170 lines
5.6 KiB
XML
170 lines
5.6 KiB
XML
<?xml version="1.0" encoding="utf-8"?>
|
|
<!--
|
|
Copyright © 2021 Amebis
|
|
|
|
This file is part of ZRCola.
|
|
|
|
ZRCola is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
ZRCola is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with ZRCola. If not, see <http://www.gnu.org/licenses/>.
|
|
-->
|
|
<package>
|
|
<job id="Analyse">
|
|
<runtime>
|
|
<description>Unicode Combining Character Analysis - Amebis, Copyright © 2021</description>
|
|
<unnamed name="<ZRCola.mdb>" required="true" helpstring="ZRCola database"/>
|
|
<unnamed name="<results.csv>" required="true" helpstring="Output file with analysis results"/>
|
|
</runtime>
|
|
<reference object="ADODB.Connection"/>
|
|
<reference object="ADODB.Command"/>
|
|
<reference object="ADODB.Recordset"/>
|
|
<reference object="Scripting.FileSystemObject"/>
|
|
<script language="JScript"><![CDATA[
|
|
if (WScript.Arguments.Unnamed.Length < 2) {
|
|
WScript.Arguments.ShowUsage();
|
|
WScript.Quit(1);
|
|
}
|
|
|
|
var parseUnicode_stat = {
|
|
"re_separator" : new RegExp("\\s*\\+\\s*", "g")
|
|
};
|
|
|
|
function fromUni(str)
|
|
{
|
|
var result = "";
|
|
var a = str.split(parseUnicode_stat.re_separator);
|
|
for (var i in a)
|
|
result += String.fromCharCode(parseInt(a[i], 16));
|
|
return result;
|
|
}
|
|
|
|
function toUni(str)
|
|
{
|
|
var i, n = str.length, result = "";
|
|
for (i = 0; i < n; i++) {
|
|
if (i) result += "+";
|
|
var val = str.charCodeAt(i);
|
|
if (val < 0x10) result += "000";
|
|
else if (val < 0x100) result += "00";
|
|
else if (val < 0x1000) result += "0";
|
|
result += val.toString(16).toUpperCase();
|
|
}
|
|
return result;
|
|
}
|
|
|
|
var escapeRegExp_stat = {
|
|
"re_specialChar" : new RegExp("[.*+?^${}()|[\\]\\\\]", "g")
|
|
};
|
|
|
|
function escapeRegExp(str)
|
|
{
|
|
return str.replace(escapeRegExp_stat.re_specialChar, "\\$&");
|
|
}
|
|
|
|
var escapeCSV_stat = {
|
|
"re_quote" : new RegExp("\"", "g")
|
|
};
|
|
|
|
function escapeCSV(str)
|
|
{
|
|
return str.replace(escapeCSV_stat.re_quote, "\"\"");
|
|
}
|
|
|
|
// Open ZRCola database.
|
|
var dbPath = WScript.Arguments.Unnamed(0);
|
|
var outputPath = WScript.Arguments.Unnamed(1);
|
|
var db = WScript.CreateObject("ADODB.Connection");
|
|
db.Open("Driver={Microsoft Access Driver (*.mdb)};Dbq=" + dbPath + ";Uid=;Pwd=;");
|
|
try {
|
|
// Open Unicode Data file.
|
|
var
|
|
fso = WScript.CreateObject("Scripting.FileSystemObject"),
|
|
f = fso.CreateTextFile(outputPath, true, true);
|
|
try {
|
|
// Build a dictionary of all compositions and known combining characters.
|
|
var zrcola = [], combining = [];
|
|
var rs = WScript.CreateObject("ADODB.Recordset");
|
|
rs.CursorLocation = adUseClient;
|
|
rs.Open("SELECT [komb], [znak] FROM [VRS_ReplChar] ORDER BY [rang_komb], LEN([komb]) DESC", db, adOpenDynamic, adLockOptimistic, adCmdText);
|
|
try {
|
|
for (; !rs.EOF; rs.MoveNext()) {
|
|
var
|
|
decomposed = fromUni(rs("komb").Value),
|
|
composed = fromUni(rs("znak").Value);
|
|
|
|
zrcola.push({
|
|
"decomposed" : decomposed,
|
|
"re_decomposed" : new RegExp(escapeRegExp(decomposed), "g"),
|
|
"composed" : composed
|
|
});
|
|
|
|
if (decomposed.charCodeAt(0) == 0x203f)
|
|
combining.push({
|
|
"decomposed" : decomposed.substring(1),
|
|
"re_decomposed" : new RegExp(escapeRegExp(decomposed.substring(1)), "g"),
|
|
"composed" : composed
|
|
});
|
|
}
|
|
} finally {
|
|
rs.Close();
|
|
}
|
|
|
|
f.WriteLine(
|
|
"\"compOrig\"" + "\t" + "\"compOrigZRCOLA\"" + "\t" +
|
|
"\"decompOrig\"" + "\t" + "\"decompOrigZRCOLA\"" + "\t" +
|
|
"\"comp\"" + "\t" + "\"compZRCOLA\"" + "\t" +
|
|
"\"decomp\"" + "\t" + "\"decompZRCOLA\"" + "\t");
|
|
|
|
// Traverse all characters in PUA and their decompositions and try to replace as much decompositions as possible with combining characters.
|
|
rs.Open("SELECT [VRS_CharList].[znak] AS [znak], [VRS_ReplChar].[komb] AS [komb] " +
|
|
"FROM [VRS_CharList] RIGHT JOIN [VRS_ReplChar] ON [VRS_CharList].[znak]=[VRS_ReplChar].[znak] "+
|
|
"WHERE [VRS_CharList].[znak]>='E000' AND [VRS_CharList].[znak]<='F8FF' "+
|
|
"ORDER BY [VRS_CharList].[znak]", db, adOpenDynamic, adLockOptimistic, adCmdText);
|
|
try {
|
|
for (; !rs.EOF; rs.MoveNext()) {
|
|
var
|
|
compOrig = fromUni(rs("znak").Value),
|
|
decompOrig = fromUni(rs("komb").Value),
|
|
decomp = decompOrig;
|
|
|
|
for (var i in combining)
|
|
decomp = decomp.replace(combining[i].re_decomposed, combining[i].composed);
|
|
var comp = decomp;
|
|
for (var i in zrcola)
|
|
comp = comp.replace(zrcola[i].re_decomposed, zrcola[i].composed);
|
|
|
|
f.WriteLine(
|
|
"\"" + escapeCSV(toUni(compOrig )) + "\"\t\"" + escapeCSV(compOrig ) + "\"\t"+
|
|
"\"" + escapeCSV(toUni(decompOrig)) + "\"\t\"" + escapeCSV(decompOrig) + "\"\t"+
|
|
"\"" + escapeCSV(toUni(comp )) + "\"\t\"" + escapeCSV(comp ) + "\"\t"+
|
|
"\"" + escapeCSV(toUni(decomp )) + "\"\t\"" + escapeCSV(decomp ) + "\"\t");
|
|
}
|
|
} finally {
|
|
rs.Close();
|
|
}
|
|
} finally {
|
|
f.Close();
|
|
}
|
|
} catch (err) {
|
|
// In case of error, delete the output file.
|
|
try { fso.DeleteFile(outputPath); } catch (err2) {}
|
|
|
|
throw err;
|
|
} finally {
|
|
db.Close();
|
|
}
|
|
|
|
WScript.Quit(0);
|
|
]]></script>
|
|
</job>
|
|
</package>
|