ZRCola/bin/Combining.wsf
2021-04-02 12:58:12 +02:00

170 lines
5.6 KiB
XML

<?xml version="1.0" encoding="utf-8"?>
<!--
Copyright © 2021 Amebis
This file is part of ZRCola.
ZRCola is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
ZRCola is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with ZRCola. If not, see <http://www.gnu.org/licenses/>.
-->
<package>
<job id="Analyse">
<runtime>
<description>Unicode Combining Character Analysis - Amebis, Copyright © 2021</description>
<unnamed name="&lt;ZRCola.mdb&gt;" required="true" helpstring="ZRCola database"/>
<unnamed name="&lt;results.csv&gt;" required="true" helpstring="Output file with analysis results"/>
</runtime>
<reference object="ADODB.Connection"/>
<reference object="ADODB.Command"/>
<reference object="ADODB.Recordset"/>
<reference object="Scripting.FileSystemObject"/>
<script language="JScript"><![CDATA[
if (WScript.Arguments.Unnamed.Length < 2) {
WScript.Arguments.ShowUsage();
WScript.Quit(1);
}
var parseUnicode_stat = {
"re_separator" : new RegExp("\\s*\\+\\s*", "g")
};
function fromUni(str)
{
var result = "";
var a = str.split(parseUnicode_stat.re_separator);
for (var i in a)
result += String.fromCharCode(parseInt(a[i], 16));
return result;
}
function toUni(str)
{
var i, n = str.length, result = "";
for (i = 0; i < n; i++) {
if (i) result += "+";
var val = str.charCodeAt(i);
if (val < 0x10) result += "000";
else if (val < 0x100) result += "00";
else if (val < 0x1000) result += "0";
result += val.toString(16).toUpperCase();
}
return result;
}
var escapeRegExp_stat = {
"re_specialChar" : new RegExp("[.*+?^${}()|[\\]\\\\]", "g")
};
function escapeRegExp(str)
{
return str.replace(escapeRegExp_stat.re_specialChar, "\\$&");
}
var escapeCSV_stat = {
"re_quote" : new RegExp("\"", "g")
};
function escapeCSV(str)
{
return str.replace(escapeCSV_stat.re_quote, "\"\"");
}
// Open ZRCola database.
var dbPath = WScript.Arguments.Unnamed(0);
var outputPath = WScript.Arguments.Unnamed(1);
var db = WScript.CreateObject("ADODB.Connection");
db.Open("Driver={Microsoft Access Driver (*.mdb)};Dbq=" + dbPath + ";Uid=;Pwd=;");
try {
// Open Unicode Data file.
var
fso = WScript.CreateObject("Scripting.FileSystemObject"),
f = fso.CreateTextFile(outputPath, true, true);
try {
// Build a dictionary of all compositions and known combining characters.
var zrcola = [], combining = [];
var rs = WScript.CreateObject("ADODB.Recordset");
rs.CursorLocation = adUseClient;
rs.Open("SELECT [komb], [znak] FROM [VRS_ReplChar] ORDER BY [rang_komb], LEN([komb]) DESC", db, adOpenDynamic, adLockOptimistic, adCmdText);
try {
for (; !rs.EOF; rs.MoveNext()) {
var
decomposed = fromUni(rs("komb").Value),
composed = fromUni(rs("znak").Value);
zrcola.push({
"decomposed" : decomposed,
"re_decomposed" : new RegExp(escapeRegExp(decomposed), "g"),
"composed" : composed
});
if (decomposed.charCodeAt(0) == 0x203f)
combining.push({
"decomposed" : decomposed.substring(1),
"re_decomposed" : new RegExp(escapeRegExp(decomposed.substring(1)), "g"),
"composed" : composed
});
}
} finally {
rs.Close();
}
f.WriteLine(
"\"compOrig\"" + "\t" + "\"compOrigZRCOLA\"" + "\t" +
"\"decompOrig\"" + "\t" + "\"decompOrigZRCOLA\"" + "\t" +
"\"comp\"" + "\t" + "\"compZRCOLA\"" + "\t" +
"\"decomp\"" + "\t" + "\"decompZRCOLA\"" + "\t");
// Traverse all characters in PUA and their decompositions and try to replace as much decompositions as possible with combining characters.
rs.Open("SELECT [VRS_CharList].[znak] AS [znak], [VRS_ReplChar].[komb] AS [komb] " +
"FROM [VRS_CharList] RIGHT JOIN [VRS_ReplChar] ON [VRS_CharList].[znak]=[VRS_ReplChar].[znak] "+
"WHERE [VRS_CharList].[znak]>='E000' AND [VRS_CharList].[znak]<='F8FF' "+
"ORDER BY [VRS_CharList].[znak]", db, adOpenDynamic, adLockOptimistic, adCmdText);
try {
for (; !rs.EOF; rs.MoveNext()) {
var
compOrig = fromUni(rs("znak").Value),
decompOrig = fromUni(rs("komb").Value),
decomp = decompOrig;
for (var i in combining)
decomp = decomp.replace(combining[i].re_decomposed, combining[i].composed);
var comp = decomp;
for (var i in zrcola)
comp = comp.replace(zrcola[i].re_decomposed, zrcola[i].composed);
f.WriteLine(
"\"" + escapeCSV(toUni(compOrig )) + "\"\t\"" + escapeCSV(compOrig ) + "\"\t"+
"\"" + escapeCSV(toUni(decompOrig)) + "\"\t\"" + escapeCSV(decompOrig) + "\"\t"+
"\"" + escapeCSV(toUni(comp )) + "\"\t\"" + escapeCSV(comp ) + "\"\t"+
"\"" + escapeCSV(toUni(decomp )) + "\"\t\"" + escapeCSV(decomp ) + "\"\t");
}
} finally {
rs.Close();
}
} finally {
f.Close();
}
} catch (err) {
// In case of error, delete the output file.
try { fso.DeleteFile(outputPath); } catch (err2) {}
throw err;
} finally {
db.Close();
}
WScript.Quit(0);
]]></script>
</job>
</package>