Introduce Unicode Combining Characters evaluation script
Signed-off-by: Simon Rozman <simon@rozman.si>
This commit is contained in:
parent
fa2fb03cf8
commit
72f8b179d7
169
bin/Combining.wsf
Normal file
169
bin/Combining.wsf
Normal file
@ -0,0 +1,169 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<!--
|
||||
Copyright © 2021 Amebis
|
||||
|
||||
This file is part of ZRCola.
|
||||
|
||||
ZRCola is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
ZRCola is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with ZRCola. If not, see <http://www.gnu.org/licenses/>.
|
||||
-->
|
||||
<package>
|
||||
<job id="Analyse">
|
||||
<runtime>
|
||||
<description>Unicode Combining Character Analysis - Amebis, Copyright © 2021</description>
|
||||
<unnamed name="<ZRCola.mdb>" required="true" helpstring="ZRCola database"/>
|
||||
<unnamed name="<results.csv>" required="true" helpstring="Output file with analysis results"/>
|
||||
</runtime>
|
||||
<reference object="ADODB.Connection"/>
|
||||
<reference object="ADODB.Command"/>
|
||||
<reference object="ADODB.Recordset"/>
|
||||
<reference object="Scripting.FileSystemObject"/>
|
||||
<script language="JScript"><![CDATA[
|
||||
if (WScript.Arguments.Unnamed.Length < 2) {
|
||||
WScript.Arguments.ShowUsage();
|
||||
WScript.Quit(1);
|
||||
}
|
||||
|
||||
var parseUnicode_stat = {
|
||||
"re_separator" : new RegExp("\\s*\\+\\s*", "g")
|
||||
};
|
||||
|
||||
function fromUni(str)
|
||||
{
|
||||
var result = "";
|
||||
var a = str.split(parseUnicode_stat.re_separator);
|
||||
for (var i in a)
|
||||
result += String.fromCharCode(parseInt(a[i], 16));
|
||||
return result;
|
||||
}
|
||||
|
||||
function toUni(str)
|
||||
{
|
||||
var i, n = str.length, result = "";
|
||||
for (i = 0; i < n; i++) {
|
||||
if (i) result += "+";
|
||||
var val = str.charCodeAt(i);
|
||||
if (val < 0x10) result += "000";
|
||||
else if (val < 0x100) result += "00";
|
||||
else if (val < 0x1000) result += "0";
|
||||
result += val.toString(16).toUpperCase();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
var escapeRegExp_stat = {
|
||||
"re_specialChar" : new RegExp("[.*+?^${}()|[\\]\\\\]", "g")
|
||||
};
|
||||
|
||||
function escapeRegExp(str)
|
||||
{
|
||||
return str.replace(escapeRegExp_stat.re_specialChar, "\\$&");
|
||||
}
|
||||
|
||||
var escapeCSV_stat = {
|
||||
"re_quote" : new RegExp("\"", "g")
|
||||
};
|
||||
|
||||
function escapeCSV(str)
|
||||
{
|
||||
return str.replace(escapeCSV_stat.re_quote, "\"\"");
|
||||
}
|
||||
|
||||
// Open ZRCola database.
|
||||
var dbPath = WScript.Arguments.Unnamed(0);
|
||||
var outputPath = WScript.Arguments.Unnamed(1);
|
||||
var db = WScript.CreateObject("ADODB.Connection");
|
||||
db.Open("Driver={Microsoft Access Driver (*.mdb)};Dbq=" + dbPath + ";Uid=;Pwd=;");
|
||||
try {
|
||||
// Open Unicode Data file.
|
||||
var
|
||||
fso = WScript.CreateObject("Scripting.FileSystemObject"),
|
||||
f = fso.CreateTextFile(outputPath, true, true);
|
||||
try {
|
||||
// Build a dictionary of all compositions and known combining characters.
|
||||
var zrcola = [], combining = [];
|
||||
var rs = WScript.CreateObject("ADODB.Recordset");
|
||||
rs.CursorLocation = adUseClient;
|
||||
rs.Open("SELECT [komb], [znak] FROM [VRS_ReplChar] WHERE [komb] ORDER BY [rang_komb] DESC, LEN([komb]) DESC", db, adOpenDynamic, adLockOptimistic, adCmdText);
|
||||
try {
|
||||
for (; !rs.EOF; rs.MoveNext()) {
|
||||
var
|
||||
decomposed = fromUni(rs("komb").Value),
|
||||
composed = fromUni(rs("znak").Value);
|
||||
|
||||
zrcola.push({
|
||||
"decomposed" : decomposed,
|
||||
"re_decomposed" : new RegExp(escapeRegExp(decomposed), "g"),
|
||||
"composed" : composed
|
||||
});
|
||||
|
||||
if (decomposed.charCodeAt(0) == 0x203f)
|
||||
combining.push({
|
||||
"decomposed" : decomposed.substring(1),
|
||||
"re_decomposed" : new RegExp(escapeRegExp(decomposed.substring(1)), "g"),
|
||||
"composed" : composed
|
||||
});
|
||||
}
|
||||
} finally {
|
||||
rs.Close();
|
||||
}
|
||||
|
||||
f.WriteLine(
|
||||
"\"compOrig\"" + "\t" + "\"compOrigZRCOLA\"" + "\t" +
|
||||
"\"decompOrig\"" + "\t" + "\"decompOrigZRCOLA\"" + "\t" +
|
||||
"\"comp\"" + "\t" + "\"compZRCOLA\"" + "\t" +
|
||||
"\"decomp\"" + "\t" + "\"decompZRCOLA\"" + "\t");
|
||||
|
||||
// Traverse all characters and their decompositions and try to replace as much decompositions as possible with combining characters.
|
||||
rs.Open("SELECT [VRS_CharList].[znak] AS [znak], [VRS_ReplChar].[komb] AS [komb] " +
|
||||
"FROM [VRS_CharList] RIGHT JOIN [VRS_ReplChar] ON [VRS_CharList].[znak]=[VRS_ReplChar].[znak] "+
|
||||
"WHERE [VRS_CharList].[znak]>='E000' AND [VRS_CharList].[znak]<='F8FF' "+
|
||||
"ORDER BY [VRS_CharList].[znak]", db, adOpenDynamic, adLockOptimistic, adCmdText);
|
||||
try {
|
||||
for (; !rs.EOF; rs.MoveNext()) {
|
||||
var
|
||||
compOrig = fromUni(rs("znak").Value),
|
||||
decompOrig = fromUni(rs("komb").Value),
|
||||
decomp = decompOrig;
|
||||
|
||||
for (var i in combining)
|
||||
decomp = decomp.replace(combining[i].re_decomposed, combining[i].composed);
|
||||
var comp = decomp;
|
||||
for (var i in zrcola)
|
||||
comp = comp.replace(zrcola[i].re_decomposed, zrcola[i].composed);
|
||||
|
||||
f.WriteLine(
|
||||
"\"" + escapeCSV(toUni(compOrig )) + "\"\t\"" + escapeCSV(compOrig ) + "\"\t"+
|
||||
"\"" + escapeCSV(toUni(decompOrig)) + "\"\t\"" + escapeCSV(decompOrig) + "\"\t"+
|
||||
"\"" + escapeCSV(toUni(comp )) + "\"\t\"" + escapeCSV(comp ) + "\"\t"+
|
||||
"\"" + escapeCSV(toUni(decomp )) + "\"\t\"" + escapeCSV(decomp ) + "\"\t");
|
||||
}
|
||||
} finally {
|
||||
rs.Close();
|
||||
}
|
||||
} finally {
|
||||
f.Close();
|
||||
}
|
||||
} catch (err) {
|
||||
// In case of error, delete the output file.
|
||||
try { fso.DeleteFile(outputPath); } catch (err2) {}
|
||||
|
||||
throw err;
|
||||
} finally {
|
||||
db.Close();
|
||||
}
|
||||
|
||||
WScript.Quit(0);
|
||||
]]></script>
|
||||
</job>
|
||||
</package>
|
Loading…
x
Reference in New Issue
Block a user