ZRColaCompile: Generate additional Unicode Composing permutations

Signed-off-by: Simon Rozman <simon@rozman.si>
This commit is contained in:
Simon Rozman 2021-04-09 15:33:26 +02:00
parent e97971ffb0
commit 3d1a12c335
2 changed files with 78 additions and 58 deletions

View File

@ -598,16 +598,9 @@ int _tmain(int argc, _TCHAR *argv[])
bits_comb(FONT_MATCH_WIDTH * FONT_MATCH_HEIGHT / 8), bits_comb(FONT_MATCH_WIDTH * FONT_MATCH_HEIGHT / 8),
bits_pre (FONT_MATCH_WIDTH * FONT_MATCH_HEIGHT / 8); bits_pre (FONT_MATCH_WIDTH * FONT_MATCH_HEIGHT / 8);
map<wstring, map<wstring, pair<double, int>>> trans; map<wstring, map<wstring, pair<double, int>>> trans;
for (; !ZRCola::DBSource::IsEOF(rs2); rs2->MoveNext()) {
// Read character from the database. auto process_permutation = [&] (const wstring &comp_orig, const wstring &decomp_orig) {
ZRCola::DBSource::character chr;
if (src.GetCharacter(rs2, chr)) {
for (auto t = db_all.cbegin(), t_end = db_all.cend(); t != t_end; ++t) {
if (t->dst.str != chr.first)
continue;
// Replace ZRCola decomposition with Unicode combining characters wherever possible. // Replace ZRCola decomposition with Unicode combining characters wherever possible.
const auto &comp_orig = chr.first;
const auto &decomp_orig = t->src.str;
wstring decomp = decomp_orig; wstring decomp = decomp_orig;
for (auto i = db_combining.cbegin(), i_end = db_combining.cend(); i != i_end; ++i) for (auto i = db_combining.cbegin(), i_end = db_combining.cend(); i != i_end; ++i)
replace_all(decomp, i->src.str, i->dst.str); replace_all(decomp, i->src.str, i->dst.str);
@ -617,13 +610,14 @@ int _tmain(int argc, _TCHAR *argv[])
// Check if we got anything useful. // Check if we got anything useful.
if (comp_orig == comp || if (comp_orig == comp ||
contains_pua(comp)) contains_pua(comp))
continue; return;
// Do the Unicode C and D normalizations to get two variants: // Do the Unicode normalization.
// - Use precomposed characters as much as possible wstring comp_pre;
// - Use combining characters only if (comp.length() > 2) {
wstring comp_comb, comp_pre; NormalizeString(NormalizationC, comp.c_str(), 2, comp_pre);
comp_pre += comp.c_str() + 2;
} else
NormalizeString(NormalizationC, comp, comp_pre); NormalizeString(NormalizationC, comp, comp_pre);
NormalizeString(NormalizationD, comp_pre, comp_comb);
{ {
// Paint original character and Unicode precomposed/combining one. // Paint original character and Unicode precomposed/combining one.
dc_selector dc_selector
@ -635,14 +629,14 @@ int _tmain(int argc, _TCHAR *argv[])
FillRect(dc_comb, &bounds, brush_bg); FillRect(dc_comb, &bounds, brush_bg);
FillRect(dc_pre , &bounds, brush_bg); FillRect(dc_pre , &bounds, brush_bg);
TextOutW(dc_orig, FONT_MATCH_WIDTH/2, FONT_MATCH_HEIGHT*5/8, comp_orig.c_str(), comp_orig.length()); TextOutW(dc_orig, FONT_MATCH_WIDTH/2, FONT_MATCH_HEIGHT*5/8, comp_orig.c_str(), comp_orig.length());
TextOutW(dc_comb, FONT_MATCH_WIDTH/2, FONT_MATCH_HEIGHT*5/8, comp_comb.c_str(), comp_comb.length()); TextOutW(dc_comb, FONT_MATCH_WIDTH/2, FONT_MATCH_HEIGHT*5/8, comp .c_str(), comp .length());
TextOutW(dc_pre , FONT_MATCH_WIDTH/2, FONT_MATCH_HEIGHT*5/8, comp_pre .c_str(), comp_pre .length()); TextOutW(dc_pre , FONT_MATCH_WIDTH/2, FONT_MATCH_HEIGHT*5/8, comp_pre .c_str(), comp_pre .length());
} }
// Compare bitmaps. // Compare bitmaps.
if (!GetDIBits(dc_orig, bmp_orig, 0, FONT_MATCH_HEIGHT, bits_orig.data(), (BITMAPINFO*)&bmi, DIB_PAL_COLORS) || if (!GetDIBits(dc_orig, bmp_orig, 0, FONT_MATCH_HEIGHT, bits_orig.data(), (BITMAPINFO*)&bmi, DIB_PAL_COLORS) ||
!GetDIBits(dc_comb, bmp_comb, 0, FONT_MATCH_HEIGHT, bits_comb.data(), (BITMAPINFO*)&bmi, DIB_PAL_COLORS) || !GetDIBits(dc_comb, bmp_comb, 0, FONT_MATCH_HEIGHT, bits_comb.data(), (BITMAPINFO*)&bmi, DIB_PAL_COLORS) ||
!GetDIBits(dc_pre , bmp_pre , 0, FONT_MATCH_HEIGHT, bits_pre .data(), (BITMAPINFO*)&bmi, DIB_PAL_COLORS)) !GetDIBits(dc_pre , bmp_pre , 0, FONT_MATCH_HEIGHT, bits_pre .data(), (BITMAPINFO*)&bmi, DIB_PAL_COLORS))
continue; return;
double double
score_comb = compare_bitmaps(bits_orig.data(), bits_comb.data()), score_comb = compare_bitmaps(bits_orig.data(), bits_comb.data()),
score_pre = compare_bitmaps(bits_orig.data(), bits_pre .data()); score_pre = compare_bitmaps(bits_orig.data(), bits_pre .data());
@ -652,19 +646,45 @@ int _tmain(int argc, _TCHAR *argv[])
if (build_csv || score_pre <= FONT_MATCH_THRESHOLD) { if (build_csv || score_pre <= FONT_MATCH_THRESHOLD) {
if (hit->second.find(comp_pre) == hit->second.end()) if (hit->second.find(comp_pre) == hit->second.end())
hit->second.insert(make_pair(comp_pre, make_pair(score_pre, 1))); hit->second.insert(make_pair(comp_pre, make_pair(score_pre, 1)));
} if ((build_csv || score_comb <= FONT_MATCH_THRESHOLD) && comp_pre != comp_comb) { } if ((build_csv || score_comb <= FONT_MATCH_THRESHOLD) && comp_pre != comp) {
if (hit->second.find(comp_comb) == hit->second.end()) if (hit->second.find(comp) == hit->second.end())
hit->second.insert(make_pair(comp_comb, make_pair(score_comb, 100))); hit->second.insert(make_pair(comp, make_pair(score_comb, 100)));
} }
} else { } else {
map<wstring, pair<double, int>> v; map<wstring, pair<double, int>> v;
if (build_csv || score_pre <= FONT_MATCH_THRESHOLD) if (build_csv || score_pre <= FONT_MATCH_THRESHOLD)
v.insert(make_pair(comp_pre, make_pair(score_pre, 1))); v.insert(make_pair(comp_pre, make_pair(score_pre, 1)));
if ((build_csv || score_comb <= FONT_MATCH_THRESHOLD) && comp_pre != comp_comb) if ((build_csv || score_comb <= FONT_MATCH_THRESHOLD) && comp_pre != comp)
v.insert(make_pair(comp_comb, make_pair(score_comb, 100))); v.insert(make_pair(comp, make_pair(score_comb, 100)));
if (!v.empty()) if (!v.empty())
trans.insert(make_pair(comp_orig, std::move(v))); trans.insert(make_pair(comp_orig, std::move(v)));
} }
};
for (; !ZRCola::DBSource::IsEOF(rs2); rs2->MoveNext()) {
// Read character from the database.
ZRCola::DBSource::character chr;
if (src.GetCharacter(rs2, chr)) {
for (auto t = db_all.cbegin(), t_end = db_all.cend(); t != t_end; ++t) {
if (t->dst.str != chr.first)
continue;
// Process primary permutation.
process_permutation(chr.first, t->src.str);
// Secondary permutation(s).
auto const hit_np = db_np.find(t->norm);
if (hit_np != db_np.end()) {
for (auto perm = hit_np->second.cbegin(), perm_end = hit_np->second.cend(); perm != perm_end; ++perm) {
// Prepare permutated string.
translation_db::mapped_type::key_type str_perm;
for (auto idx = perm->cbegin(), idx_end = perm->cend(); idx != idx_end; ++idx)
str_perm += t->src.str[*idx];
// Process secondary permutation.
process_permutation(chr.first, str_perm);
}
}
} }
} else } else
has_errors = true; has_errors = true;

Binary file not shown.