Add non-PUA ZRCola translations to ZRCola to Unicode translation

This allows ZRCola to Unicode translation to use precomposed Unicode
characters and provide better coverage for ZRCola to Unicode
translation.

Signed-off-by: Simon Rozman <simon@rozman.si>
This commit is contained in:
Simon Rozman
2021-12-13 14:37:59 +01:00
parent a3f0662022
commit 0b68cdedda
5 changed files with 21 additions and 9 deletions

View File

@@ -380,15 +380,27 @@ int _tmain(int argc, _TCHAR *argv[])
// Parse translations and build index and data.
ZRCola::DBSource::translation trans;
trans.set = 0;
for (auto t = db_temp2.cbegin(), t_end = db_temp2.cend(); t != t_end; ++t) {
// Add translation to index and data.
trans.dst.str = t->first;
for (auto d = t->second.cbegin(), d_end = t->second.cend(); d != d_end; ++d) {
trans.set = (int)ZRCOLA_TRANSEQID_DEFAULT;
trans.dst.rank = d->second.rank_dst;
trans.src.rank = d->second.rank_src;
trans.src.str = d->first;
db_trans << trans;
// If destination contains no PUA characters, add it to the ZRCola >> Unicode transliteration too.
bool has_pua = false;
for (auto ch = trans.dst.str.cbegin(), ch_end = trans.dst.str.cend(); ch != ch_end; ++ch)
if (ZRCola::ispua(*ch)) {
has_pua = true;
break;
}
if (!has_pua) {
trans.set = (int)ZRCOLA_TRANSEQID_UNICODE;
db_trans << trans;
}
}
}
} else {