Secondary translation permutations added to support normalization

This commit is contained in:
Simon Rozman 2017-05-08 15:03:20 +02:00
parent 31b6f1a3e8
commit de6d890ac1
2 changed files with 70 additions and 7 deletions

View File

@ -133,9 +133,14 @@ public:
typedef map<wstring, map<wstring, com_translation> > translation_db; typedef map<wstring, map<wstring, com_translation> > translation_db;
typedef map<string, ZRCola::DBSource::normperm> normperm_db;
static set<ZRCola::DBSource::charseq> translate_inv(_In_ const translation_db &db_trans, _In_z_ const wchar_t *str, _Inout_ set<translation_db::key_type> &path) static set<ZRCola::DBSource::charseq> translate_inv(_In_ const translation_db &db_trans, _In_ const normperm_db &db_np, _In_z_ const wchar_t *str, _Inout_ set<translation_db::key_type> &path);
static inline set<ZRCola::DBSource::charseq> permutate_and_translate_inv(_In_ const translation_db &db_trans, _In_ const normperm_db &db_np, _In_z_ const wchar_t *str, _In_z_ const char *norm, _Inout_ set<translation_db::key_type> &path);
static set<ZRCola::DBSource::charseq> translate_inv(_In_ const translation_db &db_trans, _In_ const normperm_db &db_np, _In_z_ const wchar_t *str, _Inout_ set<translation_db::key_type> &path)
{ {
set<ZRCola::DBSource::charseq> res; set<ZRCola::DBSource::charseq> res;
@ -146,7 +151,7 @@ static set<ZRCola::DBSource::charseq> translate_inv(_In_ const translation_db &d
} }
// Prepare inverse translate of the remainder string (without the first character). // Prepare inverse translate of the remainder string (without the first character).
auto res_rem = translate_inv(db_trans, str + 1, path); auto res_rem = translate_inv(db_trans, db_np, str + 1, path);
if (res_rem.empty()) if (res_rem.empty())
return res; return res;
@ -165,7 +170,9 @@ static set<ZRCola::DBSource::charseq> translate_inv(_In_ const translation_db &d
// Iterate all possible character inverse translations and combine them with the remainder string inverse translations. // Iterate all possible character inverse translations and combine them with the remainder string inverse translations.
for (auto d = hit_trans->second.cbegin(), d_end = hit_trans->second.cend(); d != d_end; ++d) { for (auto d = hit_trans->second.cbegin(), d_end = hit_trans->second.cend(); d != d_end; ++d) {
auto res_chr = translate_inv(db_trans, d->first.c_str(), path); auto res_chr = d->second.norm.empty() ?
translate_inv(db_trans, db_np, d->first.c_str(), path) :
permutate_and_translate_inv(db_trans, db_np, d->first.c_str(), d->second.norm.c_str(), path);
if (!res_chr.empty()) { if (!res_chr.empty()) {
for (auto r_chr = res_chr.cbegin(), r_chr_end = res_chr.cend(); r_chr != r_chr_end; ++r_chr) { for (auto r_chr = res_chr.cbegin(), r_chr_end = res_chr.cend(); r_chr != r_chr_end; ++r_chr) {
for (auto r_rem = res_rem.cbegin(), r_rem_end = res_rem.cend(); r_rem != r_rem_end; ++r_rem) for (auto r_rem = res_rem.cbegin(), r_rem_end = res_rem.cend(); r_rem != r_rem_end; ++r_rem)
@ -190,6 +197,31 @@ static set<ZRCola::DBSource::charseq> translate_inv(_In_ const translation_db &d
} }
static inline set<ZRCola::DBSource::charseq> permutate_and_translate_inv(_In_ const translation_db &db_trans, _In_ const normperm_db &db_np, _In_z_ const wchar_t *str, _In_z_ const char *norm, _Inout_ set<translation_db::key_type> &path)
{
// Primary permutation inverse translate.
auto res = translate_inv(db_trans, db_np, str, path);
// Secondary permutation(s).
auto const hit_np = db_np.find(norm);
if (hit_np != db_np.end()) {
for (auto perm = hit_np->second.cbegin(), perm_end = hit_np->second.cend(); perm != perm_end; ++perm) {
// Prepare permutated string.
translation_db::mapped_type::key_type str_perm;
for (auto idx = perm->cbegin(), idx_end = perm->cend(); idx != idx_end; ++idx)
str_perm += str[*idx];
// Secondary permutation inverse translate.
auto res_perm = translate_inv(db_trans, db_np, str_perm.c_str(), path);
for (auto r = res_perm.begin(), r_end = res_perm.end(); r != r_end; ++r)
res.insert(ZRCola::DBSource::charseq(r->rank + 1, std::move(r->str)));
}
}
return res;
}
/// ///
/// Main function /// Main function
/// ///
@ -269,6 +301,35 @@ int _tmain(int argc, _TCHAR *argv[])
streamoff dst_start = idrec::open<ZRCola::recordid_t, ZRCola::recordsize_t>(dst, ZRCOLA_DB_ID); streamoff dst_start = idrec::open<ZRCola::recordid_t, ZRCola::recordsize_t>(dst, ZRCOLA_DB_ID);
ZRCola::translation_db db_trans; ZRCola::translation_db db_trans;
normperm_db db_np;
{
// Get normalization permutation sets.
com_obj<ADORecordset> rs;
if (src.SelectNormPermSets(rs)) {
size_t count = src.GetRecordsetCount(rs);
if (count < 0xffffffff) { // 4G check (-1 is reserved for error condition)
string norm;
ZRCola::DBSource::normperm np;
// Parse normalization permutation sets.
for (; !ZRCola::DBSource::IsEOF(rs); rs->MoveNext()) {
// Read normalization permutation set from the database.
if (src.GetNormPerm(rs, norm, np)) {
if (!np.empty())
db_np.insert(pair<string, ZRCola::DBSource::normperm>(norm, std::move(np)));
} else
has_errors = true;
}
} else {
_ftprintf(stderr, wxT("%s: error ZCC0009: Error getting translation set count from database or too many translation sets.\n"), (LPCTSTR)filenameIn.c_str());
has_errors = true;
}
} else {
_ftprintf(stderr, wxT("%s: error ZCC0008: Error getting translation sets from database. Please make sure the file is ZRCola.zrc compatible.\n"), (LPCTSTR)filenameIn.c_str());
has_errors = true;
}
}
{ {
// Get translations. // Get translations.
@ -283,14 +344,14 @@ int _tmain(int argc, _TCHAR *argv[])
ZRCola::DBSource::translation trans; ZRCola::DBSource::translation trans;
if (src.GetTranslation(rs, trans)) { if (src.GetTranslation(rs, trans)) {
// Add translation to temporary database. // Add translation to temporary database.
pair<translation_db::mapped_type::key_type, translation_db::mapped_type::mapped_type> ctp(std::move(trans.src.str), translation_db::mapped_type::mapped_type(trans.src.rank, trans.dst.rank)); pair<translation_db::mapped_type::key_type, translation_db::mapped_type::mapped_type> ctp(std::move(trans.src.str), translation_db::mapped_type::mapped_type(trans.src.rank, trans.dst.rank, std::move(trans.norm)));
auto hit = db_temp1.find(trans.dst.str); auto hit = db_temp1.find(trans.dst.str);
if (hit != db_temp1.end()) if (hit != db_temp1.end())
hit->second.insert(std::move(ctp)); hit->second.insert(std::move(ctp));
else { else {
translation_db::mapped_type t; translation_db::mapped_type t;
t.insert(std::move(ctp)); t.insert(std::move(ctp));
db_temp1.insert(pair<translation_db::key_type, translation_db::mapped_type>(trans.dst.str, std::move(t))); db_temp1.insert(pair<translation_db::key_type, translation_db::mapped_type>(std::move(trans.dst.str), std::move(t)));
} }
} else } else
has_errors = true; has_errors = true;
@ -304,7 +365,9 @@ int _tmain(int argc, _TCHAR *argv[])
for (auto d1 = t1->second.cbegin(), d1_end = t1->second.cend(); d1 != d1_end; ++d1) { for (auto d1 = t1->second.cbegin(), d1_end = t1->second.cend(); d1 != d1_end; ++d1) {
set<translation_db::key_type> path; set<translation_db::key_type> path;
path.insert(t1->first); path.insert(t1->first);
auto res = translate_inv(db_temp1, d1->first.c_str(), path); auto res = d1->second.norm.empty() ?
translate_inv(db_temp1, db_np, d1->first.c_str(), path) :
permutate_and_translate_inv(db_temp1, db_np, d1->first.c_str(), d1->second.norm.c_str(), path);
assert(!res.empty()); assert(!res.empty());
// Add translation to temporary database. // Add translation to temporary database.
@ -315,7 +378,7 @@ int _tmain(int argc, _TCHAR *argv[])
hit->second.rank_src = std::min<int>(hit->second.rank_src, ct.rank_src); hit->second.rank_src = std::min<int>(hit->second.rank_src, ct.rank_src);
hit->second.rank_dst = std::max<int>(hit->second.rank_dst, ct.rank_dst); hit->second.rank_dst = std::max<int>(hit->second.rank_dst, ct.rank_dst);
} else } else
t2->second.insert(pair<translation_db::mapped_type::key_type, translation_db::mapped_type::mapped_type>(r->str, ct)); t2->second.insert(pair<translation_db::mapped_type::key_type, translation_db::mapped_type::mapped_type>(std::move(r->str), std::move(ct)));
} }
} }
} }

Binary file not shown.