Duplicate transformations (differing by rank only) removed from database

This commit is contained in:
Simon Rozman 2017-05-08 14:33:33 +02:00
parent 1834543564
commit 397cfe71dd
2 changed files with 56 additions and 39 deletions

View File

@ -30,20 +30,36 @@ using namespace winstd;
class com_translation class com_translation
{ {
public: public:
int rank; ///< Destination character rank int rank_src; ///< Source sequence rank
ZRCola::DBSource::charseq src; ///< Source character int rank_dst; ///< Destination character rank
inline com_translation() {}; inline com_translation()
inline com_translation(int _rank, const ZRCola::DBSource::charseq &_src) : rank(_rank), src(_src) {}; {
inline com_translation(int _rank, ZRCola::DBSource::charseq &&_src) : rank(_rank), src(std::move(_src)) {}; }
inline com_translation(const com_translation &other) : rank(other.rank), src(other.src) {}
inline com_translation(com_translation &&other) : rank(other.rank), src(std::move(other.src)) {} inline com_translation(int _rank_src, int _rank_dst) :
rank_src(_rank_src),
rank_dst(_rank_dst)
{
}
inline com_translation(const com_translation &other) :
rank_src(other.rank_src),
rank_dst(other.rank_dst)
{
}
inline com_translation(com_translation &&other) :
rank_src( other.rank_src ),
rank_dst( other.rank_dst )
{
}
inline com_translation& operator=(const com_translation &other) inline com_translation& operator=(const com_translation &other)
{ {
if (this != std::addressof(other)) { if (this != std::addressof(other)) {
rank = other.rank; rank_src = other.rank_src;
src = other.src; rank_dst = other.rank_dst;
} }
return *this; return *this;
} }
@ -51,15 +67,17 @@ public:
inline com_translation& operator=(com_translation &&other) inline com_translation& operator=(com_translation &&other)
{ {
if (this != std::addressof(other)) { if (this != std::addressof(other)) {
rank = other.rank ; rank_src = other.rank_src ;
src = std::move(other.src ); rank_dst = other.rank_dst ;
} }
return *this; return *this;
} }
inline bool operator==(_In_ const com_translation& other) const inline bool operator==(_In_ const com_translation& other) const
{ {
return rank == other.rank && src == other.src; return
rank_src == other.rank_src &&
rank_dst == other.rank_dst;
} }
inline bool operator!=(_In_ const com_translation &other) const inline bool operator!=(_In_ const com_translation &other) const
@ -69,11 +87,9 @@ public:
inline bool operator<(_In_ const com_translation& other) const inline bool operator<(_In_ const com_translation& other) const
{ {
if (src.str < other.src.str ) return true; if (rank_src < other.rank_src) return true;
else if (src.str > other.src.str ) return false; else if (rank_src > other.rank_src) return false;
else if (src.rank < other.src.rank) return true; else if (rank_dst < other.rank_dst) return true;
else if (src.rank > other.src.rank) return false;
else if (rank < other.rank ) return true;
else return false; else return false;
} }
@ -94,7 +110,7 @@ public:
}; };
typedef map<wstring, set<com_translation> > translation_db; typedef map<wstring, map<wstring, com_translation> > translation_db;
static set<wstring> translate_inv(_In_ const translation_db &db, _In_z_ const wchar_t *str, _Inout_ set<translation_db::key_type> &path) static set<wstring> translate_inv(_In_ const translation_db &db, _In_z_ const wchar_t *str, _Inout_ set<translation_db::key_type> &path)
@ -117,7 +133,7 @@ static set<wstring> translate_inv(_In_ const translation_db &db, _In_z_ const wc
return res; return res;
} }
for (auto d = t->second.cbegin(), d_end = t->second.cend(); d != d_end; ++d) { for (auto d = t->second.cbegin(), d_end = t->second.cend(); d != d_end; ++d) {
auto src = translate_inv(db, d->src.str.c_str(), path); auto src = translate_inv(db, d->first.c_str(), path);
if (!src.empty()) { if (!src.empty()) {
for (auto dd = src.cbegin(), dd_end = src.cend(); dd != dd_end; ++dd) { for (auto dd = src.cbegin(), dd_end = src.cend(); dd != dd_end; ++dd) {
for (auto r = rem.cbegin(), r_end = rem.cend(); r != r_end; ++r) for (auto r = rem.cbegin(), r_end = rem.cend(); r != r_end; ++r)
@ -237,13 +253,14 @@ int _tmain(int argc, _TCHAR *argv[])
ZRCola::DBSource::translation trans; ZRCola::DBSource::translation trans;
if (src.GetTranslation(rs, trans)) { if (src.GetTranslation(rs, trans)) {
// Add translation to temporary database. // Add translation to temporary database.
auto const t = db_temp1.find(trans.dst.str); pair<translation_db::mapped_type::key_type, translation_db::mapped_type::mapped_type> ctp(std::move(trans.src.str), translation_db::mapped_type::mapped_type(trans.src.rank, trans.dst.rank));
if (t != db_temp1.end()) auto hit = db_temp1.find(trans.dst.str);
t->second.insert(com_translation(trans.dst.rank, std::move(trans.src))); if (hit != db_temp1.end())
hit->second.insert(std::move(ctp));
else { else {
translation_db::mapped_type d; translation_db::mapped_type t;
d.insert(com_translation(trans.dst.rank, std::move(trans.src))); t.insert(std::move(ctp));
db_temp1.insert(std::move(pair<translation_db::key_type, translation_db::mapped_type>(trans.dst.str, std::move(d)))); db_temp1.insert(pair<translation_db::key_type, translation_db::mapped_type>(trans.dst.str, std::move(t)));
} }
} else } else
has_errors = true; has_errors = true;
@ -252,22 +269,22 @@ int _tmain(int argc, _TCHAR *argv[])
// Inverse translate source sequences down to non-inverse translatable characters. // Inverse translate source sequences down to non-inverse translatable characters.
translation_db db_temp2; translation_db db_temp2;
for (auto t1 = db_temp1.cbegin(), t1_end = db_temp1.cend(); t1 != t1_end; ++t1) { for (auto t1 = db_temp1.cbegin(), t1_end = db_temp1.cend(); t1 != t1_end; ++t1) {
auto t2 = db_temp2.insert(pair<translation_db::key_type, translation_db::mapped_type>(t1->first, translation_db::mapped_type())).first;
for (auto d1 = t1->second.cbegin(), d1_end = t1->second.cend(); d1 != d1_end; ++d1) { for (auto d1 = t1->second.cbegin(), d1_end = t1->second.cend(); d1 != d1_end; ++d1) {
set<translation_db::key_type> path; set<translation_db::key_type> path;
path.insert(t1->first); path.insert(t1->first);
auto str = translate_inv(db_temp1, d1->src.str.c_str(), path); auto res = translate_inv(db_temp1, d1->first.c_str(), path);
assert(!str.empty()); assert(!res.empty());
// Add translation to temporary database. // Add translation to temporary database.
auto const t2 = db_temp2.find(t1->first); for (auto r = res.begin(), r_end = res.end(); r != r_end; ++r) {
if (t2 != db_temp2.end()) { auto hit = t2->second.find(*r);
for (auto s = str.cbegin(), s_end = str.cend(); s != s_end; ++s) if (hit != t2->second.end()) {
t2->second.insert(com_translation(d1->rank, std::move(ZRCola::DBSource::charseq(d1->src.rank, s->c_str())))); hit->second.rank_src = std::min<int>(hit->second.rank_src, d1->second.rank_src);
} else { hit->second.rank_dst = std::max<int>(hit->second.rank_dst, d1->second.rank_dst);
translation_db::mapped_type d2; } else
for (auto s = str.cbegin(), s_end = str.cend(); s != s_end; ++s) t2->second.insert(pair<translation_db::mapped_type::key_type, translation_db::mapped_type::mapped_type>(*r, translation_db::mapped_type::mapped_type(d1->second.rank_src, d1->second.rank_dst)));
d2.insert(com_translation(d1->rank, std::move(ZRCola::DBSource::charseq(d1->src.rank, s->c_str()))));
db_temp2.insert(std::move(pair<translation_db::key_type, translation_db::mapped_type>(t1->first, std::move(d2))));
} }
} }
} }
@ -284,9 +301,9 @@ int _tmain(int argc, _TCHAR *argv[])
// Add translation to index and data. // Add translation to index and data.
trans.dst.str = std::move(t->first); trans.dst.str = std::move(t->first);
for (auto d = t->second.cbegin(), d_end = t->second.cend(); d != d_end; ++d) { for (auto d = t->second.cbegin(), d_end = t->second.cend(); d != d_end; ++d) {
trans.dst.rank = d->rank; trans.dst.rank = d->second.rank_dst;
trans.src.rank = d->src.rank; trans.src.rank = d->second.rank_src;
trans.src.str = std::move(d->src.str); trans.src.str = std::move(d->first);
db_trans << trans; db_trans << trans;
} }
} }

Binary file not shown.