Add support for ZRCola Composed to Unicode transliteration

The initial support compares characters in ZRCola font Private-Use-Area to the characters obtained using Unicode combining characters. Those which match sufficiently are arranged into a new transliteration. As the ZRCola Composed to Unicode transliteration requires to be applied _after_ ZRCola composition but UI applies additional transliterations _before_ ZRCola composition, the ZRCola composition was reintroduced as one of the transliterations. This allows configuring a custom transliteration sequence. Signed-off-by: Simon Rozman <simon@rozman.si>
2021-04-02 11:37:55 +02:00 · 2021-04-02 11:37:55 +02:00 · e43a5a0ef0
commit e43a5a0ef0
parent ddc8b00416
11 changed files with 416 additions and 31 deletions
--- a/ZRColaCompile/dbsource.cpp
+++ b/ZRColaCompile/dbsource.cpp
@ -729,6 +729,27 @@ bool ZRCola::DBSource::GetNormPerm(const winstd::com_obj<ADORecordset>& rs, std:
 }


+bool ZRCola::DBSource::SelectAllTranslations(com_obj<ADORecordset> &rs) const
+{
+    // Create a new recordset.
+    rs.free();
+    wxCHECK(SUCCEEDED(::CoCreateInstance(CLSID_CADORecordset, NULL, CLSCTX_ALL, IID_IADORecordset, (LPVOID*)&rs)), false);
+
+    // Open it.
+    if (FAILED(rs->Open(variant(
+        L"SELECT [komb], [rang_komb], [Kano], [Kanoniziraj], [znak], [rang_znak] "
+        L"FROM [VRS_ReplChar] "
+        L"ORDER BY [rang_komb], LEN([komb]) DESC"), variant(m_db), adOpenStatic, adLockReadOnly, adCmdText)))
+    {
+        _ftprintf(stderr, wxT("%s: error ZCC0040: Error loading translations from database. Please make sure the file is ZRCola.zrc compatible.\n"), m_filename.c_str());
+        LogErrors();
+        return false;
+    }
+
+    return true;
+}
+
+
 bool ZRCola::DBSource::SelectTranslations(com_obj<ADORecordset> &rs) const
 {
    // Create a new recordset.
@ -1239,6 +1260,30 @@ bool ZRCola::DBSource::SelectCharacters(com_obj<ADORecordset>& rs) const
 }


+bool ZRCola::DBSource::SelectPUACharacters(com_obj<ADORecordset>& rs) const
+{
+    // Create a new recordset.
+    rs.free();
+    wxCHECK(SUCCEEDED(::CoCreateInstance(CLSID_CADORecordset, NULL, CLSCTX_ALL, IID_IADORecordset, (LPVOID*)&rs)), false);
+
+    // Open it.
+    if (FAILED(rs->Open(variant(
+        L"SELECT [znak], [opis_en], [kat], [znak_v], [znak_m] "
+        L"FROM [VRS_CharList] "
+        L"WHERE "
+        L"[znak]>='E000' AND [znak]<='F8FF' AND " // Private-Use-Area
+        L"[aktiven]=1 "                           // Active characters only
+        L"ORDER BY [znak]"), variant(m_db), adOpenStatic, adLockReadOnly, adCmdText)))
+    {
+        _ftprintf(stderr, wxT("%s: error ZCC0120: Error loading characters from database. Please make sure the file is ZRCola.zrc compatible.\n"), m_filename.c_str());
+        LogErrors();
+        return false;
+    }
+
+    return true;
+}
+
+
 bool ZRCola::DBSource::GetCharacter(const com_obj<ADORecordset>& rs, character& chr) const
 {
    wxASSERT_MSG(rs, wxT("recordset is empty"));
--- a/ZRColaCompile/dbsource.h
+++ b/ZRColaCompile/dbsource.h
@ -132,7 +132,7 @@ namespace ZRCola {
            std::string norm;   ///< Normalization footprint
            charseq dst;        ///< Destination sequence

-            inline translation() : set(0) {}
+            inline translation() : set((int)ZRCOLA_TRANSEQID_DEFAULT) {}
        };


@ -145,7 +145,7 @@ namespace ZRCola {
            std::wstring src;   ///< Source name
            std::wstring dst;   ///< Destination name

-            inline transet() : set(0) {}
+            inline transet() : set((int)ZRCOLA_TRANSEQID_DEFAULT) {}
        };


@ -621,6 +621,17 @@ namespace ZRCola {
        ///
        bool GetNormPerm(const winstd::com_obj<ADORecordset>& rs, std::string& norm, normperm& np) const;

+        ///
+        /// Returns all character translations
+        ///
+        /// \param[out] rs  Recordset with results
+        ///
+        /// \returns
+        /// - true when query succeeds
+        /// - false otherwise
+        ///
+        bool SelectAllTranslations(winstd::com_obj<ADORecordset>& rs) const;
+
        ///
        /// Returns character translations
        ///
@ -805,6 +816,17 @@ namespace ZRCola {
        ///
        bool SelectCharacters(winstd::com_obj<ADORecordset>& rs) const;

+        ///
+        /// Returns Private-Use-Area characters
+        ///
+        /// \param[out] rs  Recordset with results
+        ///
+        /// \returns
+        /// - true when query succeeds
+        /// - false otherwise
+        ///
+        bool SelectPUACharacters(winstd::com_obj<ADORecordset>& rs) const;
+
        ///
        /// Returns character data
        ///
@ -908,7 +930,7 @@ namespace ZRCola {
 inline ZRCola::translation_db& operator<<(_Inout_ ZRCola::translation_db &db, _In_ const ZRCola::DBSource::translation &rec)
 {
    unsigned __int32 idx = db.data.size();
-    wxASSERT_MSG((int)0xffff8000 <= rec.set && rec.set <= (int)0x00007fff, wxT("translation set id out of bounds"));
+    wxASSERT_MSG((int)0xffff0000 <= rec.set && rec.set <= (int)0x0000ffff, wxT("translation set id out of bounds"));
    db.data.push_back((unsigned __int16)rec.set);
    wxASSERT_MSG((int)0xffff8000 <= rec.dst.rank && rec.dst.rank <= (int)0x00007fff, wxT("destination character rank out of bounds"));
    db.data.push_back((unsigned __int16)rec.dst.rank);
@ -932,7 +954,7 @@ inline ZRCola::translation_db& operator<<(_Inout_ ZRCola::translation_db &db, _I
 inline ZRCola::transet_db& operator<<(_Inout_ ZRCola::transet_db &db, _In_ const ZRCola::DBSource::transet &rec)
 {
    unsigned __int32 idx = db.data.size();
-    wxASSERT_MSG((int)0xffff8000 <= rec.set && rec.set <= (int)0x00007fff, wxT("translation set id out of bounds"));
+    wxASSERT_MSG((int)0xffff0000 <= rec.set && rec.set <= (int)0x0000ffff, wxT("translation set id out of bounds"));
    db.data.push_back((unsigned __int16)rec.set);
    std::wstring::size_type n = rec.src.length();
    wxASSERT_MSG(n <= 0xffff, wxT("translation set source name overflow"));
--- a/ZRColaCompile/main.cpp
+++ b/ZRColaCompile/main.cpp
@ -19,6 +19,10 @@

 #include "pch.h"

+#define FONT_MATCH_WIDTH        512 // must be a multiple of 8
+#define FONT_MATCH_HEIGHT       512
+#define FONT_MATCH_THRESHOLD    8e-2
+
 using namespace std;
 using namespace stdex;
 using namespace winstd;
@ -224,6 +228,47 @@ static inline set<ZRCola::DBSource::charseq> permutate_and_translate_inv(_In_ co
 }


+static bool contains_pua(_In_ const wstring &str)
+{
+    for (auto p = str.c_str(), p_end = str.c_str() + str.size(); p < p_end; p++)
+        if (L'\ue000' <= *p && *p <= L'\uf8ff')
+            return true;
+    return false;
+}
+
+
+static void replace_all(_Inout_ wstring &str, _In_ const wstring &from, _In_ const wstring &to)
+{
+    size_t start_pos = 0;
+    while ((start_pos = str.find(from, start_pos)) != wstring::npos) {
+        str.replace(start_pos, from.length(), to);
+        start_pos += to.length();
+    }
+}
+
+
+static double compare_bitmaps(
+    _In_count_c_(FONT_MATCH_WIDTH * FONT_MATCH_HEIGHT / 8) const unsigned char *bits_orig,
+    _In_count_c_(FONT_MATCH_WIDTH * FONT_MATCH_HEIGHT / 8) const unsigned char *bits)
+{
+#define B2(n) n, n + 1, n + 1, n + 2
+#define B4(n) B2(n), B2(n + 1), B2(n + 1), B2(n + 2)
+#define B6(n) B4(n), B4(n + 1), B4(n + 1), B4(n + 2)
+    static const unsigned char number_of_bits[256] = { B6(0), B6(1), B6(1), B6(2) };
+#undef B2
+#undef B4
+#undef B6
+    // Set divisors to 1 to prevent divide-by-zero.
+    size_t b_orig = 1, b = 1, x = 0;
+    for (size_t i = 0; i < FONT_MATCH_WIDTH * FONT_MATCH_HEIGHT / 8; ++i) {
+        b_orig += number_of_bits[bits_orig[i]];
+        b      += number_of_bits[bits     [i]];
+        x      += number_of_bits[bits_orig[i] ^ bits[i]];
+    }
+    return (double)x/b_orig * (double)x/b;
+}
+
+
 ///
 /// Main function
 ///
@ -303,6 +348,7 @@ int _tmain(int argc, _TCHAR *argv[])
    streamoff dst_start = idrec::open<ZRCola::recordid_t, ZRCola::recordsize_t>(dst, ZRCOLA_DB_ID);

    ZRCola::translation_db db_trans;
+    ZRCola::transet_db db_transset;
    normperm_db db_np;

    {
@ -334,6 +380,19 @@ int _tmain(int argc, _TCHAR *argv[])
    }

    {
+        // Build ZRCola Decomposed to ZRCola Composed translation set.
+        ZRCola::DBSource::transet ts;
+        ts.set = (int)ZRCOLA_TRANSEQID_DEFAULT;
+        ts.src = L"ZRCola Decomposed";
+        ts.dst = L"ZRCola Composed";
+        if (build_pot) {
+            pot.insert(ts.src);
+            pot.insert(ts.dst);
+        }
+
+        // Add translation set to index and data.
+        db_transset << ts;
+
        // Get translations.
        com_obj<ADORecordset> rs;
        if (src.SelectTranslations(rs)) {
@ -413,30 +472,239 @@ int _tmain(int argc, _TCHAR *argv[])
        }
    }

+    {
+        // Build ZRCola to Unicode translation set.
+        ZRCola::DBSource::transet ts;
+        ts.set = (int)ZRCOLA_TRANSEQID_UNICODE;
+        ts.src = L"ZRCola Composed";
+        ts.dst = L"Unicode";
+        if (build_pot) {
+            pot.insert(ts.src);
+            pot.insert(ts.dst);
+        }
+
+        // Add translation set to index and data.
+        db_transset << ts;
+
+        // Get all translations.
+        com_obj<ADORecordset> rs;
+        if (src.SelectAllTranslations(rs)) {
+            // Parse translations and build temporary database.
+            vector<ZRCola::DBSource::translation> db_all, db_combining;
+            for (; !ZRCola::DBSource::IsEOF(rs); rs->MoveNext()) {
+                // Read translation from the database.
+                ZRCola::DBSource::translation trans;
+                if (src.GetTranslation(rs, trans)) {
+                    // Add translation to temporary databases.
+                    db_all.push_back(trans);
+                    if (!trans.src.str.empty() && trans.src.str[0] == L'\u203f') {
+                        trans.src.str.erase(0, 1);
+                        db_combining.push_back(trans);
+                    }
+                } else
+                    has_errors = true;
+            }
+
+            com_obj<ADORecordset> rs2;
+            if (src.SelectPUACharacters(rs2)) {
+                // Parse characters and build translations.
+                static const LOGFONT lf_zrcola = {
+                    -FONT_MATCH_HEIGHT/2, 0,
+                    0, 0,
+                    FW_NORMAL,
+                    FALSE, FALSE, FALSE,
+                    ANSI_CHARSET,
+                    OUT_DEFAULT_PRECIS, CLIP_DEFAULT_PRECIS, DEFAULT_QUALITY,
+                    DEFAULT_PITCH | FF_DONTCARE,
+                    TEXT("ZRCola")
+                };
+                gdi_handle<HFONT> fnt_zrcola(::CreateFontIndirect(&lf_zrcola));
+                gdi_handle<HBRUSH> brush_bg(::CreateSolidBrush(RGB(0x00, 0x00, 0x00)));
+                gdi_handle<HBITMAP>
+                    bmp_orig(::CreateBitmap(FONT_MATCH_WIDTH, FONT_MATCH_HEIGHT, 1, 1, NULL)),
+                    bmp_comb(::CreateBitmap(FONT_MATCH_WIDTH, FONT_MATCH_HEIGHT, 1, 1, NULL)),
+                    bmp_pre (::CreateBitmap(FONT_MATCH_WIDTH, FONT_MATCH_HEIGHT, 1, 1, NULL));
+                dc
+                    dc_orig(::CreateCompatibleDC(NULL)),
+                    dc_comb(::CreateCompatibleDC(NULL)),
+                    dc_pre (::CreateCompatibleDC(NULL));
+                SetBkColor(dc_orig, RGB(0x00, 0x00, 0x00));
+                SetBkColor(dc_comb, RGB(0x00, 0x00, 0x00));
+                SetBkColor(dc_pre , RGB(0x00, 0x00, 0x00));
+                SetBkMode (dc_orig, TRANSPARENT);
+                SetBkMode (dc_comb, TRANSPARENT);
+                SetBkMode (dc_pre , TRANSPARENT);
+                SetTextColor(dc_orig, RGB(0xff, 0xff, 0xff));
+                SetTextColor(dc_comb, RGB(0xff, 0xff, 0xff));
+                SetTextColor(dc_pre , RGB(0xff, 0xff, 0xff));
+                SetTextAlign(dc_orig, TA_BASELINE | TA_CENTER | TA_NOUPDATECP);
+                SetTextAlign(dc_comb, TA_BASELINE | TA_CENTER | TA_NOUPDATECP);
+                SetTextAlign(dc_pre , TA_BASELINE | TA_CENTER | TA_NOUPDATECP);
+                dc_selector
+                    selector_font_orig(dc_orig, fnt_zrcola),
+                    selector_font_comb(dc_comb, fnt_zrcola),
+                    selector_font_pre (dc_pre , fnt_zrcola);
+                struct {
+                    BITMAPINFOHEADER bmiHeader;
+                    RGBQUAD          bmiColors[2];
+                } bmi =
+                {
+                    {
+                        sizeof(BITMAPINFOHEADER),
+                        FONT_MATCH_WIDTH,
+                        FONT_MATCH_HEIGHT,
+                        1,
+                        1,
+                        BI_RGB,
+                        0,
+                        3780, 3780,
+                        2, 0
+                    },
+                    {
+                        { 0x00, 0x00, 0x00 },
+                        { 0xff, 0xff, 0xff },
+                    }
+                };
+                vector<unsigned char>
+                    bits_orig(FONT_MATCH_WIDTH * FONT_MATCH_HEIGHT / 8),
+                    bits_comb(FONT_MATCH_WIDTH * FONT_MATCH_HEIGHT / 8),
+                    bits_pre (FONT_MATCH_WIDTH * FONT_MATCH_HEIGHT / 8);
+                map<wstring, map<wstring, pair<double, int>>> trans;
+                for (; !ZRCola::DBSource::IsEOF(rs2); rs2->MoveNext()) {
+                    // Read character from the database.
+                    ZRCola::DBSource::character chr;
+                    if (src.GetCharacter(rs2, chr)) {
+                        for (auto t = db_all.cbegin(), t_end = db_all.cend(); t != t_end; ++t) {
+                            if (t->dst.str != chr.first)
+                                continue;
+                            // Replace ZRCola decomposition with Unicode combining characters wherever possible.
+                            const auto &comp_orig = chr.first;
+                            const auto &decomp_orig = t->src.str;
+                            wstring decomp = decomp_orig;
+                            for (auto i = db_combining.cbegin(), i_end = db_combining.cend(); i != i_end; ++i)
+                                replace_all(decomp, i->src.str, i->dst.str);
+                            wstring comp = decomp;
+                            for (auto i = db_all.cbegin(), i_end = db_all.cend(); i != i_end; ++i)
+                                replace_all(comp, i->src.str, i->dst.str);
+                            // Check if we got anything useful.
+                            if (comp_orig == comp ||
+                                contains_pua(comp))
+                                continue;
+                            // Do the Unicode C and D normalizations to get two variants:
+                            // - Use precomposed characters as much as possible
+                            // - Use combining characters only
+                            wstring comp_comb, comp_pre;
+                            NormalizeString(NormalizationC, comp    , comp_pre );
+                            NormalizeString(NormalizationD, comp_pre, comp_comb);
+                            {
+                                // Paint original character and Unicode precomposed/combining one.
+                                dc_selector
+                                    selector_bmp_orig(dc_orig, bmp_orig),
+                                    selector_bmp_comb(dc_comb, bmp_comb),
+                                    selector_bmp_pre (dc_pre , bmp_pre );
+                                static const RECT bounds = { 0, 0, FONT_MATCH_WIDTH, FONT_MATCH_HEIGHT };
+                                FillRect(dc_orig, &bounds, brush_bg);
+                                FillRect(dc_comb, &bounds, brush_bg);
+                                FillRect(dc_pre , &bounds, brush_bg);
+                                TextOutW(dc_orig, FONT_MATCH_WIDTH/2, FONT_MATCH_HEIGHT*5/8, comp_orig.c_str(), comp_orig.length());
+                                TextOutW(dc_comb, FONT_MATCH_WIDTH/2, FONT_MATCH_HEIGHT*5/8, comp_comb.c_str(), comp_comb.length());
+                                TextOutW(dc_pre , FONT_MATCH_WIDTH/2, FONT_MATCH_HEIGHT*5/8, comp_pre .c_str(), comp_pre .length());
+                            }
+                            // Compare bitmaps.
+                            if (!GetDIBits(dc_orig, bmp_orig, 0, FONT_MATCH_HEIGHT, bits_orig.data(), (BITMAPINFO*)&bmi, DIB_PAL_COLORS) ||
+                                !GetDIBits(dc_comb, bmp_comb, 0, FONT_MATCH_HEIGHT, bits_comb.data(), (BITMAPINFO*)&bmi, DIB_PAL_COLORS) ||
+                                !GetDIBits(dc_pre , bmp_pre , 0, FONT_MATCH_HEIGHT, bits_pre .data(), (BITMAPINFO*)&bmi, DIB_PAL_COLORS))
+                                continue;
+                            double
+                                score_comb = compare_bitmaps(bits_orig.data(), bits_comb.data()),
+                                score_pre  = compare_bitmaps(bits_orig.data(), bits_pre .data());
+                            // Add results to a temporary database.
+                            auto hit = trans.find(comp_orig);
+                            if (hit != trans.end()) {
+                                if (score_pre <= FONT_MATCH_THRESHOLD) {
+                                    if (hit->second.find(comp_pre) == hit->second.end())
+                                        hit->second.insert(make_pair(comp_pre, make_pair(score_pre, 1)));
+                                } if (score_comb <= FONT_MATCH_THRESHOLD && comp_pre != comp_comb) {
+                                    if (hit->second.find(comp_comb) == hit->second.end())
+                                        hit->second.insert(make_pair(comp_comb, make_pair(score_comb, 100)));
+                                }
+                            } else {
+                                map<wstring, pair<double, int>> v;
+                                if (score_pre <= FONT_MATCH_THRESHOLD)
+                                    v.insert(make_pair(comp_pre, make_pair(score_pre, 1)));
+                                if (score_comb <= FONT_MATCH_THRESHOLD && comp_pre != comp_comb)
+                                    v.insert(make_pair(comp_comb, make_pair(score_comb, 100)));
+                                if (!v.empty())
+                                    trans.insert(make_pair(comp_orig, std::move(v)));
+                            }
+                        }
+                    } else
+                        has_errors = true;
+                }
+
+                // Preallocate memory.
+                size_t reserve = db_trans.idxSrc.size() + trans.size();
+                db_trans.idxSrc.reserve(reserve);
+                db_trans.idxDst.reserve(reserve);
+                db_trans.data  .reserve(reserve*5);
+
+                ZRCola::DBSource::translation t;
+                t.set = (int)ZRCOLA_TRANSEQID_UNICODE;
+                t.dst.rank = 1;
+                vector<pair<double, pair<wstring, int>>> results;
+                for (auto i = trans.cbegin(), i_end = trans.cend(); i != i_end; ++i) {
+                    // Sort results by score.
+                    results.clear();
+                    results.reserve(i->second.size());
+                    for (auto j = i->second.cbegin(), j_end = i->second.cend(); j != j_end; ++j)
+                        results.push_back(make_pair(j->second.first, make_pair(j->first, j->second.second)));
+                    sort(results.begin(), results.end(), [] (pair<double, pair<wstring, int>> const& a, pair<double, pair<wstring, int>> const& b) { return a.first < b.first; });
+                    int rank_comb = 0, rank_pre = 0;
+                    for (auto j = results.cbegin(), j_end = results.cend(); j != j_end; ++j) {
+                        t.src.str  = i->first;
+                        t.src.rank = j->second.second + (j->second.second >= 100 ? rank_comb++ : rank_pre++);
+                        t.dst.str  = j->second.first;
+                        db_trans << t;
+                    }
+                }
+            } else {
+                _ftprintf(stderr, wxT("%s: error ZCC0016: Error getting characters from database. Please make sure the file is ZRCola.zrc compatible.\n"), (LPCTSTR)filenameIn.c_str());
+                has_errors = true;
+            }
+        } else {
+            _ftprintf(stderr, wxT("%s: error ZCC0003: Error getting translations from database. Please make sure the file is ZRCola.zrc compatible.\n"), (LPCTSTR)filenameIn.c_str());
+            has_errors = true;
+        }
+    }
+
    {
        // Get translation sets.
        com_obj<ADORecordset> rs;
        if (src.SelectTranlationSets(rs)) {
            size_t count = src.GetRecordsetCount(rs);
            if (count < 0xffffffff) { // 4G check (-1 is reserved for error condition)
-                ZRCola::transet_db db;
-
                // Preallocate memory.
-                db.idxTranSet.reserve((count+1));
-                db.data      .reserve((count+1)*4);
+                db_transset.idxTranSet.reserve((count+2));
+                db_transset.data      .reserve((count+2)*4);

                // Parse translation sets and build index and data.
                for (; !ZRCola::DBSource::IsEOF(rs); rs->MoveNext()) {
                    // Read translation set from the database.
                    ZRCola::DBSource::transet ts;
                    if (src.GetTranslationSet(rs, ts)) {
+                        if (ts.set <= (int)ZRCOLA_TRANSEQID_DEFAULT) {
+                            _ftprintf(stderr, wxT("%s: error ZCC0008: Translation set is using reserved ID %i.\n"), (LPCTSTR)filenameIn.c_str(), ts.set);
+                            has_errors = true;
+                            continue;
+                        }
+
                        if (build_pot) {
                            pot.insert(ts.src);
                            pot.insert(ts.dst);
                        }

                        // Add translation set to index and data.
-                        db << ts;
+                        db_transset << ts;

                        // Get translations.
                        com_obj<ADORecordset> rs_tran;
@ -464,12 +732,6 @@ int _tmain(int argc, _TCHAR *argv[])
                    } else
                        has_errors = true;
                }
-
-                // Sort indices.
-                db.idxTranSet.sort();
-
-                // Write translation sets to file.
-                dst << ZRCola::transet_rec(db);
            } else {
                _ftprintf(stderr, wxT("%s: error ZCC0009: Error getting translation set count from database or too many translation sets.\n"), (LPCTSTR)filenameIn.c_str());
                has_errors = true;
@ -480,6 +742,12 @@ int _tmain(int argc, _TCHAR *argv[])
        }
    }

+    // Sort indices.
+    db_transset.idxTranSet.sort();
+
+    // Write translation sets to file.
+    dst << ZRCola::transet_rec(db_transset);
+
    // Sort indices.
    db_trans.idxSrc.sort();
    db_trans.idxDst.sort();
--- a/ZRColaCompile/pch.h
+++ b/ZRColaCompile/pch.h
@ -37,6 +37,7 @@
 #include <stdex/idrec.h>

 #include <WinStd/Common.h>
+#include <WinStd/GDI.h>

 #include <initguid.h> // GUID helper to prevent LNK2001 errors (unresolved external symbol IID_IADO...)
 #pragma warning(push)
@ -45,6 +46,7 @@
 #include <adoid.h>
 #pragma warning(pop)

+#include <memory.h>
 #include <process.h>
 #include <tchar.h>

@ -56,4 +58,5 @@
 #include <fstream>
 #include <memory>
 #include <set>
+#include <utility>
 #include <vector>
--- a/lib/WinStd
+++ b/lib/WinStd
@ -1 +1 @@
-Subproject commit 51b262b38223fbce3b9b43812ad0ff5ef88bdeb4
+Subproject commit b8816476e5f6f4f8465add75ac7bba5c742a2db6
--- a/lib/libZRCola/include/zrcola/translate.h
+++ b/lib/libZRCola/include/zrcola/translate.h
@ -37,12 +37,17 @@
 ///
 /// Translation disabled/ZRCola (De)Composition
 ///
-#define ZRCOLA_TRANSEQID_DEFAULT    ((ZRCola::transeqid_t)0)
+#define ZRCOLA_TRANSEQID_DEFAULT    ((ZRCola::transeqid_t)0x0000)

 ///
 /// Custom translation sequence ID
 ///
-#define ZRCOLA_TRANSEQID_CUSTOM     ((ZRCola::transeqid_t)-1)
+#define ZRCOLA_TRANSEQID_CUSTOM     ((ZRCola::transeqid_t)0xffff)
+
+///
+/// ZRCola to Unicode translation sequence ID
+///
+#define ZRCOLA_TRANSEQID_UNICODE    ((ZRCola::transeqid_t)0xfffe)


 namespace ZRCola {
--- a/output/data/ZRCola.zrcdb
+++ b/output/data/ZRCola.zrcdb
--- a/output/locale/ZRCola-zrcdb.pot
+++ b/output/locale/ZRCola-zrcdb.pot
@ -694,9 +694,18 @@ msgstr ""
 msgid "Ukrainian"
 msgstr ""

+msgid "Unicode"
+msgstr ""
+
 msgid "Units"
 msgstr ""

+msgid "ZRCola Composed"
+msgstr ""
+
+msgid "ZRCola Decomposed"
+msgstr ""
+
 msgid "l+j l|j"
 msgstr ""

--- a/output/locale/de_DE/ZRCola-zrcdb.po
+++ b/output/locale/de_DE/ZRCola-zrcdb.po
@ -1,14 +1,17 @@
-# 
+#
 msgid ""
 msgstr ""
 "Project-Id-Version: ZRCola.zrcdb\n"
+"POT-Creation-Date: \n"
+"PO-Revision-Date: \n"
+"Last-Translator: Simon Rozman <simon.rozman@amebis.si>\n"
 "Language-Team: German (Germany) (https://www.transifex.com/amebis/teams/91592/de_DE/)\n"
+"Language: de_DE\n"
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=UTF-8\n"
 "Content-Transfer-Encoding: 8bit\n"
-"Language: de_DE\n"
 "Plural-Forms: nplurals=2; plural=(n != 1);\n"
-"X-Generator: ZRColaCompile\n"
+"X-Generator: Poedit 2.4.2\n"

 msgid "Albanian"
 msgstr ""
@ -697,9 +700,18 @@ msgstr ""
 msgid "Ukrainian"
 msgstr ""

+msgid "Unicode"
+msgstr "Unicode"
+
 msgid "Units"
 msgstr ""

+msgid "ZRCola Composed"
+msgstr ""
+
+msgid "ZRCola Decomposed"
+msgstr ""
+
 msgid "l+j l|j"
 msgstr ""

--- a/output/locale/ru_RU/ZRCola-zrcdb.po
+++ b/output/locale/ru_RU/ZRCola-zrcdb.po
@ -1,17 +1,19 @@
 # Translators:
 # Simon Rozman <simon@rozman.si>, 2018
-# 
+#
 msgid ""
 msgstr ""
 "Project-Id-Version: ZRCola.zrcdb\n"
-"Last-Translator: Simon Rozman <simon@rozman.si>, 2018\n"
+"POT-Creation-Date: \n"
+"PO-Revision-Date: \n"
+"Last-Translator: Simon Rozman <simon.rozman@amebis.si>\n"
 "Language-Team: Russian (Russia) (https://www.transifex.com/amebis/teams/91592/ru_RU/)\n"
+"Language: ru_RU\n"
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=UTF-8\n"
 "Content-Transfer-Encoding: 8bit\n"
-"Language: ru_RU\n"
 "Plural-Forms: nplurals=4; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<12 || n%100>14) ? 1 : n%10==0 || (n%10>=5 && n%10<=9) || (n%100>=11 && n%100<=14)? 2 : 3);\n"
-"X-Generator: ZRColaCompile\n"
+"X-Generator: Poedit 2.4.2\n"

 msgid "Albanian"
 msgstr "албанский"
@ -170,8 +172,7 @@ msgid "Greek (Modern)"
 msgstr "греческий алфавит (новогреческий)"

 msgid "Greek (Modern) » Greek (Old)"
-msgstr ""
-"греческий алфавит (новогреческий) » греческий алфавит (древнегреческий)"
+msgstr "греческий алфавит (новогреческий) » греческий алфавит (древнегреческий)"

 msgid "Greek (Old)"
 msgstr "греческий алфавит (древнегреческий)"
@ -701,9 +702,18 @@ msgstr "турецкий"
 msgid "Ukrainian"
 msgstr "украинский"

+msgid "Unicode"
+msgstr "Юникод"
+
 msgid "Units"
 msgstr "Единицы"

+msgid "ZRCola Composed"
+msgstr "ZRCola составленное"
+
+msgid "ZRCola Decomposed"
+msgstr "ZRCola разобранное"
+
 msgid "l+j l|j"
 msgstr "l+j l|j"

--- a/output/locale/sl_SI/ZRCola-zrcdb.po
+++ b/output/locale/sl_SI/ZRCola-zrcdb.po
@ -1,17 +1,19 @@
 # Translators:
 # Simon Rozman <simon@rozman.si>, 2018
-# 
+#
 msgid ""
 msgstr ""
 "Project-Id-Version: ZRCola.zrcdb\n"
-"Last-Translator: Simon Rozman <simon@rozman.si>, 2018\n"
+"POT-Creation-Date: \n"
+"PO-Revision-Date: \n"
+"Last-Translator: Simon Rozman <simon.rozman@amebis.si>\n"
 "Language-Team: Slovenian (Slovenia) (https://www.transifex.com/amebis/teams/91592/sl_SI/)\n"
+"Language: sl_SI\n"
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=UTF-8\n"
 "Content-Transfer-Encoding: 8bit\n"
-"Language: sl_SI\n"
 "Plural-Forms: nplurals=4; plural=(n%100==1 ? 0 : n%100==2 ? 1 : n%100==3 || n%100==4 ? 2 : 3);\n"
-"X-Generator: ZRColaCompile\n"
+"X-Generator: Poedit 2.4.2\n"

 msgid "Albanian"
 msgstr "albanščina"
@ -700,9 +702,18 @@ msgstr "turščina"
 msgid "Ukrainian"
 msgstr "ukrajinščina"

+msgid "Unicode"
+msgstr "Unicode"
+
 msgid "Units"
 msgstr "Enote"

+msgid "ZRCola Composed"
+msgstr "ZRCola sestavljeno"
+
+msgid "ZRCola Decomposed"
+msgstr "ZRCola razstavljeno"
+
 msgid "l+j l|j"
 msgstr "l+j l|j"