Characters are represented as UTF-16 sequences instead of a single UTF-16 character

2017-03-20 23:44:49 +01:00 · 2017-03-20 23:44:49 +01:00 · 806aa550a5
commit 806aa550a5
parent ca306345c2
22 changed files with 1179 additions and 483 deletions
--- a/ZRCola/zrcolachrcatpnl.cpp
+++ b/ZRCola/zrcolachrcatpnl.cpp
@ -57,7 +57,7 @@ wxZRColaCharacterCatalogPanel::wxZRColaCharacterCatalogPanel(wxWindow* parent) :
        for (size_t i = 0, n = m_cg_db.idxRnk.size(); i < n; i++) {
            const ZRCola::chrgrp_db::chrgrp &cg = m_cg_db.idxRnk[i];
            wxString
-                label(cg.data, cg.name_len),
+                label(cg.name(), cg.name_len()),
                label_tran2(wxGetTranslation(label, wxT("ZRCola-zrcdb")));
            m_choice->Insert(label_tran2, i);
        }
@ -155,17 +155,19 @@ void wxZRColaCharacterCatalogPanel::Update()

    if (m_show_all->GetValue()) {
        m_grid->SetCharacters(
-            wxString(cg.get_chars(), cg.char_len),
-            wxArrayShort((const short*)cg.get_char_shown(), (const short*)cg.get_char_shown() + (cg.char_len + 15)/16));
+            wxString(cg.chrlst(), cg.chrlst_end()),
+            wxArrayShort(reinterpret_cast<const short*>(cg.chrshow()), reinterpret_cast<const short*>(cg.chrshow_end())));
    } else {
        // Select frequently used characters only.
-        const wchar_t *src = cg.get_chars();
-        const unsigned __int16 *shown = cg.get_char_shown();
-        wxString chars;
-        for (unsigned __int16 i = 0, j = 0; i < cg.char_len; j++) {
-            for (unsigned __int16 k = 0, mask = shown[j]; k < 16 && i < cg.char_len; k++, mask >>= 1, i++) {
+        const wchar_t *src = cg.chrlst();
+        const unsigned __int16 *shown = cg.chrshow();
+        wxArrayString chars;
+        for (size_t i = 0, i_end = cg.chrlst_len(), j = 0; i < i_end; j++) {
+            for (unsigned __int16 k = 0, mask = shown[j]; k < 16 && i < i_end; k++, mask >>= 1) {
+                size_t len = wcsnlen(src + i, i_end - i);
                if (mask & 1)
-                    chars += src[i];
+                    chars.Add(wxString(src + i, len));
+                i += len + 1;
            }
        }
        m_grid->SetCharacters(chars);
--- a/ZRCola/zrcolachrgrid.cpp
+++ b/ZRCola/zrcolachrgrid.cpp
@ -68,6 +68,24 @@ void wxZRColaCharGrid::Init()


 void wxZRColaCharGrid::SetCharacters(const wxString &chars)
+{
+    m_chars.Clear();
+    const wxCStrData chr = chars.GetData();
+    for (size_t i = 0, i_end = chars.Length(), i_next; i < i_end; i = i_next + 1) {
+        i_next = i + _tcsnlen(chr + i, i_end - i);
+        m_chars.Add(wxString(chr + i, chr + i_next));
+    };
+    m_relevance.Clear();
+    m_regenerate = true;
+
+    // Invoke OnSize(), which will populate the grid.
+    wxSizeEvent e(GetSize(), m_windowId);
+    e.SetEventObject(this);
+    HandleWindowEvent(e);
+}
+
+
+void wxZRColaCharGrid::SetCharacters(const wxArrayString &chars)
 {
    m_chars = chars;
    m_relevance.Clear();
@ -82,7 +100,12 @@ void wxZRColaCharGrid::SetCharacters(const wxString &chars)

 void wxZRColaCharGrid::SetCharacters(const wxString &chars, const wxArrayShort &relevance)
 {
-    m_chars      = chars;
+    m_chars.Clear();
+    const wxCStrData chr = chars.GetData();
+    for (size_t i = 0, i_end = chars.Length(), i_next; i < i_end; i = i_next + 1) {
+        i_next = i + _tcsnlen(chr + i, i_end - i);
+        m_chars.Add(wxString(chr + i, chr + i_next));
+    };
    m_relevance  = relevance;
    m_regenerate = true;

@ -95,22 +118,23 @@ void wxZRColaCharGrid::SetCharacters(const wxString &chars, const wxArrayShort &

 wxString wxZRColaCharGrid::GetToolTipText(int idx)
 {
-    wxASSERT_MSG(idx < (int)m_chars.Length(), wxT("index out of bounds"));
+    wxASSERT_MSG(idx < (int)m_chars.GetCount(), wxT("index out of bounds"));

    auto app = dynamic_cast<ZRColaApp*>(wxTheApp);
+    const auto &chr = m_chars[idx];

    // See if this character has a key sequence registered.
-    char ks[sizeof(ZRCola::keyseq_db::keyseq)] = {};
-    ((ZRCola::keyseq_db::keyseq*)ks)->chr = m_chars[idx];
+    std::unique_ptr<ZRCola::keyseq_db::keyseq> ks((ZRCola::keyseq_db::keyseq*)new char[sizeof(ZRCola::keyseq_db::keyseq) + sizeof(wchar_t)*chr.length()]);
+    ks->ZRCola::keyseq_db::keyseq::keyseq(NULL, 0, chr.data(), chr.length());
    ZRCola::keyseq_db::indexKey::size_type start;
-    if (app->m_ks_db.idxChr.find(*(ZRCola::keyseq_db::keyseq*)ks, start)) {
+    if (app->m_ks_db.idxChr.find(*ks, start)) {
        ZRCola::keyseq_db::keyseq &seq = app->m_ks_db.idxChr[start];
        wxString ks_str;
-        if (ZRCola::keyseq_db::GetSequenceAsText(seq.seq, seq.seq_len, ks_str))
-            return wxString::Format(wxT("U+%04X (%s)"), (int)m_chars[idx], ks_str.c_str());
+        if (ZRCola::keyseq_db::GetSequenceAsText(seq.seq(), seq.seq_len(), ks_str))
+            return wxString::Format(wxT("U+%s (%s)"), ZRCola::GetUnicodeDump(chr.data(), chr.length(), _T("+")).c_str(), ks_str.c_str());
    }

-    return wxString::Format(wxT("U+%04X"), (int)m_chars[idx]);
+    return wxString::Format(wxT("U+%s"), ZRCola::GetUnicodeDump(chr.data(), chr.length(), _T("+")).c_str());
 }


@ -126,17 +150,17 @@ void wxZRColaCharGrid::OnSize(wxSizeEvent& event)
    // Calculate initial estimate of columns and rows.
    wxSize size(event.GetSize());
    size_t
-        char_len = m_chars.Length();
+        char_count = m_chars.GetCount();
    int
-        width    = size.GetWidth() - m_rowLabelWidth - m_extraWidth,
-        cols     = std::max<int>(width / wxZRColaCharacterGridColumnWidth, 1),
-        rows     = std::max<int>((char_len + cols - 1) / cols, 1);
+        width      = size.GetWidth() - m_rowLabelWidth - m_extraWidth,
+        cols       = std::max<int>(width / wxZRColaCharacterGridColumnWidth, 1),
+        rows       = std::max<int>((char_count + cols - 1) / cols, 1);

    if (m_colLabelHeight + rows*wxZRColaCharacterGridRowHeight + m_extraHeight > size.GetHeight()) {
        // Vertical scrollbar will be shown. Adjust the width and recalculate layout to avoid horizontal scrollbar.
        width = std::max<int>(width - wxSystemSettings::GetMetric(wxSYS_VSCROLL_X, this), 0);
        cols  = std::max<int>(width / wxZRColaCharacterGridColumnWidth, 1);
-        rows  = std::max<int>((char_len + cols - 1) / cols, 1);
+        rows  = std::max<int>((char_count + cols - 1) / cols, 1);
    }

    BeginBatch();
@ -146,14 +170,14 @@ void wxZRColaCharGrid::OnSize(wxSizeEvent& event)
        wxGridStringTable *table = new wxGridStringTable(rows, cols);
        for (int r = 0, i = 0; r < rows; r++)
            for (int c = 0; c < cols; c++, i++)
-                table->SetValue(r, c, i < (int)char_len ? wxString(1, m_chars[i]) : wxEmptyString);
+                table->SetValue(r, c, i < (int)char_count ? m_chars[i] : wxEmptyString);
        SetTable(table, true);
        if (!m_relevance.IsEmpty()) {
            const wxColour colour_def;
            const wxColour colour_irr = wxSystemSettings::GetColour(wxSYS_COLOUR_BTNHIGHLIGHT);
            for (int r = 0, i = 0; r < rows; r++)
                for (int c = 0; c < cols; c++, i++)
-                    SetCellBackgroundColour(r, c, i >= (int)char_len || ((unsigned short)(m_relevance[i/16]) & (1<<(i%16))) ? colour_def : colour_irr);
+                    SetCellBackgroundColour(r, c, i >= (int)char_count || ((unsigned short)(m_relevance[i/16]) & (1<<(i%16))) ? colour_def : colour_irr);
        } else {
            for (int r = 0, i = 0; r < rows; r++)
                for (int c = 0; c < cols; c++, i++)
@ -217,7 +241,7 @@ void wxZRColaCharGrid::OnMotion(wxMouseEvent& event)
        return;

    size_t toolTipIdx = row*m_numCols + col;
-    if (toolTipIdx >= m_chars.Length()) {
+    if (toolTipIdx >= m_chars.GetCount()) {
        // Index out of range.
        m_toolTipIdx = (size_t)-1;
        m_timerToolTip.Stop();
@ -241,7 +265,7 @@ void wxZRColaCharGrid::OnTooltipTimer(wxTimerEvent& event)
 {
    event.Skip();

-    if (m_toolTipIdx >= m_chars.Length())
+    if (m_toolTipIdx >= m_chars.GetCount())
        return;

    GetGridWindow()->SetToolTip(GetToolTipText(m_toolTipIdx));
--- a/ZRCola/zrcolachrgrid.h
+++ b/ZRCola/zrcolachrgrid.h
@ -54,14 +54,21 @@ public:
    ///
    /// Sets new array of characters to display
    ///
-    /// \param[in] chars  The string containing characters to display
+    /// \param[in] chars  The string containing characters to display (zero delimited)
    ///
    void SetCharacters(const wxString &chars);

    ///
    /// Sets new array of characters to display
    ///
-    /// \param[in] chars      The string containing characters to display
+    /// \param[in] chars  The array of characters to display
+    ///
+    void SetCharacters(const wxArrayString &chars);
+
+    ///
+    /// Sets new array of characters to display
+    ///
+    /// \param[in] chars      The string containing characters to display (zero delimited)
    /// \param[in] relevance  Bit-array of \p chars relevance (1=more relevant, 0=less relevant)
    ///
    void SetCharacters(const wxString &chars, const wxArrayShort &relevance);
@ -71,7 +78,7 @@ public:
    ///
    /// \returns  The string containing displayed characters
    ///
-    inline wxString GetCharacters() const
+    inline const wxArrayString& GetCharacters() const
    {
        return m_chars;
    }
@ -83,10 +90,14 @@ public:
    ///
    /// \returns Grid coordinates of selected character or (-1, -1) if character not found.
    ///
-    inline wxGridCellCoords GetCharacterCoords(wchar_t c) const
+    inline wxGridCellCoords GetCharacterCoords(const wxString &c) const
    {
-        int i = m_chars.Find(c);
-        return i != wxNOT_FOUND ? wxGridCellCoords(i / m_numCols, i % m_numCols) : wxGridCellCoords(-1, -1);
+        for (size_t i = 0, n = m_chars.GetCount(); ; i++) {
+            if (i >= n)
+                return wxGridCellCoords(-1, -1);
+            else if (m_chars[i] == c)
+                return wxGridCellCoords(i / m_numCols, i % m_numCols);
+        }
    }

 protected:
@ -102,7 +113,7 @@ private:
    void Init();                // common part of all ctors

 protected:
-    wxString m_chars;           ///< Array of Unicode characters to display in the grid
+    wxArrayString m_chars;      ///< Array of Unicode characters to display in the grid
    wxArrayShort m_relevance;   ///< Bit-array of `m_chars` relevance

 private:
--- a/ZRCola/zrcolachrslct.cpp
+++ b/ZRCola/zrcolachrslct.cpp
@ -20,6 +20,174 @@
 #include "stdafx.h"


+//////////////////////////////////////////////////////////////////////
+// wxZRColaUTF16CharValidator
+//////////////////////////////////////////////////////////////////////
+
+wxIMPLEMENT_DYNAMIC_CLASS(wxZRColaUTF16CharValidator, wxValidator);
+
+
+wxZRColaUTF16CharValidator::wxZRColaUTF16CharValidator(wchar_t *val) :
+    m_val(val),
+    wxValidator()
+{
+}
+
+
+wxObject* wxZRColaUTF16CharValidator::Clone() const
+{
+    return new wxZRColaUTF16CharValidator(*this);
+}
+
+
+bool wxZRColaUTF16CharValidator::Validate(wxWindow *parent)
+{
+    wxASSERT(GetWindow()->IsKindOf(CLASSINFO(wxTextCtrl)));
+    wxTextCtrl *ctrl = (wxTextCtrl*)GetWindow();
+    if (!ctrl->IsEnabled()) return true;
+
+    wxString val(ctrl->GetValue());
+    return Parse(val, 0, val.Length(), ctrl, parent);
+}
+
+
+bool wxZRColaUTF16CharValidator::TransferToWindow()
+{
+    wxASSERT(GetWindow()->IsKindOf(CLASSINFO(wxTextCtrl)));
+
+    if (m_val)
+        ((wxTextCtrl*)GetWindow())->SetValue(wxString::Format(wxT("%04X"), *m_val));
+
+    return true;
+}
+
+
+bool wxZRColaUTF16CharValidator::TransferFromWindow()
+{
+    wxASSERT(GetWindow()->IsKindOf(CLASSINFO(wxTextCtrl)));
+    wxTextCtrl *ctrl = (wxTextCtrl*)GetWindow();
+
+    wxString val(ctrl->GetValue());
+    return Parse(val, 0, val.Length(), ctrl, NULL, m_val);
+}
+
+
+bool wxZRColaUTF16CharValidator::Parse(const wxString &val_in, size_t i_start, size_t i_end, wxTextCtrl *ctrl, wxWindow *parent, wchar_t *val_out)
+{
+    const wxStringCharType *buf = val_in;
+
+    wchar_t chr = 0;
+    for (size_t i = i_start;;) {
+        if (i >= i_end) {
+            // End of Unicode found.
+            if (val_out) *val_out = chr;
+            return true;
+        } else if (i >= i_start + 4) {
+            // Maximum characters exceeded.
+            ctrl->SetFocus();
+            ctrl->SetSelection(i, i_end);
+            wxMessageBox(_("Too many digits in Unicode."), _("Validation conflict"), wxOK | wxICON_EXCLAMATION, parent);
+            return false;
+        } else if (_T('0') <= buf[i] && buf[i] <= _T('9')) {
+            // Digit found.
+            chr = (chr << 4) | (buf[i] - _T('0'));
+            i++;
+        } else if (_T('A') <= buf[i] && buf[i] <= _T('F')) {
+            // Capital letter found.
+            chr = (chr << 4) | (buf[i] - _T('A') + 10);
+            i++;
+        } else if (_T('a') <= buf[i] && buf[i] <= _T('f')) {
+            // Lower letter found.
+            chr = (chr << 4) | (buf[i] - _T('a') + 10);
+            i++;
+        } else {
+            // Invalid character found.
+            ctrl->SetFocus();
+            ctrl->SetSelection(i, i + 1);
+            wxMessageBox(wxString::Format(_("Invalid character in Unicode found: %c"), buf[i]), _("Validation conflict"), wxOK | wxICON_EXCLAMATION, parent);
+            return false;
+        }
+    }
+}
+
+
+//////////////////////////////////////////////////////////////////////
+// wxZRColaUnicodeDumpValidator
+//////////////////////////////////////////////////////////////////////
+
+wxIMPLEMENT_DYNAMIC_CLASS(wxZRColaUnicodeDumpValidator, wxValidator);
+
+
+wxZRColaUnicodeDumpValidator::wxZRColaUnicodeDumpValidator(wxString *val) :
+    m_val(val),
+    wxValidator()
+{
+}
+
+
+wxObject* wxZRColaUnicodeDumpValidator::Clone() const
+{
+    return new wxZRColaUnicodeDumpValidator(*this);
+}
+
+
+bool wxZRColaUnicodeDumpValidator::Validate(wxWindow *parent)
+{
+    wxASSERT(GetWindow()->IsKindOf(CLASSINFO(wxTextCtrl)));
+    wxTextCtrl *ctrl = (wxTextCtrl*)GetWindow();
+    if (!ctrl->IsEnabled()) return true;
+
+    wxString val(ctrl->GetValue());
+    return Parse(val, 0, val.Length(), ctrl, parent);
+}
+
+
+bool wxZRColaUnicodeDumpValidator::TransferToWindow()
+{
+    wxASSERT(GetWindow()->IsKindOf(CLASSINFO(wxTextCtrl)));
+
+    if (m_val)
+        ((wxTextCtrl*)GetWindow())->SetValue(ZRCola::GetUnicodeDumpW(m_val->c_str(), m_val->length(), L"+"));
+
+    return true;
+}
+
+
+bool wxZRColaUnicodeDumpValidator::TransferFromWindow()
+{
+    wxASSERT(GetWindow()->IsKindOf(CLASSINFO(wxTextCtrl)));
+    wxTextCtrl *ctrl = (wxTextCtrl*)GetWindow();
+
+    wxString val(ctrl->GetValue());
+    return Parse(val, 0, val.Length(), ctrl, NULL, m_val);
+}
+
+
+bool wxZRColaUnicodeDumpValidator::Parse(const wxString &val_in, size_t i_start, size_t i_end, wxTextCtrl *ctrl, wxWindow *parent, wxString *val_out)
+{
+    const wxStringCharType *buf = val_in;
+
+    wxString str;
+    for (size_t i = i_start;;) {
+        const wxStringCharType *buf_next;
+        wchar_t chr;
+        if ((buf_next = wmemchr(buf + i, L'+', i_end - i)) != NULL) {
+            // Unicode dump separator found.
+            if (!wxZRColaUTF16CharValidator::Parse(val_in, i, buf_next - buf, ctrl, parent, &chr))
+                return false;
+            str += chr;
+            i = buf_next - buf + 1;
+        } else if (wxZRColaUTF16CharValidator::Parse(val_in, i, i_end, ctrl, parent, &chr)) {
+            // The rest of the FQDN parsed succesfully.
+            if (chr) str += chr;
+            if (val_out) *val_out = str;
+            return true;
+        } else
+            return false;
+    }
+}
+
+
 //////////////////////////////////////////////////////////////////////////
 // wxZRColaCharSelect
 //////////////////////////////////////////////////////////////////////////
@ -30,7 +198,6 @@ wxDEFINE_EVENT(wxEVT_SEARCH_COMPLETE, wxThreadEvent);
 wxZRColaCharSelect::wxZRColaCharSelect(wxWindow* parent) :
    m_searchChanged(false),
    m_unicodeChanged(false),
-    m_char(0),
    m_searchThread(NULL),
    wxZRColaCharSelectBase(parent)
 {
@ -46,13 +213,13 @@ wxZRColaCharSelect::wxZRColaCharSelect(wxWindow* parent) :

    m_search_more->SetLabel(_(L"▸ Search Options"));

-    m_unicode->SetValidator(wxHexValidator<wchar_t>(&m_char, wxNUM_VAL_DEFAULT, 4));
+    m_unicode->SetValidator(wxZRColaUnicodeDumpValidator(&m_char));

    // Fill categories.
    auto app = dynamic_cast<ZRColaApp*>(wxTheApp);
    for (size_t i = 0, n = app->m_cc_db.idxRnk.size(); i < n; i++) {
        const auto &cc = app->m_cc_db.idxRnk[i];
-        int idx = m_categories->Insert(wxGetTranslation(wxString(cc.name, cc.name_len), wxT("ZRCola-zrcdb")), i);
+        int idx = m_categories->Insert(wxGetTranslation(wxString(cc.name(), cc.name_len()), wxT("ZRCola-zrcdb")), i);
        m_categories->Check(idx);
        m_ccOrder.insert(std::make_pair(cc.id, idx));
    }
@ -84,24 +251,24 @@ void wxZRColaCharSelect::OnIdle(wxIdleEvent& event)
        if (m_unicode->GetValidator()->TransferFromWindow()) {
            auto app = dynamic_cast<ZRColaApp*>(wxTheApp);

-            m_gridPreview->SetCellValue(wxString(1, m_char), 0, 0);
+            m_gridPreview->SetCellValue(m_char, 0, 0);

-            char chr[sizeof(ZRCola::character_db::character)] = {};
-            ((ZRCola::character_db::character*)chr)->chr = m_char;
+            std::unique_ptr<ZRCola::character_db::character> chr((ZRCola::character_db::character*)new char[sizeof(ZRCola::character_db::character) + sizeof(wchar_t)*m_char.length()]);
+            chr->ZRCola::character_db::character::character(m_char.data(), m_char.length());
            size_t start;
-            if (app->m_chr_db.idxChr.find(*(ZRCola::character_db::character*)chr, start)) {
+            if (app->m_chr_db.idxChr.find(*chr, start)) {
                const auto &chr = app->m_chr_db.idxChr[start];
                // Update character description.
-                m_description->SetValue(wxString(chr.data, chr.desc_len));
+                m_description->SetValue(wxString(chr.desc(), chr.desc_len()));
                {
                    // See if this character has a key sequence registered.
-                    char ks[sizeof(ZRCola::keyseq_db::keyseq)] = {};
-                    ((ZRCola::keyseq_db::keyseq*)ks)->chr = m_char;
+                    std::unique_ptr<ZRCola::keyseq_db::keyseq> ks((ZRCola::keyseq_db::keyseq*)new char[sizeof(ZRCola::keyseq_db::keyseq) + sizeof(wchar_t)*m_char.length()]);
+                    ks->ZRCola::keyseq_db::keyseq::keyseq(NULL, 0, m_char.data(), m_char.length());
                    ZRCola::keyseq_db::indexKey::size_type start;
-                    if (app->m_ks_db.idxChr.find(*(ZRCola::keyseq_db::keyseq*)ks, start)) {
+                    if (app->m_ks_db.idxChr.find(*ks, start)) {
                        ZRCola::keyseq_db::keyseq &seq = app->m_ks_db.idxChr[start];
                        wxString ks_str;
-                        if (ZRCola::keyseq_db::GetSequenceAsText(seq.seq, seq.seq_len, ks_str))
+                        if (ZRCola::keyseq_db::GetSequenceAsText(seq.seq(), seq.seq_len(), ks_str))
                            m_shortcut->SetValue(ks_str);
                        else
                            m_shortcut->SetValue(wxEmptyString);
@ -115,12 +282,12 @@ void wxZRColaCharSelect::OnIdle(wxIdleEvent& event)
                    // Update character category.
                    if (app->m_cc_db.idxChrCat.find(*((ZRCola::chrcat_db::chrcat*)cc), start)) {
                        const auto &cat = app->m_cc_db.idxChrCat[start];
-                        m_category->SetValue(wxGetTranslation(wxString(cat.name, cat.name_len), wxT("ZRCola-zrcdb")));
+                        m_category->SetValue(wxGetTranslation(wxString(cat.name(), cat.name_len()), wxT("ZRCola-zrcdb")));
                    } else
                        m_category->SetValue(wxEmptyString);
                }
                // Update related characters.
-                m_gridRelated->SetCharacters(wxString(chr.data + chr.desc_len, chr.rel_len));
+                m_gridRelated->SetCharacters(wxString(chr.rel(), chr.rel_end()));
            } else {
                m_description->SetValue(wxEmptyString);
                m_shortcut->SetValue(wxEmptyString);
@ -130,9 +297,10 @@ void wxZRColaCharSelect::OnIdle(wxIdleEvent& event)

            // Find character tags.
            std::list<std::wstring> tag_names;
-            ZRCola::chrtag_db::chrtag ct = { m_char };
+            std::unique_ptr<ZRCola::chrtag_db::chrtag> ct((ZRCola::chrtag_db::chrtag*)new char[sizeof(ZRCola::chrtag_db::chrtag) + sizeof(wchar_t)*m_char.length()]);
+            ct->ZRCola::chrtag_db::chrtag::chrtag(m_char.data(), m_char.length());
            size_t end;
-            if (app->m_ct_db.idxChr.find(ct, start, end)) {
+            if (app->m_ct_db.idxChr.find(*ct, start, end)) {
                for (size_t i = start; i < end; i++) {
                    const ZRCola::chrtag_db::chrtag &ct = app->m_ct_db.idxChr[i];

@ -149,9 +317,9 @@ void wxZRColaCharSelect::OnIdle(wxIdleEvent& event)
                            for (auto name = tag_names.cbegin(), name_end = tag_names.cend();; ++name) {
                                if (name == name_end) {
                                    // Add name to the list.
-                                    tag_names.push_back(std::wstring(tn.name, tn.name + tn.name_len));
+                                    tag_names.push_back(std::wstring(tn.name(), tn.name_end()));
                                    break;
-                                } else if (ZRCola::tagname_db::tagname::CompareName(m_locale, name->data(), (unsigned __int16)name->length(), tn.name, tn.name_len) == 0)
+                                } else if (ZRCola::tagname_db::tagname::CompareName(m_locale, name->data(), (unsigned __int16)name->length(), tn.name(), tn.name_len()) == 0)
                                    // Name is already on the list.
                                    break;
                            }
@ -284,10 +452,10 @@ void wxZRColaCharSelect::OnSearchComplete(wxThreadEvent& event)

    if (m_searchThread) {
        // Display results.
-        wxString chars;
+        wxArrayString chars;
        chars.reserve(m_searchThread->m_hits.size());
        for (auto i = m_searchThread->m_hits.cbegin(), i_end = m_searchThread->m_hits.cend(); i != i_end; ++i)
-            chars += i->second;
+            chars.Add(i->second);
        m_gridResults->SetCharacters(chars);

        m_searchThread->Delete();
@ -304,7 +472,7 @@ void wxZRColaCharSelect::OnResultSelectCell(wxGridEvent& event)

    wxString val(m_gridResults->GetCellValue(event.GetRow(), event.GetCol()));
    if (!val.IsEmpty())
-        NavigateTo(val[0]);
+        NavigateTo(val);
 }


@ -314,7 +482,7 @@ void wxZRColaCharSelect::OnResultCellDClick(wxGridEvent& event)

    wxString val(m_gridResults->GetCellValue(event.GetRow(), event.GetCol()));
    if (!val.IsEmpty()) {
-        NavigateTo(val[0]);
+        NavigateTo(val);
        wxCommandEvent e(wxEVT_COMMAND_BUTTON_CLICKED, wxID_OK);
        m_sdbSizerButtonsOK->GetEventHandler()->ProcessEvent(e);
    }
@ -328,7 +496,7 @@ void wxZRColaCharSelect::OnResultsKeyDown(wxKeyEvent& event)
    case WXK_NUMPAD_ENTER:
        wxString val(m_gridResults->GetCellValue(m_gridResults->GetCursorRow(), m_gridResults->GetCursorColumn()));
        if (!val.IsEmpty()) {
-            NavigateTo(val[0]);
+            NavigateTo(val);
            wxCommandEvent e(wxEVT_COMMAND_BUTTON_CLICKED, wxID_OK);
            m_sdbSizerButtonsOK->GetEventHandler()->ProcessEvent(e);

@ -347,7 +515,7 @@ void wxZRColaCharSelect::OnRecentSelectCell(wxGridEvent& event)

    wxString val(m_gridRecent->GetCellValue(event.GetRow(), event.GetCol()));
    if (!val.IsEmpty())
-        NavigateTo(val[0]);
+        NavigateTo(val);
 }


@ -357,7 +525,7 @@ void wxZRColaCharSelect::OnRecentCellDClick(wxGridEvent& event)

    wxString val(m_gridRecent->GetCellValue(event.GetRow(), event.GetCol()));
    if (!val.IsEmpty()) {
-        NavigateTo(val[0]);
+        NavigateTo(val);
        wxCommandEvent e(wxEVT_COMMAND_BUTTON_CLICKED, wxID_OK);
        m_sdbSizerButtonsOK->GetEventHandler()->ProcessEvent(e);
    }
@ -371,7 +539,7 @@ void wxZRColaCharSelect::OnRecentKeyDown(wxKeyEvent& event)
    case WXK_NUMPAD_ENTER:
        wxString val(m_gridRecent->GetCellValue(m_gridRecent->GetCursorRow(), m_gridRecent->GetCursorColumn()));
        if (!val.IsEmpty()) {
-            NavigateTo(val[0]);
+            NavigateTo(val);
            wxCommandEvent e(wxEVT_COMMAND_BUTTON_CLICKED, wxID_OK);
            m_sdbSizerButtonsOK->GetEventHandler()->ProcessEvent(e);

@ -431,7 +599,7 @@ void wxZRColaCharSelect::OnRelatedSelectCell(wxGridEvent& event)

    wxString val(m_gridRelated->GetCellValue(event.GetRow(), event.GetCol()));
    if (!val.IsEmpty())
-        NavigateTo(val[0]);
+        NavigateTo(val);
 }


@ -439,15 +607,15 @@ void wxZRColaCharSelect::OnOKButtonClick(wxCommandEvent& event)
 {
    event.Skip();

-    wxString
-        recent(m_gridRecent->GetCharacters()),
-        val(1, m_char);
-    for (size_t i = 0, n = recent.Length(); i < n; i++) {
-        const wxStringCharType c = recent[i];
+    const wxArrayString &recent = m_gridRecent->GetCharacters();
+    wxArrayString val;
+    val.reserve(recent.GetCount() + 1);
+    val.Add(m_char);
+    for (size_t i = 0, n = recent.GetCount(); i < n; i++) {
+        const wxString &c = recent[i];
        if (c != m_char)
-            val += c;
+            val.Add(c);
    }
-
    m_gridRecent->SetCharacters(val);
 }

@ -457,13 +625,13 @@ void wxZRColaCharSelect::ResetResults()
    // Fill the results.
    auto app = dynamic_cast<ZRColaApp*>(wxTheApp);
    size_t i, n = app->m_chr_db.idxChr.size();
-    wxString val;
+    wxArrayString val;
    val.reserve(n);
    for (i = 0; i < n; i++) {
        const auto &chr = app->m_chr_db.idxChr[i];
        auto idx = m_ccOrder.find(chr.cat);
        if (idx == m_ccOrder.end() || m_categories->IsChecked(idx->second))
-            val += chr.chr;
+            val.Add(wxString(chr.chr(), chr.chr_len()));
    }
    m_gridResults->SetCharacters(val);
 }
@ -507,7 +675,7 @@ void wxZRColaCharSelect::NavigateBy(int offset)
 }


-void wxZRColaCharSelect::NavigateTo(wchar_t c)
+void wxZRColaCharSelect::NavigateTo(const wxString &c)
 {
    if (m_char != c) {
        // Update history state
@ -548,7 +716,7 @@ wxZRColaCharSelect::SearchThread::SearchThread(wxZRColaCharSelect *parent) :
 wxThread::ExitCode wxZRColaCharSelect::SearchThread::Entry()
 {
    auto app = dynamic_cast<ZRColaApp*>(wxTheApp);
-    std::map<wchar_t, ZRCola::charrank_t> hits;
+    std::map<std::wstring, ZRCola::charrank_t> hits;

    if (TestDestroy()) return (wxThread::ExitCode)1;

@ -561,7 +729,7 @@ wxThread::ExitCode wxZRColaCharSelect::SearchThread::Entry()

    {
        // Search by description and merge results.
-        std::map<wchar_t, ZRCola::charrank_t> hits_sub;
+        std::map<std::wstring, ZRCola::charrank_t> hits_sub;
        if (!app->m_chr_db.Search(m_search.c_str(), m_cats, hits, hits_sub, TestDestroyS, this)) return (wxThread::ExitCode)1;
        for (auto i = hits_sub.cbegin(), i_end = hits_sub.cend(); i != i_end; ++i) {
            if (TestDestroy()) return (wxThread::ExitCode)1;
@ -589,7 +757,7 @@ wxThread::ExitCode wxZRColaCharSelect::SearchThread::Entry()
        if (i->second > rank_threshold)
            m_hits.push_back(std::make_pair(i->second, i->first));
    }
-    std::qsort(m_hits.data(), m_hits.size(), sizeof(std::pair<ZRCola::charrank_t, wchar_t>), CompareHits);
+    std::qsort(m_hits.data(), m_hits.size(), sizeof(std::pair<ZRCola::charrank_t, std::wstring>), CompareHits);

    // Signal the event handler that this thread is going to be destroyed.
    // NOTE: here we assume that using the m_parent pointer is safe,
@ -637,7 +805,17 @@ void wxPersistentZRColaCharSelect::Save() const
    auto wnd = static_cast<const wxZRColaCharSelect*>(GetWindow()); // dynamic_cast is not reliable as we are typically called late in the wxTopLevelWindowMSW destructor.
    auto app = dynamic_cast<ZRColaApp*>(wxTheApp);

-    SaveValue(wxT("recentChars"), wnd->m_gridRecent->GetCharacters());
+    wxString val;
+    auto &recent = wnd->m_gridRecent->GetCharacters();
+    for (size_t i = 0, n = recent.GetCount(); i < n; i++) {
+        if (i) val += wxT('|');
+        auto &chr = recent[i];
+        for (size_t j = 0, m = chr.Length(); j < m; j++) {
+            if (j) val += wxT('+');
+            val += wxString::Format(wxT("%04X"), chr[j]);
+        }
+    }
+    SaveValue(wxT("recentChars2"), val);

    for (size_t i = 0, n = app->m_cc_db.idxRnk.size(); i < n; i++) {
        const auto &cc = app->m_cc_db.idxRnk[i];
@ -656,8 +834,38 @@ bool wxPersistentZRColaCharSelect::Restore()
    auto app = dynamic_cast<ZRColaApp*>(wxTheApp);

    wxString recent;
-    if (RestoreValue(wxT("recentChars"), &recent))
-        wnd->m_gridRecent->SetCharacters(recent);
+    if (RestoreValue(wxT("recentChars2"), &recent)) {
+        // Native format found.
+        wxArrayString val;
+        wxString chr;
+        wchar_t c = 0;
+        for (size_t i = 0, n = recent.Length();; i++) {
+            if (i >= n) {
+                if (c)              { chr += c;     c = 0;       }
+                if (!chr.IsEmpty()) { val.Add(chr); chr.Clear(); }
+                break;
+            } else {
+                wxStringCharType r = recent[i];
+                     if (wxT('0') <= r && r <= wxT('9')) c = (c << 4) | (r - wxT('0')     );
+                else if (wxT('A') <= r && r <= wxT('F')) c = (c << 4) | (r - wxT('A') + 10);
+                else if (wxT('a') <= r && r <= wxT('f')) c = (c << 4) | (r - wxT('a') + 10);
+                else if (r == wxT('+')) {
+                    if (c)              { chr += c;     c = 0;       }
+                } else if (r == wxT('|')) {
+                    if (c)              { chr += c;     c = 0;       }
+                    if (!chr.IsEmpty()) { val.Add(chr); chr.Clear(); }
+                } else
+                    break;
+            }
+        }
+        wnd->m_gridRecent->SetCharacters(val);
+    } else if (RestoreValue(wxT("recentChars"), &recent)) {
+        // Legacy value found.
+        wxArrayString val;
+        for (size_t i = 0, n = recent.Length(); i < n; i++)
+            val.Add(wxString(1, recent[i]));
+        wnd->m_gridRecent->SetCharacters(val);
+    }

    for (size_t i = 0, n = app->m_cc_db.idxRnk.size(); i < n; i++) {
        const auto &cc = app->m_cc_db.idxRnk[i];
--- a/ZRCola/zrcolachrslct.h
+++ b/ZRCola/zrcolachrslct.h
@ -28,15 +28,106 @@ class wxPersistentZRColaCharSelect;

 #include "zrcolagui.h"
 #include <zrcola/character.h>
-#include <wxex/valhex.h>
+#include <wx/validate.h>
 #include <wxex/persist/dialog.h>
 #include <wx/event.h>
 #include <wx/thread.h>
 #include <list>
 #include <map>
+#include <string>
 #include <vector>


+///
+/// Validator for Unicode character
+///
+class WXEXTEND_API wxZRColaUTF16CharValidator : public wxValidator
+{
+public:
+    ///
+    /// Construct the validator with a value to store data
+    ///
+    wxZRColaUTF16CharValidator(wchar_t *val = NULL);
+
+    ///
+    /// Copies this validator
+    ///
+    virtual wxObject* Clone() const;
+
+    ///
+    /// Validates the value
+    ///
+    virtual bool Validate(wxWindow *parent);
+
+    ///
+    /// Transfers the value to the window
+    ///
+    virtual bool TransferToWindow();
+
+    ///
+    /// Transfers the value from the window
+    ///
+    virtual bool TransferFromWindow();
+
+    ///
+    /// Parses FQDN value
+    ///
+    static bool Parse(const wxString &val_in, size_t i_start, size_t i_end, wxTextCtrl *ctrl, wxWindow *parent, wchar_t *val_out = NULL);
+
+protected:
+    wchar_t *m_val; ///< Pointer to variable to receive control's parsed value
+
+private:
+    wxDECLARE_DYNAMIC_CLASS(wxZRColaUTF16CharValidator);
+    wxDECLARE_NO_ASSIGN_CLASS(wxZRColaUTF16CharValidator);
+};
+
+
+///
+/// Validator for Unicode dump
+///
+class wxZRColaUnicodeDumpValidator : public wxValidator
+{
+public:
+    ///
+    /// Construct the validator with a value to store data
+    ///
+    wxZRColaUnicodeDumpValidator(wxString *val = NULL);
+
+    ///
+    /// Copies this validator
+    ///
+    virtual wxObject* Clone() const;
+
+    ///
+    /// Validates the value
+    ///
+    virtual bool Validate(wxWindow *parent);
+
+    ///
+    /// Transfers the value to the window
+    ///
+    virtual bool TransferToWindow();
+
+    ///
+    /// Transfers the value from the window
+    ///
+    virtual bool TransferFromWindow();
+
+    ///
+    /// Parses Unicode dump value
+    ///
+    static bool Parse(const wxString &val_in, size_t i_start, size_t i_end, wxTextCtrl *ctrl, wxWindow *parent, wxString *val_out = NULL);
+
+protected:
+    wxString *m_val; ///< Pointer to variable to receive control's parsed value
+
+private:
+    wxDECLARE_DYNAMIC_CLASS(wxZRColaUnicodeDumpValidator);
+    wxDECLARE_NO_ASSIGN_CLASS(wxZRColaUnicodeDumpValidator);
+};
+
+
 wxDECLARE_EVENT(wxEVT_SEARCH_COMPLETE, wxThreadEvent);


@ -76,10 +167,10 @@ protected:

    void ResetResults();
    void NavigateBy(int offset);
-    void NavigateTo(wchar_t c);
+    void NavigateTo(const wxString &c);

 public:
-    wchar_t m_char;                                 ///< Currently selected character (0 when none)
+    wxString m_char;                                ///< Currently selected character (empty when none)

 protected:
    LCID m_locale;                                  ///< Locale for tag lookup
@ -104,7 +195,7 @@ protected:
    public:
        std::wstring m_search;                      ///< Search phrase
        std::set<ZRCola::chrcatid_t> m_cats;        ///< Search categories
-        std::vector<std::pair<ZRCola::charrank_t, wchar_t> > m_hits; ///< Search results
+        std::vector<std::pair<ZRCola::charrank_t, std::wstring> > m_hits; ///< Search results

    protected:
        wxZRColaCharSelect *m_parent;               ///< Thread owner
@ -116,7 +207,7 @@ protected:
    ///
    struct NavigationState
    {
-        wchar_t m_char;
+        std::wstring m_char;
        struct {
            wxGridCellCoords m_selected;
        } m_related;
--- a/ZRCola/zrcolafrm.cpp
+++ b/ZRCola/zrcolafrm.cpp
@ -213,7 +213,7 @@ void wxZRColaFrame::OnForwardEvent(wxCommandEvent& event)

 void wxZRColaFrame::OnInsertCharacter(wxCommandEvent& event)
 {
-    if (m_chrSelect->ShowModal() == wxID_OK && m_chrSelect->m_char) {
+    if (m_chrSelect->ShowModal() == wxID_OK && !m_chrSelect->m_char.empty()) {
        m_panel->m_decomposed->WriteText(m_chrSelect->m_char);
        m_panel->m_decomposed->SetFocus();
    }
--- a/ZRCola/zrcolakeyhndlr.cpp
+++ b/ZRCola/zrcolakeyhndlr.cpp
@ -96,13 +96,9 @@ bool wxZRColaKeyHandler::ProcessEvent(wxEvent& event)
                    (e.AltDown()     ? ZRCola::keyseq_db::keyseq::ALT   : 0);
                m_seq.push_back(key);

-                auto n = m_seq.size();
-                ZRCola::keyseq_db::keyseq *ks = (ZRCola::keyseq_db::keyseq*)new char[sizeof(ZRCola::keyseq_db::keyseq) + sizeof(ZRCola::keyseq_db::keyseq::key_t)*n];
-                ks->chr = 0;
-                ks->seq_len = n;
-                memcpy(ks->seq, m_seq.data(), sizeof(ZRCola::keyseq_db::keyseq::key_t)*n);
+                std::unique_ptr<ZRCola::keyseq_db::keyseq> ks((ZRCola::keyseq_db::keyseq*)new char[sizeof(ZRCola::keyseq_db::keyseq) + sizeof(ZRCola::keyseq_db::keyseq::key_t)*m_seq.size()]);
+                ks->ZRCola::keyseq_db::keyseq::keyseq(m_seq.data(), m_seq.size());
                found = app->m_ks_db.idxKey.find(*ks, start);
-                delete ks;
            }

            if (found) {
@ -116,14 +112,14 @@ bool wxZRColaKeyHandler::ProcessEvent(wxEvent& event)
                wxObject *obj = event.GetEventObject();
                if (obj && obj->IsKindOf(wxCLASSINFO(wxTextCtrl))) {
                    // Push text to source control.
-                    ((wxTextCtrl*)obj)->WriteText(ks.chr);
+                    ((wxTextCtrl*)obj)->WriteText(wxString(ks.chr(), ks.chr_len()));

                    // Event is fully processed now.
                    event.StopPropagation();
                    return true;
                }
            } else if (start < app->m_ks_db.idxKey.size() &&
-                ZRCola::keyseq_db::keyseq::CompareSequence(m_seq.data(), m_seq.size(), app->m_ks_db.idxKey[start].seq, std::min<unsigned __int16>(app->m_ks_db.idxKey[start].seq_len, m_seq.size())) == 0)
+                ZRCola::keyseq_db::keyseq::CompareSequence(m_seq.data(), m_seq.size(), app->m_ks_db.idxKey[start].seq(), std::min<size_t>(app->m_ks_db.idxKey[start].seq_len(), m_seq.size())) == 0)
            {
                // The sequence is a partial match. Continue watching.
                if (pFrame && pFrame->GetStatusBar())
--- a/ZRCola/zrcolasettings.cpp
+++ b/ZRCola/zrcolasettings.cpp
@ -34,7 +34,7 @@ wxZRColaSettings::wxZRColaSettings(wxWindow* parent) :
    for (size_t i = 0, n = app->m_lang_db.idxLng.size(); i < n; i++) {
        const auto &lang = app->m_lang_db.idxLng[i];
        wxString
-            label(lang.name, lang.name_len),
+            label(lang.name(), lang.name_len()),
            label_tran(wxGetTranslation(label, wxT("ZRCola-zrcdb")));
        m_languages->Insert(label_tran, i);
    }
--- a/ZRColaCompile/dbsource.cpp
+++ b/ZRColaCompile/dbsource.cpp
@ -28,35 +28,19 @@ using namespace winstd;
 // ZRCola::DBSource::character_bank
 //////////////////////////////////////////////////////////////////////////

-ZRCola::DBSource::character_bank::character_bank() : vector<unique_ptr<ZRCola::DBSource::character> >()
-{
-    resize(0x10000);
-}
-
-
 void ZRCola::DBSource::character_bank::build_related()
 {
-    // Initialize ignore list.
-    m_ignore.insert(L"letter");
-    m_ignore.insert(L"modifier");
-    m_ignore.insert(L"symbol");
-    m_ignore.insert(L"accent");
-    m_ignore.insert(L"with");
-    m_ignore.insert(L"and");
-    m_ignore.insert(L"capital");
-    m_ignore.insert(L"small");
-    m_ignore.insert(L"combining");
-
    SYSTEM_INFO si;
    GetSystemInfo(&si);

    // Launch workers.
    build_related_worker **workers = new build_related_worker*[si.dwNumberOfProcessors];
-    size_type from = 0, to;
-    for (DWORD i = 0; i < si.dwNumberOfProcessors; i++) {
-        to = MulDiv(i + 1, 0x10000, si.dwNumberOfProcessors);
-        workers[i] = new build_related_worker(this, from, to);
-        from = to;
+    size_type from = 0, total = size();
+    iterator chr_from = begin(), chr_to;
+    for (DWORD i = 0; i < si.dwNumberOfProcessors; i++, chr_from = chr_to) {
+        size_type to = MulDiv(i + 1, total, si.dwNumberOfProcessors);
+        for (chr_to = chr_from; from < to; from++, ++chr_to);
+        workers[i] = new build_related_worker(this, chr_from, chr_to);
    }

    // Wait for workers.
@ -71,7 +55,7 @@ void ZRCola::DBSource::character_bank::build_related()
 }


-ZRCola::DBSource::character_bank::build_related_worker::build_related_worker(_In_ const character_bank *cb, _In_ size_type from, _In_ size_type to) :
+ZRCola::DBSource::character_bank::build_related_worker::build_related_worker(_In_ const character_bank *cb, _In_ iterator from, _In_ iterator to) :
    win_handle((HANDLE)_beginthreadex(NULL, 0, process, this, CREATE_SUSPENDED, NULL)),
    m_heap(HeapCreate(0, 0, 0)),
    m_cb(cb),
@ -86,40 +70,39 @@ ZRCola::DBSource::character_bank::build_related_worker::build_related_worker(_In
 unsigned int ZRCola::DBSource::character_bank::build_related_worker::process()
 {
    heap_allocator<wchar_t> al(m_heap);
-    basic_string<wchar_t, char_traits<wchar_t>, heap_allocator<wchar_t> > rel(al);
+    vector<wchar_t, heap_allocator<wchar_t> > rel(al);
    set<wstring, less<wstring>, heap_allocator<wstring> > matching(less<wstring>(), al);

-    for (size_type i = m_from; i < m_to; i++) {
-        auto &chr = *(m_cb->at(i).get());
-        if (&chr == NULL) continue;
-
+    for (auto c = m_from; c != m_to; c++) {
        rel.clear();

-        // Remove all unexisting, inactive, or self related characters.
-        for (auto j = chr.rel.length(); j--;) {
-            wchar_t c = chr.rel[j];
-            if (m_cb->at(c) && (wchar_t)j != c)
-                rel += c;
+        // Skip all unexisting, or self related characters.
+        auto m_cb_end = m_cb->cend();
+        for (std::vector<wchar_t>::const_pointer c_rel = c->second.rel.data(), c_rel_end = c_rel + c->second.rel.size(), c_rel_next = c_rel_end; c_rel < c_rel_end; c_rel = c_rel_next) {
+            c_rel_next = c_rel + wcsnlen(c_rel, c_rel_end - c_rel) + 1;
+            if (m_cb->find(c_rel) != m_cb_end && c->first.compare(c_rel) != 0)
+                rel.insert(rel.end(), c_rel, c_rel_next);
        }

-        // Add all characters that share enought keywords.
-        for (size_type j = 0, j_end = m_cb->size(); j < j_end; j++) {
-            if (i == j || rel.find((wchar_t)j) != wstring::npos)
+        // Add all characters that share enough keywords.
+        for (auto c2 = m_cb->cbegin(), c2_end = m_cb->cend(); c2 != c2_end; ++c2) {
+            if (c == c2)
                continue;
-            const auto &chr2 = *(m_cb->at(j).get());
-            if (&chr2 == NULL)
+            bool already_present = false;
+            for (std::vector<wchar_t>::const_pointer c_rel = rel.data(), c_rel_end = c_rel + rel.size(), c_rel_next = c_rel_end; c_rel < c_rel_end; c_rel = c_rel_next) {
+                c_rel_next = c_rel + wcsnlen(c_rel, c_rel_end - c_rel) + 1;
+                if (c2->first.compare(c_rel) == 0) {
+                    already_present = true;
+                    break;
+                }
+            }
+            if (already_present)
                continue;

            set<wstring>::size_type comparisons = 0;
            matching.clear();
-            for (auto term = chr.terms.cbegin(), term_end = chr.terms.cend(); term != term_end; ++term) {
-                // Test for ignored word(s).
-                if (m_cb->m_ignore.find(*term) != m_cb->m_ignore.cend())
-                    continue;
-                for (auto term2 = chr2.terms.cbegin(), term2_end = chr2.terms.cend(); term2 != term2_end; ++term2) {
-                    // Test for ignored word(s).
-                    if (m_cb->m_ignore.find(*term2) != m_cb->m_ignore.cend())
-                        continue;
+            for (auto term = c->second.terms_rel.cbegin(), term_end = c->second.terms_rel.cend(); term != term_end; ++term) {
+                for (auto term2 = c2->second.terms_rel.cbegin(), term2_end = c2->second.terms_rel.cend(); term2 != term2_end; ++term2) {
                    comparisons++;
                    if (*term == *term2)
                        matching.insert(*term);
@ -130,11 +113,11 @@ unsigned int ZRCola::DBSource::character_bank::build_related_worker::process()
                // If 1/2 terms match, assume related.
                auto hits = matching.size();
                if (hits*hits*2 >= comparisons)
-                    rel += chr2.chr;
+                    rel.insert(rel.end(), c2->first.data(), c2->first.data() + c2->first.length() + 1);
            }
        }

-        chr.rel.assign(rel.c_str(), rel.length());
+        c->second.rel.assign(rel.cbegin(), rel.cend());
    }

    return 0;
@ -197,7 +180,7 @@ void ZRCola::DBSource::character_desc_idx::parse_keywords(const wchar_t *str, se
 }


-void ZRCola::DBSource::character_desc_idx::add_keywords(const set<wstring> &terms, wchar_t chr, size_t sub)
+void ZRCola::DBSource::character_desc_idx::add_keywords(const set<wstring> &terms, const wstring &chr, size_t sub)
 {
    for (auto term = terms.cbegin(), term_end = terms.cend(); term != term_end; ++term) {
        if (sub) {
@ -250,6 +233,16 @@ void ZRCola::DBSource::character_desc_idx::save(ZRCola::textindex<wchar_t, wchar

 ZRCola::DBSource::DBSource()
 {
+    // Initialize ignore list.
+    m_terms_ignore.insert(L"letter");
+    m_terms_ignore.insert(L"modifier");
+    m_terms_ignore.insert(L"symbol");
+    m_terms_ignore.insert(L"accent");
+    m_terms_ignore.insert(L"with");
+    m_terms_ignore.insert(L"and");
+    m_terms_ignore.insert(L"capital");
+    m_terms_ignore.insert(L"small");
+    m_terms_ignore.insert(L"combining");
 }


@ -434,29 +427,31 @@ bool ZRCola::DBSource::GetUnicodeString(const com_obj<ADOField>& f, wstring& str

    variant v;
    wxVERIFY(SUCCEEDED(f->get_Value(&v)));
-    wxCHECK(SUCCEEDED(v.change_type(VT_BSTR)), false);
-
-    // Parse the field. Must be "xxxx+xxxx+xxxx..." sequence.
    str.clear();
-    for (UINT i = 0, n = ::SysStringLen(V_BSTR(&v)); i < n && V_BSTR(&v)[i];) {
-        // Parse Unicode code.
-        UINT j = 0;
-        wchar_t c = 0;
-        for (; i < n && V_BSTR(&v)[i]; i++, j++) {
-                 if (L'0' <= V_BSTR(&v)[i] && V_BSTR(&v)[i] <= L'9') c = c*0x10 + (V_BSTR(&v)[i] - L'0');
-            else if (L'A' <= V_BSTR(&v)[i] && V_BSTR(&v)[i] <= L'F') c = c*0x10 + (V_BSTR(&v)[i] - L'A' + 10);
-            else if (L'a' <= V_BSTR(&v)[i] && V_BSTR(&v)[i] <= L'f') c = c*0x10 + (V_BSTR(&v)[i] - L'a' + 10);
-            else break;
-        }
-        if (j <= 0 || 4 < j) {
-            bstr fieldname; wxVERIFY(SUCCEEDED(f->get_Name(&fieldname)));
-            _ftprintf(stderr, wxT("%s: error ZCC0020: Syntax error in \"%.*ls\" field (\"%.*ls\"). Unicode code must be one to four hexadecimal characters long.\n"), m_filename.c_str(), fieldname.length(), (BSTR)fieldname, n, V_BSTR(&v));
-            return false;
-        }
-        str += c;
+    if (V_VT(&v) != VT_NULL) {
+        wxCHECK(SUCCEEDED(v.change_type(VT_BSTR)), false);

-        // Skip delimiter(s) and whitespace.
-        for (; i < n && V_BSTR(&v)[i] && (V_BSTR(&v)[i] == L'+' || _iswspace_l(V_BSTR(&v)[i], m_locale)); i++);
+        // Parse the field. Must be "xxxx+xxxx+xxxx..." sequence.
+        for (UINT i = 0, n = ::SysStringLen(V_BSTR(&v)); i < n && V_BSTR(&v)[i];) {
+            // Parse Unicode code.
+            UINT j = 0;
+            wchar_t c = 0;
+            for (; i < n && V_BSTR(&v)[i]; i++, j++) {
+                     if (L'0' <= V_BSTR(&v)[i] && V_BSTR(&v)[i] <= L'9') c = c*0x10 + (V_BSTR(&v)[i] - L'0');
+                else if (L'A' <= V_BSTR(&v)[i] && V_BSTR(&v)[i] <= L'F') c = c*0x10 + (V_BSTR(&v)[i] - L'A' + 10);
+                else if (L'a' <= V_BSTR(&v)[i] && V_BSTR(&v)[i] <= L'f') c = c*0x10 + (V_BSTR(&v)[i] - L'a' + 10);
+                else break;
+            }
+            if (j <= 0 || 4 < j) {
+                bstr fieldname; wxVERIFY(SUCCEEDED(f->get_Name(&fieldname)));
+                _ftprintf(stderr, wxT("%s: error ZCC0020: Syntax error in \"%.*ls\" field (\"%.*ls\"). Unicode code must be one to four hexadecimal characters long.\n"), m_filename.c_str(), fieldname.length(), (BSTR)fieldname, n, V_BSTR(&v));
+                return false;
+            }
+            str += c;
+
+            // Skip delimiter(s) and whitespace.
+            for (; i < n && V_BSTR(&v)[i] && (V_BSTR(&v)[i] == L'+' || _iswspace_l(V_BSTR(&v)[i], m_locale)); i++);
+        }
    }

    return true;
@ -615,19 +610,19 @@ bool ZRCola::DBSource::GetTranslation(const com_obj<ADORecordset>& rs, ZRCola::D
    {
        com_obj<ADOField> f;
        wxVERIFY(SUCCEEDED(flds->get_Item(variant(L"komb"), &f)));
-        wxCHECK(GetUnicodeString(f, t.decomp.str), false);
+        wxCHECK(GetUnicodeString(f, t.dec.str), false);
    }

    {
        com_obj<ADOField> f;
        wxVERIFY(SUCCEEDED(flds->get_Item(variant(L"znak"), &f)));
-        wxCHECK(GetUnicodeString(f, t.chr), false);
+        wxCHECK(GetUnicodeString(f, t.com), false);
    }

    {
        com_obj<ADOField> f;
        wxVERIFY(SUCCEEDED(flds->get_Item(variant(L"rang_znak"), &f)));
-        wxCHECK(GetValue(f, t.decomp.rank), false);
+        wxCHECK(GetValue(f, t.dec.rank), false);
    }

    return true;
@ -665,7 +660,7 @@ bool ZRCola::DBSource::GetKeySequence(const com_obj<ADORecordset>& rs, ZRCola::D
    {
        com_obj<ADOField> f;
        wxVERIFY(SUCCEEDED(flds->get_Item(variant(L"Znak"), &f)));
-        wxCHECK(GetUnicodeCharacter(f, ks.chr), false);
+        wxCHECK(GetUnicodeString(f, ks.chr), false);
    }

    int modifiers;
@ -884,11 +879,11 @@ bool ZRCola::DBSource::GetCharacterGroup(const com_obj<ADORecordset>& rs, chrgrp
        com_obj<ADOField> f_char, f_show;
        wxVERIFY(SUCCEEDED(flds->get_Item(variant(L"Znak"  ), &f_char)));
        wxVERIFY(SUCCEEDED(flds->get_Item(variant(L"pogost"), &f_show)));
-        for (VARIANT_BOOL eof = VARIANT_TRUE; SUCCEEDED(rs_chars->get_EOF(&eof)) && !eof; rs_chars->MoveNext()) {
-            wchar_t c;
-            wxCHECK(GetUnicodeCharacter(f_char, c), false);
-            size_t n = cg.chars.length();
-            cg.chars += c;
+        size_t n = 0;
+        for (VARIANT_BOOL eof = VARIANT_TRUE; SUCCEEDED(rs_chars->get_EOF(&eof)) && !eof; rs_chars->MoveNext(), n++) {
+            wstring c;
+            wxCHECK(GetUnicodeString(f_char, c), false);
+            cg.chars.insert(cg.chars.end(), c.data(), c.data() + c.length() + 1);
            bool show;
            wxCHECK(GetValue(f_show, show), false);
            if ((n % 16) == 0)
@ -932,42 +927,49 @@ bool ZRCola::DBSource::GetCharacter(const com_obj<ADORecordset>& rs, character&

    com_obj<ADOFields> flds;
    wxVERIFY(SUCCEEDED(rs->get_Fields(&flds)));
-    wchar_t c;
-    chr.rel.clear();
+    wstring c;
+    chr.second.terms.clear();
+    chr.second.terms_rel.clear();
+    chr.second.rel.clear();

    {
        com_obj<ADOField> f;
        wxVERIFY(SUCCEEDED(flds->get_Item(variant(L"znak"), &f)));
-        wxCHECK(GetUnicodeCharacter(f, chr.chr), false);
+        wxCHECK(GetUnicodeString(f, chr.first), false);
    }

    {
        com_obj<ADOField> f;
        wxVERIFY(SUCCEEDED(flds->get_Item(variant(L"znak_v"), &f)));
-        wxCHECK(GetUnicodeCharacter(f, c), false);
-        if (c && c != chr.chr)
-            chr.rel += c;
+        wxCHECK(GetUnicodeString(f, c), false);
+        if (!c.empty() && c != chr.first)
+            chr.second.rel.insert(chr.second.rel.end(), c.data(), c.data() + c.length() + 1);
    }

    {
        com_obj<ADOField> f;
        wxVERIFY(SUCCEEDED(flds->get_Item(variant(L"znak_m"), &f)));
-        wxCHECK(GetUnicodeCharacter(f, c), false);
-        if (c && c != chr.chr)
-            chr.rel += c;
+        wxCHECK(GetUnicodeString(f, c), false);
+        if (!c.empty() && c != chr.first)
+            chr.second.rel.insert(chr.second.rel.end(), c.data(), c.data() + c.length() + 1);
    }

    {
        com_obj<ADOField> f;
        wxVERIFY(SUCCEEDED(flds->get_Item(variant(L"opis_en"), &f)));
-        wxCHECK(GetValue(f, chr.desc), false);
+        wxCHECK(GetValue(f, chr.second.desc), false);
+        ZRCola::DBSource::character_desc_idx::parse_keywords(chr.second.desc.c_str(), chr.second.terms);
+        for (auto term = chr.second.terms.cbegin(), term_end = chr.second.terms.cend(); term != term_end; ++term) {
+            if (m_terms_ignore.find(*term) != m_terms_ignore.cend())
+                continue;
+            chr.second.terms_rel.insert(*term);
+        }
    }
-    ZRCola::DBSource::character_desc_idx::parse_keywords(chr.desc.c_str(), chr.terms);

    {
        com_obj<ADOField> f;
        wxVERIFY(SUCCEEDED(flds->get_Item(variant(L"kat"), &f)));
-        wxCHECK(GetChrCat(f, chr.cat), false);
+        wxCHECK(GetChrCat(f, chr.second.cat), false);
    }

    return true;
@ -1058,7 +1060,7 @@ bool ZRCola::DBSource::GetCharacterTag(const winstd::com_obj<ADORecordset>& rs,
    {
        com_obj<ADOField> f;
        wxVERIFY(SUCCEEDED(flds->get_Item(variant(L"znak"), &f)));
-        wxCHECK(GetUnicodeCharacter(f, ct.chr), false);
+        wxCHECK(GetUnicodeString(f, ct.chr), false);
    }

    {
--- a/ZRColaCompile/dbsource.h
+++ b/ZRColaCompile/dbsource.h
@ -79,8 +79,8 @@ namespace ZRCola {
        ///
        class translation {
        public:
-            std::wstring chr;           ///< Composed character
-            charseq decomp;             ///< Decomposed sequence
+            charseq dec;                ///< Decomposed sequence
+            std::wstring com;           ///< Composed character
        };


@ -114,7 +114,7 @@ namespace ZRCola {
            };

        public:
-            wchar_t chr;                ///< Character
+            std::wstring chr;           ///< Character
            std::vector<keycode> seq;   ///< Key sequence
        };

@ -147,69 +147,58 @@ namespace ZRCola {
            int id;                             ///< Character group ID
            int rank;                           ///< Character group rank
            std::wstring name;                  ///< Character group name
-            std::wstring chars;                 ///< Character group characters
+            std::vector<wchar_t> chars;         ///< Character group characters
            std::vector<unsigned __int16> show; ///< Bit vector if particular character is displayed initially
        };


+        ///
+        /// Character data
+        ///
+        class character_data {
+        public:
+            inline character_data()
+            {
+                cat.data[0] = 0;
+                cat.data[1] = 0;
+            }
+
+            inline character_data(_In_ const character_data &othr) :
+                cat      (othr.cat),
+                desc     (othr.desc),
+                terms    (othr.terms),
+                terms_rel(othr.terms_rel),
+                rel      (othr.rel)
+            {
+            }
+
+            ZRCola::chrcatid_t cat;             ///< Category ID
+            std::wstring desc;                  ///< Character description
+            std::set<std::wstring> terms;       ///< Search terms
+            std::set<std::wstring> terms_rel;   ///< Relevant terms for relating characters
+            std::vector<wchar_t> rel;           ///< Related characters
+        };
+
+
        ///
        /// Character
        ///
-        class character {
-        public:
-            inline character()
-            {
-                chr = 0;
-                cat.data[0] = 0;
-                cat.data[1] = 0;
-            }
-
-            inline character(_In_ const character &othr) :
-                chr  (othr.chr),
-                cat  (othr.cat),
-                desc (othr.desc),
-                terms(othr.terms),
-                rel  (othr.rel)
-            {
-            }
-
-            inline bool operator==(_In_ const character &othr) const
-            {
-                return
-                    chr   == othr.chr   &&
-                    cat   == othr.cat   &&
-                    desc  == othr.desc  &&
-                    terms == othr.terms &&
-                    rel   == othr.rel;
-            }
-
-            inline bool operator!=(_In_ const character &othr) const
-            {
-                return !operator==(othr);
-            }
-
-            wchar_t chr;                    ///< Character
-            ZRCola::chrcatid_t cat;         ///< Category ID
-            std::wstring desc;              ///< Character description
-            std::set<std::wstring> terms;   ///< Search terms
-            std::wstring rel;               ///< Related characters
-        };
+        typedef std::pair<std::wstring, character_data> character;


        ///
        /// Character bank
        ///
-        class character_bank : public std::vector<std::unique_ptr<character> >
+        class character_bank : public std::map<std::wstring, character_data>
        {
        public:
-            character_bank();
            void build_related();

        protected:
            class build_related_worker : public winstd::win_handle
            {
            public:
-                build_related_worker(_In_ const character_bank *cb, _In_ size_type from, _In_ size_type to);
+                build_related_worker(_In_ const character_bank *cb, _In_ iterator from, _In_ iterator to);

                inline void join()
                {
@ -230,12 +219,9 @@ namespace ZRCola {

            protected:
                const character_bank *m_cb;
-                size_type m_from, m_to;
+                iterator m_from, m_to;
                winstd::heap m_heap;
            };
-
-        protected:
-            std::set<std::wstring> m_ignore;
        };


@ -266,8 +252,8 @@ namespace ZRCola {
        {
        public:
            static void parse_keywords(const wchar_t *str, std::set<std::wstring> &terms);
-            void add_keywords(const std::set<std::wstring> &terms, wchar_t chr, size_t sub = 0);
-            inline void add_keywords(const wchar_t *str, wchar_t chr, size_t sub = 0)
+            void add_keywords(const std::set<std::wstring> &terms, const std::wstring &chr, size_t sub = 0);
+            inline void add_keywords(const wchar_t *str, const std::wstring &chr, size_t sub = 0)
            {
                std::set<std::wstring> terms;
                parse_keywords(str, terms);
@ -277,21 +263,21 @@ namespace ZRCola {
            void save(ZRCola::textindex<wchar_t, wchar_t, unsigned __int32> &idx) const;

        protected:
-            inline void add_keyword(const std::wstring &term, wchar_t chr)
+            inline void add_keyword(const std::wstring &term, const std::wstring &chr)
            {
                iterator idx = find(term);
                if (idx == end()) {
                    // New keyword.
-                    insert(std::make_pair(term, std::vector<wchar_t>(1, chr)));
+                    insert(std::make_pair(term, mapped_type(chr.data(), chr.data() + chr.length() + 1)));
                } else {
                    // Append to existing keyword.
-                    std::vector<wchar_t> &val = idx->second;
-                    for (auto i = val.cbegin(), i_end = val.cend(); ; ++i) {
-                        if (i == i_end) {
+                    auto &val = idx->second;
+                    for (mapped_type::size_type i = 0, n = val.size(); ; i += wcsnlen(val.data() + i, n - i) + 1) {
+                        if (i >= n) {
                            // End-of-values reached. Append character.
-                            val.push_back(chr);
+                            val.insert(val.end(), chr.data(), chr.data() + chr.length() + 1);
                            break;
-                        } else if (*i == chr) {
+                        } else if (chr.compare(val.data() + i) == 0) {
                            // Character already among the values.
                            break;
                        }
@ -317,7 +303,7 @@ namespace ZRCola {
        ///
        class chrtag {
        public:
-            wchar_t chr;                ///> Character
+            std::wstring chr;           ///> Character
            int tag;                    ///< Tag ID
        };

@ -697,10 +683,12 @@ namespace ZRCola {

    protected:
        std::basic_string<TCHAR> m_filename;    ///< Database filename
-        winstd::com_obj<ADOConnection> m_db;       ///< Database
+        winstd::com_obj<ADOConnection> m_db;    ///< Database
        _locale_t m_locale;                     ///< Database locale

        winstd::com_obj<ADOCommand> m_comCharacterGroup;   ///< ADO Command for GetCharacterGroup subquery
        winstd::com_obj<ADOParameter> m_pCharacterGroup1;  ///< \c m_comCharacterGroup parameter
+
+        std::set<std::wstring> m_terms_ignore;  ///< Terms to ignore when comparing characters
    };
 };
--- a/ZRColaCompile/main.cpp
+++ b/ZRColaCompile/main.cpp
@ -165,13 +165,13 @@ int _tmain(int argc, _TCHAR *argv[])
                    ZRCola::DBSource::translation trans;
                    if (src.GetTranslation(rs, trans)) {
                        // Add translation to temporary database.
-                        auto const t = db_temp1.find(trans.chr);
+                        auto const t = db_temp1.find(trans.com);
                        if (t != db_temp1.end())
-                            t->second.insert(std::move(trans.decomp));
+                            t->second.insert(std::move(trans.dec));
                        else {
                            translation_db::mapped_type d;
-                            d.insert(std::move(trans.decomp));
-                            db_temp1.insert(std::move(pair<translation_db::key_type, translation_db::mapped_type>(trans.chr, std::move(d))));
+                            d.insert(std::move(trans.dec));
+                            db_temp1.insert(std::move(pair<translation_db::key_type, translation_db::mapped_type>(trans.com, std::move(d))));
                        }
                    } else
                        has_errors = true;
@ -214,16 +214,14 @@ int _tmain(int argc, _TCHAR *argv[])
                        unsigned __int32 idx = db.data.size();
                        wxASSERT_MSG((int)0xffff8000 <= d->rank && d->rank <= (int)0x00007fff, wxT("transformation rank out of bounds"));
                        db.data.push_back((unsigned __int16)d->rank);
-                        wstring::size_type n_com = t->first.length();
-                        wxASSERT_MSG(n_com <= 0xffff, wxT("composition string too long"));
-                        db.data.push_back((unsigned __int16)n_com);
-                        wstring::size_type n_dec = d->str.length();
-                        wxASSERT_MSG(n_com + n_dec <= 0xffff, wxT("decomposition string too long"));
-                        db.data.push_back((unsigned __int16)(n_com + n_dec));
-                        for (wstring::size_type i = 0; i < n_com; i++)
-                            db.data.push_back(t->first[i]);
-                        for (wstring::size_type i = 0; i < n_dec; i++)
-                            db.data.push_back(d->str[i]);
+                        wstring::size_type n = t->first.length();
+                        wxASSERT_MSG(n <= 0xffff, wxT("composition overflow"));
+                        db.data.push_back((unsigned __int16)n);
+                        n += d->str.length();
+                        wxASSERT_MSG(n <= 0xffff, wxT("decomposition overflow"));
+                        db.data.push_back((unsigned __int16)n);
+                        db.data.insert(db.data.end(), t->first.cbegin(), t->first.cend());
+                        db.data.insert(db.data.end(), d->str  .cbegin(), d->str  .cend());
                        db.idxComp  .push_back(idx);
                        db.idxDecomp.push_back(idx);
                    }
@ -265,17 +263,19 @@ int _tmain(int argc, _TCHAR *argv[])
                    if (src.GetKeySequence(rs, ks)) {
                        // Add key sequence to index and data.
                        unsigned __int32 idx = db.data.size();
-                        db.data.push_back(ks.chr);
-                        vector<ZRCola::DBSource::keyseq::keycode>::size_type n = ks.seq.size();
-                        wxASSERT_MSG(n <= 0xffff, wxT("key sequence too long"));
+                        wstring::size_type n = ks.chr.length();
+                        wxASSERT_MSG(n <= 0xffff, wxT("character overflow"));
                        db.data.push_back((unsigned __int16)n);
-                        for (vector<ZRCola::DBSource::keyseq::keycode>::size_type i = 0; i < n; i++) {
-                            const ZRCola::DBSource::keyseq::keycode &kc = ks.seq[i];
-                            db.data.push_back(kc.key);
+                        n += ks.seq.size() * sizeof(ZRCola::keyseq_db::keyseq::key_t) / sizeof(wchar_t);
+                        wxASSERT_MSG(n <= 0xffff, wxT("key sequence overflow"));
+                        db.data.push_back((unsigned __int16)n);
+                        db.data.insert(db.data.end(), ks.chr.cbegin(), ks.chr.cend());
+                        for (auto kc = ks.seq.cbegin(), kc_end = ks.seq.cend(); kc != kc_end; ++kc) {
+                            db.data.push_back(kc->key);
                            db.data.push_back(
-                                (kc.shift ? ZRCola::keyseq_db::keyseq::SHIFT : 0) |
-                                (kc.ctrl  ? ZRCola::keyseq_db::keyseq::CTRL  : 0) |
-                                (kc.alt   ? ZRCola::keyseq_db::keyseq::ALT   : 0));
+                                (kc->shift ? ZRCola::keyseq_db::keyseq::SHIFT : 0) |
+                                (kc->ctrl  ? ZRCola::keyseq_db::keyseq::CTRL  : 0) |
+                                (kc->alt   ? ZRCola::keyseq_db::keyseq::ALT   : 0));
                        }
                        db.idxChr.push_back(idx);
                        db.idxKey.push_back(idx);
@ -293,10 +293,14 @@ int _tmain(int argc, _TCHAR *argv[])
                        &ks1 = db.idxKey[i - 1],
                        &ks2 = db.idxKey[i    ];

-                    if (ZRCola::keyseq_db::keyseq::CompareSequence(ks1.seq, ks1.seq_len, ks2.seq, ks2.seq_len) == 0) {
+                    if (ZRCola::keyseq_db::keyseq::CompareSequence(ks1.seq(), ks1.seq_len(), ks2.seq(), ks2.seq_len()) == 0) {
                        wxString seq_str;
-                        ZRCola::keyseq_db::GetSequenceAsText(ks1.seq, ks1.seq_len, seq_str);
-                        _ftprintf(stderr, wxT("%s: warning ZCC0007: Duplicate key sequence (%ls => %04X or %04X). The keyboard behaviour will be unpredictable.\n"), (LPCTSTR)filenameIn.c_str(), seq_str.c_str(), ks1.chr, ks2.chr);
+                        ZRCola::keyseq_db::GetSequenceAsText(ks1.seq(), ks1.seq_len(), seq_str);
+                        _ftprintf(stderr, wxT("%s: warning ZCC0007: Duplicate key sequence (%ls => %s or %s). The keyboard behaviour will be unpredictable.\n"),
+                            (LPCTSTR)filenameIn.c_str(),
+                            seq_str.c_str(),
+                            ZRCola::GetUnicodeDump(ks1.chr(), ks1.chr_len()).c_str(),
+                            ZRCola::GetUnicodeDump(ks2.chr(), ks2.chr_len()).c_str());
                    }
                }

@ -334,13 +338,11 @@ int _tmain(int argc, _TCHAR *argv[])

                        // Add language to index and data.
                        unsigned __int32 idx = db.data.size();
-                        for (wstring::size_type i = 0; i < sizeof(ZRCola::langid_t)/sizeof(unsigned __int16); i++)
-                            db.data.push_back(((const unsigned __int16*)lang.id.data)[i]);
+                        db.data.insert(db.data.end(), reinterpret_cast<const unsigned __int16*>(&lang.id), reinterpret_cast<const unsigned __int16*>(&lang.id + 1));
                        wstring::size_type n = lang.name.length();
-                        wxASSERT_MSG(n <= 0xffff, wxT("language name too long"));
+                        wxASSERT_MSG(n <= 0xffff, wxT("language name overflow"));
                        db.data.push_back((unsigned __int16)n);
-                        for (wstring::size_type i = 0; i < n; i++)
-                            db.data.push_back(lang.name[i]);
+                        db.data.insert(db.data.end(), lang.name.cbegin(), lang.name.cend());
                        db.idxLng.push_back(idx);
                    } else
                        has_errors = true;
@ -383,13 +385,11 @@ int _tmain(int argc, _TCHAR *argv[])
                    if (src.GetLanguageCharacter(rs, lc)) {
                        // Add language characters to index and data.
                        unsigned __int32 idx = db.data.size();
-                        for (wstring::size_type i = 0; i < sizeof(ZRCola::langid_t)/sizeof(unsigned __int16); i++)
-                            db.data.push_back(((const unsigned __int16*)lc.lang.data)[i]);
+                        db.data.insert(db.data.end(), reinterpret_cast<const unsigned __int16*>(&lc.lang), reinterpret_cast<const unsigned __int16*>(&lc.lang + 1));
                        wstring::size_type n = lc.chr.length();
-                        wxASSERT_MSG(n <= 0xffff, wxT("character string too long"));
+                        wxASSERT_MSG(n <= 0xffff, wxT("character overflow"));
                        db.data.push_back((unsigned __int16)n);
-                        for (wstring::size_type i = 0; i < n; i++)
-                            db.data.push_back(lc.chr[i]);
+                        db.data.insert(db.data.end(), lc.chr.cbegin(), lc.chr.cend());
                        db.idxChr.push_back(idx);
 #ifdef ZRCOLA_LANGCHAR_LANG_IDX
                        db.idxLng.push_back(idx);
@ -447,18 +447,15 @@ int _tmain(int argc, _TCHAR *argv[])
                        db.data.push_back((unsigned __int16)cg.id);
                        wxASSERT_MSG((int)0xffff8000 <= cg.rank && cg.rank <= (int)0x00007fff, wxT("character group rank out of bounds"));
                        db.data.push_back((unsigned __int16)cg.rank);
-                        wstring::size_type n_name = cg.name.length();
-                        wxASSERT_MSG(n_name <= 0xffff, wxT("character group name too long"));
-                        db.data.push_back((unsigned __int16)n_name);
-                        wstring::size_type n_char = cg.chars.length();
-                        wxASSERT_MSG(n_char <= 0xffff, wxT("too many character group characters"));
-                        db.data.push_back((unsigned __int16)n_char);
-                        for (wstring::size_type i = 0; i < n_name; i++)
-                            db.data.push_back(cg.name[i]);
-                        for (wstring::size_type i = 0; i < n_char; i++)
-                            db.data.push_back(cg.chars[i]);
-                        for (std::vector<unsigned __int16>::size_type i = 0, n = cg.show.size(); i < n; i++)
-                            db.data.push_back(cg.show[i]);
+                        wstring::size_type n = cg.name.length();
+                        wxASSERT_MSG(n <= 0xffff, wxT("character group name overflow"));
+                        db.data.push_back((unsigned __int16)n);
+                        n += cg.chars.size();
+                        wxASSERT_MSG(n <= 0xffff, wxT("character group characters overflow"));
+                        db.data.push_back((unsigned __int16)n);
+                        db.data.insert(db.data.end(), cg.name .cbegin(), cg.name .cend());
+                        db.data.insert(db.data.end(), cg.chars.cbegin(), cg.chars.cend());
+                        db.data.insert(db.data.end(), cg.show .cbegin(), cg.show .cend());
                        db.idxRnk.push_back(idx);
                    } else
                        has_errors = true;
@ -490,15 +487,14 @@ int _tmain(int argc, _TCHAR *argv[])
                ZRCola::DBSource::character_desc_idx idxChrDsc, idxChrDscSub;

                ZRCola::DBSource::character_bank chrs;
+                ZRCola::DBSource::character chr;

                // Phase 1: Parse characters and build indexes.
                for (; !ZRCola::DBSource::IsEOF(rs); rs->MoveNext()) {
                    // Read character from the database.
-                    unique_ptr<ZRCola::DBSource::character> c(new ZRCola::DBSource::character);
-                    if (src.GetCharacter(rs, *c)) {
-                        const auto &chr = *c.get();
-                        chrs[chr.chr].swap(c);
-                    } else
+                    if (src.GetCharacter(rs, chr))
+                        chrs[chr.first] = std::move(chr.second);
+                    else
                        has_errors = true;
                }

@ -512,33 +508,30 @@ int _tmain(int argc, _TCHAR *argv[])
                db.data  .reserve(count*4);

                // Phase 3: Parse characters and build index and data.
-                for (size_t i = 0, i_end = chrs.size(); i < i_end; i++) {
-                    const auto &chr = *(chrs[i].get());
-                    if (&chr == NULL) continue;
-
+                for (auto chr = chrs.cbegin(), chr_end = chrs.cend(); chr != chr_end; ++chr) {
                    // Add character to index and data.
                    unsigned __int32 idx = db.data.size();
-                    db.data.push_back((unsigned __int16)chr.chr);
-                    for (wstring::size_type i = 0; i < sizeof(ZRCola::chrcatid_t)/sizeof(unsigned __int16); i++)
-                        db.data.push_back(((const unsigned __int16*)chr.cat.data)[i]);
-                    wstring::size_type n_desc = chr.desc.length();
-                    wxASSERT_MSG(n_desc <= 0xffff, wxT("character description too long"));
-                    db.data.push_back((unsigned __int16)n_desc);
-                    wstring::size_type n_rel = chr.rel.length();
-                    wxASSERT_MSG(n_rel <= 0xffff, wxT("too many related characters"));
-                    db.data.push_back((unsigned __int16)n_rel);
-                    for (wstring::size_type i = 0; i < n_desc; i++)
-                        db.data.push_back(chr.desc[i]);
-                    for (wstring::size_type i = 0; i < n_rel; i++)
-                        db.data.push_back(chr.rel[i]);
+                    db.data.insert(db.data.end(), reinterpret_cast<const unsigned __int16*>(&chr->second.cat), reinterpret_cast<const unsigned __int16*>(&chr->second.cat + 1));
+                    wstring::size_type n = chr->first.length();
+                    wxASSERT_MSG(n <= 0xffff, wxT("character overflow"));
+                    db.data.push_back((unsigned __int16)n);
+                    n += chr->second.desc.length();
+                    wxASSERT_MSG(n <= 0xffff, wxT("character description overflow"));
+                    db.data.push_back((unsigned __int16)n);
+                    n += chr->second.rel.size();
+                    wxASSERT_MSG(n <= 0xffff, wxT("related characters overflow"));
+                    db.data.push_back((unsigned __int16)n);
+                    db.data.insert(db.data.end(), chr->first      .cbegin(), chr->first      .cend());
+                    db.data.insert(db.data.end(), chr->second.desc.cbegin(), chr->second.desc.cend());
+                    db.data.insert(db.data.end(), chr->second.rel .cbegin(), chr->second.rel .cend());
                    db.idxChr.push_back(idx);

                    // Add description (and keywords) to index.
-                    idxChrDsc   .add_keywords(chr.terms, chr.chr, 0);
-                    idxChrDscSub.add_keywords(chr.terms, chr.chr, 3);
+                    idxChrDsc   .add_keywords(chr->second.terms, chr->first, 0);
+                    idxChrDscSub.add_keywords(chr->second.terms, chr->first, 3);

                    // Mark category used.
-                    categories_used.insert(chr.cat);
+                    categories_used.insert(chr->second.cat);
                }

                // Sort indices.
@ -588,15 +581,13 @@ int _tmain(int argc, _TCHAR *argv[])

                        // Add character category to index and data.
                        unsigned __int32 idx = db.data.size();
-                        for (wstring::size_type i = 0; i < sizeof(ZRCola::chrcatid_t)/sizeof(unsigned __int16); i++)
-                            db.data.push_back(((const unsigned __int16*)cc.id.data)[i]);
+                        db.data.insert(db.data.end(), reinterpret_cast<const unsigned __int16*>(&cc.id), reinterpret_cast<const unsigned __int16*>(&cc.id + 1));
                        wxASSERT_MSG((int)0xffff8000 <= cc.rank && cc.rank <= (int)0x00007fff, wxT("character category rank out of bounds"));
                        db.data.push_back((unsigned __int16)cc.rank);
-                        wstring::size_type n_name = cc.name.length();
-                        wxASSERT_MSG(n_name <= 0xffff, wxT("character category name too long"));
-                        db.data.push_back((unsigned __int16)n_name);
-                        for (wstring::size_type i = 0; i < n_name; i++)
-                            db.data.push_back(cc.name[i]);
+                        wstring::size_type n = cc.name.length();
+                        wxASSERT_MSG(n <= 0xffff, wxT("character category name overflow"));
+                        db.data.push_back((unsigned __int16)n);
+                        db.data.insert(db.data.end(), cc.name.cbegin(), cc.name.cend());
                        db.idxChrCat.push_back(idx);
                        db.idxRnk   .push_back(idx);
                    } else
@ -639,9 +630,12 @@ int _tmain(int argc, _TCHAR *argv[])
                    if (src.GetCharacterTag(rs, ct)) {
                        // Add characters tags to index and data.
                        unsigned __int32 idx = db.data.size();
-                        db.data.push_back(ct.chr);
                        wxASSERT_MSG((int)0xffff8000 <= ct.tag && ct.tag <= (int)0x00007fff, wxT("tag out of bounds"));
                        db.data.push_back((unsigned __int16)ct.tag);
+                        wstring::size_type n = ct.chr.length();
+                        wxASSERT_MSG(n <= 0xffff, wxT("character overflow"));
+                        db.data.push_back((unsigned __int16)n);
+                        db.data.insert(db.data.end(), ct.chr.cbegin(), ct.chr.cend());
                        db.idxChr.push_back(idx);
                        db.idxTag.push_back(idx);
                    } else
@ -691,10 +685,9 @@ int _tmain(int argc, _TCHAR *argv[])
                                db.data.push_back(LOWORD(ln->first));
                                db.data.push_back(HIWORD(ln->first));
                                wstring::size_type n = nm->length();
-                                wxASSERT_MSG(n <= 0xffff, wxT("tag name too long"));
+                                wxASSERT_MSG(n <= 0xffff, wxT("tag name overflow"));
                                db.data.push_back((unsigned __int16)n);
-                                for (wstring::size_type i = 0; i < n; i++)
-                                    db.data.push_back(nm->at(i));
+                                db.data.insert(db.data.end(), nm->cbegin(), nm->cend());
                                db.idxName.push_back(idx);
                                db.idxTag .push_back(idx);
                            }
--- a/lib/libZRCola/include/zrcola/character.h
+++ b/lib/libZRCola/include/zrcola/character.h
@ -41,6 +41,8 @@ namespace ZRCola {
    ///
    typedef double charrank_t;

+#pragma pack(push)
+#pragma pack(2)
    ///
    /// Character category ID type
    /// Two letter abbreviation, non-terminated
@ -66,6 +68,7 @@ namespace ZRCola {
        ///
        static const chrcatid_t blank;
    };
+#pragma pack(pop)


    ///
@ -180,11 +183,66 @@ namespace ZRCola {
        /// Character data
        ///
        struct character {
-            wchar_t chr;                ///> Character
+        public:
            chrcatid_t cat;             ///> Category ID
-            unsigned __int16 desc_len;  ///< Character description length in \c data
-            unsigned __int16 rel_len;   ///< Related character count in \c data
-            wchar_t data[];             ///< Character description and list of related characters
+
+        protected:
+            unsigned __int16 chr_to;    ///< Character end in \c data
+            unsigned __int16 desc_to;   ///< Character description end in \c data
+            unsigned __int16 rel_to;    ///< Related characters end in \c data
+            wchar_t data[];             ///< Character, character description
+
+        private:
+            inline character(_In_ const character &other);
+            inline character& operator=(_In_ const character &other);
+
+        public:
+            ///
+            /// Constructs the character
+            ///
+            /// \param[in] chr       Character
+            /// \param[in] chr_len   Number of UTF-16 characters in \p chr
+            /// \param[in] cat       Category
+            /// \param[in] desc      Description
+            /// \param[in] desc_len  Number of UTF-16 characters in \p desc
+            /// \param[in] rel       Related characters list (zero delimited)
+            /// \param[in] rel_len   Number of UTF-16 characters in \p rel (including zero delimiters)
+            ///
+            inline character(
+                _In_opt_z_count_(chr_len)  const wchar_t    *chr      = NULL,
+                _In_opt_                         size_t      chr_len  = 0,
+                _In_opt_                         chrcatid_t  cat      = chrcatid_t::blank,
+                _In_opt_z_count_(desc_len) const wchar_t    *desc     = NULL,
+                _In_opt_                         size_t      desc_len = 0,
+                _In_opt_z_count_(rel_len)  const wchar_t    *rel      = NULL,
+                _In_opt_                         size_t      rel_len  = 0)
+            {
+                this->cat = cat;
+                this->chr_to = static_cast<unsigned __int16>(chr_len);
+                if (chr_len) memcpy(this->data, chr, sizeof(wchar_t)*chr_len);
+                this->desc_to = static_cast<unsigned __int16>(this->chr_to + desc_len);
+                if (desc_len) memcpy(this->data + this->chr_to, desc, sizeof(wchar_t)*desc_len);
+                this->rel_to = static_cast<unsigned __int16>(this->desc_to + rel_len);
+                if (rel_len) memcpy(this->data + this->desc_to, rel, sizeof(wchar_t)*rel_len);
+            }
+
+            inline const wchar_t*         chr    () const { return data;          };
+            inline       wchar_t*         chr    ()       { return data;          };
+            inline const wchar_t*         chr_end() const { return data + chr_to; };
+            inline       wchar_t*         chr_end()       { return data + chr_to; };
+            inline       unsigned __int16 chr_len() const { return chr_to;        };
+
+            inline const wchar_t*         desc    () const { return data + chr_to;    };
+            inline       wchar_t*         desc    ()       { return data + chr_to;    };
+            inline const wchar_t*         desc_end() const { return data + desc_to;   };
+            inline       wchar_t*         desc_end()       { return data + desc_to;   };
+            inline       unsigned __int16 desc_len() const { return desc_to - chr_to; };
+
+            inline const wchar_t*         rel    () const { return data + desc_to;   };
+            inline       wchar_t*         rel    ()       { return data + desc_to;   };
+            inline const wchar_t*         rel_end() const { return data + rel_to;    };
+            inline       wchar_t*         rel_end()       { return data + rel_to;    };
+            inline       unsigned __int16 rel_len() const { return rel_to - desc_to; };
        };
 #pragma pack(pop)

@ -214,8 +272,8 @@ namespace ZRCola {
            ///
            virtual int compare(_In_ const character &a, _In_ const character &b) const
            {
-                     if (a.chr < b.chr) return -1;
-                else if (a.chr > b.chr) return  1;
+                int r = ZRCola::CompareString(a.chr(), a.chr_len(), b.chr(), b.chr_len());
+                if (r != 0) return r;

                return 0;
            }
@ -252,23 +310,25 @@ namespace ZRCola {
        /// \param[in   ] fn_abort  Pointer to function to periodically test for search cancellation
        /// \param[in   ] cookie    Cookie for \p fn_abort call
        ///
-        bool Search(_In_z_ const wchar_t *str, _In_ const std::set<chrcatid_t> &cats, _Inout_ std::map<wchar_t, charrank_t> &hits, _Inout_ std::map<wchar_t, charrank_t> &hits_sub, _In_opt_ bool (__cdecl *fn_abort)(void *cookie) = NULL, _In_opt_ void *cookie = NULL) const;
+        bool Search(_In_z_ const wchar_t *str, _In_ const std::set<chrcatid_t> &cats, _Inout_ std::map<std::wstring, charrank_t> &hits, _Inout_ std::map<std::wstring, charrank_t> &hits_sub, _In_opt_ bool (__cdecl *fn_abort)(void *cookie) = NULL, _In_opt_ void *cookie = NULL) const;

        ///
        /// Get character category
        ///
-        /// \param[in] c  Character
+        /// \param[in] chr  Character
+        /// \param[in] len  Number of UTF-16 characters in \p chr
        ///
        /// \returns
        /// - Character category if character found
        /// - `ZRCola::chrcatid_t::blank` otherwise
        ///
-        inline chrcatid_t GetCharCat(wchar_t c) const
+        inline chrcatid_t GetCharCat(_In_z_count_(len) const wchar_t *chr, _In_ const size_t len) const
        {
-            char _chr[sizeof(character)];
-            ((character *)_chr)->chr = c;
+            assert(len <= 0xffff);
+            std::unique_ptr<character> c((character*)new char[sizeof(character) + sizeof(wchar_t)*len]);
+            c->character::character(chr, len);
            indexChar::size_type start;
-            return idxChr.find(*((character *)_chr), start) ? idxChr[start].cat : chrcatid_t::blank;
+            return idxChr.find(*c, start) ? idxChr[start].cat : chrcatid_t::blank;
        }
    };

@ -287,10 +347,44 @@ namespace ZRCola {
        /// Character category data
        ///
        struct chrcat {
-            chrcatid_t id;              ///< Character category ID
-            unsigned __int16 rank;      ///< Character category rank
-            unsigned __int16 name_len;  ///< \c name length (in characters)
-            wchar_t name[];             ///< Character category name
+        public:
+            chrcatid_t id;                              ///< Character category ID
+            unsigned __int16 rank;                      ///< Character category rank
+
+        protected:
+            unsigned __int16 name_to;                   ///< Character category name end in \c data
+            wchar_t data[];                             ///< Character category name
+
+        private:
+            inline chrcat(_In_ const chrcat &other);
+            inline chrcat& operator=(_In_ const chrcat &other);
+
+        public:
+            ///
+            /// Constructs the character category
+            ///
+            /// \param[in] id        Character category ID
+            /// \param[in] rank      Character category rank
+            /// \param[in] name      Character category name
+            /// \param[in] name_len  Number of UTF-16 characters in \p name
+            ///
+            inline chrcat(
+                _In_opt_                         chrcatid_t        id       = chrcatid_t::blank,
+                _In_opt_                         unsigned __int16  rank     = 0,
+                _In_opt_z_count_(name_len) const wchar_t          *name     = NULL,
+                _In_opt_                         size_t            name_len = 0)
+            {
+                this->id   = id;
+                this->rank = rank;
+                this->name_to = static_cast<unsigned __int16>(name_len);
+                if (name_len) memcpy(this->data, name, sizeof(wchar_t)*name_len);
+            }
+
+            inline const wchar_t*         name    () const { return data;           };
+            inline       wchar_t*         name    ()       { return data;           };
+            inline const wchar_t*         name_end() const { return data + name_to; };
+            inline       wchar_t*         name_end()       { return data + name_to; };
+            inline       unsigned __int16 name_len() const { return name_to;        };
        };
 #pragma pack(pop)

@ -374,10 +468,13 @@ namespace ZRCola {
                     if (a.rank < b.rank) return -1;
                else if (a.rank > b.rank) return +1;

-                int r = _wcsncoll(a.name, b.name, std::min<unsigned __int16>(a.name_len, b.name_len));
+                unsigned __int16
+                    a_name_len = a.name_len(),
+                    b_name_len = b.name_len();
+                int r = _wcsncoll(a.name(), b.name(), std::min<unsigned __int16>(a_name_len, b_name_len));
                if (r != 0) return r;
-                     if (a.name_len < b.name_len) return -1;
-                else if (a.name_len > b.name_len) return +1;
+                     if (a_name_len < b_name_len) return -1;
+                else if (a_name_len > b_name_len) return +1;

                return 0;
            }
--- a/lib/libZRCola/include/zrcola/common.h
+++ b/lib/libZRCola/include/zrcola/common.h
@ -19,6 +19,7 @@

 #pragma once

+#include <sal.h>
 #include <istream>
 #include <ostream>
 #include <utility>
@ -58,11 +59,11 @@ namespace ZRCola {
    typedef unsigned __int32 recordsize_t;


+#pragma pack(push)
+#pragma pack(2)
    ///
    /// Key-value index pair for mappings
    ///
-#pragma pack(push)
-#pragma pack(2)
    template <class T>
    struct mappair_t
    {
@ -72,6 +73,8 @@ namespace ZRCola {
 #pragma pack(pop)


+#pragma pack(push)
+#pragma pack(2)
    ///
    /// Language ID type
    /// Three letter abbreviation, zero terminated
@ -105,6 +108,7 @@ namespace ZRCola {
        ///
        static const langid_t blank;
    };
+#pragma pack(pop)


    ///
@ -228,11 +232,11 @@ namespace ZRCola {
    ///
    /// Memory index
    ///
-    template <class T, class T_idx = unsigned __int32, class T_data = T>
+    template <class T_data, class T_idx = unsigned __int32, class T_el = T_data>
    class index : public std::vector<T_idx>
    {
    protected:
-        std::vector<T> &host;  ///< Reference to host data
+        std::vector<T_data> &host;  ///< Reference to host data

    public:
        ///
@ -240,7 +244,7 @@ namespace ZRCola {
        ///
        /// \param[in] h  Reference to vector holding the data
        ///
-        index(_In_ std::vector<T> &h) : host(h) {}
+        index(_In_ std::vector<T_data> &h) : host(h) {}


        ///
@ -250,9 +254,9 @@ namespace ZRCola {
        ///
        /// \returns Data reference
        ///
-        inline const T_data& at(size_type pos) const
+        inline const T_el& at(size_type pos) const
        {
-            return (const T_data&)host.at(std::vector<T_idx>::at(pos));
+            return *reinterpret_cast<const T_el*>(&host[std::vector<T_idx>::at(pos)]);
        }


@ -263,9 +267,9 @@ namespace ZRCola {
        ///
        /// \returns Data reference
        ///
-        inline T_data& at(size_type pos)
+        inline T_el& at(size_type pos)
        {
-            return (T_data&)host.at(std::vector<T_idx>::at(pos));
+            return *reinterpret_cast<T_el*>(&host[std::vector<T_idx>::at(pos)]);
        }


@ -276,9 +280,9 @@ namespace ZRCola {
        ///
        /// \returns Data reference
        ///
-        inline const T_data& operator[](size_type pos) const
+        inline const T_el& operator[](size_type pos) const
        {
-            return (const T_data&)host[std::vector<T_idx>::at(pos)];
+            return *reinterpret_cast<const T_el*>(&host[std::vector<T_idx>::operator[](pos)]);
        }


@ -289,9 +293,9 @@ namespace ZRCola {
        ///
        /// \returns Data reference
        ///
-        inline T_data& operator[](size_type pos)
+        inline T_el& operator[](size_type pos)
        {
-            return (T_data&)host[std::vector<T_idx>::at(pos)];
+            return *reinterpret_cast<T_el*>(&host[std::vector<T_idx>::operator[](pos)]);
        }


@ -315,7 +319,7 @@ namespace ZRCola {
        /// - =0 when a == b
        /// - >0 when a >  b
        ///
-        virtual int compare(_In_ const T_data &a, _In_ const T_data &b) const = 0;
+        virtual int compare(_In_ const T_el &a, _In_ const T_el &b) const = 0;


        ///
@ -329,7 +333,7 @@ namespace ZRCola {
        /// - =0 when a == b
        /// - >0 when a >  b
        ///
-        virtual int compare_sort(_In_ const T_data &a, _In_ const T_data &b) const
+        virtual int compare_sort(_In_ const T_el &a, _In_ const T_el &b) const
        {
            // Revert to `compare()` by default.
            return compare(a, b);
@ -348,7 +352,7 @@ namespace ZRCola {
        /// - \c true if found
        /// - \c false otherwise
        ///
-        bool find(_In_ const T_data &el, _Out_ size_type &start, _Out_ size_type &end) const
+        bool find(_In_ const T_el &el, _Out_ size_type &start, _Out_ size_type &end) const
        {
            // Start with the full search area.
            for (start = 0, end = size(); start < end; ) {
@ -388,7 +392,7 @@ namespace ZRCola {
        /// - \c true if found
        /// - \c false otherwise
        ///
-        bool find(_In_ const T_data &el, _Out_ size_type &start) const
+        bool find(_In_ const T_el &el, _Out_ size_type &start) const
        {
            // Start with the full search area.
            size_t end;
@ -415,8 +419,11 @@ namespace ZRCola {
    private:
        static int __cdecl compare_s(void *p, const void *a, const void *b)
        {
-            const index<T, T_idx, T_data> *t = (const index<T, T_idx, T_data>*)p;
-            return t->compare_sort((const T_data&)t->host[*(const T_idx*)a], (const T_data&)t->host[*(const T_idx*)b]);
+            const index<T_data, T_idx, T_el> *_this = reinterpret_cast<const index<T_data, T_idx, T_el>*>(p);
+            const T_data *data = _this->host.data();
+            return _this->compare_sort(
+                *reinterpret_cast<const T_el*>(data + *reinterpret_cast<const T_idx*>(a)),
+                *reinterpret_cast<const T_el*>(data + *reinterpret_cast<const T_idx*>(b)));
        }
    };

@ -556,10 +563,10 @@ namespace ZRCola {
    ///
    /// Binary compares two strings
    ///
-    /// \param[in] str_a      First string
-    /// \param[in] str_a_end  First string end
-    /// \param[in] str_b      Second string
-    /// \param[in] str_b_end  Second string end
+    /// \param[in] str_a    First string
+    /// \param[in] count_a  Number of characters in string \p str_a
+    /// \param[in] str_b    Second string
+    /// \param[in] count_b  Number of characters in string \p str_b
    ///
    /// \returns
    /// - <0 when str_a <  str_b
@ -568,18 +575,70 @@ namespace ZRCola {
    ///
    /// \note
    /// The function does not treat \\0 characters as terminators for performance reasons.
-    /// Therefore \p str_a_end and \p str_b_end must represent exact string ends.
+    /// Therefore \p count_a and \p count_b must represent exact string lengths.
    ///
-    inline int CompareString(const wchar_t *str_a, const wchar_t *str_a_end, const wchar_t *str_b, const wchar_t *str_b_end)
+    inline int CompareString(_In_ const wchar_t *str_a, _In_ size_t count_a, _In_ const wchar_t *str_b, _In_ size_t count_b)
    {
-        for (; ; str_a++, str_b++) {
-                    if (str_a >= str_a_end && str_b >= str_b_end) return  0;
-            else if (str_a >= str_a_end && str_b <  str_b_end) return -1;
-            else if (str_a <  str_a_end && str_b >= str_b_end) return +1;
-            else if (*str_a < *str_b) return -1;
-            else if (*str_a > *str_b) return +1;
+        for (size_t i = 0; ; i++) {
+                 if (i >= count_a && i >= count_b) return  0;
+            else if (i >= count_a && i <  count_b) return -1;
+            else if (i <  count_a && i >= count_b) return +1;
+            else if (str_a[i] < str_b[i]) return -1;
+            else if (str_a[i] > str_b[i]) return +1;
        }
    }
+
+    ///
+    /// Generates and returns Unicode representation of the string using hexadecimal codes.
+    ///
+    /// \param[in] str    Unicode string
+    /// \param[in] count  Number of characters in string \p str
+    /// \param[in] sep    Separator
+    ///
+    inline std::string GetUnicodeDumpA(_In_ const wchar_t *str, _In_ size_t count, _In_opt_z_ const char *sep = "+")
+    {
+        std::string out;
+        size_t dump_len_max = strlen(sep) + 4 + 1;
+        char *dump;
+        std::unique_ptr<char> dump_obj(dump = new char[dump_len_max]);
+        if (count && str[0]) {
+            size_t i = 0;
+            out.insert(out.end(), dump, dump + _snprintf(dump, dump_len_max, "%04X", str[i++]));
+            while (i < count && str[i])
+                out.insert(out.end(), dump, dump + _snprintf(dump, dump_len_max, "%s%04X", sep, str[i++]));
+        }
+
+        return out;
+    }
+
+    ///
+    /// Generates and returns Unicode representation of the string using hexadecimal codes.
+    ///
+    /// \param[in] str    Unicode string
+    /// \param[in] count  Number of characters in string \p str
+    /// \param[in] sep    Separator
+    ///
+    inline std::wstring GetUnicodeDumpW(_In_ const wchar_t *str, _In_ size_t count, _In_opt_z_ const wchar_t *sep = L"+")
+    {
+        std::wstring out;
+        size_t dump_len_max = wcslen(sep) + 4 + 1;
+        wchar_t *dump;
+        std::unique_ptr<wchar_t> dump_obj(dump = new wchar_t[dump_len_max]);
+        if (count && str[0]) {
+            size_t i = 0;
+            out.insert(out.end(), dump, dump + _snwprintf(dump, dump_len_max, L"%04X", str[i++]));
+            while (i < count && str[i])
+                out.insert(out.end(), dump, dump + _snwprintf(dump, dump_len_max, L"%s%04X", sep, str[i++]));
+        }
+
+        return out;
+    }
+
+#ifdef _UNICODE
+#define GetUnicodeDump GetUnicodeDumpW
+#else
+#define GetUnicodeDump GetUnicodeDumpA
+#endif
 };


@ -591,8 +650,8 @@ namespace ZRCola {
 ///
 /// \returns The stream \p stream
 ///
-template <class T, class T_idx, class T_data>
-inline std::ostream& operator <<(_In_ std::ostream& stream, _In_ const ZRCola::index<T, T_idx, T_data> &idx)
+template <class T_data, class T_idx, class T_el>
+inline std::ostream& operator <<(_In_ std::ostream& stream, _In_ const ZRCola::index<T_data, T_idx, T_el> &idx)
 {
    // Write index count.
    auto idx_count = idx.size();
@ -623,8 +682,8 @@ inline std::ostream& operator <<(_In_ std::ostream& stream, _In_ const ZRCola::i
 ///
 /// \returns The stream \p stream
 ///
-template <class T, class T_idx, class T_data>
-inline std::istream& operator >>(_In_ std::istream& stream, _Out_ ZRCola::index<T, T_idx, T_data> &idx)
+template <class T_data, class T_idx, class T_el>
+inline std::istream& operator >>(_In_ std::istream& stream, _Out_ ZRCola::index<T_data, T_idx, T_el> &idx)
 {
    unsigned __int32 count;

--- a/lib/libZRCola/include/zrcola/language.h
+++ b/lib/libZRCola/include/zrcola/language.h
@ -45,9 +45,40 @@ namespace ZRCola {
        /// Character data
        ///
        struct langchar {
+        public:
            langid_t lang;              ///< Language ID
-            unsigned __int16 chr_len;   ///< \c chr length (in UTF-16 characters)
-            wchar_t chr[];              ///< Character
+
+        protected:
+            unsigned __int16 chr_to;    ///< Character end in \c data
+            wchar_t data[];             ///< Character
+
+        private:
+            inline langchar(_In_ const langchar &other);
+            inline langchar& operator=(_In_ const langchar &other);
+
+        public:
+            ///
+            /// Constructs the language character
+            ///
+            /// \param[in] lang     Character language
+            /// \param[in] chr      Character
+            /// \param[in] chr_len  Number of UTF-16 characters in \p chr
+            ///
+            inline langchar(
+                _In_opt_                        langid_t  lang    = langid_t::blank,
+                _In_opt_z_count_(chr_len) const wchar_t  *chr     = NULL,
+                _In_opt_                        size_t    chr_len = 0)
+            {
+                this->lang = lang;
+                this->chr_to = static_cast<unsigned __int16>(chr_len);
+                if (chr_len) memcpy(this->data, chr, sizeof(wchar_t)*chr_len);
+            }
+
+            inline const wchar_t*         chr    () const { return data;          };
+            inline       wchar_t*         chr    ()       { return data;          };
+            inline const wchar_t*         chr_end() const { return data + chr_to; };
+            inline       wchar_t*         chr_end()       { return data + chr_to; };
+            inline       unsigned __int16 chr_len() const { return chr_to;        };
        };
 #pragma pack(pop)

@ -77,7 +108,7 @@ namespace ZRCola {
            ///
            virtual int compare(_In_ const langchar &a, _In_ const langchar &b) const
            {
-                int r = ZRCola::CompareString(a.chr, a.chr + a.chr_len, b.chr, b.chr + b.chr_len);
+                int r = ZRCola::CompareString(a.chr(), a.chr_len(), b.chr(), b.chr_len());
                if (r != 0) return r;

                     if (a.lang < b.lang) return -1;
@ -118,7 +149,7 @@ namespace ZRCola {
                     if (a.lang < b.lang) return -1;
                else if (a.lang > b.lang) return  1;

-                int r = ZRCola::CompareString(a.chr, a.chr + a.chr_len, b.chr, b.chr + b.chr_len);
+                int r = ZRCola::CompareString(a.chr, a.chr_len(), b.chr(), b.chr_len());
                if (r != 0) return r;

                return 0;
@ -178,9 +209,40 @@ namespace ZRCola {
        /// Language data
        ///
        struct language {
+        public:
            langid_t id;                ///< Language ID
-            unsigned __int16 name_len;  ///< \c name length (in UTF-16 characters)
-            wchar_t name[];             ///< Language name
+
+        protected:
+            unsigned __int16 name_to;   ///< Language name end in \c data
+            wchar_t data[];             ///< Language name
+
+        private:
+            inline language(_In_ const language &other);
+            inline language& operator=(_In_ const language &other);
+
+        public:
+            ///
+            /// Constructs the language
+            ///
+            /// \param[in] id        Language ID
+            /// \param[in] name      Language name
+            /// \param[in] name_len  Number of UTF-16 characters in \p name
+            ///
+            inline language(
+                _In_opt_                         langid_t  id       = langid_t::blank,
+                _In_opt_z_count_(name_len) const wchar_t  *name     = NULL,
+                _In_opt_                         size_t    name_len = 0)
+            {
+                this->id = id;
+                this->name_to = static_cast<unsigned __int16>(name_len);
+                if (name_len) memcpy(this->data, name, sizeof(wchar_t)*name_len);
+            }
+
+            inline const wchar_t*         name    () const { return data;           };
+            inline       wchar_t*         name    ()       { return data;           };
+            inline const wchar_t*         name_end() const { return data + name_to; };
+            inline       wchar_t*         name_end()       { return data + name_to; };
+            inline       unsigned __int16 name_len() const { return name_to;        };
        };
 #pragma pack(pop)

--- a/lib/libZRCola/include/zrcola/tag.h
+++ b/lib/libZRCola/include/zrcola/tag.h
@ -47,8 +47,40 @@ namespace ZRCola {
        /// Character tag data
        ///
        struct chrtag {
-            wchar_t chr;    ///> Character
+        public:
            tagid_t tag;    ///< Tag ID
+
+        protected:
+            unsigned __int16 chr_to;    ///< Character end in \c data
+            wchar_t data[];             ///< Character
+
+        private:
+            inline chrtag(_In_ const chrtag &other);
+            inline chrtag& operator=(_In_ const chrtag &other);
+
+        public:
+            ///
+            /// Constructs the character tag
+            ///
+            /// \param[in] chr      Character
+            /// \param[in] chr_len  Number of UTF-16 characters in \p chr
+            /// \param[in] tag      Tag
+            ///
+            inline chrtag(
+                _In_opt_z_count_(chr_len) const wchar_t *chr      = NULL,
+                _In_opt_                        size_t   chr_len  = 0,
+                _In_opt_                        tagid_t  tag      = 0)
+            {
+                this->tag    = tag;
+                this->chr_to = static_cast<unsigned __int16>(chr_len);
+                if (chr_len) memcpy(this->data, chr, sizeof(wchar_t)*chr_len);
+            }
+
+            inline const wchar_t*         chr    () const { return data;          };
+            inline       wchar_t*         chr    ()       { return data;          };
+            inline const wchar_t*         chr_end() const { return data + chr_to; };
+            inline       wchar_t*         chr_end()       { return data + chr_to; };
+            inline       unsigned __int16 chr_len() const { return chr_to;        };
        };
 #pragma pack(pop)

@ -78,8 +110,8 @@ namespace ZRCola {
            ///
            virtual int compare(_In_ const chrtag &a, _In_ const chrtag &b) const
            {
-                     if (a.chr < b.chr) return -1;
-                else if (a.chr > b.chr) return  1;
+                int r = ZRCola::CompareString(a.chr(), a.chr_len(), b.chr(), b.chr_len());
+                if (r != 0) return r;

                return 0;
            }
@ -97,8 +129,8 @@ namespace ZRCola {
            ///
            virtual int compare_sort(_In_ const chrtag &a, _In_ const chrtag &b) const
            {
-                     if (a.chr < b.chr) return -1;
-                else if (a.chr > b.chr) return  1;
+                int r = ZRCola::CompareString(a.chr(), a.chr_len(), b.chr(), b.chr_len());
+                if (r != 0) return r;

                     if (a.tag < b.tag) return -1;
                else if (a.tag > b.tag) return  1;
@ -156,8 +188,8 @@ namespace ZRCola {
                     if (a.tag < b.tag) return -1;
                else if (a.tag > b.tag) return  1;

-                     if (a.chr < b.chr) return -1;
-                else if (a.chr > b.chr) return  1;
+                int r = ZRCola::CompareString(a.chr(), a.chr_len(), b.chr(), b.chr_len());
+                if (r != 0) return r;

                return 0;
            }
@ -191,7 +223,7 @@ namespace ZRCola {
        /// \param[in   ] fn_abort  Pointer to function to periodically test for search cancellation
        /// \param[in   ] cookie    Cookie for \p fn_abort call
        ///
-        bool Search(_In_ const std::map<tagid_t, unsigned __int16> &tags, _In_ const character_db &ch_db, _In_ const std::set<chrcatid_t> &cats, _Inout_ std::map<wchar_t, charrank_t> &hits, _In_opt_ bool (__cdecl *fn_abort)(void *cookie) = NULL, _In_opt_ void *cookie = NULL) const;
+        bool Search(_In_ const std::map<tagid_t, unsigned __int16> &tags, _In_ const character_db &ch_db, _In_ const std::set<chrcatid_t> &cats, _Inout_ std::map<std::wstring, charrank_t> &hits, _In_opt_ bool (__cdecl *fn_abort)(void *cookie) = NULL, _In_opt_ void *cookie = NULL) const;
    };


@ -209,19 +241,53 @@ namespace ZRCola {
        /// Tag name data
        ///
        struct tagname {
+        public:
            tagid_t tag;                ///< Tag ID
            LCID locale;                ///< Locale ID
-            unsigned __int16 name_len;  ///< \c name length (in characters)
-            wchar_t name[];             ///< Tag localized name
+
+        protected:
+            unsigned __int16 name_to;   ///< Tag name end in \c data
+            wchar_t data[];             ///< Tag name
+
+        private:
+            inline tagname(_In_ const tagname &other);
+            inline tagname& operator=(_In_ const tagname &other);
+
+        public:
+            ///
+            /// Constructs the localized tag name
+            ///
+            /// \param[in] tag       Tag
+            /// \param[in] locale    Locale
+            /// \param[in] name      Tag name
+            /// \param[in] name_len  Number of UTF-16 characters in \p name
+            ///
+            inline tagname(
+                _In_opt_                         tagid_t  tag      = 0,
+                _In_opt_                         LCID     locale   = MAKELCID(MAKELANGID(LANG_NEUTRAL, SUBLANG_NEUTRAL), SORT_DEFAULT),
+                _In_opt_z_count_(name_len) const wchar_t *name     = NULL,
+                _In_opt_                         size_t   name_len = 0)
+            {
+                this->tag    = tag;
+                this->locale = locale;
+                this->name_to = static_cast<unsigned __int16>(name_len);
+                if (name_len) memcpy(this->data, name, sizeof(wchar_t)*name_len);
+            }
+
+            inline const wchar_t*         name    () const { return data; };
+            inline       wchar_t*         name    ()       { return data; };
+            inline const wchar_t*         name_end() const { return data + name_to; };
+            inline       wchar_t*         name_end()       { return data + name_to; };
+            inline       unsigned __int16 name_len() const { return name_to; };

            ///
            /// Compares two names
            ///
            /// \param[in] locale   Locale ID to use for compare
            /// \param[in] str_a    First name
-            /// \param[in] count_a  Number of characters in string \p str_a
+            /// \param[in] count_a  Number of UTF-16 characters in \p str_a
            /// \param[in] str_b    Second name
-            /// \param[in] count_b  Number of characters in string \p str_b
+            /// \param[in] count_b  Number of UTF-16 characters in \p str_b
            ///
            /// \returns
            /// - <0 when str_a <  str_b
@ -259,7 +325,7 @@ namespace ZRCola {
            indexName(_In_ std::vector<unsigned __int16> &h) : index<unsigned __int16, unsigned __int32, tagname>(h) {}

            ///
-            /// Compares two tag names by name (for searching)
+            /// Compares two tag names by locale and name (for searching)
            ///
            /// \param[in] a  Pointer to first element
            /// \param[in] b  Pointer to second element
@ -274,14 +340,14 @@ namespace ZRCola {
                     if (a.locale < b.locale) return -1;
                else if (a.locale > b.locale) return  1;

-                int r = tagname::CompareName(a.locale, a.name, a.name_len, b.name, b.name_len);
+                int r = tagname::CompareName(a.locale, a.name(), a.name_len(), b.name(), b.name_len());
                if (r != 0) return r;

                return 0;
            }

            ///
-            /// Compares two tag names by name (for sorting)
+            /// Compares two tag names by locale and name (for sorting)
            ///
            /// \param[in] a  Pointer to first element
            /// \param[in] b  Pointer to second element
@ -296,7 +362,7 @@ namespace ZRCola {
                     if (a.locale < b.locale) return -1;
                else if (a.locale > b.locale) return  1;

-                int r = tagname::CompareName(a.locale, a.name, a.name_len, b.name, b.name_len);
+                int r = tagname::CompareName(a.locale, a.name(), a.name_len(), b.name(), b.name_len());
                if (r != 0) return r;

                     if (a.tag < b.tag) return -1;
--- a/lib/libZRCola/include/zrcola/translate.h
+++ b/lib/libZRCola/include/zrcola/translate.h
@ -54,22 +54,49 @@ namespace ZRCola {
            unsigned __int16 dec_to;   ///< Decomposed string end in \c data
            wchar_t data[];            ///< Decomposed string and composed character

+        private:
+            inline translation(_In_ const translation &other);
+            inline translation& operator=(_In_ const translation &other);
+
        public:
-            inline const wchar_t*         com    () const { return data; };
-            inline       wchar_t*         com    ()       { return data; };
+            ///
+            /// Constructs the translation
+            ///
+            /// \param[in] rank     Translation rank
+            /// \param[in] com      Composed character
+            /// \param[in] com_len  Number of UTF-16 characters in \p com
+            /// \param[in] dec      Decomposed character
+            /// \param[in] dec_len  Number of UTF-16 characters in \p dec
+            ///
+            inline translation(
+                _In_opt_                        unsigned __int16  rank    = 0,
+                _In_opt_z_count_(com_len) const wchar_t          *com     = NULL,
+                _In_opt_                        size_t            com_len = 0,
+                _In_opt_z_count_(dec_len) const wchar_t          *dec     = NULL,
+                _In_opt_                        size_t            dec_len = 0)
+            {
+                this->rank = rank;
+                this->com_to = static_cast<unsigned __int16>(com_len);
+                if (com_len) memcpy(this->data, com, sizeof(wchar_t)*com_len);
+                this->dec_to = static_cast<unsigned __int16>(this->com_to + dec_len);
+                if (dec_len) memcpy(this->data + this->com_to, dec, sizeof(wchar_t)*dec_len);
+            }
+
+            inline const wchar_t*         com    () const { return data;          };
+            inline       wchar_t*         com    ()       { return data;          };
            inline const wchar_t*         com_end() const { return data + com_to; };
            inline       wchar_t*         com_end()       { return data + com_to; };
-            inline       unsigned __int16 com_len() const { return com_to; };
+            inline       unsigned __int16 com_len() const { return com_to;        };

            inline wchar_t com_at(_In_ size_t i) const
            {
                return i < com_to ? data[i] : 0;
            }

-            inline const wchar_t*         dec    () const { return data + com_to; };
-            inline       wchar_t*         dec    ()       { return data + com_to; };
-            inline const wchar_t*         dec_end() const { return data + dec_to; };
-            inline       wchar_t*         dec_end()       { return data + dec_to; };
+            inline const wchar_t*         dec    () const { return data + com_to;   };
+            inline       wchar_t*         dec    ()       { return data + com_to;   };
+            inline const wchar_t*         dec_end() const { return data + dec_to;   };
+            inline       wchar_t*         dec_end()       { return data + dec_to;   };
            inline       unsigned __int16 dec_len() const { return dec_to - com_to; };

            inline wchar_t dec_at(_In_ size_t i) const
@ -106,7 +133,7 @@ namespace ZRCola {
            ///
            virtual int compare(_In_ const translation &a, _In_ const translation &b) const
            {
-                int r = ZRCola::CompareString(a.dec(), a.dec_end(), b.dec(), b.dec_end());
+                int r = ZRCola::CompareString(a.dec(), a.dec_len(), b.dec(), b.dec_len());
                if (r != 0) return r;

                return 0;
@ -125,10 +152,10 @@ namespace ZRCola {
            ///
            virtual int compare_sort(_In_ const translation &a, _In_ const translation &b) const
            {
-                int r = ZRCola::CompareString(a.dec(), a.dec_end(), b.dec(), b.dec_end());
+                int r = ZRCola::CompareString(a.dec(), a.dec_len(), b.dec(), b.dec_len());
                if (r != 0) return r;

-                r = ZRCola::CompareString(a.com(), a.com_end(), b.com(), b.com_end());
+                r = ZRCola::CompareString(a.com(), a.com_len(), b.com(), b.com_len());
                if (r != 0) return r;

                return 0;
@ -162,7 +189,7 @@ namespace ZRCola {
            ///
            virtual int compare(_In_ const translation &a, _In_ const translation &b) const
            {
-                int r = ZRCola::CompareString(a.com(), a.com_end(), b.com(), b.com_end());
+                int r = ZRCola::CompareString(a.com(), a.com_len(), b.com(), b.com_len());
                if (r != 0) return r;

                return 0;
@ -181,13 +208,13 @@ namespace ZRCola {
            ///
            virtual int compare_sort(_In_ const translation &a, _In_ const translation &b) const
            {
-                int r = ZRCola::CompareString(a.com(), a.com_end(), b.com(), b.com_end());
+                int r = ZRCola::CompareString(a.com(), a.com_len(), b.com(), b.com_len());
                if (r != 0) return r;

                     if (a.rank < b.rank) return -1;
                else if (a.rank > b.rank) return +1;

-                r = ZRCola::CompareString(a.dec(), a.dec_end(), b.dec(), b.dec_end());
+                r = ZRCola::CompareString(a.dec(), a.dec_len(), b.dec(), b.dec_len());
                if (r != 0) return r;

                return 0;
--- a/lib/libZRCola/src/character.cpp
+++ b/lib/libZRCola/src/character.cpp
@ -23,7 +23,7 @@
 const ZRCola::chrcatid_t ZRCola::chrcatid_t::blank = {};


-bool ZRCola::character_db::Search(_In_z_ const wchar_t *str, _In_ const std::set<chrcatid_t> &cats, _Inout_ std::map<wchar_t, charrank_t> &hits, _Inout_ std::map<wchar_t, charrank_t> &hits_sub, _In_opt_ bool (__cdecl *fn_abort)(void *cookie), _In_opt_ void *cookie) const
+bool ZRCola::character_db::Search(_In_z_ const wchar_t *str, _In_ const std::set<chrcatid_t> &cats, _Inout_ std::map<std::wstring, charrank_t> &hits, _Inout_ std::map<std::wstring, charrank_t> &hits_sub, _In_opt_ bool (__cdecl *fn_abort)(void *cookie), _In_opt_ void *cookie) const
 {
    assert(str);

@ -76,14 +76,15 @@ bool ZRCola::character_db::Search(_In_z_ const wchar_t *str, _In_ const std::set

            if (idxDsc.find(term.c_str(), term.size(), &data, &len)) {
                // The term was found.
-                for (size_t i = 0; i < len; i++) {
+                for (size_t i = 0, j = 0; i < len; i += j + 1) {
                    if (fn_abort && fn_abort(cookie)) return false;
-                    wchar_t c = data[i];
-                    if (cats.find(GetCharCat(c)) != cats.end()) {
+                    j = wcsnlen(data + i, len - i);
+                    if (cats.find(GetCharCat(data + i, j)) != cats.end()) {
+                        std::wstring c(data + i, j);
                        auto idx = hits.find(c);
                        if (idx == hits.end()) {
                            // New character.
-                            hits.insert(std::make_pair(data[i], 1.0/len));
+                            hits.insert(std::make_pair(std::move(c), 1.0/len));
                        } else {
                            // Increase rating of existing character.
                            idx->second += 1.0/len;
@ -94,14 +95,15 @@ bool ZRCola::character_db::Search(_In_z_ const wchar_t *str, _In_ const std::set

            if (idxDscSub.find(term.c_str(), term.size(), &data, &len)) {
                // The term was found in the sub-term index.
-                for (size_t i = 0; i < len; i++) {
+                for (size_t i = 0, j = 0; i < len; i += j + 1) {
                    if (fn_abort && fn_abort(cookie)) return false;
-                    wchar_t c = data[i];
-                    if (cats.find(GetCharCat(c)) != cats.end()) {
+                    j = wcsnlen(data + i, len - i);
+                    if (cats.find(GetCharCat(data + i, j)) != cats.end()) {
+                        std::wstring c(data + i, j);
                        auto idx = hits_sub.find(c);
                        if (idx == hits_sub.end()) {
                            // New character.
-                            hits_sub.insert(std::make_pair(data[i], 1.0/len));
+                            hits_sub.insert(std::make_pair(c, 1.0/len));
                        } else {
                            // Increase rating of existing character.
                            idx->second += 1.0/len;
--- a/lib/libZRCola/src/language.cpp
+++ b/lib/libZRCola/src/language.cpp
@ -75,10 +75,8 @@ bool ZRCola::langchar_db::IsLocalCharacter(_In_ const wchar_t *chr, _In_ const w
 {
    size_t n = chr_end - chr;
    assert(n <= 0xffff);
-    std::unique_ptr<ZRCola::langchar_db::langchar> lc((ZRCola::langchar_db::langchar*)new char[sizeof(ZRCola::langchar_db::langchar) + sizeof(wchar_t)*n]);
-    lc->lang = lang;
-    lc->chr_len = (unsigned __int16)n;
-    memcpy(lc->chr, chr, sizeof(wchar_t)*n);
-    ZRCola::langchar_db::indexChar::size_type start;
+    std::unique_ptr<langchar> lc((langchar*)new char[sizeof(langchar) + sizeof(wchar_t)*n]);
+    lc->langchar::langchar(lang, chr, n);
+    indexChar::size_type start;
    return idxChr.find(*lc, start);
 }
--- a/lib/libZRCola/src/tag.cpp
+++ b/lib/libZRCola/src/tag.cpp
@ -20,23 +20,24 @@
 #include "stdafx.h"


-bool ZRCola::chrtag_db::Search(_In_ const std::map<tagid_t, unsigned __int16> &tags, _In_ const character_db &ch_db, _In_ const std::set<chrcatid_t> &cats, _Inout_ std::map<wchar_t, charrank_t> &hits, _In_opt_ bool (__cdecl *fn_abort)(void *cookie), _In_opt_ void *cookie) const
+bool ZRCola::chrtag_db::Search(_In_ const std::map<tagid_t, unsigned __int16> &tags, _In_ const character_db &ch_db, _In_ const std::set<chrcatid_t> &cats, _Inout_ std::map<std::wstring, charrank_t> &hits, _In_opt_ bool (__cdecl *fn_abort)(void *cookie), _In_opt_ void *cookie) const
 {
    for (auto tag = tags.cbegin(), tag_end = tags.cend(); tag != tag_end; ++tag) {
        if (fn_abort && fn_abort(cookie)) return false;

        // Search for tagged characters.
-        chrtag el = { 0, tag->first };
        size_t start, end;
-        if (idxTag.find(el, start, end)) {
+        if (idxTag.find(chrtag(NULL, 0, tag->first), start, end)) {
            for (size_t i = start; i < end; i++) {
                if (fn_abort && fn_abort(cookie)) return false;
                const chrtag &ct = idxTag[i];
-                if (cats.find(ch_db.GetCharCat(ct.chr)) != cats.end()) {
-                    auto idx = hits.find(ct.chr);
+                unsigned __int16 len = ct.chr_len();
+                if (cats.find(ch_db.GetCharCat(ct.chr(), len)) != cats.end()) {
+                    std::wstring chr(ct.chr(), len);
+                    auto idx = hits.find(chr);
                    if (idx == hits.end()) {
                        // New character.
-                        hits.insert(std::make_pair(ct.chr, tag->second));
+                        hits.insert(std::make_pair(std::move(chr), tag->second));
                    } else {
                        // Increase count for existing character.
                        idx->second += tag->second;
@ -95,8 +96,7 @@ bool ZRCola::tagname_db::Search(_In_z_ const wchar_t *str, _In_ LCID locale, _In

            // Find the name.
            std::unique_ptr<tagname> tn(reinterpret_cast<tagname*>(new char[sizeof(tagname) + sizeof(wchar_t)*name.length()]));
-            tn->locale = locale;
-            memcpy(tn->name, name.data(), sizeof(wchar_t)*(tn->name_len = (unsigned __int16)name.length()));
+            tn->tagname::tagname(0, locale, name.data(), name.length());
            size_t start, end;
            if (idxName.find(*tn, start, end)) {
                // The name was found.
--- a/lib/libZRColaUI/include/zrcolaui/chargroup.h
+++ b/lib/libZRColaUI/include/zrcolaui/chargroup.h
@ -46,33 +46,64 @@ namespace ZRCola {
        /// Character group data
        ///
        struct chrgrp {
-            unsigned __int16 id;                ///< Character group id
+        public:
+            unsigned __int16 id;                ///< Character group ID
            unsigned __int16 rank;              ///< Character group rank
-            unsigned __int16 name_len;          ///< Character group name length in \c data
-            unsigned __int16 char_len;          ///< Character list length in \c data
-            wchar_t data[];                     ///< Character group name and character list

-            inline const wchar_t* get_chars() const
+        protected:
+            unsigned __int16 name_to;           ///< Character group name end in \c data
+            unsigned __int16 chrlst_to;         ///< Character list end in \c data
+            wchar_t data[];                     ///< Character group name, character list, bit vector if particular character is displayed initially
+
+        public:
+            ///
+            /// Constructs the character group
+            ///
+            /// \param[in] id          Character group ID
+            /// \param[in] rank        Character group rank
+            /// \param[in] name        Character group name
+            /// \param[in] name_len    Number of UTF-16 characters in \p name
+            /// \param[in] chrlst      Character list (zero delimited)
+            /// \param[in] chrlst_len  Number of UTF-16 characters in \p chrlst (including zero delimiters)
+            /// \param[in] chrshow     Binary vector which particular character is displayed initially
+            ///
+            inline chrgrp(
+                _In_opt_                                     unsigned __int16  id         = 0,
+                _In_opt_                                     unsigned __int16  rank       = 0,
+                _In_opt_z_count_(name_len)             const wchar_t          *name       = NULL,
+                _In_opt_                                     size_t            name_len   = 0,
+                _In_opt_z_count_(chrlst_len)           const wchar_t          *chrlst     = NULL,
+                _In_opt_                                     size_t            chrlst_len = 0,
+                _In_opt_count_x_((chrlst_len + 15)/16) const unsigned __int16 *chrshow    = NULL)
            {
-                return data + name_len;
+                this->id   = id;
+                this->rank = rank;
+                this->name_to = static_cast<unsigned __int16>(name_len);
+                if (name_len) memcpy(this->data, name, sizeof(wchar_t)*name_len);
+                this->chrlst_to = static_cast<unsigned __int16>(this->name_to + chrlst_len);
+                if (chrlst_len) {
+                    memcpy(this->data + this->name_to, chrlst, sizeof(wchar_t)*chrlst_len);
+                    memcpy(this->data + this->chrlst_to, chrshow, (chrlst_len + sizeof(*data)*8 - 1)/8);
+                }
            }

-            inline wchar_t get_char(size_t index) const
-            {
-                assert(index < char_len);
-                return data[name_len + index];
-            }
+            inline const wchar_t*         name    () const { return data;           };
+            inline       wchar_t*         name    ()       { return data;           };
+            inline const wchar_t*         name_end() const { return data + name_to; };
+            inline       wchar_t*         name_end()       { return data + name_to; };
+            inline       unsigned __int16 name_len() const { return name_to;        };

-            inline const unsigned __int16* get_char_shown() const
-            {
-                return (const unsigned __int16*)(data + name_len + char_len);
-            }
+            inline const wchar_t*         chrlst    () const { return data + name_to;      };
+            inline       wchar_t*         chrlst    ()       { return data + name_to;      };
+            inline const wchar_t*         chrlst_end() const { return data + chrlst_to;    };
+            inline       wchar_t*         chrlst_end()       { return data + chrlst_to;    };
+            inline       unsigned __int16 chrlst_len() const { return chrlst_to - name_to; };

-            inline bool is_char_shown(size_t index) const
-            {
-                assert(index < char_len);
-                return (data[name_len + char_len + index / 16] & (1 << (index % 16))) ? true : false;
-            }
+            inline const unsigned __int16* chrshow    () const { return reinterpret_cast<const unsigned __int16*>(data + chrlst_to                ); };
+            inline       unsigned __int16* chrshow    ()       { return reinterpret_cast<      unsigned __int16*>(data + chrlst_to                ); };
+            inline const unsigned __int16* chrshow_end() const { return reinterpret_cast<const unsigned __int16*>(data + chrlst_to + chrshow_len()); };
+            inline       unsigned __int16* chrshow_end()       { return reinterpret_cast<      unsigned __int16*>(data + chrlst_to + chrshow_len()); };
+            inline       unsigned __int16  chrshow_len() const { return (chrlst_len() + sizeof(*data)*8 - 1)/(sizeof(*data)*8);                      };
        };
 #pragma pack(pop)

@ -124,10 +155,13 @@ namespace ZRCola {
                     if (a.rank < b.rank) return -1;
                else if (a.rank > b.rank) return +1;

-                int r = _wcsncoll(a.data, b.data, std::min<unsigned __int16>(a.name_len, b.name_len));
+                unsigned __int16
+                    a_name_len = a.name_len(),
+                    b_name_len = b.name_len();
+                int r = _wcsncoll(a.name(), b.name(), std::min<unsigned __int16>(a_name_len, b_name_len));
                if (r != 0) return r;
-                     if (a.name_len < b.name_len) return -1;
-                else if (a.name_len > b.name_len) return +1;
+                     if (a_name_len < b_name_len) return -1;
+                else if (a_name_len > b_name_len) return +1;

                return 0;
            }
--- a/lib/libZRColaUI/include/zrcolaui/keyboard.h
+++ b/lib/libZRColaUI/include/zrcolaui/keyboard.h
@ -48,39 +48,75 @@ namespace ZRCola {
        /// Key sequence data
        ///
        struct keyseq {
+        public:
            enum modifiers_t {
                SHIFT   = 1<<0,                 ///< SHIFT key was pressed
                CTRL    = 1<<1,                 ///< CTRL key was pressed
                ALT     = 1<<2,                 ///< ALT key was pressed
            };

-            wchar_t chr;                        ///< Character
-            unsigned __int16 seq_len;           ///< \c seq length
            struct key_t {
                wchar_t key;                    ///< Key
                unsigned __int16 modifiers;     ///< Modifiers (bitwise combination of SHIFT, CTRL and ALT)
-            } seq[];                            ///< Key sequence
+            };

+        protected:
+            unsigned __int16 chr_to;            ///< Character end in \c data
+            unsigned __int16 seq_to;            ///< Key sequence end in \c data
+            wchar_t data[];                     ///< Character and key sequence
+
+        public:
+            ///
+            /// Constructs the key sequence
+            ///
+            /// \param[in] seq        Key sequence
+            /// \param[in] seq_count  Number of UTF-16 characters in \p seq
+            /// \param[in] chr        Character
+            /// \param[in] chr_len    Number of UTF-16 characters in \p chr
+            ///
+            inline keyseq(
+                _In_opt_count_(seq_count) const key_t   *seq       = NULL,
+                _In_opt_                        size_t   seq_count = 0,
+                _In_opt_z_count_(chr_len) const wchar_t *chr       = NULL,
+                _In_opt_                        size_t   chr_len   = 0)
+            {
+                this->chr_to = static_cast<unsigned __int16>(chr_len);
+                if (chr_len) memcpy(this->data, chr, sizeof(wchar_t)*chr_len);
+                this->seq_to = static_cast<unsigned __int16>(this->chr_to + seq_count * sizeof(key_t) / sizeof(*data));
+                if (seq_count) memcpy(this->data + this->chr_to, seq, sizeof(key_t)*seq_count);
+            }
+
+            inline const wchar_t*         chr    () const { return data;          };
+            inline       wchar_t*         chr    ()       { return data;          };
+            inline const wchar_t*         chr_end() const { return data + chr_to; };
+            inline       wchar_t*         chr_end()       { return data + chr_to; };
+            inline       unsigned __int16 chr_len() const { return chr_to;        };
+
+            inline const key_t*           seq    () const { return reinterpret_cast<const key_t*>(data + chr_to);     };
+            inline       key_t*           seq    ()       { return reinterpret_cast<      key_t*>(data + chr_to);     };
+            inline const key_t*           seq_end() const { return reinterpret_cast<const key_t*>(data + seq_to);     };
+            inline       key_t*           seq_end()       { return reinterpret_cast<      key_t*>(data + seq_to);     };
+            inline       unsigned __int16 seq_len() const { return (seq_to - chr_to) * sizeof(*data) / sizeof(key_t); };

            ///
            /// Compares two key sequences
            ///
-            /// \param[in] seq_a    First key sequence
-            /// \param[in] count_a  Number of keys in sequence \p seq_a
-            /// \param[in] seq_b    Second key sequence
-            /// \param[in] count_b  Number of keys in sequence \p seq_b
+            /// \param[in] seq_a  First key sequence
+            /// \param[in] len_a  Number of keys in sequence \p seq_a
+            /// \param[in] seq_b  Second key sequence
+            /// \param[in] len_b  Number of keys in sequence \p seq_b
            ///
            /// \returns
            /// - <0 when seq_a <  seq_b
            /// - =0 when seq_a == seq_b
            /// - >0 when seq_a >  seq_b
            ///
-            static inline int CompareSequence(const key_t *seq_a, unsigned __int16 count_a, const key_t *seq_b, unsigned __int16 count_b)
+            static inline int CompareSequence(_In_ const key_t *seq_a, _In_ size_t len_a, _In_ const key_t *seq_b, _In_ size_t len_b)
            {
-                for (unsigned __int16 i = 0; ; i++) {
-                         if (i >= count_a && i >= count_b) return  0;
-                    else if (i >= count_a && i <  count_b) return -1;
-                    else if (i <  count_a && i >= count_b) return +1;
+                for (size_t i = 0; ; i++) {
+                         if (i >= len_a && i >= len_b) return  0;
+                    else if (i >= len_a && i <  len_b) return -1;
+                    else if (i <  len_a && i >= len_b) return +1;
                    else if (seq_a[i].key       < seq_b[i].key      ) return -1;
                    else if (seq_a[i].key       > seq_b[i].key      ) return +1;
                    else if (seq_a[i].modifiers < seq_b[i].modifiers) return -1;
@ -116,8 +152,8 @@ namespace ZRCola {
            ///
            virtual int compare(_In_ const keyseq &a, _In_ const keyseq &b) const
            {
-                     if (a.chr < b.chr) return -1;
-                else if (a.chr > b.chr) return +1;
+                int r = ZRCola::CompareString(a.chr(), a.chr_len(), b.chr(), b.chr_len());
+                if (r != 0) return r;

                return 0;
            }
@ -135,10 +171,10 @@ namespace ZRCola {
            ///
            virtual int compare_sort(_In_ const keyseq &a, _In_ const keyseq &b) const
            {
-                     if (a.chr < b.chr) return -1;
-                else if (a.chr > b.chr) return +1;
+                int r = ZRCola::CompareString(a.chr(), a.chr_len(), b.chr(), b.chr_len());
+                if (r != 0) return r;

-                int r = keyseq::CompareSequence(a.seq, a.seq_len, b.seq, b.seq_len);
+                r = keyseq::CompareSequence(a.seq(), a.seq_len(), b.seq(), b.seq_len());
                if (r != 0) return r;

                return 0;
@ -172,7 +208,7 @@ namespace ZRCola {
            ///
            virtual int compare(_In_ const keyseq &a, _In_ const keyseq &b) const
            {
-                int r = keyseq::CompareSequence(a.seq, a.seq_len, b.seq, b.seq_len);
+                int r = keyseq::CompareSequence(a.seq(), a.seq_len(), b.seq(), b.seq_len());
                if (r != 0) return r;

                return 0;
@ -191,11 +227,11 @@ namespace ZRCola {
            ///
            virtual int compare_sort(_In_ const keyseq &a, _In_ const keyseq &b) const
            {
-                int r = keyseq::CompareSequence(a.seq, a.seq_len, b.seq, b.seq_len);
+                int r = keyseq::CompareSequence(a.seq(), a.seq_len(), b.seq(), b.seq_len());
                if (r != 0) return r;

-                     if (a.chr < b.chr) return -1;
-                else if (a.chr > b.chr) return +1;
+                r = ZRCola::CompareString(a.chr(), a.chr_len(), b.chr(), b.chr_len());
+                if (r != 0) return r;

                return 0;
            }
--- a/output/data/ZRCola.zrcdb
+++ b/output/data/ZRCola.zrcdb