diff --git a/ZRCola/zrcolachrcatpnl.cpp b/ZRCola/zrcolachrcatpnl.cpp index f67f4c4..99ae94f 100644 --- a/ZRCola/zrcolachrcatpnl.cpp +++ b/ZRCola/zrcolachrcatpnl.cpp @@ -57,7 +57,7 @@ wxZRColaCharacterCatalogPanel::wxZRColaCharacterCatalogPanel(wxWindow* parent) : for (size_t i = 0, n = m_cg_db.idxRnk.size(); i < n; i++) { const ZRCola::chrgrp_db::chrgrp &cg = m_cg_db.idxRnk[i]; wxString - label(cg.data, cg.name_len), + label(cg.name(), cg.name_len()), label_tran2(wxGetTranslation(label, wxT("ZRCola-zrcdb"))); m_choice->Insert(label_tran2, i); } @@ -155,17 +155,19 @@ void wxZRColaCharacterCatalogPanel::Update() if (m_show_all->GetValue()) { m_grid->SetCharacters( - wxString(cg.get_chars(), cg.char_len), - wxArrayShort((const short*)cg.get_char_shown(), (const short*)cg.get_char_shown() + (cg.char_len + 15)/16)); + wxString(cg.chrlst(), cg.chrlst_end()), + wxArrayShort(reinterpret_cast(cg.chrshow()), reinterpret_cast(cg.chrshow_end()))); } else { // Select frequently used characters only. - const wchar_t *src = cg.get_chars(); - const unsigned __int16 *shown = cg.get_char_shown(); - wxString chars; - for (unsigned __int16 i = 0, j = 0; i < cg.char_len; j++) { - for (unsigned __int16 k = 0, mask = shown[j]; k < 16 && i < cg.char_len; k++, mask >>= 1, i++) { + const wchar_t *src = cg.chrlst(); + const unsigned __int16 *shown = cg.chrshow(); + wxArrayString chars; + for (size_t i = 0, i_end = cg.chrlst_len(), j = 0; i < i_end; j++) { + for (unsigned __int16 k = 0, mask = shown[j]; k < 16 && i < i_end; k++, mask >>= 1) { + size_t len = wcsnlen(src + i, i_end - i); if (mask & 1) - chars += src[i]; + chars.Add(wxString(src + i, len)); + i += len + 1; } } m_grid->SetCharacters(chars); diff --git a/ZRCola/zrcolachrgrid.cpp b/ZRCola/zrcolachrgrid.cpp index 2f6f33d..77f07c9 100644 --- a/ZRCola/zrcolachrgrid.cpp +++ b/ZRCola/zrcolachrgrid.cpp @@ -68,6 +68,24 @@ void wxZRColaCharGrid::Init() void wxZRColaCharGrid::SetCharacters(const wxString &chars) +{ + m_chars.Clear(); + const wxCStrData chr = chars.GetData(); + for (size_t i = 0, i_end = chars.Length(), i_next; i < i_end; i = i_next + 1) { + i_next = i + _tcsnlen(chr + i, i_end - i); + m_chars.Add(wxString(chr + i, chr + i_next)); + }; + m_relevance.Clear(); + m_regenerate = true; + + // Invoke OnSize(), which will populate the grid. + wxSizeEvent e(GetSize(), m_windowId); + e.SetEventObject(this); + HandleWindowEvent(e); +} + + +void wxZRColaCharGrid::SetCharacters(const wxArrayString &chars) { m_chars = chars; m_relevance.Clear(); @@ -82,7 +100,12 @@ void wxZRColaCharGrid::SetCharacters(const wxString &chars) void wxZRColaCharGrid::SetCharacters(const wxString &chars, const wxArrayShort &relevance) { - m_chars = chars; + m_chars.Clear(); + const wxCStrData chr = chars.GetData(); + for (size_t i = 0, i_end = chars.Length(), i_next; i < i_end; i = i_next + 1) { + i_next = i + _tcsnlen(chr + i, i_end - i); + m_chars.Add(wxString(chr + i, chr + i_next)); + }; m_relevance = relevance; m_regenerate = true; @@ -95,22 +118,23 @@ void wxZRColaCharGrid::SetCharacters(const wxString &chars, const wxArrayShort & wxString wxZRColaCharGrid::GetToolTipText(int idx) { - wxASSERT_MSG(idx < (int)m_chars.Length(), wxT("index out of bounds")); + wxASSERT_MSG(idx < (int)m_chars.GetCount(), wxT("index out of bounds")); auto app = dynamic_cast(wxTheApp); + const auto &chr = m_chars[idx]; // See if this character has a key sequence registered. - char ks[sizeof(ZRCola::keyseq_db::keyseq)] = {}; - ((ZRCola::keyseq_db::keyseq*)ks)->chr = m_chars[idx]; + std::unique_ptr ks((ZRCola::keyseq_db::keyseq*)new char[sizeof(ZRCola::keyseq_db::keyseq) + sizeof(wchar_t)*chr.length()]); + ks->ZRCola::keyseq_db::keyseq::keyseq(NULL, 0, chr.data(), chr.length()); ZRCola::keyseq_db::indexKey::size_type start; - if (app->m_ks_db.idxChr.find(*(ZRCola::keyseq_db::keyseq*)ks, start)) { + if (app->m_ks_db.idxChr.find(*ks, start)) { ZRCola::keyseq_db::keyseq &seq = app->m_ks_db.idxChr[start]; wxString ks_str; - if (ZRCola::keyseq_db::GetSequenceAsText(seq.seq, seq.seq_len, ks_str)) - return wxString::Format(wxT("U+%04X (%s)"), (int)m_chars[idx], ks_str.c_str()); + if (ZRCola::keyseq_db::GetSequenceAsText(seq.seq(), seq.seq_len(), ks_str)) + return wxString::Format(wxT("U+%s (%s)"), ZRCola::GetUnicodeDump(chr.data(), chr.length(), _T("+")).c_str(), ks_str.c_str()); } - return wxString::Format(wxT("U+%04X"), (int)m_chars[idx]); + return wxString::Format(wxT("U+%s"), ZRCola::GetUnicodeDump(chr.data(), chr.length(), _T("+")).c_str()); } @@ -126,17 +150,17 @@ void wxZRColaCharGrid::OnSize(wxSizeEvent& event) // Calculate initial estimate of columns and rows. wxSize size(event.GetSize()); size_t - char_len = m_chars.Length(); + char_count = m_chars.GetCount(); int - width = size.GetWidth() - m_rowLabelWidth - m_extraWidth, - cols = std::max(width / wxZRColaCharacterGridColumnWidth, 1), - rows = std::max((char_len + cols - 1) / cols, 1); + width = size.GetWidth() - m_rowLabelWidth - m_extraWidth, + cols = std::max(width / wxZRColaCharacterGridColumnWidth, 1), + rows = std::max((char_count + cols - 1) / cols, 1); if (m_colLabelHeight + rows*wxZRColaCharacterGridRowHeight + m_extraHeight > size.GetHeight()) { // Vertical scrollbar will be shown. Adjust the width and recalculate layout to avoid horizontal scrollbar. width = std::max(width - wxSystemSettings::GetMetric(wxSYS_VSCROLL_X, this), 0); cols = std::max(width / wxZRColaCharacterGridColumnWidth, 1); - rows = std::max((char_len + cols - 1) / cols, 1); + rows = std::max((char_count + cols - 1) / cols, 1); } BeginBatch(); @@ -146,14 +170,14 @@ void wxZRColaCharGrid::OnSize(wxSizeEvent& event) wxGridStringTable *table = new wxGridStringTable(rows, cols); for (int r = 0, i = 0; r < rows; r++) for (int c = 0; c < cols; c++, i++) - table->SetValue(r, c, i < (int)char_len ? wxString(1, m_chars[i]) : wxEmptyString); + table->SetValue(r, c, i < (int)char_count ? m_chars[i] : wxEmptyString); SetTable(table, true); if (!m_relevance.IsEmpty()) { const wxColour colour_def; const wxColour colour_irr = wxSystemSettings::GetColour(wxSYS_COLOUR_BTNHIGHLIGHT); for (int r = 0, i = 0; r < rows; r++) for (int c = 0; c < cols; c++, i++) - SetCellBackgroundColour(r, c, i >= (int)char_len || ((unsigned short)(m_relevance[i/16]) & (1<<(i%16))) ? colour_def : colour_irr); + SetCellBackgroundColour(r, c, i >= (int)char_count || ((unsigned short)(m_relevance[i/16]) & (1<<(i%16))) ? colour_def : colour_irr); } else { for (int r = 0, i = 0; r < rows; r++) for (int c = 0; c < cols; c++, i++) @@ -217,7 +241,7 @@ void wxZRColaCharGrid::OnMotion(wxMouseEvent& event) return; size_t toolTipIdx = row*m_numCols + col; - if (toolTipIdx >= m_chars.Length()) { + if (toolTipIdx >= m_chars.GetCount()) { // Index out of range. m_toolTipIdx = (size_t)-1; m_timerToolTip.Stop(); @@ -241,7 +265,7 @@ void wxZRColaCharGrid::OnTooltipTimer(wxTimerEvent& event) { event.Skip(); - if (m_toolTipIdx >= m_chars.Length()) + if (m_toolTipIdx >= m_chars.GetCount()) return; GetGridWindow()->SetToolTip(GetToolTipText(m_toolTipIdx)); diff --git a/ZRCola/zrcolachrgrid.h b/ZRCola/zrcolachrgrid.h index 533542c..14ebbd4 100644 --- a/ZRCola/zrcolachrgrid.h +++ b/ZRCola/zrcolachrgrid.h @@ -54,14 +54,21 @@ public: /// /// Sets new array of characters to display /// - /// \param[in] chars The string containing characters to display + /// \param[in] chars The string containing characters to display (zero delimited) /// void SetCharacters(const wxString &chars); /// /// Sets new array of characters to display /// - /// \param[in] chars The string containing characters to display + /// \param[in] chars The array of characters to display + /// + void SetCharacters(const wxArrayString &chars); + + /// + /// Sets new array of characters to display + /// + /// \param[in] chars The string containing characters to display (zero delimited) /// \param[in] relevance Bit-array of \p chars relevance (1=more relevant, 0=less relevant) /// void SetCharacters(const wxString &chars, const wxArrayShort &relevance); @@ -71,7 +78,7 @@ public: /// /// \returns The string containing displayed characters /// - inline wxString GetCharacters() const + inline const wxArrayString& GetCharacters() const { return m_chars; } @@ -83,10 +90,14 @@ public: /// /// \returns Grid coordinates of selected character or (-1, -1) if character not found. /// - inline wxGridCellCoords GetCharacterCoords(wchar_t c) const + inline wxGridCellCoords GetCharacterCoords(const wxString &c) const { - int i = m_chars.Find(c); - return i != wxNOT_FOUND ? wxGridCellCoords(i / m_numCols, i % m_numCols) : wxGridCellCoords(-1, -1); + for (size_t i = 0, n = m_chars.GetCount(); ; i++) { + if (i >= n) + return wxGridCellCoords(-1, -1); + else if (m_chars[i] == c) + return wxGridCellCoords(i / m_numCols, i % m_numCols); + } } protected: @@ -102,7 +113,7 @@ private: void Init(); // common part of all ctors protected: - wxString m_chars; ///< Array of Unicode characters to display in the grid + wxArrayString m_chars; ///< Array of Unicode characters to display in the grid wxArrayShort m_relevance; ///< Bit-array of `m_chars` relevance private: diff --git a/ZRCola/zrcolachrslct.cpp b/ZRCola/zrcolachrslct.cpp index db66f73..f7e2d3c 100644 --- a/ZRCola/zrcolachrslct.cpp +++ b/ZRCola/zrcolachrslct.cpp @@ -20,6 +20,174 @@ #include "stdafx.h" +////////////////////////////////////////////////////////////////////// +// wxZRColaUTF16CharValidator +////////////////////////////////////////////////////////////////////// + +wxIMPLEMENT_DYNAMIC_CLASS(wxZRColaUTF16CharValidator, wxValidator); + + +wxZRColaUTF16CharValidator::wxZRColaUTF16CharValidator(wchar_t *val) : + m_val(val), + wxValidator() +{ +} + + +wxObject* wxZRColaUTF16CharValidator::Clone() const +{ + return new wxZRColaUTF16CharValidator(*this); +} + + +bool wxZRColaUTF16CharValidator::Validate(wxWindow *parent) +{ + wxASSERT(GetWindow()->IsKindOf(CLASSINFO(wxTextCtrl))); + wxTextCtrl *ctrl = (wxTextCtrl*)GetWindow(); + if (!ctrl->IsEnabled()) return true; + + wxString val(ctrl->GetValue()); + return Parse(val, 0, val.Length(), ctrl, parent); +} + + +bool wxZRColaUTF16CharValidator::TransferToWindow() +{ + wxASSERT(GetWindow()->IsKindOf(CLASSINFO(wxTextCtrl))); + + if (m_val) + ((wxTextCtrl*)GetWindow())->SetValue(wxString::Format(wxT("%04X"), *m_val)); + + return true; +} + + +bool wxZRColaUTF16CharValidator::TransferFromWindow() +{ + wxASSERT(GetWindow()->IsKindOf(CLASSINFO(wxTextCtrl))); + wxTextCtrl *ctrl = (wxTextCtrl*)GetWindow(); + + wxString val(ctrl->GetValue()); + return Parse(val, 0, val.Length(), ctrl, NULL, m_val); +} + + +bool wxZRColaUTF16CharValidator::Parse(const wxString &val_in, size_t i_start, size_t i_end, wxTextCtrl *ctrl, wxWindow *parent, wchar_t *val_out) +{ + const wxStringCharType *buf = val_in; + + wchar_t chr = 0; + for (size_t i = i_start;;) { + if (i >= i_end) { + // End of Unicode found. + if (val_out) *val_out = chr; + return true; + } else if (i >= i_start + 4) { + // Maximum characters exceeded. + ctrl->SetFocus(); + ctrl->SetSelection(i, i_end); + wxMessageBox(_("Too many digits in Unicode."), _("Validation conflict"), wxOK | wxICON_EXCLAMATION, parent); + return false; + } else if (_T('0') <= buf[i] && buf[i] <= _T('9')) { + // Digit found. + chr = (chr << 4) | (buf[i] - _T('0')); + i++; + } else if (_T('A') <= buf[i] && buf[i] <= _T('F')) { + // Capital letter found. + chr = (chr << 4) | (buf[i] - _T('A') + 10); + i++; + } else if (_T('a') <= buf[i] && buf[i] <= _T('f')) { + // Lower letter found. + chr = (chr << 4) | (buf[i] - _T('a') + 10); + i++; + } else { + // Invalid character found. + ctrl->SetFocus(); + ctrl->SetSelection(i, i + 1); + wxMessageBox(wxString::Format(_("Invalid character in Unicode found: %c"), buf[i]), _("Validation conflict"), wxOK | wxICON_EXCLAMATION, parent); + return false; + } + } +} + + +////////////////////////////////////////////////////////////////////// +// wxZRColaUnicodeDumpValidator +////////////////////////////////////////////////////////////////////// + +wxIMPLEMENT_DYNAMIC_CLASS(wxZRColaUnicodeDumpValidator, wxValidator); + + +wxZRColaUnicodeDumpValidator::wxZRColaUnicodeDumpValidator(wxString *val) : + m_val(val), + wxValidator() +{ +} + + +wxObject* wxZRColaUnicodeDumpValidator::Clone() const +{ + return new wxZRColaUnicodeDumpValidator(*this); +} + + +bool wxZRColaUnicodeDumpValidator::Validate(wxWindow *parent) +{ + wxASSERT(GetWindow()->IsKindOf(CLASSINFO(wxTextCtrl))); + wxTextCtrl *ctrl = (wxTextCtrl*)GetWindow(); + if (!ctrl->IsEnabled()) return true; + + wxString val(ctrl->GetValue()); + return Parse(val, 0, val.Length(), ctrl, parent); +} + + +bool wxZRColaUnicodeDumpValidator::TransferToWindow() +{ + wxASSERT(GetWindow()->IsKindOf(CLASSINFO(wxTextCtrl))); + + if (m_val) + ((wxTextCtrl*)GetWindow())->SetValue(ZRCola::GetUnicodeDumpW(m_val->c_str(), m_val->length(), L"+")); + + return true; +} + + +bool wxZRColaUnicodeDumpValidator::TransferFromWindow() +{ + wxASSERT(GetWindow()->IsKindOf(CLASSINFO(wxTextCtrl))); + wxTextCtrl *ctrl = (wxTextCtrl*)GetWindow(); + + wxString val(ctrl->GetValue()); + return Parse(val, 0, val.Length(), ctrl, NULL, m_val); +} + + +bool wxZRColaUnicodeDumpValidator::Parse(const wxString &val_in, size_t i_start, size_t i_end, wxTextCtrl *ctrl, wxWindow *parent, wxString *val_out) +{ + const wxStringCharType *buf = val_in; + + wxString str; + for (size_t i = i_start;;) { + const wxStringCharType *buf_next; + wchar_t chr; + if ((buf_next = wmemchr(buf + i, L'+', i_end - i)) != NULL) { + // Unicode dump separator found. + if (!wxZRColaUTF16CharValidator::Parse(val_in, i, buf_next - buf, ctrl, parent, &chr)) + return false; + str += chr; + i = buf_next - buf + 1; + } else if (wxZRColaUTF16CharValidator::Parse(val_in, i, i_end, ctrl, parent, &chr)) { + // The rest of the FQDN parsed succesfully. + if (chr) str += chr; + if (val_out) *val_out = str; + return true; + } else + return false; + } +} + + ////////////////////////////////////////////////////////////////////////// // wxZRColaCharSelect ////////////////////////////////////////////////////////////////////////// @@ -30,7 +198,6 @@ wxDEFINE_EVENT(wxEVT_SEARCH_COMPLETE, wxThreadEvent); wxZRColaCharSelect::wxZRColaCharSelect(wxWindow* parent) : m_searchChanged(false), m_unicodeChanged(false), - m_char(0), m_searchThread(NULL), wxZRColaCharSelectBase(parent) { @@ -46,13 +213,13 @@ wxZRColaCharSelect::wxZRColaCharSelect(wxWindow* parent) : m_search_more->SetLabel(_(L"▸ Search Options")); - m_unicode->SetValidator(wxHexValidator(&m_char, wxNUM_VAL_DEFAULT, 4)); + m_unicode->SetValidator(wxZRColaUnicodeDumpValidator(&m_char)); // Fill categories. auto app = dynamic_cast(wxTheApp); for (size_t i = 0, n = app->m_cc_db.idxRnk.size(); i < n; i++) { const auto &cc = app->m_cc_db.idxRnk[i]; - int idx = m_categories->Insert(wxGetTranslation(wxString(cc.name, cc.name_len), wxT("ZRCola-zrcdb")), i); + int idx = m_categories->Insert(wxGetTranslation(wxString(cc.name(), cc.name_len()), wxT("ZRCola-zrcdb")), i); m_categories->Check(idx); m_ccOrder.insert(std::make_pair(cc.id, idx)); } @@ -84,24 +251,24 @@ void wxZRColaCharSelect::OnIdle(wxIdleEvent& event) if (m_unicode->GetValidator()->TransferFromWindow()) { auto app = dynamic_cast(wxTheApp); - m_gridPreview->SetCellValue(wxString(1, m_char), 0, 0); + m_gridPreview->SetCellValue(m_char, 0, 0); - char chr[sizeof(ZRCola::character_db::character)] = {}; - ((ZRCola::character_db::character*)chr)->chr = m_char; + std::unique_ptr chr((ZRCola::character_db::character*)new char[sizeof(ZRCola::character_db::character) + sizeof(wchar_t)*m_char.length()]); + chr->ZRCola::character_db::character::character(m_char.data(), m_char.length()); size_t start; - if (app->m_chr_db.idxChr.find(*(ZRCola::character_db::character*)chr, start)) { + if (app->m_chr_db.idxChr.find(*chr, start)) { const auto &chr = app->m_chr_db.idxChr[start]; // Update character description. - m_description->SetValue(wxString(chr.data, chr.desc_len)); + m_description->SetValue(wxString(chr.desc(), chr.desc_len())); { // See if this character has a key sequence registered. - char ks[sizeof(ZRCola::keyseq_db::keyseq)] = {}; - ((ZRCola::keyseq_db::keyseq*)ks)->chr = m_char; + std::unique_ptr ks((ZRCola::keyseq_db::keyseq*)new char[sizeof(ZRCola::keyseq_db::keyseq) + sizeof(wchar_t)*m_char.length()]); + ks->ZRCola::keyseq_db::keyseq::keyseq(NULL, 0, m_char.data(), m_char.length()); ZRCola::keyseq_db::indexKey::size_type start; - if (app->m_ks_db.idxChr.find(*(ZRCola::keyseq_db::keyseq*)ks, start)) { + if (app->m_ks_db.idxChr.find(*ks, start)) { ZRCola::keyseq_db::keyseq &seq = app->m_ks_db.idxChr[start]; wxString ks_str; - if (ZRCola::keyseq_db::GetSequenceAsText(seq.seq, seq.seq_len, ks_str)) + if (ZRCola::keyseq_db::GetSequenceAsText(seq.seq(), seq.seq_len(), ks_str)) m_shortcut->SetValue(ks_str); else m_shortcut->SetValue(wxEmptyString); @@ -115,12 +282,12 @@ void wxZRColaCharSelect::OnIdle(wxIdleEvent& event) // Update character category. if (app->m_cc_db.idxChrCat.find(*((ZRCola::chrcat_db::chrcat*)cc), start)) { const auto &cat = app->m_cc_db.idxChrCat[start]; - m_category->SetValue(wxGetTranslation(wxString(cat.name, cat.name_len), wxT("ZRCola-zrcdb"))); + m_category->SetValue(wxGetTranslation(wxString(cat.name(), cat.name_len()), wxT("ZRCola-zrcdb"))); } else m_category->SetValue(wxEmptyString); } // Update related characters. - m_gridRelated->SetCharacters(wxString(chr.data + chr.desc_len, chr.rel_len)); + m_gridRelated->SetCharacters(wxString(chr.rel(), chr.rel_end())); } else { m_description->SetValue(wxEmptyString); m_shortcut->SetValue(wxEmptyString); @@ -130,9 +297,10 @@ void wxZRColaCharSelect::OnIdle(wxIdleEvent& event) // Find character tags. std::list tag_names; - ZRCola::chrtag_db::chrtag ct = { m_char }; + std::unique_ptr ct((ZRCola::chrtag_db::chrtag*)new char[sizeof(ZRCola::chrtag_db::chrtag) + sizeof(wchar_t)*m_char.length()]); + ct->ZRCola::chrtag_db::chrtag::chrtag(m_char.data(), m_char.length()); size_t end; - if (app->m_ct_db.idxChr.find(ct, start, end)) { + if (app->m_ct_db.idxChr.find(*ct, start, end)) { for (size_t i = start; i < end; i++) { const ZRCola::chrtag_db::chrtag &ct = app->m_ct_db.idxChr[i]; @@ -149,9 +317,9 @@ void wxZRColaCharSelect::OnIdle(wxIdleEvent& event) for (auto name = tag_names.cbegin(), name_end = tag_names.cend();; ++name) { if (name == name_end) { // Add name to the list. - tag_names.push_back(std::wstring(tn.name, tn.name + tn.name_len)); + tag_names.push_back(std::wstring(tn.name(), tn.name_end())); break; - } else if (ZRCola::tagname_db::tagname::CompareName(m_locale, name->data(), (unsigned __int16)name->length(), tn.name, tn.name_len) == 0) + } else if (ZRCola::tagname_db::tagname::CompareName(m_locale, name->data(), (unsigned __int16)name->length(), tn.name(), tn.name_len()) == 0) // Name is already on the list. break; } @@ -284,10 +452,10 @@ void wxZRColaCharSelect::OnSearchComplete(wxThreadEvent& event) if (m_searchThread) { // Display results. - wxString chars; + wxArrayString chars; chars.reserve(m_searchThread->m_hits.size()); for (auto i = m_searchThread->m_hits.cbegin(), i_end = m_searchThread->m_hits.cend(); i != i_end; ++i) - chars += i->second; + chars.Add(i->second); m_gridResults->SetCharacters(chars); m_searchThread->Delete(); @@ -304,7 +472,7 @@ void wxZRColaCharSelect::OnResultSelectCell(wxGridEvent& event) wxString val(m_gridResults->GetCellValue(event.GetRow(), event.GetCol())); if (!val.IsEmpty()) - NavigateTo(val[0]); + NavigateTo(val); } @@ -314,7 +482,7 @@ void wxZRColaCharSelect::OnResultCellDClick(wxGridEvent& event) wxString val(m_gridResults->GetCellValue(event.GetRow(), event.GetCol())); if (!val.IsEmpty()) { - NavigateTo(val[0]); + NavigateTo(val); wxCommandEvent e(wxEVT_COMMAND_BUTTON_CLICKED, wxID_OK); m_sdbSizerButtonsOK->GetEventHandler()->ProcessEvent(e); } @@ -328,7 +496,7 @@ void wxZRColaCharSelect::OnResultsKeyDown(wxKeyEvent& event) case WXK_NUMPAD_ENTER: wxString val(m_gridResults->GetCellValue(m_gridResults->GetCursorRow(), m_gridResults->GetCursorColumn())); if (!val.IsEmpty()) { - NavigateTo(val[0]); + NavigateTo(val); wxCommandEvent e(wxEVT_COMMAND_BUTTON_CLICKED, wxID_OK); m_sdbSizerButtonsOK->GetEventHandler()->ProcessEvent(e); @@ -347,7 +515,7 @@ void wxZRColaCharSelect::OnRecentSelectCell(wxGridEvent& event) wxString val(m_gridRecent->GetCellValue(event.GetRow(), event.GetCol())); if (!val.IsEmpty()) - NavigateTo(val[0]); + NavigateTo(val); } @@ -357,7 +525,7 @@ void wxZRColaCharSelect::OnRecentCellDClick(wxGridEvent& event) wxString val(m_gridRecent->GetCellValue(event.GetRow(), event.GetCol())); if (!val.IsEmpty()) { - NavigateTo(val[0]); + NavigateTo(val); wxCommandEvent e(wxEVT_COMMAND_BUTTON_CLICKED, wxID_OK); m_sdbSizerButtonsOK->GetEventHandler()->ProcessEvent(e); } @@ -371,7 +539,7 @@ void wxZRColaCharSelect::OnRecentKeyDown(wxKeyEvent& event) case WXK_NUMPAD_ENTER: wxString val(m_gridRecent->GetCellValue(m_gridRecent->GetCursorRow(), m_gridRecent->GetCursorColumn())); if (!val.IsEmpty()) { - NavigateTo(val[0]); + NavigateTo(val); wxCommandEvent e(wxEVT_COMMAND_BUTTON_CLICKED, wxID_OK); m_sdbSizerButtonsOK->GetEventHandler()->ProcessEvent(e); @@ -431,7 +599,7 @@ void wxZRColaCharSelect::OnRelatedSelectCell(wxGridEvent& event) wxString val(m_gridRelated->GetCellValue(event.GetRow(), event.GetCol())); if (!val.IsEmpty()) - NavigateTo(val[0]); + NavigateTo(val); } @@ -439,15 +607,15 @@ void wxZRColaCharSelect::OnOKButtonClick(wxCommandEvent& event) { event.Skip(); - wxString - recent(m_gridRecent->GetCharacters()), - val(1, m_char); - for (size_t i = 0, n = recent.Length(); i < n; i++) { - const wxStringCharType c = recent[i]; + const wxArrayString &recent = m_gridRecent->GetCharacters(); + wxArrayString val; + val.reserve(recent.GetCount() + 1); + val.Add(m_char); + for (size_t i = 0, n = recent.GetCount(); i < n; i++) { + const wxString &c = recent[i]; if (c != m_char) - val += c; + val.Add(c); } - m_gridRecent->SetCharacters(val); } @@ -457,13 +625,13 @@ void wxZRColaCharSelect::ResetResults() // Fill the results. auto app = dynamic_cast(wxTheApp); size_t i, n = app->m_chr_db.idxChr.size(); - wxString val; + wxArrayString val; val.reserve(n); for (i = 0; i < n; i++) { const auto &chr = app->m_chr_db.idxChr[i]; auto idx = m_ccOrder.find(chr.cat); if (idx == m_ccOrder.end() || m_categories->IsChecked(idx->second)) - val += chr.chr; + val.Add(wxString(chr.chr(), chr.chr_len())); } m_gridResults->SetCharacters(val); } @@ -507,7 +675,7 @@ void wxZRColaCharSelect::NavigateBy(int offset) } -void wxZRColaCharSelect::NavigateTo(wchar_t c) +void wxZRColaCharSelect::NavigateTo(const wxString &c) { if (m_char != c) { // Update history state @@ -548,7 +716,7 @@ wxZRColaCharSelect::SearchThread::SearchThread(wxZRColaCharSelect *parent) : wxThread::ExitCode wxZRColaCharSelect::SearchThread::Entry() { auto app = dynamic_cast(wxTheApp); - std::map hits; + std::map hits; if (TestDestroy()) return (wxThread::ExitCode)1; @@ -561,7 +729,7 @@ wxThread::ExitCode wxZRColaCharSelect::SearchThread::Entry() { // Search by description and merge results. - std::map hits_sub; + std::map hits_sub; if (!app->m_chr_db.Search(m_search.c_str(), m_cats, hits, hits_sub, TestDestroyS, this)) return (wxThread::ExitCode)1; for (auto i = hits_sub.cbegin(), i_end = hits_sub.cend(); i != i_end; ++i) { if (TestDestroy()) return (wxThread::ExitCode)1; @@ -589,7 +757,7 @@ wxThread::ExitCode wxZRColaCharSelect::SearchThread::Entry() if (i->second > rank_threshold) m_hits.push_back(std::make_pair(i->second, i->first)); } - std::qsort(m_hits.data(), m_hits.size(), sizeof(std::pair), CompareHits); + std::qsort(m_hits.data(), m_hits.size(), sizeof(std::pair), CompareHits); // Signal the event handler that this thread is going to be destroyed. // NOTE: here we assume that using the m_parent pointer is safe, @@ -637,7 +805,17 @@ void wxPersistentZRColaCharSelect::Save() const auto wnd = static_cast(GetWindow()); // dynamic_cast is not reliable as we are typically called late in the wxTopLevelWindowMSW destructor. auto app = dynamic_cast(wxTheApp); - SaveValue(wxT("recentChars"), wnd->m_gridRecent->GetCharacters()); + wxString val; + auto &recent = wnd->m_gridRecent->GetCharacters(); + for (size_t i = 0, n = recent.GetCount(); i < n; i++) { + if (i) val += wxT('|'); + auto &chr = recent[i]; + for (size_t j = 0, m = chr.Length(); j < m; j++) { + if (j) val += wxT('+'); + val += wxString::Format(wxT("%04X"), chr[j]); + } + } + SaveValue(wxT("recentChars2"), val); for (size_t i = 0, n = app->m_cc_db.idxRnk.size(); i < n; i++) { const auto &cc = app->m_cc_db.idxRnk[i]; @@ -656,8 +834,38 @@ bool wxPersistentZRColaCharSelect::Restore() auto app = dynamic_cast(wxTheApp); wxString recent; - if (RestoreValue(wxT("recentChars"), &recent)) - wnd->m_gridRecent->SetCharacters(recent); + if (RestoreValue(wxT("recentChars2"), &recent)) { + // Native format found. + wxArrayString val; + wxString chr; + wchar_t c = 0; + for (size_t i = 0, n = recent.Length();; i++) { + if (i >= n) { + if (c) { chr += c; c = 0; } + if (!chr.IsEmpty()) { val.Add(chr); chr.Clear(); } + break; + } else { + wxStringCharType r = recent[i]; + if (wxT('0') <= r && r <= wxT('9')) c = (c << 4) | (r - wxT('0') ); + else if (wxT('A') <= r && r <= wxT('F')) c = (c << 4) | (r - wxT('A') + 10); + else if (wxT('a') <= r && r <= wxT('f')) c = (c << 4) | (r - wxT('a') + 10); + else if (r == wxT('+')) { + if (c) { chr += c; c = 0; } + } else if (r == wxT('|')) { + if (c) { chr += c; c = 0; } + if (!chr.IsEmpty()) { val.Add(chr); chr.Clear(); } + } else + break; + } + } + wnd->m_gridRecent->SetCharacters(val); + } else if (RestoreValue(wxT("recentChars"), &recent)) { + // Legacy value found. + wxArrayString val; + for (size_t i = 0, n = recent.Length(); i < n; i++) + val.Add(wxString(1, recent[i])); + wnd->m_gridRecent->SetCharacters(val); + } for (size_t i = 0, n = app->m_cc_db.idxRnk.size(); i < n; i++) { const auto &cc = app->m_cc_db.idxRnk[i]; diff --git a/ZRCola/zrcolachrslct.h b/ZRCola/zrcolachrslct.h index 25019f8..6f8ed27 100644 --- a/ZRCola/zrcolachrslct.h +++ b/ZRCola/zrcolachrslct.h @@ -28,15 +28,106 @@ class wxPersistentZRColaCharSelect; #include "zrcolagui.h" #include -#include +#include #include #include #include #include #include +#include #include +/// +/// Validator for Unicode character +/// +class WXEXTEND_API wxZRColaUTF16CharValidator : public wxValidator +{ +public: + /// + /// Construct the validator with a value to store data + /// + wxZRColaUTF16CharValidator(wchar_t *val = NULL); + + /// + /// Copies this validator + /// + virtual wxObject* Clone() const; + + /// + /// Validates the value + /// + virtual bool Validate(wxWindow *parent); + + /// + /// Transfers the value to the window + /// + virtual bool TransferToWindow(); + + /// + /// Transfers the value from the window + /// + virtual bool TransferFromWindow(); + + /// + /// Parses FQDN value + /// + static bool Parse(const wxString &val_in, size_t i_start, size_t i_end, wxTextCtrl *ctrl, wxWindow *parent, wchar_t *val_out = NULL); + +protected: + wchar_t *m_val; ///< Pointer to variable to receive control's parsed value + +private: + wxDECLARE_DYNAMIC_CLASS(wxZRColaUTF16CharValidator); + wxDECLARE_NO_ASSIGN_CLASS(wxZRColaUTF16CharValidator); +}; + + +/// +/// Validator for Unicode dump +/// +class wxZRColaUnicodeDumpValidator : public wxValidator +{ +public: + /// + /// Construct the validator with a value to store data + /// + wxZRColaUnicodeDumpValidator(wxString *val = NULL); + + /// + /// Copies this validator + /// + virtual wxObject* Clone() const; + + /// + /// Validates the value + /// + virtual bool Validate(wxWindow *parent); + + /// + /// Transfers the value to the window + /// + virtual bool TransferToWindow(); + + /// + /// Transfers the value from the window + /// + virtual bool TransferFromWindow(); + + /// + /// Parses Unicode dump value + /// + static bool Parse(const wxString &val_in, size_t i_start, size_t i_end, wxTextCtrl *ctrl, wxWindow *parent, wxString *val_out = NULL); + +protected: + wxString *m_val; ///< Pointer to variable to receive control's parsed value + +private: + wxDECLARE_DYNAMIC_CLASS(wxZRColaUnicodeDumpValidator); + wxDECLARE_NO_ASSIGN_CLASS(wxZRColaUnicodeDumpValidator); +}; + + wxDECLARE_EVENT(wxEVT_SEARCH_COMPLETE, wxThreadEvent); @@ -76,10 +167,10 @@ protected: void ResetResults(); void NavigateBy(int offset); - void NavigateTo(wchar_t c); + void NavigateTo(const wxString &c); public: - wchar_t m_char; ///< Currently selected character (0 when none) + wxString m_char; ///< Currently selected character (empty when none) protected: LCID m_locale; ///< Locale for tag lookup @@ -104,7 +195,7 @@ protected: public: std::wstring m_search; ///< Search phrase std::set m_cats; ///< Search categories - std::vector > m_hits; ///< Search results + std::vector > m_hits; ///< Search results protected: wxZRColaCharSelect *m_parent; ///< Thread owner @@ -116,7 +207,7 @@ protected: /// struct NavigationState { - wchar_t m_char; + std::wstring m_char; struct { wxGridCellCoords m_selected; } m_related; diff --git a/ZRCola/zrcolafrm.cpp b/ZRCola/zrcolafrm.cpp index 216d0e3..5008182 100644 --- a/ZRCola/zrcolafrm.cpp +++ b/ZRCola/zrcolafrm.cpp @@ -213,7 +213,7 @@ void wxZRColaFrame::OnForwardEvent(wxCommandEvent& event) void wxZRColaFrame::OnInsertCharacter(wxCommandEvent& event) { - if (m_chrSelect->ShowModal() == wxID_OK && m_chrSelect->m_char) { + if (m_chrSelect->ShowModal() == wxID_OK && !m_chrSelect->m_char.empty()) { m_panel->m_decomposed->WriteText(m_chrSelect->m_char); m_panel->m_decomposed->SetFocus(); } diff --git a/ZRCola/zrcolakeyhndlr.cpp b/ZRCola/zrcolakeyhndlr.cpp index de83069..929cf55 100644 --- a/ZRCola/zrcolakeyhndlr.cpp +++ b/ZRCola/zrcolakeyhndlr.cpp @@ -96,13 +96,9 @@ bool wxZRColaKeyHandler::ProcessEvent(wxEvent& event) (e.AltDown() ? ZRCola::keyseq_db::keyseq::ALT : 0); m_seq.push_back(key); - auto n = m_seq.size(); - ZRCola::keyseq_db::keyseq *ks = (ZRCola::keyseq_db::keyseq*)new char[sizeof(ZRCola::keyseq_db::keyseq) + sizeof(ZRCola::keyseq_db::keyseq::key_t)*n]; - ks->chr = 0; - ks->seq_len = n; - memcpy(ks->seq, m_seq.data(), sizeof(ZRCola::keyseq_db::keyseq::key_t)*n); + std::unique_ptr ks((ZRCola::keyseq_db::keyseq*)new char[sizeof(ZRCola::keyseq_db::keyseq) + sizeof(ZRCola::keyseq_db::keyseq::key_t)*m_seq.size()]); + ks->ZRCola::keyseq_db::keyseq::keyseq(m_seq.data(), m_seq.size()); found = app->m_ks_db.idxKey.find(*ks, start); - delete ks; } if (found) { @@ -116,14 +112,14 @@ bool wxZRColaKeyHandler::ProcessEvent(wxEvent& event) wxObject *obj = event.GetEventObject(); if (obj && obj->IsKindOf(wxCLASSINFO(wxTextCtrl))) { // Push text to source control. - ((wxTextCtrl*)obj)->WriteText(ks.chr); + ((wxTextCtrl*)obj)->WriteText(wxString(ks.chr(), ks.chr_len())); // Event is fully processed now. event.StopPropagation(); return true; } } else if (start < app->m_ks_db.idxKey.size() && - ZRCola::keyseq_db::keyseq::CompareSequence(m_seq.data(), m_seq.size(), app->m_ks_db.idxKey[start].seq, std::min(app->m_ks_db.idxKey[start].seq_len, m_seq.size())) == 0) + ZRCola::keyseq_db::keyseq::CompareSequence(m_seq.data(), m_seq.size(), app->m_ks_db.idxKey[start].seq(), std::min(app->m_ks_db.idxKey[start].seq_len(), m_seq.size())) == 0) { // The sequence is a partial match. Continue watching. if (pFrame && pFrame->GetStatusBar()) diff --git a/ZRCola/zrcolasettings.cpp b/ZRCola/zrcolasettings.cpp index bef2786..db148bc 100644 --- a/ZRCola/zrcolasettings.cpp +++ b/ZRCola/zrcolasettings.cpp @@ -34,7 +34,7 @@ wxZRColaSettings::wxZRColaSettings(wxWindow* parent) : for (size_t i = 0, n = app->m_lang_db.idxLng.size(); i < n; i++) { const auto &lang = app->m_lang_db.idxLng[i]; wxString - label(lang.name, lang.name_len), + label(lang.name(), lang.name_len()), label_tran(wxGetTranslation(label, wxT("ZRCola-zrcdb"))); m_languages->Insert(label_tran, i); } diff --git a/ZRColaCompile/dbsource.cpp b/ZRColaCompile/dbsource.cpp index d57929c..5e3892b 100644 --- a/ZRColaCompile/dbsource.cpp +++ b/ZRColaCompile/dbsource.cpp @@ -28,35 +28,19 @@ using namespace winstd; // ZRCola::DBSource::character_bank ////////////////////////////////////////////////////////////////////////// -ZRCola::DBSource::character_bank::character_bank() : vector >() -{ - resize(0x10000); -} - - void ZRCola::DBSource::character_bank::build_related() { - // Initialize ignore list. - m_ignore.insert(L"letter"); - m_ignore.insert(L"modifier"); - m_ignore.insert(L"symbol"); - m_ignore.insert(L"accent"); - m_ignore.insert(L"with"); - m_ignore.insert(L"and"); - m_ignore.insert(L"capital"); - m_ignore.insert(L"small"); - m_ignore.insert(L"combining"); - SYSTEM_INFO si; GetSystemInfo(&si); // Launch workers. build_related_worker **workers = new build_related_worker*[si.dwNumberOfProcessors]; - size_type from = 0, to; - for (DWORD i = 0; i < si.dwNumberOfProcessors; i++) { - to = MulDiv(i + 1, 0x10000, si.dwNumberOfProcessors); - workers[i] = new build_related_worker(this, from, to); - from = to; + size_type from = 0, total = size(); + iterator chr_from = begin(), chr_to; + for (DWORD i = 0; i < si.dwNumberOfProcessors; i++, chr_from = chr_to) { + size_type to = MulDiv(i + 1, total, si.dwNumberOfProcessors); + for (chr_to = chr_from; from < to; from++, ++chr_to); + workers[i] = new build_related_worker(this, chr_from, chr_to); } // Wait for workers. @@ -71,7 +55,7 @@ void ZRCola::DBSource::character_bank::build_related() } -ZRCola::DBSource::character_bank::build_related_worker::build_related_worker(_In_ const character_bank *cb, _In_ size_type from, _In_ size_type to) : +ZRCola::DBSource::character_bank::build_related_worker::build_related_worker(_In_ const character_bank *cb, _In_ iterator from, _In_ iterator to) : win_handle((HANDLE)_beginthreadex(NULL, 0, process, this, CREATE_SUSPENDED, NULL)), m_heap(HeapCreate(0, 0, 0)), m_cb(cb), @@ -86,40 +70,39 @@ ZRCola::DBSource::character_bank::build_related_worker::build_related_worker(_In unsigned int ZRCola::DBSource::character_bank::build_related_worker::process() { heap_allocator al(m_heap); - basic_string, heap_allocator > rel(al); + vector > rel(al); set, heap_allocator > matching(less(), al); - for (size_type i = m_from; i < m_to; i++) { - auto &chr = *(m_cb->at(i).get()); - if (&chr == NULL) continue; - + for (auto c = m_from; c != m_to; c++) { rel.clear(); - // Remove all unexisting, inactive, or self related characters. - for (auto j = chr.rel.length(); j--;) { - wchar_t c = chr.rel[j]; - if (m_cb->at(c) && (wchar_t)j != c) - rel += c; + // Skip all unexisting, or self related characters. + auto m_cb_end = m_cb->cend(); + for (std::vector::const_pointer c_rel = c->second.rel.data(), c_rel_end = c_rel + c->second.rel.size(), c_rel_next = c_rel_end; c_rel < c_rel_end; c_rel = c_rel_next) { + c_rel_next = c_rel + wcsnlen(c_rel, c_rel_end - c_rel) + 1; + if (m_cb->find(c_rel) != m_cb_end && c->first.compare(c_rel) != 0) + rel.insert(rel.end(), c_rel, c_rel_next); } - // Add all characters that share enought keywords. - for (size_type j = 0, j_end = m_cb->size(); j < j_end; j++) { - if (i == j || rel.find((wchar_t)j) != wstring::npos) + // Add all characters that share enough keywords. + for (auto c2 = m_cb->cbegin(), c2_end = m_cb->cend(); c2 != c2_end; ++c2) { + if (c == c2) continue; - const auto &chr2 = *(m_cb->at(j).get()); - if (&chr2 == NULL) + bool already_present = false; + for (std::vector::const_pointer c_rel = rel.data(), c_rel_end = c_rel + rel.size(), c_rel_next = c_rel_end; c_rel < c_rel_end; c_rel = c_rel_next) { + c_rel_next = c_rel + wcsnlen(c_rel, c_rel_end - c_rel) + 1; + if (c2->first.compare(c_rel) == 0) { + already_present = true; + break; + } + } + if (already_present) continue; set::size_type comparisons = 0; matching.clear(); - for (auto term = chr.terms.cbegin(), term_end = chr.terms.cend(); term != term_end; ++term) { - // Test for ignored word(s). - if (m_cb->m_ignore.find(*term) != m_cb->m_ignore.cend()) - continue; - for (auto term2 = chr2.terms.cbegin(), term2_end = chr2.terms.cend(); term2 != term2_end; ++term2) { - // Test for ignored word(s). - if (m_cb->m_ignore.find(*term2) != m_cb->m_ignore.cend()) - continue; + for (auto term = c->second.terms_rel.cbegin(), term_end = c->second.terms_rel.cend(); term != term_end; ++term) { + for (auto term2 = c2->second.terms_rel.cbegin(), term2_end = c2->second.terms_rel.cend(); term2 != term2_end; ++term2) { comparisons++; if (*term == *term2) matching.insert(*term); @@ -130,11 +113,11 @@ unsigned int ZRCola::DBSource::character_bank::build_related_worker::process() // If 1/2 terms match, assume related. auto hits = matching.size(); if (hits*hits*2 >= comparisons) - rel += chr2.chr; + rel.insert(rel.end(), c2->first.data(), c2->first.data() + c2->first.length() + 1); } } - chr.rel.assign(rel.c_str(), rel.length()); + c->second.rel.assign(rel.cbegin(), rel.cend()); } return 0; @@ -197,7 +180,7 @@ void ZRCola::DBSource::character_desc_idx::parse_keywords(const wchar_t *str, se } -void ZRCola::DBSource::character_desc_idx::add_keywords(const set &terms, wchar_t chr, size_t sub) +void ZRCola::DBSource::character_desc_idx::add_keywords(const set &terms, const wstring &chr, size_t sub) { for (auto term = terms.cbegin(), term_end = terms.cend(); term != term_end; ++term) { if (sub) { @@ -250,6 +233,16 @@ void ZRCola::DBSource::character_desc_idx::save(ZRCola::textindex& f, wstring& str variant v; wxVERIFY(SUCCEEDED(f->get_Value(&v))); - wxCHECK(SUCCEEDED(v.change_type(VT_BSTR)), false); - - // Parse the field. Must be "xxxx+xxxx+xxxx..." sequence. str.clear(); - for (UINT i = 0, n = ::SysStringLen(V_BSTR(&v)); i < n && V_BSTR(&v)[i];) { - // Parse Unicode code. - UINT j = 0; - wchar_t c = 0; - for (; i < n && V_BSTR(&v)[i]; i++, j++) { - if (L'0' <= V_BSTR(&v)[i] && V_BSTR(&v)[i] <= L'9') c = c*0x10 + (V_BSTR(&v)[i] - L'0'); - else if (L'A' <= V_BSTR(&v)[i] && V_BSTR(&v)[i] <= L'F') c = c*0x10 + (V_BSTR(&v)[i] - L'A' + 10); - else if (L'a' <= V_BSTR(&v)[i] && V_BSTR(&v)[i] <= L'f') c = c*0x10 + (V_BSTR(&v)[i] - L'a' + 10); - else break; - } - if (j <= 0 || 4 < j) { - bstr fieldname; wxVERIFY(SUCCEEDED(f->get_Name(&fieldname))); - _ftprintf(stderr, wxT("%s: error ZCC0020: Syntax error in \"%.*ls\" field (\"%.*ls\"). Unicode code must be one to four hexadecimal characters long.\n"), m_filename.c_str(), fieldname.length(), (BSTR)fieldname, n, V_BSTR(&v)); - return false; - } - str += c; + if (V_VT(&v) != VT_NULL) { + wxCHECK(SUCCEEDED(v.change_type(VT_BSTR)), false); - // Skip delimiter(s) and whitespace. - for (; i < n && V_BSTR(&v)[i] && (V_BSTR(&v)[i] == L'+' || _iswspace_l(V_BSTR(&v)[i], m_locale)); i++); + // Parse the field. Must be "xxxx+xxxx+xxxx..." sequence. + for (UINT i = 0, n = ::SysStringLen(V_BSTR(&v)); i < n && V_BSTR(&v)[i];) { + // Parse Unicode code. + UINT j = 0; + wchar_t c = 0; + for (; i < n && V_BSTR(&v)[i]; i++, j++) { + if (L'0' <= V_BSTR(&v)[i] && V_BSTR(&v)[i] <= L'9') c = c*0x10 + (V_BSTR(&v)[i] - L'0'); + else if (L'A' <= V_BSTR(&v)[i] && V_BSTR(&v)[i] <= L'F') c = c*0x10 + (V_BSTR(&v)[i] - L'A' + 10); + else if (L'a' <= V_BSTR(&v)[i] && V_BSTR(&v)[i] <= L'f') c = c*0x10 + (V_BSTR(&v)[i] - L'a' + 10); + else break; + } + if (j <= 0 || 4 < j) { + bstr fieldname; wxVERIFY(SUCCEEDED(f->get_Name(&fieldname))); + _ftprintf(stderr, wxT("%s: error ZCC0020: Syntax error in \"%.*ls\" field (\"%.*ls\"). Unicode code must be one to four hexadecimal characters long.\n"), m_filename.c_str(), fieldname.length(), (BSTR)fieldname, n, V_BSTR(&v)); + return false; + } + str += c; + + // Skip delimiter(s) and whitespace. + for (; i < n && V_BSTR(&v)[i] && (V_BSTR(&v)[i] == L'+' || _iswspace_l(V_BSTR(&v)[i], m_locale)); i++); + } } return true; @@ -615,19 +610,19 @@ bool ZRCola::DBSource::GetTranslation(const com_obj& rs, ZRCola::D { com_obj f; wxVERIFY(SUCCEEDED(flds->get_Item(variant(L"komb"), &f))); - wxCHECK(GetUnicodeString(f, t.decomp.str), false); + wxCHECK(GetUnicodeString(f, t.dec.str), false); } { com_obj f; wxVERIFY(SUCCEEDED(flds->get_Item(variant(L"znak"), &f))); - wxCHECK(GetUnicodeString(f, t.chr), false); + wxCHECK(GetUnicodeString(f, t.com), false); } { com_obj f; wxVERIFY(SUCCEEDED(flds->get_Item(variant(L"rang_znak"), &f))); - wxCHECK(GetValue(f, t.decomp.rank), false); + wxCHECK(GetValue(f, t.dec.rank), false); } return true; @@ -665,7 +660,7 @@ bool ZRCola::DBSource::GetKeySequence(const com_obj& rs, ZRCola::D { com_obj f; wxVERIFY(SUCCEEDED(flds->get_Item(variant(L"Znak"), &f))); - wxCHECK(GetUnicodeCharacter(f, ks.chr), false); + wxCHECK(GetUnicodeString(f, ks.chr), false); } int modifiers; @@ -884,11 +879,11 @@ bool ZRCola::DBSource::GetCharacterGroup(const com_obj& rs, chrgrp com_obj f_char, f_show; wxVERIFY(SUCCEEDED(flds->get_Item(variant(L"Znak" ), &f_char))); wxVERIFY(SUCCEEDED(flds->get_Item(variant(L"pogost"), &f_show))); - for (VARIANT_BOOL eof = VARIANT_TRUE; SUCCEEDED(rs_chars->get_EOF(&eof)) && !eof; rs_chars->MoveNext()) { - wchar_t c; - wxCHECK(GetUnicodeCharacter(f_char, c), false); - size_t n = cg.chars.length(); - cg.chars += c; + size_t n = 0; + for (VARIANT_BOOL eof = VARIANT_TRUE; SUCCEEDED(rs_chars->get_EOF(&eof)) && !eof; rs_chars->MoveNext(), n++) { + wstring c; + wxCHECK(GetUnicodeString(f_char, c), false); + cg.chars.insert(cg.chars.end(), c.data(), c.data() + c.length() + 1); bool show; wxCHECK(GetValue(f_show, show), false); if ((n % 16) == 0) @@ -932,42 +927,49 @@ bool ZRCola::DBSource::GetCharacter(const com_obj& rs, character& com_obj flds; wxVERIFY(SUCCEEDED(rs->get_Fields(&flds))); - wchar_t c; - chr.rel.clear(); + wstring c; + chr.second.terms.clear(); + chr.second.terms_rel.clear(); + chr.second.rel.clear(); { com_obj f; wxVERIFY(SUCCEEDED(flds->get_Item(variant(L"znak"), &f))); - wxCHECK(GetUnicodeCharacter(f, chr.chr), false); + wxCHECK(GetUnicodeString(f, chr.first), false); } { com_obj f; wxVERIFY(SUCCEEDED(flds->get_Item(variant(L"znak_v"), &f))); - wxCHECK(GetUnicodeCharacter(f, c), false); - if (c && c != chr.chr) - chr.rel += c; + wxCHECK(GetUnicodeString(f, c), false); + if (!c.empty() && c != chr.first) + chr.second.rel.insert(chr.second.rel.end(), c.data(), c.data() + c.length() + 1); } { com_obj f; wxVERIFY(SUCCEEDED(flds->get_Item(variant(L"znak_m"), &f))); - wxCHECK(GetUnicodeCharacter(f, c), false); - if (c && c != chr.chr) - chr.rel += c; + wxCHECK(GetUnicodeString(f, c), false); + if (!c.empty() && c != chr.first) + chr.second.rel.insert(chr.second.rel.end(), c.data(), c.data() + c.length() + 1); } { com_obj f; wxVERIFY(SUCCEEDED(flds->get_Item(variant(L"opis_en"), &f))); - wxCHECK(GetValue(f, chr.desc), false); + wxCHECK(GetValue(f, chr.second.desc), false); + ZRCola::DBSource::character_desc_idx::parse_keywords(chr.second.desc.c_str(), chr.second.terms); + for (auto term = chr.second.terms.cbegin(), term_end = chr.second.terms.cend(); term != term_end; ++term) { + if (m_terms_ignore.find(*term) != m_terms_ignore.cend()) + continue; + chr.second.terms_rel.insert(*term); + } } - ZRCola::DBSource::character_desc_idx::parse_keywords(chr.desc.c_str(), chr.terms); { com_obj f; wxVERIFY(SUCCEEDED(flds->get_Item(variant(L"kat"), &f))); - wxCHECK(GetChrCat(f, chr.cat), false); + wxCHECK(GetChrCat(f, chr.second.cat), false); } return true; @@ -1058,7 +1060,7 @@ bool ZRCola::DBSource::GetCharacterTag(const winstd::com_obj& rs, { com_obj f; wxVERIFY(SUCCEEDED(flds->get_Item(variant(L"znak"), &f))); - wxCHECK(GetUnicodeCharacter(f, ct.chr), false); + wxCHECK(GetUnicodeString(f, ct.chr), false); } { diff --git a/ZRColaCompile/dbsource.h b/ZRColaCompile/dbsource.h index ba39bd3..6d241fc 100644 --- a/ZRColaCompile/dbsource.h +++ b/ZRColaCompile/dbsource.h @@ -79,8 +79,8 @@ namespace ZRCola { /// class translation { public: - std::wstring chr; ///< Composed character - charseq decomp; ///< Decomposed sequence + charseq dec; ///< Decomposed sequence + std::wstring com; ///< Composed character }; @@ -114,7 +114,7 @@ namespace ZRCola { }; public: - wchar_t chr; ///< Character + std::wstring chr; ///< Character std::vector seq; ///< Key sequence }; @@ -147,69 +147,58 @@ namespace ZRCola { int id; ///< Character group ID int rank; ///< Character group rank std::wstring name; ///< Character group name - std::wstring chars; ///< Character group characters + std::vector chars; ///< Character group characters std::vector show; ///< Bit vector if particular character is displayed initially }; + /// + /// Character data + /// + class character_data { + public: + inline character_data() + { + cat.data[0] = 0; + cat.data[1] = 0; + } + + inline character_data(_In_ const character_data &othr) : + cat (othr.cat), + desc (othr.desc), + terms (othr.terms), + terms_rel(othr.terms_rel), + rel (othr.rel) + { + } + + ZRCola::chrcatid_t cat; ///< Category ID + std::wstring desc; ///< Character description + std::set terms; ///< Search terms + std::set terms_rel; ///< Relevant terms for relating characters + std::vector rel; ///< Related characters + }; + + /// /// Character /// - class character { - public: - inline character() - { - chr = 0; - cat.data[0] = 0; - cat.data[1] = 0; - } - - inline character(_In_ const character &othr) : - chr (othr.chr), - cat (othr.cat), - desc (othr.desc), - terms(othr.terms), - rel (othr.rel) - { - } - - inline bool operator==(_In_ const character &othr) const - { - return - chr == othr.chr && - cat == othr.cat && - desc == othr.desc && - terms == othr.terms && - rel == othr.rel; - } - - inline bool operator!=(_In_ const character &othr) const - { - return !operator==(othr); - } - - wchar_t chr; ///< Character - ZRCola::chrcatid_t cat; ///< Category ID - std::wstring desc; ///< Character description - std::set terms; ///< Search terms - std::wstring rel; ///< Related characters - }; + typedef std::pair character; /// /// Character bank /// - class character_bank : public std::vector > + class character_bank : public std::map { public: - character_bank(); void build_related(); protected: class build_related_worker : public winstd::win_handle { public: - build_related_worker(_In_ const character_bank *cb, _In_ size_type from, _In_ size_type to); + build_related_worker(_In_ const character_bank *cb, _In_ iterator from, _In_ iterator to); inline void join() { @@ -230,12 +219,9 @@ namespace ZRCola { protected: const character_bank *m_cb; - size_type m_from, m_to; + iterator m_from, m_to; winstd::heap m_heap; }; - - protected: - std::set m_ignore; }; @@ -266,8 +252,8 @@ namespace ZRCola { { public: static void parse_keywords(const wchar_t *str, std::set &terms); - void add_keywords(const std::set &terms, wchar_t chr, size_t sub = 0); - inline void add_keywords(const wchar_t *str, wchar_t chr, size_t sub = 0) + void add_keywords(const std::set &terms, const std::wstring &chr, size_t sub = 0); + inline void add_keywords(const wchar_t *str, const std::wstring &chr, size_t sub = 0) { std::set terms; parse_keywords(str, terms); @@ -277,21 +263,21 @@ namespace ZRCola { void save(ZRCola::textindex &idx) const; protected: - inline void add_keyword(const std::wstring &term, wchar_t chr) + inline void add_keyword(const std::wstring &term, const std::wstring &chr) { iterator idx = find(term); if (idx == end()) { // New keyword. - insert(std::make_pair(term, std::vector(1, chr))); + insert(std::make_pair(term, mapped_type(chr.data(), chr.data() + chr.length() + 1))); } else { // Append to existing keyword. - std::vector &val = idx->second; - for (auto i = val.cbegin(), i_end = val.cend(); ; ++i) { - if (i == i_end) { + auto &val = idx->second; + for (mapped_type::size_type i = 0, n = val.size(); ; i += wcsnlen(val.data() + i, n - i) + 1) { + if (i >= n) { // End-of-values reached. Append character. - val.push_back(chr); + val.insert(val.end(), chr.data(), chr.data() + chr.length() + 1); break; - } else if (*i == chr) { + } else if (chr.compare(val.data() + i) == 0) { // Character already among the values. break; } @@ -317,7 +303,7 @@ namespace ZRCola { /// class chrtag { public: - wchar_t chr; ///> Character + std::wstring chr; ///> Character int tag; ///< Tag ID }; @@ -697,10 +683,12 @@ namespace ZRCola { protected: std::basic_string m_filename; ///< Database filename - winstd::com_obj m_db; ///< Database + winstd::com_obj m_db; ///< Database _locale_t m_locale; ///< Database locale winstd::com_obj m_comCharacterGroup; ///< ADO Command for GetCharacterGroup subquery winstd::com_obj m_pCharacterGroup1; ///< \c m_comCharacterGroup parameter + + std::set m_terms_ignore; ///< Terms to ignore when comparing characters }; }; diff --git a/ZRColaCompile/main.cpp b/ZRColaCompile/main.cpp index 5bc5974..c79fab5 100644 --- a/ZRColaCompile/main.cpp +++ b/ZRColaCompile/main.cpp @@ -165,13 +165,13 @@ int _tmain(int argc, _TCHAR *argv[]) ZRCola::DBSource::translation trans; if (src.GetTranslation(rs, trans)) { // Add translation to temporary database. - auto const t = db_temp1.find(trans.chr); + auto const t = db_temp1.find(trans.com); if (t != db_temp1.end()) - t->second.insert(std::move(trans.decomp)); + t->second.insert(std::move(trans.dec)); else { translation_db::mapped_type d; - d.insert(std::move(trans.decomp)); - db_temp1.insert(std::move(pair(trans.chr, std::move(d)))); + d.insert(std::move(trans.dec)); + db_temp1.insert(std::move(pair(trans.com, std::move(d)))); } } else has_errors = true; @@ -214,16 +214,14 @@ int _tmain(int argc, _TCHAR *argv[]) unsigned __int32 idx = db.data.size(); wxASSERT_MSG((int)0xffff8000 <= d->rank && d->rank <= (int)0x00007fff, wxT("transformation rank out of bounds")); db.data.push_back((unsigned __int16)d->rank); - wstring::size_type n_com = t->first.length(); - wxASSERT_MSG(n_com <= 0xffff, wxT("composition string too long")); - db.data.push_back((unsigned __int16)n_com); - wstring::size_type n_dec = d->str.length(); - wxASSERT_MSG(n_com + n_dec <= 0xffff, wxT("decomposition string too long")); - db.data.push_back((unsigned __int16)(n_com + n_dec)); - for (wstring::size_type i = 0; i < n_com; i++) - db.data.push_back(t->first[i]); - for (wstring::size_type i = 0; i < n_dec; i++) - db.data.push_back(d->str[i]); + wstring::size_type n = t->first.length(); + wxASSERT_MSG(n <= 0xffff, wxT("composition overflow")); + db.data.push_back((unsigned __int16)n); + n += d->str.length(); + wxASSERT_MSG(n <= 0xffff, wxT("decomposition overflow")); + db.data.push_back((unsigned __int16)n); + db.data.insert(db.data.end(), t->first.cbegin(), t->first.cend()); + db.data.insert(db.data.end(), d->str .cbegin(), d->str .cend()); db.idxComp .push_back(idx); db.idxDecomp.push_back(idx); } @@ -265,17 +263,19 @@ int _tmain(int argc, _TCHAR *argv[]) if (src.GetKeySequence(rs, ks)) { // Add key sequence to index and data. unsigned __int32 idx = db.data.size(); - db.data.push_back(ks.chr); - vector::size_type n = ks.seq.size(); - wxASSERT_MSG(n <= 0xffff, wxT("key sequence too long")); + wstring::size_type n = ks.chr.length(); + wxASSERT_MSG(n <= 0xffff, wxT("character overflow")); db.data.push_back((unsigned __int16)n); - for (vector::size_type i = 0; i < n; i++) { - const ZRCola::DBSource::keyseq::keycode &kc = ks.seq[i]; - db.data.push_back(kc.key); + n += ks.seq.size() * sizeof(ZRCola::keyseq_db::keyseq::key_t) / sizeof(wchar_t); + wxASSERT_MSG(n <= 0xffff, wxT("key sequence overflow")); + db.data.push_back((unsigned __int16)n); + db.data.insert(db.data.end(), ks.chr.cbegin(), ks.chr.cend()); + for (auto kc = ks.seq.cbegin(), kc_end = ks.seq.cend(); kc != kc_end; ++kc) { + db.data.push_back(kc->key); db.data.push_back( - (kc.shift ? ZRCola::keyseq_db::keyseq::SHIFT : 0) | - (kc.ctrl ? ZRCola::keyseq_db::keyseq::CTRL : 0) | - (kc.alt ? ZRCola::keyseq_db::keyseq::ALT : 0)); + (kc->shift ? ZRCola::keyseq_db::keyseq::SHIFT : 0) | + (kc->ctrl ? ZRCola::keyseq_db::keyseq::CTRL : 0) | + (kc->alt ? ZRCola::keyseq_db::keyseq::ALT : 0)); } db.idxChr.push_back(idx); db.idxKey.push_back(idx); @@ -293,10 +293,14 @@ int _tmain(int argc, _TCHAR *argv[]) &ks1 = db.idxKey[i - 1], &ks2 = db.idxKey[i ]; - if (ZRCola::keyseq_db::keyseq::CompareSequence(ks1.seq, ks1.seq_len, ks2.seq, ks2.seq_len) == 0) { + if (ZRCola::keyseq_db::keyseq::CompareSequence(ks1.seq(), ks1.seq_len(), ks2.seq(), ks2.seq_len()) == 0) { wxString seq_str; - ZRCola::keyseq_db::GetSequenceAsText(ks1.seq, ks1.seq_len, seq_str); - _ftprintf(stderr, wxT("%s: warning ZCC0007: Duplicate key sequence (%ls => %04X or %04X). The keyboard behaviour will be unpredictable.\n"), (LPCTSTR)filenameIn.c_str(), seq_str.c_str(), ks1.chr, ks2.chr); + ZRCola::keyseq_db::GetSequenceAsText(ks1.seq(), ks1.seq_len(), seq_str); + _ftprintf(stderr, wxT("%s: warning ZCC0007: Duplicate key sequence (%ls => %s or %s). The keyboard behaviour will be unpredictable.\n"), + (LPCTSTR)filenameIn.c_str(), + seq_str.c_str(), + ZRCola::GetUnicodeDump(ks1.chr(), ks1.chr_len()).c_str(), + ZRCola::GetUnicodeDump(ks2.chr(), ks2.chr_len()).c_str()); } } @@ -334,13 +338,11 @@ int _tmain(int argc, _TCHAR *argv[]) // Add language to index and data. unsigned __int32 idx = db.data.size(); - for (wstring::size_type i = 0; i < sizeof(ZRCola::langid_t)/sizeof(unsigned __int16); i++) - db.data.push_back(((const unsigned __int16*)lang.id.data)[i]); + db.data.insert(db.data.end(), reinterpret_cast(&lang.id), reinterpret_cast(&lang.id + 1)); wstring::size_type n = lang.name.length(); - wxASSERT_MSG(n <= 0xffff, wxT("language name too long")); + wxASSERT_MSG(n <= 0xffff, wxT("language name overflow")); db.data.push_back((unsigned __int16)n); - for (wstring::size_type i = 0; i < n; i++) - db.data.push_back(lang.name[i]); + db.data.insert(db.data.end(), lang.name.cbegin(), lang.name.cend()); db.idxLng.push_back(idx); } else has_errors = true; @@ -383,13 +385,11 @@ int _tmain(int argc, _TCHAR *argv[]) if (src.GetLanguageCharacter(rs, lc)) { // Add language characters to index and data. unsigned __int32 idx = db.data.size(); - for (wstring::size_type i = 0; i < sizeof(ZRCola::langid_t)/sizeof(unsigned __int16); i++) - db.data.push_back(((const unsigned __int16*)lc.lang.data)[i]); + db.data.insert(db.data.end(), reinterpret_cast(&lc.lang), reinterpret_cast(&lc.lang + 1)); wstring::size_type n = lc.chr.length(); - wxASSERT_MSG(n <= 0xffff, wxT("character string too long")); + wxASSERT_MSG(n <= 0xffff, wxT("character overflow")); db.data.push_back((unsigned __int16)n); - for (wstring::size_type i = 0; i < n; i++) - db.data.push_back(lc.chr[i]); + db.data.insert(db.data.end(), lc.chr.cbegin(), lc.chr.cend()); db.idxChr.push_back(idx); #ifdef ZRCOLA_LANGCHAR_LANG_IDX db.idxLng.push_back(idx); @@ -447,18 +447,15 @@ int _tmain(int argc, _TCHAR *argv[]) db.data.push_back((unsigned __int16)cg.id); wxASSERT_MSG((int)0xffff8000 <= cg.rank && cg.rank <= (int)0x00007fff, wxT("character group rank out of bounds")); db.data.push_back((unsigned __int16)cg.rank); - wstring::size_type n_name = cg.name.length(); - wxASSERT_MSG(n_name <= 0xffff, wxT("character group name too long")); - db.data.push_back((unsigned __int16)n_name); - wstring::size_type n_char = cg.chars.length(); - wxASSERT_MSG(n_char <= 0xffff, wxT("too many character group characters")); - db.data.push_back((unsigned __int16)n_char); - for (wstring::size_type i = 0; i < n_name; i++) - db.data.push_back(cg.name[i]); - for (wstring::size_type i = 0; i < n_char; i++) - db.data.push_back(cg.chars[i]); - for (std::vector::size_type i = 0, n = cg.show.size(); i < n; i++) - db.data.push_back(cg.show[i]); + wstring::size_type n = cg.name.length(); + wxASSERT_MSG(n <= 0xffff, wxT("character group name overflow")); + db.data.push_back((unsigned __int16)n); + n += cg.chars.size(); + wxASSERT_MSG(n <= 0xffff, wxT("character group characters overflow")); + db.data.push_back((unsigned __int16)n); + db.data.insert(db.data.end(), cg.name .cbegin(), cg.name .cend()); + db.data.insert(db.data.end(), cg.chars.cbegin(), cg.chars.cend()); + db.data.insert(db.data.end(), cg.show .cbegin(), cg.show .cend()); db.idxRnk.push_back(idx); } else has_errors = true; @@ -490,15 +487,14 @@ int _tmain(int argc, _TCHAR *argv[]) ZRCola::DBSource::character_desc_idx idxChrDsc, idxChrDscSub; ZRCola::DBSource::character_bank chrs; + ZRCola::DBSource::character chr; // Phase 1: Parse characters and build indexes. for (; !ZRCola::DBSource::IsEOF(rs); rs->MoveNext()) { // Read character from the database. - unique_ptr c(new ZRCola::DBSource::character); - if (src.GetCharacter(rs, *c)) { - const auto &chr = *c.get(); - chrs[chr.chr].swap(c); - } else + if (src.GetCharacter(rs, chr)) + chrs[chr.first] = std::move(chr.second); + else has_errors = true; } @@ -512,33 +508,30 @@ int _tmain(int argc, _TCHAR *argv[]) db.data .reserve(count*4); // Phase 3: Parse characters and build index and data. - for (size_t i = 0, i_end = chrs.size(); i < i_end; i++) { - const auto &chr = *(chrs[i].get()); - if (&chr == NULL) continue; - + for (auto chr = chrs.cbegin(), chr_end = chrs.cend(); chr != chr_end; ++chr) { // Add character to index and data. unsigned __int32 idx = db.data.size(); - db.data.push_back((unsigned __int16)chr.chr); - for (wstring::size_type i = 0; i < sizeof(ZRCola::chrcatid_t)/sizeof(unsigned __int16); i++) - db.data.push_back(((const unsigned __int16*)chr.cat.data)[i]); - wstring::size_type n_desc = chr.desc.length(); - wxASSERT_MSG(n_desc <= 0xffff, wxT("character description too long")); - db.data.push_back((unsigned __int16)n_desc); - wstring::size_type n_rel = chr.rel.length(); - wxASSERT_MSG(n_rel <= 0xffff, wxT("too many related characters")); - db.data.push_back((unsigned __int16)n_rel); - for (wstring::size_type i = 0; i < n_desc; i++) - db.data.push_back(chr.desc[i]); - for (wstring::size_type i = 0; i < n_rel; i++) - db.data.push_back(chr.rel[i]); + db.data.insert(db.data.end(), reinterpret_cast(&chr->second.cat), reinterpret_cast(&chr->second.cat + 1)); + wstring::size_type n = chr->first.length(); + wxASSERT_MSG(n <= 0xffff, wxT("character overflow")); + db.data.push_back((unsigned __int16)n); + n += chr->second.desc.length(); + wxASSERT_MSG(n <= 0xffff, wxT("character description overflow")); + db.data.push_back((unsigned __int16)n); + n += chr->second.rel.size(); + wxASSERT_MSG(n <= 0xffff, wxT("related characters overflow")); + db.data.push_back((unsigned __int16)n); + db.data.insert(db.data.end(), chr->first .cbegin(), chr->first .cend()); + db.data.insert(db.data.end(), chr->second.desc.cbegin(), chr->second.desc.cend()); + db.data.insert(db.data.end(), chr->second.rel .cbegin(), chr->second.rel .cend()); db.idxChr.push_back(idx); // Add description (and keywords) to index. - idxChrDsc .add_keywords(chr.terms, chr.chr, 0); - idxChrDscSub.add_keywords(chr.terms, chr.chr, 3); + idxChrDsc .add_keywords(chr->second.terms, chr->first, 0); + idxChrDscSub.add_keywords(chr->second.terms, chr->first, 3); // Mark category used. - categories_used.insert(chr.cat); + categories_used.insert(chr->second.cat); } // Sort indices. @@ -588,15 +581,13 @@ int _tmain(int argc, _TCHAR *argv[]) // Add character category to index and data. unsigned __int32 idx = db.data.size(); - for (wstring::size_type i = 0; i < sizeof(ZRCola::chrcatid_t)/sizeof(unsigned __int16); i++) - db.data.push_back(((const unsigned __int16*)cc.id.data)[i]); + db.data.insert(db.data.end(), reinterpret_cast(&cc.id), reinterpret_cast(&cc.id + 1)); wxASSERT_MSG((int)0xffff8000 <= cc.rank && cc.rank <= (int)0x00007fff, wxT("character category rank out of bounds")); db.data.push_back((unsigned __int16)cc.rank); - wstring::size_type n_name = cc.name.length(); - wxASSERT_MSG(n_name <= 0xffff, wxT("character category name too long")); - db.data.push_back((unsigned __int16)n_name); - for (wstring::size_type i = 0; i < n_name; i++) - db.data.push_back(cc.name[i]); + wstring::size_type n = cc.name.length(); + wxASSERT_MSG(n <= 0xffff, wxT("character category name overflow")); + db.data.push_back((unsigned __int16)n); + db.data.insert(db.data.end(), cc.name.cbegin(), cc.name.cend()); db.idxChrCat.push_back(idx); db.idxRnk .push_back(idx); } else @@ -639,9 +630,12 @@ int _tmain(int argc, _TCHAR *argv[]) if (src.GetCharacterTag(rs, ct)) { // Add characters tags to index and data. unsigned __int32 idx = db.data.size(); - db.data.push_back(ct.chr); wxASSERT_MSG((int)0xffff8000 <= ct.tag && ct.tag <= (int)0x00007fff, wxT("tag out of bounds")); db.data.push_back((unsigned __int16)ct.tag); + wstring::size_type n = ct.chr.length(); + wxASSERT_MSG(n <= 0xffff, wxT("character overflow")); + db.data.push_back((unsigned __int16)n); + db.data.insert(db.data.end(), ct.chr.cbegin(), ct.chr.cend()); db.idxChr.push_back(idx); db.idxTag.push_back(idx); } else @@ -691,10 +685,9 @@ int _tmain(int argc, _TCHAR *argv[]) db.data.push_back(LOWORD(ln->first)); db.data.push_back(HIWORD(ln->first)); wstring::size_type n = nm->length(); - wxASSERT_MSG(n <= 0xffff, wxT("tag name too long")); + wxASSERT_MSG(n <= 0xffff, wxT("tag name overflow")); db.data.push_back((unsigned __int16)n); - for (wstring::size_type i = 0; i < n; i++) - db.data.push_back(nm->at(i)); + db.data.insert(db.data.end(), nm->cbegin(), nm->cend()); db.idxName.push_back(idx); db.idxTag .push_back(idx); } diff --git a/lib/libZRCola/include/zrcola/character.h b/lib/libZRCola/include/zrcola/character.h index 1c7fe75..df5632a 100644 --- a/lib/libZRCola/include/zrcola/character.h +++ b/lib/libZRCola/include/zrcola/character.h @@ -41,6 +41,8 @@ namespace ZRCola { /// typedef double charrank_t; +#pragma pack(push) +#pragma pack(2) /// /// Character category ID type /// Two letter abbreviation, non-terminated @@ -66,6 +68,7 @@ namespace ZRCola { /// static const chrcatid_t blank; }; +#pragma pack(pop) /// @@ -180,11 +183,66 @@ namespace ZRCola { /// Character data /// struct character { - wchar_t chr; ///> Character + public: chrcatid_t cat; ///> Category ID - unsigned __int16 desc_len; ///< Character description length in \c data - unsigned __int16 rel_len; ///< Related character count in \c data - wchar_t data[]; ///< Character description and list of related characters + + protected: + unsigned __int16 chr_to; ///< Character end in \c data + unsigned __int16 desc_to; ///< Character description end in \c data + unsigned __int16 rel_to; ///< Related characters end in \c data + wchar_t data[]; ///< Character, character description + + private: + inline character(_In_ const character &other); + inline character& operator=(_In_ const character &other); + + public: + /// + /// Constructs the character + /// + /// \param[in] chr Character + /// \param[in] chr_len Number of UTF-16 characters in \p chr + /// \param[in] cat Category + /// \param[in] desc Description + /// \param[in] desc_len Number of UTF-16 characters in \p desc + /// \param[in] rel Related characters list (zero delimited) + /// \param[in] rel_len Number of UTF-16 characters in \p rel (including zero delimiters) + /// + inline character( + _In_opt_z_count_(chr_len) const wchar_t *chr = NULL, + _In_opt_ size_t chr_len = 0, + _In_opt_ chrcatid_t cat = chrcatid_t::blank, + _In_opt_z_count_(desc_len) const wchar_t *desc = NULL, + _In_opt_ size_t desc_len = 0, + _In_opt_z_count_(rel_len) const wchar_t *rel = NULL, + _In_opt_ size_t rel_len = 0) + { + this->cat = cat; + this->chr_to = static_cast(chr_len); + if (chr_len) memcpy(this->data, chr, sizeof(wchar_t)*chr_len); + this->desc_to = static_cast(this->chr_to + desc_len); + if (desc_len) memcpy(this->data + this->chr_to, desc, sizeof(wchar_t)*desc_len); + this->rel_to = static_cast(this->desc_to + rel_len); + if (rel_len) memcpy(this->data + this->desc_to, rel, sizeof(wchar_t)*rel_len); + } + + inline const wchar_t* chr () const { return data; }; + inline wchar_t* chr () { return data; }; + inline const wchar_t* chr_end() const { return data + chr_to; }; + inline wchar_t* chr_end() { return data + chr_to; }; + inline unsigned __int16 chr_len() const { return chr_to; }; + + inline const wchar_t* desc () const { return data + chr_to; }; + inline wchar_t* desc () { return data + chr_to; }; + inline const wchar_t* desc_end() const { return data + desc_to; }; + inline wchar_t* desc_end() { return data + desc_to; }; + inline unsigned __int16 desc_len() const { return desc_to - chr_to; }; + + inline const wchar_t* rel () const { return data + desc_to; }; + inline wchar_t* rel () { return data + desc_to; }; + inline const wchar_t* rel_end() const { return data + rel_to; }; + inline wchar_t* rel_end() { return data + rel_to; }; + inline unsigned __int16 rel_len() const { return rel_to - desc_to; }; }; #pragma pack(pop) @@ -214,8 +272,8 @@ namespace ZRCola { /// virtual int compare(_In_ const character &a, _In_ const character &b) const { - if (a.chr < b.chr) return -1; - else if (a.chr > b.chr) return 1; + int r = ZRCola::CompareString(a.chr(), a.chr_len(), b.chr(), b.chr_len()); + if (r != 0) return r; return 0; } @@ -252,23 +310,25 @@ namespace ZRCola { /// \param[in ] fn_abort Pointer to function to periodically test for search cancellation /// \param[in ] cookie Cookie for \p fn_abort call /// - bool Search(_In_z_ const wchar_t *str, _In_ const std::set &cats, _Inout_ std::map &hits, _Inout_ std::map &hits_sub, _In_opt_ bool (__cdecl *fn_abort)(void *cookie) = NULL, _In_opt_ void *cookie = NULL) const; + bool Search(_In_z_ const wchar_t *str, _In_ const std::set &cats, _Inout_ std::map &hits, _Inout_ std::map &hits_sub, _In_opt_ bool (__cdecl *fn_abort)(void *cookie) = NULL, _In_opt_ void *cookie = NULL) const; /// /// Get character category /// - /// \param[in] c Character + /// \param[in] chr Character + /// \param[in] len Number of UTF-16 characters in \p chr /// /// \returns /// - Character category if character found /// - `ZRCola::chrcatid_t::blank` otherwise /// - inline chrcatid_t GetCharCat(wchar_t c) const + inline chrcatid_t GetCharCat(_In_z_count_(len) const wchar_t *chr, _In_ const size_t len) const { - char _chr[sizeof(character)]; - ((character *)_chr)->chr = c; + assert(len <= 0xffff); + std::unique_ptr c((character*)new char[sizeof(character) + sizeof(wchar_t)*len]); + c->character::character(chr, len); indexChar::size_type start; - return idxChr.find(*((character *)_chr), start) ? idxChr[start].cat : chrcatid_t::blank; + return idxChr.find(*c, start) ? idxChr[start].cat : chrcatid_t::blank; } }; @@ -287,10 +347,44 @@ namespace ZRCola { /// Character category data /// struct chrcat { - chrcatid_t id; ///< Character category ID - unsigned __int16 rank; ///< Character category rank - unsigned __int16 name_len; ///< \c name length (in characters) - wchar_t name[]; ///< Character category name + public: + chrcatid_t id; ///< Character category ID + unsigned __int16 rank; ///< Character category rank + + protected: + unsigned __int16 name_to; ///< Character category name end in \c data + wchar_t data[]; ///< Character category name + + private: + inline chrcat(_In_ const chrcat &other); + inline chrcat& operator=(_In_ const chrcat &other); + + public: + /// + /// Constructs the character category + /// + /// \param[in] id Character category ID + /// \param[in] rank Character category rank + /// \param[in] name Character category name + /// \param[in] name_len Number of UTF-16 characters in \p name + /// + inline chrcat( + _In_opt_ chrcatid_t id = chrcatid_t::blank, + _In_opt_ unsigned __int16 rank = 0, + _In_opt_z_count_(name_len) const wchar_t *name = NULL, + _In_opt_ size_t name_len = 0) + { + this->id = id; + this->rank = rank; + this->name_to = static_cast(name_len); + if (name_len) memcpy(this->data, name, sizeof(wchar_t)*name_len); + } + + inline const wchar_t* name () const { return data; }; + inline wchar_t* name () { return data; }; + inline const wchar_t* name_end() const { return data + name_to; }; + inline wchar_t* name_end() { return data + name_to; }; + inline unsigned __int16 name_len() const { return name_to; }; }; #pragma pack(pop) @@ -374,10 +468,13 @@ namespace ZRCola { if (a.rank < b.rank) return -1; else if (a.rank > b.rank) return +1; - int r = _wcsncoll(a.name, b.name, std::min(a.name_len, b.name_len)); + unsigned __int16 + a_name_len = a.name_len(), + b_name_len = b.name_len(); + int r = _wcsncoll(a.name(), b.name(), std::min(a_name_len, b_name_len)); if (r != 0) return r; - if (a.name_len < b.name_len) return -1; - else if (a.name_len > b.name_len) return +1; + if (a_name_len < b_name_len) return -1; + else if (a_name_len > b_name_len) return +1; return 0; } diff --git a/lib/libZRCola/include/zrcola/common.h b/lib/libZRCola/include/zrcola/common.h index 3b2da4a..20b7e45 100644 --- a/lib/libZRCola/include/zrcola/common.h +++ b/lib/libZRCola/include/zrcola/common.h @@ -19,6 +19,7 @@ #pragma once +#include #include #include #include @@ -58,11 +59,11 @@ namespace ZRCola { typedef unsigned __int32 recordsize_t; +#pragma pack(push) +#pragma pack(2) /// /// Key-value index pair for mappings /// -#pragma pack(push) -#pragma pack(2) template struct mappair_t { @@ -72,6 +73,8 @@ namespace ZRCola { #pragma pack(pop) +#pragma pack(push) +#pragma pack(2) /// /// Language ID type /// Three letter abbreviation, zero terminated @@ -105,6 +108,7 @@ namespace ZRCola { /// static const langid_t blank; }; +#pragma pack(pop) /// @@ -228,11 +232,11 @@ namespace ZRCola { /// /// Memory index /// - template + template class index : public std::vector { protected: - std::vector &host; ///< Reference to host data + std::vector &host; ///< Reference to host data public: /// @@ -240,7 +244,7 @@ namespace ZRCola { /// /// \param[in] h Reference to vector holding the data /// - index(_In_ std::vector &h) : host(h) {} + index(_In_ std::vector &h) : host(h) {} /// @@ -250,9 +254,9 @@ namespace ZRCola { /// /// \returns Data reference /// - inline const T_data& at(size_type pos) const + inline const T_el& at(size_type pos) const { - return (const T_data&)host.at(std::vector::at(pos)); + return *reinterpret_cast(&host[std::vector::at(pos)]); } @@ -263,9 +267,9 @@ namespace ZRCola { /// /// \returns Data reference /// - inline T_data& at(size_type pos) + inline T_el& at(size_type pos) { - return (T_data&)host.at(std::vector::at(pos)); + return *reinterpret_cast(&host[std::vector::at(pos)]); } @@ -276,9 +280,9 @@ namespace ZRCola { /// /// \returns Data reference /// - inline const T_data& operator[](size_type pos) const + inline const T_el& operator[](size_type pos) const { - return (const T_data&)host[std::vector::at(pos)]; + return *reinterpret_cast(&host[std::vector::operator[](pos)]); } @@ -289,9 +293,9 @@ namespace ZRCola { /// /// \returns Data reference /// - inline T_data& operator[](size_type pos) + inline T_el& operator[](size_type pos) { - return (T_data&)host[std::vector::at(pos)]; + return *reinterpret_cast(&host[std::vector::operator[](pos)]); } @@ -315,7 +319,7 @@ namespace ZRCola { /// - =0 when a == b /// - >0 when a > b /// - virtual int compare(_In_ const T_data &a, _In_ const T_data &b) const = 0; + virtual int compare(_In_ const T_el &a, _In_ const T_el &b) const = 0; /// @@ -329,7 +333,7 @@ namespace ZRCola { /// - =0 when a == b /// - >0 when a > b /// - virtual int compare_sort(_In_ const T_data &a, _In_ const T_data &b) const + virtual int compare_sort(_In_ const T_el &a, _In_ const T_el &b) const { // Revert to `compare()` by default. return compare(a, b); @@ -348,7 +352,7 @@ namespace ZRCola { /// - \c true if found /// - \c false otherwise /// - bool find(_In_ const T_data &el, _Out_ size_type &start, _Out_ size_type &end) const + bool find(_In_ const T_el &el, _Out_ size_type &start, _Out_ size_type &end) const { // Start with the full search area. for (start = 0, end = size(); start < end; ) { @@ -388,7 +392,7 @@ namespace ZRCola { /// - \c true if found /// - \c false otherwise /// - bool find(_In_ const T_data &el, _Out_ size_type &start) const + bool find(_In_ const T_el &el, _Out_ size_type &start) const { // Start with the full search area. size_t end; @@ -415,8 +419,11 @@ namespace ZRCola { private: static int __cdecl compare_s(void *p, const void *a, const void *b) { - const index *t = (const index*)p; - return t->compare_sort((const T_data&)t->host[*(const T_idx*)a], (const T_data&)t->host[*(const T_idx*)b]); + const index *_this = reinterpret_cast*>(p); + const T_data *data = _this->host.data(); + return _this->compare_sort( + *reinterpret_cast(data + *reinterpret_cast(a)), + *reinterpret_cast(data + *reinterpret_cast(b))); } }; @@ -556,10 +563,10 @@ namespace ZRCola { /// /// Binary compares two strings /// - /// \param[in] str_a First string - /// \param[in] str_a_end First string end - /// \param[in] str_b Second string - /// \param[in] str_b_end Second string end + /// \param[in] str_a First string + /// \param[in] count_a Number of characters in string \p str_a + /// \param[in] str_b Second string + /// \param[in] count_b Number of characters in string \p str_b /// /// \returns /// - <0 when str_a < str_b @@ -568,18 +575,70 @@ namespace ZRCola { /// /// \note /// The function does not treat \\0 characters as terminators for performance reasons. - /// Therefore \p str_a_end and \p str_b_end must represent exact string ends. + /// Therefore \p count_a and \p count_b must represent exact string lengths. /// - inline int CompareString(const wchar_t *str_a, const wchar_t *str_a_end, const wchar_t *str_b, const wchar_t *str_b_end) + inline int CompareString(_In_ const wchar_t *str_a, _In_ size_t count_a, _In_ const wchar_t *str_b, _In_ size_t count_b) { - for (; ; str_a++, str_b++) { - if (str_a >= str_a_end && str_b >= str_b_end) return 0; - else if (str_a >= str_a_end && str_b < str_b_end) return -1; - else if (str_a < str_a_end && str_b >= str_b_end) return +1; - else if (*str_a < *str_b) return -1; - else if (*str_a > *str_b) return +1; + for (size_t i = 0; ; i++) { + if (i >= count_a && i >= count_b) return 0; + else if (i >= count_a && i < count_b) return -1; + else if (i < count_a && i >= count_b) return +1; + else if (str_a[i] < str_b[i]) return -1; + else if (str_a[i] > str_b[i]) return +1; } } + + /// + /// Generates and returns Unicode representation of the string using hexadecimal codes. + /// + /// \param[in] str Unicode string + /// \param[in] count Number of characters in string \p str + /// \param[in] sep Separator + /// + inline std::string GetUnicodeDumpA(_In_ const wchar_t *str, _In_ size_t count, _In_opt_z_ const char *sep = "+") + { + std::string out; + size_t dump_len_max = strlen(sep) + 4 + 1; + char *dump; + std::unique_ptr dump_obj(dump = new char[dump_len_max]); + if (count && str[0]) { + size_t i = 0; + out.insert(out.end(), dump, dump + _snprintf(dump, dump_len_max, "%04X", str[i++])); + while (i < count && str[i]) + out.insert(out.end(), dump, dump + _snprintf(dump, dump_len_max, "%s%04X", sep, str[i++])); + } + + return out; + } + + /// + /// Generates and returns Unicode representation of the string using hexadecimal codes. + /// + /// \param[in] str Unicode string + /// \param[in] count Number of characters in string \p str + /// \param[in] sep Separator + /// + inline std::wstring GetUnicodeDumpW(_In_ const wchar_t *str, _In_ size_t count, _In_opt_z_ const wchar_t *sep = L"+") + { + std::wstring out; + size_t dump_len_max = wcslen(sep) + 4 + 1; + wchar_t *dump; + std::unique_ptr dump_obj(dump = new wchar_t[dump_len_max]); + if (count && str[0]) { + size_t i = 0; + out.insert(out.end(), dump, dump + _snwprintf(dump, dump_len_max, L"%04X", str[i++])); + while (i < count && str[i]) + out.insert(out.end(), dump, dump + _snwprintf(dump, dump_len_max, L"%s%04X", sep, str[i++])); + } + + return out; + } + +#ifdef _UNICODE +#define GetUnicodeDump GetUnicodeDumpW +#else +#define GetUnicodeDump GetUnicodeDumpA +#endif }; @@ -591,8 +650,8 @@ namespace ZRCola { /// /// \returns The stream \p stream /// -template -inline std::ostream& operator <<(_In_ std::ostream& stream, _In_ const ZRCola::index &idx) +template +inline std::ostream& operator <<(_In_ std::ostream& stream, _In_ const ZRCola::index &idx) { // Write index count. auto idx_count = idx.size(); @@ -623,8 +682,8 @@ inline std::ostream& operator <<(_In_ std::ostream& stream, _In_ const ZRCola::i /// /// \returns The stream \p stream /// -template -inline std::istream& operator >>(_In_ std::istream& stream, _Out_ ZRCola::index &idx) +template +inline std::istream& operator >>(_In_ std::istream& stream, _Out_ ZRCola::index &idx) { unsigned __int32 count; diff --git a/lib/libZRCola/include/zrcola/language.h b/lib/libZRCola/include/zrcola/language.h index 1bfe4e1..5d71d6a 100644 --- a/lib/libZRCola/include/zrcola/language.h +++ b/lib/libZRCola/include/zrcola/language.h @@ -45,9 +45,40 @@ namespace ZRCola { /// Character data /// struct langchar { + public: langid_t lang; ///< Language ID - unsigned __int16 chr_len; ///< \c chr length (in UTF-16 characters) - wchar_t chr[]; ///< Character + + protected: + unsigned __int16 chr_to; ///< Character end in \c data + wchar_t data[]; ///< Character + + private: + inline langchar(_In_ const langchar &other); + inline langchar& operator=(_In_ const langchar &other); + + public: + /// + /// Constructs the language character + /// + /// \param[in] lang Character language + /// \param[in] chr Character + /// \param[in] chr_len Number of UTF-16 characters in \p chr + /// + inline langchar( + _In_opt_ langid_t lang = langid_t::blank, + _In_opt_z_count_(chr_len) const wchar_t *chr = NULL, + _In_opt_ size_t chr_len = 0) + { + this->lang = lang; + this->chr_to = static_cast(chr_len); + if (chr_len) memcpy(this->data, chr, sizeof(wchar_t)*chr_len); + } + + inline const wchar_t* chr () const { return data; }; + inline wchar_t* chr () { return data; }; + inline const wchar_t* chr_end() const { return data + chr_to; }; + inline wchar_t* chr_end() { return data + chr_to; }; + inline unsigned __int16 chr_len() const { return chr_to; }; }; #pragma pack(pop) @@ -77,7 +108,7 @@ namespace ZRCola { /// virtual int compare(_In_ const langchar &a, _In_ const langchar &b) const { - int r = ZRCola::CompareString(a.chr, a.chr + a.chr_len, b.chr, b.chr + b.chr_len); + int r = ZRCola::CompareString(a.chr(), a.chr_len(), b.chr(), b.chr_len()); if (r != 0) return r; if (a.lang < b.lang) return -1; @@ -118,7 +149,7 @@ namespace ZRCola { if (a.lang < b.lang) return -1; else if (a.lang > b.lang) return 1; - int r = ZRCola::CompareString(a.chr, a.chr + a.chr_len, b.chr, b.chr + b.chr_len); + int r = ZRCola::CompareString(a.chr, a.chr_len(), b.chr(), b.chr_len()); if (r != 0) return r; return 0; @@ -178,9 +209,40 @@ namespace ZRCola { /// Language data /// struct language { + public: langid_t id; ///< Language ID - unsigned __int16 name_len; ///< \c name length (in UTF-16 characters) - wchar_t name[]; ///< Language name + + protected: + unsigned __int16 name_to; ///< Language name end in \c data + wchar_t data[]; ///< Language name + + private: + inline language(_In_ const language &other); + inline language& operator=(_In_ const language &other); + + public: + /// + /// Constructs the language + /// + /// \param[in] id Language ID + /// \param[in] name Language name + /// \param[in] name_len Number of UTF-16 characters in \p name + /// + inline language( + _In_opt_ langid_t id = langid_t::blank, + _In_opt_z_count_(name_len) const wchar_t *name = NULL, + _In_opt_ size_t name_len = 0) + { + this->id = id; + this->name_to = static_cast(name_len); + if (name_len) memcpy(this->data, name, sizeof(wchar_t)*name_len); + } + + inline const wchar_t* name () const { return data; }; + inline wchar_t* name () { return data; }; + inline const wchar_t* name_end() const { return data + name_to; }; + inline wchar_t* name_end() { return data + name_to; }; + inline unsigned __int16 name_len() const { return name_to; }; }; #pragma pack(pop) diff --git a/lib/libZRCola/include/zrcola/tag.h b/lib/libZRCola/include/zrcola/tag.h index 4218a3b..3efb700 100644 --- a/lib/libZRCola/include/zrcola/tag.h +++ b/lib/libZRCola/include/zrcola/tag.h @@ -47,8 +47,40 @@ namespace ZRCola { /// Character tag data /// struct chrtag { - wchar_t chr; ///> Character + public: tagid_t tag; ///< Tag ID + + protected: + unsigned __int16 chr_to; ///< Character end in \c data + wchar_t data[]; ///< Character + + private: + inline chrtag(_In_ const chrtag &other); + inline chrtag& operator=(_In_ const chrtag &other); + + public: + /// + /// Constructs the character tag + /// + /// \param[in] chr Character + /// \param[in] chr_len Number of UTF-16 characters in \p chr + /// \param[in] tag Tag + /// + inline chrtag( + _In_opt_z_count_(chr_len) const wchar_t *chr = NULL, + _In_opt_ size_t chr_len = 0, + _In_opt_ tagid_t tag = 0) + { + this->tag = tag; + this->chr_to = static_cast(chr_len); + if (chr_len) memcpy(this->data, chr, sizeof(wchar_t)*chr_len); + } + + inline const wchar_t* chr () const { return data; }; + inline wchar_t* chr () { return data; }; + inline const wchar_t* chr_end() const { return data + chr_to; }; + inline wchar_t* chr_end() { return data + chr_to; }; + inline unsigned __int16 chr_len() const { return chr_to; }; }; #pragma pack(pop) @@ -78,8 +110,8 @@ namespace ZRCola { /// virtual int compare(_In_ const chrtag &a, _In_ const chrtag &b) const { - if (a.chr < b.chr) return -1; - else if (a.chr > b.chr) return 1; + int r = ZRCola::CompareString(a.chr(), a.chr_len(), b.chr(), b.chr_len()); + if (r != 0) return r; return 0; } @@ -97,8 +129,8 @@ namespace ZRCola { /// virtual int compare_sort(_In_ const chrtag &a, _In_ const chrtag &b) const { - if (a.chr < b.chr) return -1; - else if (a.chr > b.chr) return 1; + int r = ZRCola::CompareString(a.chr(), a.chr_len(), b.chr(), b.chr_len()); + if (r != 0) return r; if (a.tag < b.tag) return -1; else if (a.tag > b.tag) return 1; @@ -156,8 +188,8 @@ namespace ZRCola { if (a.tag < b.tag) return -1; else if (a.tag > b.tag) return 1; - if (a.chr < b.chr) return -1; - else if (a.chr > b.chr) return 1; + int r = ZRCola::CompareString(a.chr(), a.chr_len(), b.chr(), b.chr_len()); + if (r != 0) return r; return 0; } @@ -191,7 +223,7 @@ namespace ZRCola { /// \param[in ] fn_abort Pointer to function to periodically test for search cancellation /// \param[in ] cookie Cookie for \p fn_abort call /// - bool Search(_In_ const std::map &tags, _In_ const character_db &ch_db, _In_ const std::set &cats, _Inout_ std::map &hits, _In_opt_ bool (__cdecl *fn_abort)(void *cookie) = NULL, _In_opt_ void *cookie = NULL) const; + bool Search(_In_ const std::map &tags, _In_ const character_db &ch_db, _In_ const std::set &cats, _Inout_ std::map &hits, _In_opt_ bool (__cdecl *fn_abort)(void *cookie) = NULL, _In_opt_ void *cookie = NULL) const; }; @@ -209,19 +241,53 @@ namespace ZRCola { /// Tag name data /// struct tagname { + public: tagid_t tag; ///< Tag ID LCID locale; ///< Locale ID - unsigned __int16 name_len; ///< \c name length (in characters) - wchar_t name[]; ///< Tag localized name + + protected: + unsigned __int16 name_to; ///< Tag name end in \c data + wchar_t data[]; ///< Tag name + + private: + inline tagname(_In_ const tagname &other); + inline tagname& operator=(_In_ const tagname &other); + + public: + /// + /// Constructs the localized tag name + /// + /// \param[in] tag Tag + /// \param[in] locale Locale + /// \param[in] name Tag name + /// \param[in] name_len Number of UTF-16 characters in \p name + /// + inline tagname( + _In_opt_ tagid_t tag = 0, + _In_opt_ LCID locale = MAKELCID(MAKELANGID(LANG_NEUTRAL, SUBLANG_NEUTRAL), SORT_DEFAULT), + _In_opt_z_count_(name_len) const wchar_t *name = NULL, + _In_opt_ size_t name_len = 0) + { + this->tag = tag; + this->locale = locale; + this->name_to = static_cast(name_len); + if (name_len) memcpy(this->data, name, sizeof(wchar_t)*name_len); + } + + inline const wchar_t* name () const { return data; }; + inline wchar_t* name () { return data; }; + inline const wchar_t* name_end() const { return data + name_to; }; + inline wchar_t* name_end() { return data + name_to; }; + inline unsigned __int16 name_len() const { return name_to; }; /// /// Compares two names /// /// \param[in] locale Locale ID to use for compare /// \param[in] str_a First name - /// \param[in] count_a Number of characters in string \p str_a + /// \param[in] count_a Number of UTF-16 characters in \p str_a /// \param[in] str_b Second name - /// \param[in] count_b Number of characters in string \p str_b + /// \param[in] count_b Number of UTF-16 characters in \p str_b /// /// \returns /// - <0 when str_a < str_b @@ -259,7 +325,7 @@ namespace ZRCola { indexName(_In_ std::vector &h) : index(h) {} /// - /// Compares two tag names by name (for searching) + /// Compares two tag names by locale and name (for searching) /// /// \param[in] a Pointer to first element /// \param[in] b Pointer to second element @@ -274,14 +340,14 @@ namespace ZRCola { if (a.locale < b.locale) return -1; else if (a.locale > b.locale) return 1; - int r = tagname::CompareName(a.locale, a.name, a.name_len, b.name, b.name_len); + int r = tagname::CompareName(a.locale, a.name(), a.name_len(), b.name(), b.name_len()); if (r != 0) return r; return 0; } /// - /// Compares two tag names by name (for sorting) + /// Compares two tag names by locale and name (for sorting) /// /// \param[in] a Pointer to first element /// \param[in] b Pointer to second element @@ -296,7 +362,7 @@ namespace ZRCola { if (a.locale < b.locale) return -1; else if (a.locale > b.locale) return 1; - int r = tagname::CompareName(a.locale, a.name, a.name_len, b.name, b.name_len); + int r = tagname::CompareName(a.locale, a.name(), a.name_len(), b.name(), b.name_len()); if (r != 0) return r; if (a.tag < b.tag) return -1; diff --git a/lib/libZRCola/include/zrcola/translate.h b/lib/libZRCola/include/zrcola/translate.h index a4fb4c3..6482e76 100644 --- a/lib/libZRCola/include/zrcola/translate.h +++ b/lib/libZRCola/include/zrcola/translate.h @@ -54,22 +54,49 @@ namespace ZRCola { unsigned __int16 dec_to; ///< Decomposed string end in \c data wchar_t data[]; ///< Decomposed string and composed character + private: + inline translation(_In_ const translation &other); + inline translation& operator=(_In_ const translation &other); + public: - inline const wchar_t* com () const { return data; }; - inline wchar_t* com () { return data; }; + /// + /// Constructs the translation + /// + /// \param[in] rank Translation rank + /// \param[in] com Composed character + /// \param[in] com_len Number of UTF-16 characters in \p com + /// \param[in] dec Decomposed character + /// \param[in] dec_len Number of UTF-16 characters in \p dec + /// + inline translation( + _In_opt_ unsigned __int16 rank = 0, + _In_opt_z_count_(com_len) const wchar_t *com = NULL, + _In_opt_ size_t com_len = 0, + _In_opt_z_count_(dec_len) const wchar_t *dec = NULL, + _In_opt_ size_t dec_len = 0) + { + this->rank = rank; + this->com_to = static_cast(com_len); + if (com_len) memcpy(this->data, com, sizeof(wchar_t)*com_len); + this->dec_to = static_cast(this->com_to + dec_len); + if (dec_len) memcpy(this->data + this->com_to, dec, sizeof(wchar_t)*dec_len); + } + + inline const wchar_t* com () const { return data; }; + inline wchar_t* com () { return data; }; inline const wchar_t* com_end() const { return data + com_to; }; inline wchar_t* com_end() { return data + com_to; }; - inline unsigned __int16 com_len() const { return com_to; }; + inline unsigned __int16 com_len() const { return com_to; }; inline wchar_t com_at(_In_ size_t i) const { return i < com_to ? data[i] : 0; } - inline const wchar_t* dec () const { return data + com_to; }; - inline wchar_t* dec () { return data + com_to; }; - inline const wchar_t* dec_end() const { return data + dec_to; }; - inline wchar_t* dec_end() { return data + dec_to; }; + inline const wchar_t* dec () const { return data + com_to; }; + inline wchar_t* dec () { return data + com_to; }; + inline const wchar_t* dec_end() const { return data + dec_to; }; + inline wchar_t* dec_end() { return data + dec_to; }; inline unsigned __int16 dec_len() const { return dec_to - com_to; }; inline wchar_t dec_at(_In_ size_t i) const @@ -106,7 +133,7 @@ namespace ZRCola { /// virtual int compare(_In_ const translation &a, _In_ const translation &b) const { - int r = ZRCola::CompareString(a.dec(), a.dec_end(), b.dec(), b.dec_end()); + int r = ZRCola::CompareString(a.dec(), a.dec_len(), b.dec(), b.dec_len()); if (r != 0) return r; return 0; @@ -125,10 +152,10 @@ namespace ZRCola { /// virtual int compare_sort(_In_ const translation &a, _In_ const translation &b) const { - int r = ZRCola::CompareString(a.dec(), a.dec_end(), b.dec(), b.dec_end()); + int r = ZRCola::CompareString(a.dec(), a.dec_len(), b.dec(), b.dec_len()); if (r != 0) return r; - r = ZRCola::CompareString(a.com(), a.com_end(), b.com(), b.com_end()); + r = ZRCola::CompareString(a.com(), a.com_len(), b.com(), b.com_len()); if (r != 0) return r; return 0; @@ -162,7 +189,7 @@ namespace ZRCola { /// virtual int compare(_In_ const translation &a, _In_ const translation &b) const { - int r = ZRCola::CompareString(a.com(), a.com_end(), b.com(), b.com_end()); + int r = ZRCola::CompareString(a.com(), a.com_len(), b.com(), b.com_len()); if (r != 0) return r; return 0; @@ -181,13 +208,13 @@ namespace ZRCola { /// virtual int compare_sort(_In_ const translation &a, _In_ const translation &b) const { - int r = ZRCola::CompareString(a.com(), a.com_end(), b.com(), b.com_end()); + int r = ZRCola::CompareString(a.com(), a.com_len(), b.com(), b.com_len()); if (r != 0) return r; if (a.rank < b.rank) return -1; else if (a.rank > b.rank) return +1; - r = ZRCola::CompareString(a.dec(), a.dec_end(), b.dec(), b.dec_end()); + r = ZRCola::CompareString(a.dec(), a.dec_len(), b.dec(), b.dec_len()); if (r != 0) return r; return 0; diff --git a/lib/libZRCola/src/character.cpp b/lib/libZRCola/src/character.cpp index 3d76d95..558b48a 100644 --- a/lib/libZRCola/src/character.cpp +++ b/lib/libZRCola/src/character.cpp @@ -23,7 +23,7 @@ const ZRCola::chrcatid_t ZRCola::chrcatid_t::blank = {}; -bool ZRCola::character_db::Search(_In_z_ const wchar_t *str, _In_ const std::set &cats, _Inout_ std::map &hits, _Inout_ std::map &hits_sub, _In_opt_ bool (__cdecl *fn_abort)(void *cookie), _In_opt_ void *cookie) const +bool ZRCola::character_db::Search(_In_z_ const wchar_t *str, _In_ const std::set &cats, _Inout_ std::map &hits, _Inout_ std::map &hits_sub, _In_opt_ bool (__cdecl *fn_abort)(void *cookie), _In_opt_ void *cookie) const { assert(str); @@ -76,14 +76,15 @@ bool ZRCola::character_db::Search(_In_z_ const wchar_t *str, _In_ const std::set if (idxDsc.find(term.c_str(), term.size(), &data, &len)) { // The term was found. - for (size_t i = 0; i < len; i++) { + for (size_t i = 0, j = 0; i < len; i += j + 1) { if (fn_abort && fn_abort(cookie)) return false; - wchar_t c = data[i]; - if (cats.find(GetCharCat(c)) != cats.end()) { + j = wcsnlen(data + i, len - i); + if (cats.find(GetCharCat(data + i, j)) != cats.end()) { + std::wstring c(data + i, j); auto idx = hits.find(c); if (idx == hits.end()) { // New character. - hits.insert(std::make_pair(data[i], 1.0/len)); + hits.insert(std::make_pair(std::move(c), 1.0/len)); } else { // Increase rating of existing character. idx->second += 1.0/len; @@ -94,14 +95,15 @@ bool ZRCola::character_db::Search(_In_z_ const wchar_t *str, _In_ const std::set if (idxDscSub.find(term.c_str(), term.size(), &data, &len)) { // The term was found in the sub-term index. - for (size_t i = 0; i < len; i++) { + for (size_t i = 0, j = 0; i < len; i += j + 1) { if (fn_abort && fn_abort(cookie)) return false; - wchar_t c = data[i]; - if (cats.find(GetCharCat(c)) != cats.end()) { + j = wcsnlen(data + i, len - i); + if (cats.find(GetCharCat(data + i, j)) != cats.end()) { + std::wstring c(data + i, j); auto idx = hits_sub.find(c); if (idx == hits_sub.end()) { // New character. - hits_sub.insert(std::make_pair(data[i], 1.0/len)); + hits_sub.insert(std::make_pair(c, 1.0/len)); } else { // Increase rating of existing character. idx->second += 1.0/len; diff --git a/lib/libZRCola/src/language.cpp b/lib/libZRCola/src/language.cpp index 841c627..34249af 100644 --- a/lib/libZRCola/src/language.cpp +++ b/lib/libZRCola/src/language.cpp @@ -75,10 +75,8 @@ bool ZRCola::langchar_db::IsLocalCharacter(_In_ const wchar_t *chr, _In_ const w { size_t n = chr_end - chr; assert(n <= 0xffff); - std::unique_ptr lc((ZRCola::langchar_db::langchar*)new char[sizeof(ZRCola::langchar_db::langchar) + sizeof(wchar_t)*n]); - lc->lang = lang; - lc->chr_len = (unsigned __int16)n; - memcpy(lc->chr, chr, sizeof(wchar_t)*n); - ZRCola::langchar_db::indexChar::size_type start; + std::unique_ptr lc((langchar*)new char[sizeof(langchar) + sizeof(wchar_t)*n]); + lc->langchar::langchar(lang, chr, n); + indexChar::size_type start; return idxChr.find(*lc, start); } diff --git a/lib/libZRCola/src/tag.cpp b/lib/libZRCola/src/tag.cpp index f393def..a561c98 100644 --- a/lib/libZRCola/src/tag.cpp +++ b/lib/libZRCola/src/tag.cpp @@ -20,23 +20,24 @@ #include "stdafx.h" -bool ZRCola::chrtag_db::Search(_In_ const std::map &tags, _In_ const character_db &ch_db, _In_ const std::set &cats, _Inout_ std::map &hits, _In_opt_ bool (__cdecl *fn_abort)(void *cookie), _In_opt_ void *cookie) const +bool ZRCola::chrtag_db::Search(_In_ const std::map &tags, _In_ const character_db &ch_db, _In_ const std::set &cats, _Inout_ std::map &hits, _In_opt_ bool (__cdecl *fn_abort)(void *cookie), _In_opt_ void *cookie) const { for (auto tag = tags.cbegin(), tag_end = tags.cend(); tag != tag_end; ++tag) { if (fn_abort && fn_abort(cookie)) return false; // Search for tagged characters. - chrtag el = { 0, tag->first }; size_t start, end; - if (idxTag.find(el, start, end)) { + if (idxTag.find(chrtag(NULL, 0, tag->first), start, end)) { for (size_t i = start; i < end; i++) { if (fn_abort && fn_abort(cookie)) return false; const chrtag &ct = idxTag[i]; - if (cats.find(ch_db.GetCharCat(ct.chr)) != cats.end()) { - auto idx = hits.find(ct.chr); + unsigned __int16 len = ct.chr_len(); + if (cats.find(ch_db.GetCharCat(ct.chr(), len)) != cats.end()) { + std::wstring chr(ct.chr(), len); + auto idx = hits.find(chr); if (idx == hits.end()) { // New character. - hits.insert(std::make_pair(ct.chr, tag->second)); + hits.insert(std::make_pair(std::move(chr), tag->second)); } else { // Increase count for existing character. idx->second += tag->second; @@ -95,8 +96,7 @@ bool ZRCola::tagname_db::Search(_In_z_ const wchar_t *str, _In_ LCID locale, _In // Find the name. std::unique_ptr tn(reinterpret_cast(new char[sizeof(tagname) + sizeof(wchar_t)*name.length()])); - tn->locale = locale; - memcpy(tn->name, name.data(), sizeof(wchar_t)*(tn->name_len = (unsigned __int16)name.length())); + tn->tagname::tagname(0, locale, name.data(), name.length()); size_t start, end; if (idxName.find(*tn, start, end)) { // The name was found. diff --git a/lib/libZRColaUI/include/zrcolaui/chargroup.h b/lib/libZRColaUI/include/zrcolaui/chargroup.h index b2cc82a..c8c0b92 100644 --- a/lib/libZRColaUI/include/zrcolaui/chargroup.h +++ b/lib/libZRColaUI/include/zrcolaui/chargroup.h @@ -46,33 +46,64 @@ namespace ZRCola { /// Character group data /// struct chrgrp { - unsigned __int16 id; ///< Character group id + public: + unsigned __int16 id; ///< Character group ID unsigned __int16 rank; ///< Character group rank - unsigned __int16 name_len; ///< Character group name length in \c data - unsigned __int16 char_len; ///< Character list length in \c data - wchar_t data[]; ///< Character group name and character list - inline const wchar_t* get_chars() const + protected: + unsigned __int16 name_to; ///< Character group name end in \c data + unsigned __int16 chrlst_to; ///< Character list end in \c data + wchar_t data[]; ///< Character group name, character list, bit vector if particular character is displayed initially + + public: + /// + /// Constructs the character group + /// + /// \param[in] id Character group ID + /// \param[in] rank Character group rank + /// \param[in] name Character group name + /// \param[in] name_len Number of UTF-16 characters in \p name + /// \param[in] chrlst Character list (zero delimited) + /// \param[in] chrlst_len Number of UTF-16 characters in \p chrlst (including zero delimiters) + /// \param[in] chrshow Binary vector which particular character is displayed initially + /// + inline chrgrp( + _In_opt_ unsigned __int16 id = 0, + _In_opt_ unsigned __int16 rank = 0, + _In_opt_z_count_(name_len) const wchar_t *name = NULL, + _In_opt_ size_t name_len = 0, + _In_opt_z_count_(chrlst_len) const wchar_t *chrlst = NULL, + _In_opt_ size_t chrlst_len = 0, + _In_opt_count_x_((chrlst_len + 15)/16) const unsigned __int16 *chrshow = NULL) { - return data + name_len; + this->id = id; + this->rank = rank; + this->name_to = static_cast(name_len); + if (name_len) memcpy(this->data, name, sizeof(wchar_t)*name_len); + this->chrlst_to = static_cast(this->name_to + chrlst_len); + if (chrlst_len) { + memcpy(this->data + this->name_to, chrlst, sizeof(wchar_t)*chrlst_len); + memcpy(this->data + this->chrlst_to, chrshow, (chrlst_len + sizeof(*data)*8 - 1)/8); + } } - inline wchar_t get_char(size_t index) const - { - assert(index < char_len); - return data[name_len + index]; - } + inline const wchar_t* name () const { return data; }; + inline wchar_t* name () { return data; }; + inline const wchar_t* name_end() const { return data + name_to; }; + inline wchar_t* name_end() { return data + name_to; }; + inline unsigned __int16 name_len() const { return name_to; }; - inline const unsigned __int16* get_char_shown() const - { - return (const unsigned __int16*)(data + name_len + char_len); - } + inline const wchar_t* chrlst () const { return data + name_to; }; + inline wchar_t* chrlst () { return data + name_to; }; + inline const wchar_t* chrlst_end() const { return data + chrlst_to; }; + inline wchar_t* chrlst_end() { return data + chrlst_to; }; + inline unsigned __int16 chrlst_len() const { return chrlst_to - name_to; }; - inline bool is_char_shown(size_t index) const - { - assert(index < char_len); - return (data[name_len + char_len + index / 16] & (1 << (index % 16))) ? true : false; - } + inline const unsigned __int16* chrshow () const { return reinterpret_cast(data + chrlst_to ); }; + inline unsigned __int16* chrshow () { return reinterpret_cast< unsigned __int16*>(data + chrlst_to ); }; + inline const unsigned __int16* chrshow_end() const { return reinterpret_cast(data + chrlst_to + chrshow_len()); }; + inline unsigned __int16* chrshow_end() { return reinterpret_cast< unsigned __int16*>(data + chrlst_to + chrshow_len()); }; + inline unsigned __int16 chrshow_len() const { return (chrlst_len() + sizeof(*data)*8 - 1)/(sizeof(*data)*8); }; }; #pragma pack(pop) @@ -124,10 +155,13 @@ namespace ZRCola { if (a.rank < b.rank) return -1; else if (a.rank > b.rank) return +1; - int r = _wcsncoll(a.data, b.data, std::min(a.name_len, b.name_len)); + unsigned __int16 + a_name_len = a.name_len(), + b_name_len = b.name_len(); + int r = _wcsncoll(a.name(), b.name(), std::min(a_name_len, b_name_len)); if (r != 0) return r; - if (a.name_len < b.name_len) return -1; - else if (a.name_len > b.name_len) return +1; + if (a_name_len < b_name_len) return -1; + else if (a_name_len > b_name_len) return +1; return 0; } diff --git a/lib/libZRColaUI/include/zrcolaui/keyboard.h b/lib/libZRColaUI/include/zrcolaui/keyboard.h index 428bb71..26fbd5a 100644 --- a/lib/libZRColaUI/include/zrcolaui/keyboard.h +++ b/lib/libZRColaUI/include/zrcolaui/keyboard.h @@ -48,39 +48,75 @@ namespace ZRCola { /// Key sequence data /// struct keyseq { + public: enum modifiers_t { SHIFT = 1<<0, ///< SHIFT key was pressed CTRL = 1<<1, ///< CTRL key was pressed ALT = 1<<2, ///< ALT key was pressed }; - wchar_t chr; ///< Character - unsigned __int16 seq_len; ///< \c seq length struct key_t { wchar_t key; ///< Key unsigned __int16 modifiers; ///< Modifiers (bitwise combination of SHIFT, CTRL and ALT) - } seq[]; ///< Key sequence + }; + protected: + unsigned __int16 chr_to; ///< Character end in \c data + unsigned __int16 seq_to; ///< Key sequence end in \c data + wchar_t data[]; ///< Character and key sequence + + public: + /// + /// Constructs the key sequence + /// + /// \param[in] seq Key sequence + /// \param[in] seq_count Number of UTF-16 characters in \p seq + /// \param[in] chr Character + /// \param[in] chr_len Number of UTF-16 characters in \p chr + /// + inline keyseq( + _In_opt_count_(seq_count) const key_t *seq = NULL, + _In_opt_ size_t seq_count = 0, + _In_opt_z_count_(chr_len) const wchar_t *chr = NULL, + _In_opt_ size_t chr_len = 0) + { + this->chr_to = static_cast(chr_len); + if (chr_len) memcpy(this->data, chr, sizeof(wchar_t)*chr_len); + this->seq_to = static_cast(this->chr_to + seq_count * sizeof(key_t) / sizeof(*data)); + if (seq_count) memcpy(this->data + this->chr_to, seq, sizeof(key_t)*seq_count); + } + + inline const wchar_t* chr () const { return data; }; + inline wchar_t* chr () { return data; }; + inline const wchar_t* chr_end() const { return data + chr_to; }; + inline wchar_t* chr_end() { return data + chr_to; }; + inline unsigned __int16 chr_len() const { return chr_to; }; + + inline const key_t* seq () const { return reinterpret_cast(data + chr_to); }; + inline key_t* seq () { return reinterpret_cast< key_t*>(data + chr_to); }; + inline const key_t* seq_end() const { return reinterpret_cast(data + seq_to); }; + inline key_t* seq_end() { return reinterpret_cast< key_t*>(data + seq_to); }; + inline unsigned __int16 seq_len() const { return (seq_to - chr_to) * sizeof(*data) / sizeof(key_t); }; /// /// Compares two key sequences /// - /// \param[in] seq_a First key sequence - /// \param[in] count_a Number of keys in sequence \p seq_a - /// \param[in] seq_b Second key sequence - /// \param[in] count_b Number of keys in sequence \p seq_b + /// \param[in] seq_a First key sequence + /// \param[in] len_a Number of keys in sequence \p seq_a + /// \param[in] seq_b Second key sequence + /// \param[in] len_b Number of keys in sequence \p seq_b /// /// \returns /// - <0 when seq_a < seq_b /// - =0 when seq_a == seq_b /// - >0 when seq_a > seq_b /// - static inline int CompareSequence(const key_t *seq_a, unsigned __int16 count_a, const key_t *seq_b, unsigned __int16 count_b) + static inline int CompareSequence(_In_ const key_t *seq_a, _In_ size_t len_a, _In_ const key_t *seq_b, _In_ size_t len_b) { - for (unsigned __int16 i = 0; ; i++) { - if (i >= count_a && i >= count_b) return 0; - else if (i >= count_a && i < count_b) return -1; - else if (i < count_a && i >= count_b) return +1; + for (size_t i = 0; ; i++) { + if (i >= len_a && i >= len_b) return 0; + else if (i >= len_a && i < len_b) return -1; + else if (i < len_a && i >= len_b) return +1; else if (seq_a[i].key < seq_b[i].key ) return -1; else if (seq_a[i].key > seq_b[i].key ) return +1; else if (seq_a[i].modifiers < seq_b[i].modifiers) return -1; @@ -116,8 +152,8 @@ namespace ZRCola { /// virtual int compare(_In_ const keyseq &a, _In_ const keyseq &b) const { - if (a.chr < b.chr) return -1; - else if (a.chr > b.chr) return +1; + int r = ZRCola::CompareString(a.chr(), a.chr_len(), b.chr(), b.chr_len()); + if (r != 0) return r; return 0; } @@ -135,10 +171,10 @@ namespace ZRCola { /// virtual int compare_sort(_In_ const keyseq &a, _In_ const keyseq &b) const { - if (a.chr < b.chr) return -1; - else if (a.chr > b.chr) return +1; + int r = ZRCola::CompareString(a.chr(), a.chr_len(), b.chr(), b.chr_len()); + if (r != 0) return r; - int r = keyseq::CompareSequence(a.seq, a.seq_len, b.seq, b.seq_len); + r = keyseq::CompareSequence(a.seq(), a.seq_len(), b.seq(), b.seq_len()); if (r != 0) return r; return 0; @@ -172,7 +208,7 @@ namespace ZRCola { /// virtual int compare(_In_ const keyseq &a, _In_ const keyseq &b) const { - int r = keyseq::CompareSequence(a.seq, a.seq_len, b.seq, b.seq_len); + int r = keyseq::CompareSequence(a.seq(), a.seq_len(), b.seq(), b.seq_len()); if (r != 0) return r; return 0; @@ -191,11 +227,11 @@ namespace ZRCola { /// virtual int compare_sort(_In_ const keyseq &a, _In_ const keyseq &b) const { - int r = keyseq::CompareSequence(a.seq, a.seq_len, b.seq, b.seq_len); + int r = keyseq::CompareSequence(a.seq(), a.seq_len(), b.seq(), b.seq_len()); if (r != 0) return r; - if (a.chr < b.chr) return -1; - else if (a.chr > b.chr) return +1; + r = ZRCola::CompareString(a.chr(), a.chr_len(), b.chr(), b.chr_len()); + if (r != 0) return r; return 0; } diff --git a/output/data/ZRCola.zrcdb b/output/data/ZRCola.zrcdb index 0ebd89b..e74aede 100644 Binary files a/output/data/ZRCola.zrcdb and b/output/data/ZRCola.zrcdb differ