From 4ec7dc3ca50785e817238bfb00b143b0c55db7bc Mon Sep 17 00:00:00 2001 From: Simon Rozman Date: Fri, 13 May 2016 09:32:36 +0200 Subject: [PATCH] Search optimizations --- ZRCola/zrcolachrgrid.cpp | 4 +-- ZRCola/zrcolachrslct.cpp | 37 ++++++++---------------- ZRCola/zrcolafrm.cpp | 4 +-- ZRCola/zrcolakeyhndlr.cpp | 4 +-- lib/libZRCola/include/zrcola/character.h | 34 +++++++++++++++++++++- lib/libZRCola/include/zrcola/common.h | 34 ++++++++++++++++++++++ lib/libZRCola/src/character.cpp | 36 +++++++++++++---------- 7 files changed, 106 insertions(+), 47 deletions(-) diff --git a/ZRCola/zrcolachrgrid.cpp b/ZRCola/zrcolachrgrid.cpp index f47439f..d33d9f6 100644 --- a/ZRCola/zrcolachrgrid.cpp +++ b/ZRCola/zrcolachrgrid.cpp @@ -88,12 +88,12 @@ wxString wxZRColaCharGrid::GetToolTipText(int idx) ZRColaApp *app = (ZRColaApp*)wxTheApp; // See if this character has a key sequence registered. - ZRCola::keyseq_db::indexKey::size_type start, end; + ZRCola::keyseq_db::indexKey::size_type start; bool found; ZRCola::keyseq_db::keyseq *ks = (ZRCola::keyseq_db::keyseq*)new char[sizeof(ZRCola::keyseq_db::keyseq)]; ks->chr = m_chars[idx]; ks->seq_len = 0; - found = app->m_ks_db.idxChr.find(*ks, start, end); + found = app->m_ks_db.idxChr.find(*ks, start); delete ks; if (found) { diff --git a/ZRCola/zrcolachrslct.cpp b/ZRCola/zrcolachrslct.cpp index 7ac160c..cdc09c5 100644 --- a/ZRCola/zrcolachrslct.cpp +++ b/ZRCola/zrcolachrslct.cpp @@ -50,8 +50,7 @@ wxZRColaCharSelect::wxZRColaCharSelect(wxWindow* parent) : // Fill categories. ZRColaApp *app = (ZRColaApp*)wxTheApp; - size_t i, n; - for (i = 0, n = app->m_cc_db.idxRnk.size(); i < n; i++) { + for (size_t i = 0, n = app->m_cc_db.idxRnk.size(); i < n; i++) { const ZRCola::chrcat_db::chrcat &cc = app->m_cc_db.idxRnk[i]; int idx = m_categories->Insert(wxGetTranslation(wxString(cc.name, cc.name_len), wxT("ZRCola-zrcdb")), i); m_categories->Check(idx); @@ -75,8 +74,8 @@ void wxZRColaCharSelect::OnIdle(wxIdleEvent& event) { ZRCola::character_db::character *chr = (ZRCola::character_db::character*)new char[sizeof(ZRCola::character_db::character)]; chr->chr = m_char; - size_t start, end; - if (app->m_chr_db.idxChr.find(*chr, start, end)) { + size_t start; + if (app->m_chr_db.idxChr.find(*chr, start)) { const ZRCola::character_db::character &chr = app->m_chr_db.idxChr[start]; m_description->SetValue(wxString(chr.data, chr.desc_len)); m_gridRelated->SetCharacters(wxString(chr.data + chr.desc_len, chr.rel_len)); @@ -125,11 +124,19 @@ void wxZRColaCharSelect::OnSearchTimer(wxTimerEvent& event) if (!val.IsEmpty()) { ZRColaApp *app = (ZRColaApp*)wxTheApp; std::map hits; + std::set cats; + + // Select categories. + for (size_t i = 0, n = app->m_cc_db.idxRnk.size(); i < n; i++) { + const ZRCola::chrcat_db::chrcat &cc = app->m_cc_db.idxRnk[i]; + if (m_categories->IsChecked(i)) + cats.insert(cc.id); + } { // Search by indexes and merge results. std::map hits_sub; - app->m_chr_db.search_by_desc(val.c_str(), hits, hits_sub); + app->m_chr_db.Search(val.c_str(), cats, hits, hits_sub); for (std::map::const_iterator i = hits_sub.cbegin(), i_end = hits_sub.cend(); i != i_end; ++i) { std::map::iterator idx = hits.find(i->first); if (idx == hits.end()) @@ -139,26 +146,6 @@ void wxZRColaCharSelect::OnSearchTimer(wxTimerEvent& event) } } - // Filter by categories. - ZRCola::character_db::character *chr = (ZRCola::character_db::character*)new char[sizeof(ZRCola::character_db::character)]; - for (std::map::const_iterator i = hits.cbegin(), i_end = hits.cend(); i != i_end;) { - chr->chr = i->first; - size_t start, end; - std::map::const_iterator idx; - if (app->m_chr_db.idxChr.find(*chr, start, end) && - ((idx = m_ccOrder.find(app->m_chr_db.idxChr[start].cat)) == m_ccOrder.end() || m_categories->IsChecked(idx->second))) - { - // Character category approved. - ++i; - } else { - // Character category not approved. - std::map::const_iterator i_remove = i; - ++i; - hits.erase(i_remove); - } - } - delete chr; - // Now sort the characters by rank. std::vector< std::pair > hits2; hits2.reserve(hits.size()); diff --git a/ZRCola/zrcolafrm.cpp b/ZRCola/zrcolafrm.cpp index b3a4447..ab0f45e 100644 --- a/ZRCola/zrcolafrm.cpp +++ b/ZRCola/zrcolafrm.cpp @@ -565,8 +565,8 @@ void wxZRColaFrame::UpdateDecomposedLanguage() ZRCola::language_db::language *l = new ZRCola::language_db::language; memcpy(l->id, m_lang, sizeof(l->id)); l->name_len = 0; - ZRCola::language_db::indexLang::size_type start, end; - m_toolDecompLanguage->SetSelection(app->m_lang_db.idxLng.find(*l, start, end) ? start : -1); + ZRCola::language_db::indexLang::size_type start; + m_toolDecompLanguage->SetSelection(app->m_lang_db.idxLng.find(*l, start) ? start : -1); delete l; } diff --git a/ZRCola/zrcolakeyhndlr.cpp b/ZRCola/zrcolakeyhndlr.cpp index 34bdc04..97f9560 100644 --- a/ZRCola/zrcolakeyhndlr.cpp +++ b/ZRCola/zrcolakeyhndlr.cpp @@ -69,7 +69,7 @@ bool wxZRColaKeyHandler::ProcessEvent(wxEvent& event) ) { ZRColaApp *app = (ZRColaApp*)wxTheApp; - ZRCola::keyseq_db::indexKey::size_type start, end; + ZRCola::keyseq_db::indexKey::size_type start; bool found; wxFrame *pFrame = wxDynamicCast(app->m_mainWnd, wxFrame); @@ -96,7 +96,7 @@ bool wxZRColaKeyHandler::ProcessEvent(wxEvent& event) ks->chr = 0; ks->seq_len = n; memcpy(ks->seq, m_seq.data(), sizeof(ZRCola::keyseq_db::keyseq::key_t)*n); - found = app->m_ks_db.idxKey.find(*ks, start, end); + found = app->m_ks_db.idxKey.find(*ks, start); delete ks; } diff --git a/lib/libZRCola/include/zrcola/character.h b/lib/libZRCola/include/zrcola/character.h index 95a3b4d..10cef5d 100644 --- a/lib/libZRCola/include/zrcola/character.h +++ b/lib/libZRCola/include/zrcola/character.h @@ -26,6 +26,7 @@ #include #include #include +#include #include #pragma warning(push) @@ -51,6 +52,12 @@ namespace ZRCola { }; + /// + /// Blank character category + /// + const chrcatid_t chrcatid_t_blank = {}; + + /// /// Compares two character category IDs /// @@ -151,7 +158,32 @@ namespace ZRCola { /// inline character_db() : idxChr(data) {} - void search_by_desc(_In_z_ const wchar_t *str, _Inout_ std::map &hits, _Inout_ std::map &hits_sub) const; + /// + /// Search for characters by description in given categories + /// + /// \param[in ] str Search string + /// \param[in ] cats Set of categories, character must be a part of + /// \param[inout] hits (character, count) map to append full-word hits to + /// \param[inout] hits_sub (character, count) map to append partial-word hits to + /// + void Search(_In_z_ const wchar_t *str, _In_ const std::set &cats, _Inout_ std::map &hits, _Inout_ std::map &hits_sub) const; + + /// + /// Get character category + /// + /// \param[in] c Character + /// + /// \returns + /// - Character category if character found + /// - `ZRCola::chrcatid_t_blank` otherwise + /// + inline chrcatid_t GetCharCat(wchar_t c) const + { + char _chr[sizeof(character)]; + ((character *)_chr)->chr = c; + indexChar::size_type start; + return idxChr.find(*((character *)_chr), start) ? idxChr[start].cat : chrcatid_t_blank; + } }; diff --git a/lib/libZRCola/include/zrcola/common.h b/lib/libZRCola/include/zrcola/common.h index 30f487b..97657a8 100644 --- a/lib/libZRCola/include/zrcola/common.h +++ b/lib/libZRCola/include/zrcola/common.h @@ -248,6 +248,40 @@ namespace ZRCola { return false; } + /// + /// Search for the first element in the index + /// + /// \param[in] el Element we are looking for (needle) + /// \param[out] start Index of the first matching element found + /// + /// \returns + /// - \c true if found + /// - \c false otherwise + /// + bool find(_In_ const T_data &el, _Out_ size_type &start) const + { + // Start with the full search area. + size_t end; + for (start = 0, end = size(); start < end; ) { + size_type m = (start + end) / 2; + int r = compare(el, at(m)); + if (r < 0) end = m; + else if (r > 0) start = m + 1; + else { + // Narrow the search area on the left to start at the first element in the run. + for (size_type end2 = m; start < end2;) { + size_type m = (start + end2) / 2; + int r = compare(el, at(m)); + if (r <= 0) end2 = m; else start = m + 1; + } + + return true; + } + } + + return false; + } + private: static int __cdecl compare_s(void *p, const void *a, const void *b) { diff --git a/lib/libZRCola/src/character.cpp b/lib/libZRCola/src/character.cpp index f3b7604..f1ea0db 100644 --- a/lib/libZRCola/src/character.cpp +++ b/lib/libZRCola/src/character.cpp @@ -20,7 +20,7 @@ #include "stdafx.h" -void ZRCola::character_db::search_by_desc(_In_z_ const wchar_t *str, _Inout_ std::map &hits, _Inout_ std::map &hits_sub) const +void ZRCola::character_db::Search(_In_z_ const wchar_t *str, _In_ const std::set &cats, _Inout_ std::map &hits, _Inout_ std::map &hits_sub) const { assert(str); @@ -68,13 +68,16 @@ void ZRCola::character_db::search_by_desc(_In_z_ const wchar_t *str, _Inout_ std if (idxDsc.find(term.c_str(), term.size(), &data, &len)) { // The term was found. for (size_t i = 0; i < len; i++) { - std::map::iterator idx = hits.find(data[i]); - if (idx == hits.end()) { - // New character. - hits.insert(std::make_pair(data[i], 1)); - } else { - // Increment existing character. - idx->second++; + wchar_t c = data[i]; + if (cats.find(GetCharCat(c)) != cats.end()) { + std::map::iterator idx = hits.find(c); + if (idx == hits.end()) { + // New character. + hits.insert(std::make_pair(data[i], 1)); + } else { + // Increment existing character. + idx->second++; + } } } } @@ -82,13 +85,16 @@ void ZRCola::character_db::search_by_desc(_In_z_ const wchar_t *str, _Inout_ std if (idxDscSub.find(term.c_str(), term.size(), &data, &len)) { // The term was found in the sub-term index. for (size_t i = 0; i < len; i++) { - std::map::iterator idx = hits_sub.find(data[i]); - if (idx == hits_sub.end()) { - // New character. - hits_sub.insert(std::make_pair(data[i], 1)); - } else { - // Increment existing character. - idx->second++; + wchar_t c = data[i]; + if (cats.find(GetCharCat(c)) != cats.end()) { + std::map::iterator idx = hits_sub.find(c); + if (idx == hits_sub.end()) { + // New character. + hits_sub.insert(std::make_pair(data[i], 1)); + } else { + // Increment existing character. + idx->second++; + } } } }