Search optimizations

This commit is contained in:
Simon Rozman 2016-05-13 09:32:36 +02:00
parent 9c3c1585d5
commit 4ec7dc3ca5
7 changed files with 106 additions and 47 deletions

View File

@ -88,12 +88,12 @@ wxString wxZRColaCharGrid::GetToolTipText(int idx)
ZRColaApp *app = (ZRColaApp*)wxTheApp; ZRColaApp *app = (ZRColaApp*)wxTheApp;
// See if this character has a key sequence registered. // See if this character has a key sequence registered.
ZRCola::keyseq_db::indexKey::size_type start, end; ZRCola::keyseq_db::indexKey::size_type start;
bool found; bool found;
ZRCola::keyseq_db::keyseq *ks = (ZRCola::keyseq_db::keyseq*)new char[sizeof(ZRCola::keyseq_db::keyseq)]; ZRCola::keyseq_db::keyseq *ks = (ZRCola::keyseq_db::keyseq*)new char[sizeof(ZRCola::keyseq_db::keyseq)];
ks->chr = m_chars[idx]; ks->chr = m_chars[idx];
ks->seq_len = 0; ks->seq_len = 0;
found = app->m_ks_db.idxChr.find(*ks, start, end); found = app->m_ks_db.idxChr.find(*ks, start);
delete ks; delete ks;
if (found) { if (found) {

View File

@ -50,8 +50,7 @@ wxZRColaCharSelect::wxZRColaCharSelect(wxWindow* parent) :
// Fill categories. // Fill categories.
ZRColaApp *app = (ZRColaApp*)wxTheApp; ZRColaApp *app = (ZRColaApp*)wxTheApp;
size_t i, n; for (size_t i = 0, n = app->m_cc_db.idxRnk.size(); i < n; i++) {
for (i = 0, n = app->m_cc_db.idxRnk.size(); i < n; i++) {
const ZRCola::chrcat_db::chrcat &cc = app->m_cc_db.idxRnk[i]; const ZRCola::chrcat_db::chrcat &cc = app->m_cc_db.idxRnk[i];
int idx = m_categories->Insert(wxGetTranslation(wxString(cc.name, cc.name_len), wxT("ZRCola-zrcdb")), i); int idx = m_categories->Insert(wxGetTranslation(wxString(cc.name, cc.name_len), wxT("ZRCola-zrcdb")), i);
m_categories->Check(idx); m_categories->Check(idx);
@ -75,8 +74,8 @@ void wxZRColaCharSelect::OnIdle(wxIdleEvent& event)
{ {
ZRCola::character_db::character *chr = (ZRCola::character_db::character*)new char[sizeof(ZRCola::character_db::character)]; ZRCola::character_db::character *chr = (ZRCola::character_db::character*)new char[sizeof(ZRCola::character_db::character)];
chr->chr = m_char; chr->chr = m_char;
size_t start, end; size_t start;
if (app->m_chr_db.idxChr.find(*chr, start, end)) { if (app->m_chr_db.idxChr.find(*chr, start)) {
const ZRCola::character_db::character &chr = app->m_chr_db.idxChr[start]; const ZRCola::character_db::character &chr = app->m_chr_db.idxChr[start];
m_description->SetValue(wxString(chr.data, chr.desc_len)); m_description->SetValue(wxString(chr.data, chr.desc_len));
m_gridRelated->SetCharacters(wxString(chr.data + chr.desc_len, chr.rel_len)); m_gridRelated->SetCharacters(wxString(chr.data + chr.desc_len, chr.rel_len));
@ -125,11 +124,19 @@ void wxZRColaCharSelect::OnSearchTimer(wxTimerEvent& event)
if (!val.IsEmpty()) { if (!val.IsEmpty()) {
ZRColaApp *app = (ZRColaApp*)wxTheApp; ZRColaApp *app = (ZRColaApp*)wxTheApp;
std::map<wchar_t, unsigned long> hits; std::map<wchar_t, unsigned long> hits;
std::set<ZRCola::chrcatid_t> cats;
// Select categories.
for (size_t i = 0, n = app->m_cc_db.idxRnk.size(); i < n; i++) {
const ZRCola::chrcat_db::chrcat &cc = app->m_cc_db.idxRnk[i];
if (m_categories->IsChecked(i))
cats.insert(cc.id);
}
{ {
// Search by indexes and merge results. // Search by indexes and merge results.
std::map<wchar_t, unsigned long> hits_sub; std::map<wchar_t, unsigned long> hits_sub;
app->m_chr_db.search_by_desc(val.c_str(), hits, hits_sub); app->m_chr_db.Search(val.c_str(), cats, hits, hits_sub);
for (std::map<wchar_t, unsigned long>::const_iterator i = hits_sub.cbegin(), i_end = hits_sub.cend(); i != i_end; ++i) { for (std::map<wchar_t, unsigned long>::const_iterator i = hits_sub.cbegin(), i_end = hits_sub.cend(); i != i_end; ++i) {
std::map<wchar_t, unsigned long>::iterator idx = hits.find(i->first); std::map<wchar_t, unsigned long>::iterator idx = hits.find(i->first);
if (idx == hits.end()) if (idx == hits.end())
@ -139,26 +146,6 @@ void wxZRColaCharSelect::OnSearchTimer(wxTimerEvent& event)
} }
} }
// Filter by categories.
ZRCola::character_db::character *chr = (ZRCola::character_db::character*)new char[sizeof(ZRCola::character_db::character)];
for (std::map<wchar_t, unsigned long>::const_iterator i = hits.cbegin(), i_end = hits.cend(); i != i_end;) {
chr->chr = i->first;
size_t start, end;
std::map<ZRCola::chrcatid_t, int>::const_iterator idx;
if (app->m_chr_db.idxChr.find(*chr, start, end) &&
((idx = m_ccOrder.find(app->m_chr_db.idxChr[start].cat)) == m_ccOrder.end() || m_categories->IsChecked(idx->second)))
{
// Character category approved.
++i;
} else {
// Character category not approved.
std::map<wchar_t, unsigned long>::const_iterator i_remove = i;
++i;
hits.erase(i_remove);
}
}
delete chr;
// Now sort the characters by rank. // Now sort the characters by rank.
std::vector< std::pair<unsigned long, wchar_t> > hits2; std::vector< std::pair<unsigned long, wchar_t> > hits2;
hits2.reserve(hits.size()); hits2.reserve(hits.size());

View File

@ -565,8 +565,8 @@ void wxZRColaFrame::UpdateDecomposedLanguage()
ZRCola::language_db::language *l = new ZRCola::language_db::language; ZRCola::language_db::language *l = new ZRCola::language_db::language;
memcpy(l->id, m_lang, sizeof(l->id)); memcpy(l->id, m_lang, sizeof(l->id));
l->name_len = 0; l->name_len = 0;
ZRCola::language_db::indexLang::size_type start, end; ZRCola::language_db::indexLang::size_type start;
m_toolDecompLanguage->SetSelection(app->m_lang_db.idxLng.find(*l, start, end) ? start : -1); m_toolDecompLanguage->SetSelection(app->m_lang_db.idxLng.find(*l, start) ? start : -1);
delete l; delete l;
} }

View File

@ -69,7 +69,7 @@ bool wxZRColaKeyHandler::ProcessEvent(wxEvent& event)
) )
{ {
ZRColaApp *app = (ZRColaApp*)wxTheApp; ZRColaApp *app = (ZRColaApp*)wxTheApp;
ZRCola::keyseq_db::indexKey::size_type start, end; ZRCola::keyseq_db::indexKey::size_type start;
bool found; bool found;
wxFrame *pFrame = wxDynamicCast(app->m_mainWnd, wxFrame); wxFrame *pFrame = wxDynamicCast(app->m_mainWnd, wxFrame);
@ -96,7 +96,7 @@ bool wxZRColaKeyHandler::ProcessEvent(wxEvent& event)
ks->chr = 0; ks->chr = 0;
ks->seq_len = n; ks->seq_len = n;
memcpy(ks->seq, m_seq.data(), sizeof(ZRCola::keyseq_db::keyseq::key_t)*n); memcpy(ks->seq, m_seq.data(), sizeof(ZRCola::keyseq_db::keyseq::key_t)*n);
found = app->m_ks_db.idxKey.find(*ks, start, end); found = app->m_ks_db.idxKey.find(*ks, start);
delete ks; delete ks;
} }

View File

@ -26,6 +26,7 @@
#include <map> #include <map>
#include <ostream> #include <ostream>
#include <vector> #include <vector>
#include <set>
#include <string> #include <string>
#pragma warning(push) #pragma warning(push)
@ -51,6 +52,12 @@ namespace ZRCola {
}; };
///
/// Blank character category
///
const chrcatid_t chrcatid_t_blank = {};
/// ///
/// Compares two character category IDs /// Compares two character category IDs
/// ///
@ -151,7 +158,32 @@ namespace ZRCola {
/// ///
inline character_db() : idxChr(data) {} inline character_db() : idxChr(data) {}
void search_by_desc(_In_z_ const wchar_t *str, _Inout_ std::map<wchar_t, unsigned long> &hits, _Inout_ std::map<wchar_t, unsigned long> &hits_sub) const; ///
/// Search for characters by description in given categories
///
/// \param[in ] str Search string
/// \param[in ] cats Set of categories, character must be a part of
/// \param[inout] hits (character, count) map to append full-word hits to
/// \param[inout] hits_sub (character, count) map to append partial-word hits to
///
void Search(_In_z_ const wchar_t *str, _In_ const std::set<chrcatid_t> &cats, _Inout_ std::map<wchar_t, unsigned long> &hits, _Inout_ std::map<wchar_t, unsigned long> &hits_sub) const;
///
/// Get character category
///
/// \param[in] c Character
///
/// \returns
/// - Character category if character found
/// - `ZRCola::chrcatid_t_blank` otherwise
///
inline chrcatid_t GetCharCat(wchar_t c) const
{
char _chr[sizeof(character)];
((character *)_chr)->chr = c;
indexChar::size_type start;
return idxChr.find(*((character *)_chr), start) ? idxChr[start].cat : chrcatid_t_blank;
}
}; };

View File

@ -248,6 +248,40 @@ namespace ZRCola {
return false; return false;
} }
///
/// Search for the first element in the index
///
/// \param[in] el Element we are looking for (needle)
/// \param[out] start Index of the first matching element found
///
/// \returns
/// - \c true if found
/// - \c false otherwise
///
bool find(_In_ const T_data &el, _Out_ size_type &start) const
{
// Start with the full search area.
size_t end;
for (start = 0, end = size(); start < end; ) {
size_type m = (start + end) / 2;
int r = compare(el, at(m));
if (r < 0) end = m;
else if (r > 0) start = m + 1;
else {
// Narrow the search area on the left to start at the first element in the run.
for (size_type end2 = m; start < end2;) {
size_type m = (start + end2) / 2;
int r = compare(el, at(m));
if (r <= 0) end2 = m; else start = m + 1;
}
return true;
}
}
return false;
}
private: private:
static int __cdecl compare_s(void *p, const void *a, const void *b) static int __cdecl compare_s(void *p, const void *a, const void *b)
{ {

View File

@ -20,7 +20,7 @@
#include "stdafx.h" #include "stdafx.h"
void ZRCola::character_db::search_by_desc(_In_z_ const wchar_t *str, _Inout_ std::map<wchar_t, unsigned long> &hits, _Inout_ std::map<wchar_t, unsigned long> &hits_sub) const void ZRCola::character_db::Search(_In_z_ const wchar_t *str, _In_ const std::set<chrcatid_t> &cats, _Inout_ std::map<wchar_t, unsigned long> &hits, _Inout_ std::map<wchar_t, unsigned long> &hits_sub) const
{ {
assert(str); assert(str);
@ -68,7 +68,9 @@ void ZRCola::character_db::search_by_desc(_In_z_ const wchar_t *str, _Inout_ std
if (idxDsc.find(term.c_str(), term.size(), &data, &len)) { if (idxDsc.find(term.c_str(), term.size(), &data, &len)) {
// The term was found. // The term was found.
for (size_t i = 0; i < len; i++) { for (size_t i = 0; i < len; i++) {
std::map<wchar_t, unsigned long>::iterator idx = hits.find(data[i]); wchar_t c = data[i];
if (cats.find(GetCharCat(c)) != cats.end()) {
std::map<wchar_t, unsigned long>::iterator idx = hits.find(c);
if (idx == hits.end()) { if (idx == hits.end()) {
// New character. // New character.
hits.insert(std::make_pair(data[i], 1)); hits.insert(std::make_pair(data[i], 1));
@ -78,11 +80,14 @@ void ZRCola::character_db::search_by_desc(_In_z_ const wchar_t *str, _Inout_ std
} }
} }
} }
}
if (idxDscSub.find(term.c_str(), term.size(), &data, &len)) { if (idxDscSub.find(term.c_str(), term.size(), &data, &len)) {
// The term was found in the sub-term index. // The term was found in the sub-term index.
for (size_t i = 0; i < len; i++) { for (size_t i = 0; i < len; i++) {
std::map<wchar_t, unsigned long>::iterator idx = hits_sub.find(data[i]); wchar_t c = data[i];
if (cats.find(GetCharCat(c)) != cats.end()) {
std::map<wchar_t, unsigned long>::iterator idx = hits_sub.find(c);
if (idx == hits_sub.end()) { if (idx == hits_sub.end()) {
// New character. // New character.
hits_sub.insert(std::make_pair(data[i], 1)); hits_sub.insert(std::make_pair(data[i], 1));
@ -95,3 +100,4 @@ void ZRCola::character_db::search_by_desc(_In_z_ const wchar_t *str, _Inout_ std
} }
} }
} }
}