Character search ranking and display is more sophisticated now

This commit is contained in:
Simon Rozman 2016-05-31 11:50:12 +02:00
parent 3f64aef58d
commit e5b246a4f0
4 changed files with 39 additions and 23 deletions

View File

@ -221,7 +221,7 @@ void wxZRColaCharSelect::OnSearchComplete(wxThreadEvent& event)
// Display results. // Display results.
wxString chars; wxString chars;
chars.reserve(m_searchThread->m_hits.size()); chars.reserve(m_searchThread->m_hits.size());
for (std::vector< std::pair<unsigned long, wchar_t> >::const_iterator i = m_searchThread->m_hits.cbegin(), i_end = m_searchThread->m_hits.cend(); i != i_end; ++i) for (std::vector<std::pair<ZRCola::charrank_t, wchar_t> >::const_iterator i = m_searchThread->m_hits.cbegin(), i_end = m_searchThread->m_hits.cend(); i != i_end; ++i)
chars += i->second; chars += i->second;
m_gridResults->SetCharacters(chars); m_gridResults->SetCharacters(chars);
@ -483,17 +483,17 @@ wxZRColaCharSelect::SearchThread::SearchThread(wxZRColaCharSelect *parent) :
wxThread::ExitCode wxZRColaCharSelect::SearchThread::Entry() wxThread::ExitCode wxZRColaCharSelect::SearchThread::Entry()
{ {
ZRColaApp *app = (ZRColaApp*)wxTheApp; ZRColaApp *app = (ZRColaApp*)wxTheApp;
std::map<wchar_t, unsigned long> hits; std::map<wchar_t, ZRCola::charrank_t> hits;
if (TestDestroy()) return (wxThread::ExitCode)1; if (TestDestroy()) return (wxThread::ExitCode)1;
{ {
// Search by indexes and merge results. // Search by indexes and merge results.
std::map<wchar_t, unsigned long> hits_sub; std::map<wchar_t, ZRCola::charrank_t> hits_sub;
if (!app->m_chr_db.Search(m_search.c_str(), m_cats, hits, hits_sub, TestDestroyS, this)) return (wxThread::ExitCode)1; if (!app->m_chr_db.Search(m_search.c_str(), m_cats, hits, hits_sub, TestDestroyS, this)) return (wxThread::ExitCode)1;
for (std::map<wchar_t, unsigned long>::const_iterator i = hits_sub.cbegin(), i_end = hits_sub.cend(); i != i_end; ++i) { for (std::map<wchar_t, ZRCola::charrank_t>::const_iterator i = hits_sub.cbegin(), i_end = hits_sub.cend(); i != i_end; ++i) {
if (TestDestroy()) return (wxThread::ExitCode)1; if (TestDestroy()) return (wxThread::ExitCode)1;
std::map<wchar_t, unsigned long>::iterator idx = hits.find(i->first); std::map<wchar_t, ZRCola::charrank_t>::iterator idx = hits.find(i->first);
if (idx == hits.end()) if (idx == hits.end())
hits.insert(std::make_pair(i->first, i->second / 4)); hits.insert(std::make_pair(i->first, i->second / 4));
else else
@ -501,13 +501,23 @@ wxThread::ExitCode wxZRColaCharSelect::SearchThread::Entry()
} }
} }
// Now sort the characters by rank. // Get best rank.
m_hits.reserve(hits.size()); ZRCola::charrank_t rank_ref = 0;
for (std::map<wchar_t, unsigned long>::const_iterator i = hits.cbegin(), i_end = hits.cend(); i != i_end; ++i) { for (std::map<wchar_t, ZRCola::charrank_t>::const_iterator i = hits.cbegin(), i_end = hits.cend(); i != i_end; ++i) {
if (TestDestroy()) return (wxThread::ExitCode)1; if (TestDestroy()) return (wxThread::ExitCode)1;
if (i->second > rank_ref)
rank_ref = i->second;
}
// Now sort the characters by rank (taking only top 3/4 by rank).
ZRCola::charrank_t rank_threshold = rank_ref*3/4;
m_hits.reserve(hits.size());
for (std::map<wchar_t, ZRCola::charrank_t>::const_iterator i = hits.cbegin(), i_end = hits.cend(); i != i_end; ++i) {
if (TestDestroy()) return (wxThread::ExitCode)1;
if (i->second > rank_threshold)
m_hits.push_back(std::make_pair(i->second, i->first)); m_hits.push_back(std::make_pair(i->second, i->first));
} }
std::qsort(m_hits.data(), m_hits.size(), sizeof(std::pair<unsigned long, wchar_t>), CompareHits); std::qsort(m_hits.data(), m_hits.size(), sizeof(std::pair<ZRCola::charrank_t, wchar_t>), CompareHits);
// Signal the event handler that this thread is going to be destroyed. // Signal the event handler that this thread is going to be destroyed.
// NOTE: here we assume that using the m_parent pointer is safe, // NOTE: here we assume that using the m_parent pointer is safe,
@ -520,8 +530,8 @@ wxThread::ExitCode wxZRColaCharSelect::SearchThread::Entry()
int __cdecl wxZRColaCharSelect::SearchThread::CompareHits(const void *a, const void *b) int __cdecl wxZRColaCharSelect::SearchThread::CompareHits(const void *a, const void *b)
{ {
const std::pair<unsigned long, wchar_t> *_a = (const std::pair<unsigned long, wchar_t>*)a; const std::pair<ZRCola::charrank_t, wchar_t> *_a = (const std::pair<ZRCola::charrank_t, wchar_t>*)a;
const std::pair<unsigned long, wchar_t> *_b = (const std::pair<unsigned long, wchar_t>*)b; const std::pair<ZRCola::charrank_t, wchar_t> *_b = (const std::pair<ZRCola::charrank_t, wchar_t>*)b;
if (_a->first > _b->first) return -1; if (_a->first > _b->first) return -1;
else if (_a->first < _b->first) return 1; else if (_a->first < _b->first) return 1;

View File

@ -34,6 +34,7 @@ class wxPersistentZRColaCharSelect;
#include <wx/thread.h> #include <wx/thread.h>
#include <list> #include <list>
#include <map> #include <map>
#include <vector>
wxDECLARE_EVENT(wxEVT_SEARCH_COMPLETE, wxThreadEvent); wxDECLARE_EVENT(wxEVT_SEARCH_COMPLETE, wxThreadEvent);
@ -101,7 +102,7 @@ protected:
public: public:
std::wstring m_search; ///< Search phrase std::wstring m_search; ///< Search phrase
std::set<ZRCola::chrcatid_t> m_cats; ///< Search categories std::set<ZRCola::chrcatid_t> m_cats; ///< Search categories
std::vector< std::pair<unsigned long, wchar_t> > m_hits; ///< Search results std::vector<std::pair<ZRCola::charrank_t, wchar_t> > m_hits; ///< Search results
protected: protected:
wxZRColaCharSelect *m_parent; ///< Thread owner wxZRColaCharSelect *m_parent; ///< Thread owner

View File

@ -36,6 +36,11 @@
namespace ZRCola { namespace ZRCola {
///
/// Character rank type
///
typedef double charrank_t;
/// ///
/// Character category ID type /// Character category ID type
/// Two letter abbreviation, non-terminated /// Two letter abbreviation, non-terminated
@ -237,7 +242,7 @@ namespace ZRCola {
/// \param[in] fn_abort Pointer to function to periodically test for search cancellation /// \param[in] fn_abort Pointer to function to periodically test for search cancellation
/// \param[in] cookie Cookie for \p fn_abort call /// \param[in] cookie Cookie for \p fn_abort call
/// ///
bool Search(_In_z_ const wchar_t *str, _In_ const std::set<chrcatid_t> &cats, _Inout_ std::map<wchar_t, unsigned long> &hits, _Inout_ std::map<wchar_t, unsigned long> &hits_sub, _In_opt_ bool (__cdecl *fn_abort)(void *cookie) = NULL, _In_opt_ void *cookie = NULL) const; bool Search(_In_z_ const wchar_t *str, _In_ const std::set<chrcatid_t> &cats, _Inout_ std::map<wchar_t, charrank_t> &hits, _Inout_ std::map<wchar_t, charrank_t> &hits_sub, _In_opt_ bool (__cdecl *fn_abort)(void *cookie) = NULL, _In_opt_ void *cookie = NULL) const;
/// ///
/// Get character category /// Get character category

View File

@ -20,7 +20,7 @@
#include "stdafx.h" #include "stdafx.h"
bool ZRCola::character_db::Search(_In_z_ const wchar_t *str, _In_ const std::set<chrcatid_t> &cats, _Inout_ std::map<wchar_t, unsigned long> &hits, _Inout_ std::map<wchar_t, unsigned long> &hits_sub, _In_opt_ bool (__cdecl *fn_abort)(void *cookie), _In_opt_ void *cookie) const bool ZRCola::character_db::Search(_In_z_ const wchar_t *str, _In_ const std::set<chrcatid_t> &cats, _Inout_ std::map<wchar_t, charrank_t> &hits, _Inout_ std::map<wchar_t, charrank_t> &hits_sub, _In_opt_ bool (__cdecl *fn_abort)(void *cookie), _In_opt_ void *cookie) const
{ {
assert(str); assert(str);
@ -77,13 +77,13 @@ bool ZRCola::character_db::Search(_In_z_ const wchar_t *str, _In_ const std::set
if (fn_abort && fn_abort(cookie)) return false; if (fn_abort && fn_abort(cookie)) return false;
wchar_t c = data[i]; wchar_t c = data[i];
if (cats.find(GetCharCat(c)) != cats.end()) { if (cats.find(GetCharCat(c)) != cats.end()) {
std::map<wchar_t, unsigned long>::iterator idx = hits.find(c); std::map<wchar_t, charrank_t>::iterator idx = hits.find(c);
if (idx == hits.end()) { if (idx == hits.end()) {
// New character. // New character.
hits.insert(std::make_pair(data[i], 1)); hits.insert(std::make_pair(data[i], 1.0/len));
} else { } else {
// Increment existing character. // Increase rating of existing character.
idx->second++; idx->second += 1.0/len;
} }
} }
} }
@ -95,13 +95,13 @@ bool ZRCola::character_db::Search(_In_z_ const wchar_t *str, _In_ const std::set
if (fn_abort && fn_abort(cookie)) return false; if (fn_abort && fn_abort(cookie)) return false;
wchar_t c = data[i]; wchar_t c = data[i];
if (cats.find(GetCharCat(c)) != cats.end()) { if (cats.find(GetCharCat(c)) != cats.end()) {
std::map<wchar_t, unsigned long>::iterator idx = hits_sub.find(c); std::map<wchar_t, charrank_t>::iterator idx = hits_sub.find(c);
if (idx == hits_sub.end()) { if (idx == hits_sub.end()) {
// New character. // New character.
hits_sub.insert(std::make_pair(data[i], 1)); hits_sub.insert(std::make_pair(data[i], 1.0/len));
} else { } else {
// Increment existing character. // Increase rating of existing character.
idx->second++; idx->second += 1.0/len;
} }
} }
} }