Order blocks by Unicode location

Signed-off-by: Simon Rozman <simon.rozman@amebis.si>
This commit is contained in:
2025-12-03 12:49:19 +01:00
parent 83e2022ef6
commit e6ea4a2742
8 changed files with 137 additions and 79 deletions

View File

@@ -204,8 +204,8 @@ wxZRColaCharSelect::wxZRColaCharSelect(wxWindow* parent) :
// Fill blocks.
auto app = dynamic_cast<ZRColaApp*>(wxTheApp);
for (size_t i = 0, n = app->m_cb_db.idxRank.size(); i < n; i++) {
const auto &cb = app->m_cb_db.idxRank[i];
for (size_t i = 0, n = app->m_cb_db.idxFirst.size(); i < n; i++) {
const auto &cb = app->m_cb_db.idxFirst[i];
int idx = m_blocks->Insert(wxGetTranslation(wxString(cb.name(), cb.name_len()), wxT("ZRCola-zrcdb")), (unsigned int)i);
m_blocks->Check(idx);
m_cbOrder.insert(std::make_pair(cb.id, idx));
@@ -267,7 +267,7 @@ void wxZRColaCharSelect::OnIdle(wxIdleEvent& event)
{
// Update character block.
ZRCola::chrblk_db::indexChrId::size_type cb_start;
if (app->m_cb_db.idxChrId.find(ZRCola::chrblk_db::chrcls(chr.blk), cb_start)) {
if (app->m_cb_db.idxChrId.find(ZRCola::chrblk_db::chrblk(chr.blk), cb_start)) {
const auto &blk = app->m_cb_db.idxChrId[cb_start];
m_block->SetValue(wxGetTranslation(wxString(blk.name(), blk.name_len()), wxT("ZRCola-zrcdb")));
} else
@@ -341,8 +341,8 @@ void wxZRColaCharSelect::OnIdle(wxIdleEvent& event)
m_searchThread->m_search.assign(val.c_str(), val.Length());
// Select blocks.
for (size_t i = 0, n = app->m_cb_db.idxRank.size(); i < n; i++) {
const auto &cb = app->m_cb_db.idxRank[i];
for (size_t i = 0, n = app->m_cb_db.idxFirst.size(); i < n; i++) {
const auto &cb = app->m_cb_db.idxFirst[i];
if (m_blocks->IsChecked((unsigned int)i))
m_searchThread->m_blks.insert(cb.id);
}
@@ -389,7 +389,7 @@ void wxZRColaCharSelect::OnBlocksAll(wxHyperlinkEvent& event)
event.StopPropagation();
auto app = dynamic_cast<ZRColaApp*>(wxTheApp);
for (size_t i = 0, n = app->m_cb_db.idxRank.size(); i < n; i++)
for (size_t i = 0, n = app->m_cb_db.idxFirst.size(); i < n; i++)
m_blocks->Check((unsigned int)i, true);
m_searchChanged = true;
@@ -401,7 +401,7 @@ void wxZRColaCharSelect::OnBlocksNone(wxHyperlinkEvent& event)
event.StopPropagation();
auto app = dynamic_cast<ZRColaApp*>(wxTheApp);
for (size_t i = 0, n = app->m_cb_db.idxRank.size(); i < n; i++)
for (size_t i = 0, n = app->m_cb_db.idxFirst.size(); i < n; i++)
m_blocks->Check((unsigned int)i, false);
m_searchChanged = true;
@@ -413,7 +413,7 @@ void wxZRColaCharSelect::OnBlocksInvert(wxHyperlinkEvent& event)
event.StopPropagation();
auto app = dynamic_cast<ZRColaApp*>(wxTheApp);
for (size_t i = 0, n = app->m_cb_db.idxRank.size(); i < n; i++)
for (size_t i = 0, n = app->m_cb_db.idxFirst.size(); i < n; i++)
m_blocks->Check((unsigned int)i, !m_blocks->IsChecked((unsigned int)i));
m_searchChanged = true;
@@ -820,8 +820,8 @@ void wxPersistentZRColaCharSelect::Save() const
SaveValue(wxT("recentChars" ), str ); // Save in legacy format for backward compatibility.
SaveValue(wxT("recentChars2"), str2); // Save in native format
for (size_t i = 0, n = app->m_cb_db.idxRank.size(); i < n; i++) {
const auto &cb = app->m_cb_db.idxRank[i];
for (size_t i = 0, n = app->m_cb_db.idxFirst.size(); i < n; i++) {
const auto &cb = app->m_cb_db.idxFirst[i];
SaveValue(wxString::Format(wxT("block%u"), cb.id), wnd->m_blocks->IsChecked((unsigned int)i));
}
@@ -853,8 +853,8 @@ bool wxPersistentZRColaCharSelect::Restore()
wnd->m_gridRecent->SetCharacters(val);
}
for (size_t i = 0, n = app->m_cb_db.idxRank.size(); i < n; i++) {
const auto &cb = app->m_cb_db.idxRank[i];
for (size_t i = 0, n = app->m_cb_db.idxFirst.size(); i < n; i++) {
const auto &cb = app->m_cb_db.idxFirst[i];
bool val;
if (RestoreValue(wxString::Format(wxT("block%u"), cb.id), &val))
wnd->m_blocks->Check((unsigned int)i, val);

View File

@@ -16,6 +16,7 @@
#include <WinStd/COM.h>
#include <WinStd/Win.h>
#include <stdex/string.hpp>
#include <wxex/common.h>
#pragma warning(push)
@@ -1019,11 +1020,21 @@ inline ZRCola::chrblk_db& operator<<(_Inout_ ZRCola::chrblk_db &db, _In_ const Z
uint32_t idx = db.data.size();
db.data.push_back((uint16_t)rec.second.id);
db.data.push_back((uint16_t)rec.second.rank);
std::wstring::size_type n = rec.second.name.length();
std::wstring::size_type n = rec.first >= 0x10000 ? 2 : 1;
db.data.push_back((uint16_t)n);
n += rec.second.name.length();
wxASSERT_MSG(n <= 0xffff, wxT("character block name overflow"));
db.data.push_back((uint16_t)n);
if (rec.first < 0x10000)
db.data.push_back((uint16_t)rec.first);
else {
stdex::utf16_t buf[2];
stdex::ucs4_to_surrogate_pair(buf, rec.first);
db.data.insert(db.data.end(), &buf[0], &buf[2]);
}
db.data.insert(db.data.end(), rec.second.name.cbegin(), rec.second.name.cend());
db.idxChrId.push_back(idx);
db.idxFirst.push_back(idx);
db.idxRank .push_back(idx);
return db;

View File

@@ -821,6 +821,7 @@ int _tmain(int argc, _TCHAR *argv[])
// Preallocate memory.
db.idxChrId.reserve(chrs.idxChrBlk.size());
db.idxFirst.reserve(chrs.idxChrBlk.size());
db.idxRank.reserve(chrs.idxChrBlk.size());
db.data.reserve(chrs.idxChrBlk.size() * 16);
@@ -841,6 +842,7 @@ int _tmain(int argc, _TCHAR *argv[])
// Write character blocks to file.
db.idxChrId.sort();
db.idxFirst.sort();
db.idxRank.sort();
dst << ZRCola::chrblk_rec(db);
}

View File

@@ -289,62 +289,74 @@ namespace ZRCola {
///
/// Character classification database template
/// Character block database
///
template <typename T_id>
class chrclass_db {
class chrblk_db {
public:
#pragma pack(push)
#pragma pack(2)
///
/// Character classification data
/// Character block data
///
struct chrcls {
struct chrblk {
public:
T_id id; ///< Character classification ID
uint16_t rank; ///< Character classification rank
chrblkid_t id; ///< Character block ID
uint16_t rank; ///< Character block rank
protected:
uint16_t name_to; ///< Character classification name end in \c data
char_t data[]; ///< Character classification name
uint16_t first_to; ///< Character block first character end in \c data
uint16_t name_to; ///< Character block name end in \c data
char_t data[]; ///< Character block name
private:
chrcls(_In_ const chrcls &other);
chrcls& operator=(_In_ const chrcls &other);
chrblk(_In_ const chrblk &other);
chrblk& operator=(_In_ const chrblk &other);
public:
///
/// Constructs the character classification
/// Constructs the character block
///
/// \param[in] id Character classification ID
/// \param[in] rank Character classification rank
/// \param[in] name Character classification name
/// \param[in] name_len Number of UTF-16 characters in \p name
/// \param[in] id Character block ID
/// \param[in] rank Character block rank
/// \param[in] first Character block first character
/// \param[in] first_len Number of UTF-16 characters in \p first
/// \param[in] name Character block name
/// \param[in] name_len Number of UTF-16 characters in \p name
///
chrcls(
_In_opt_ T_id id = default,
_In_opt_ uint16_t rank = 0,
_In_opt_z_count_(name_len) const char_t *name = NULL,
_In_opt_ size_t name_len = 0)
chrblk(
_In_opt_ chrblkid_t id = 0,
_In_opt_ uint16_t rank = 0,
_In_opt_z_count_(first_len) const char_t *first = NULL,
_In_opt_ size_t first_len = 0,
_In_opt_z_count_(name_len) const char_t *name = NULL,
_In_opt_ size_t name_len = 0)
{
this->id = id;
this->rank = rank;
this->name_to = static_cast<uint16_t>(name_len);
if (name && name_len) memcpy(this->data, name, sizeof(char_t)*name_len);
this->first_to = static_cast<uint16_t>(first_len);
if (first && first_len) memcpy(this->data, first, sizeof(char_t)*first_len);
this->name_to = static_cast<uint16_t>(this->first_to + name_len);
if (name && name_len) memcpy(this->data + this->first_to, name, sizeof(char_t)*name_len);
}
const char_t* name () const { return data; };
char_t* name () { return data; };
const char_t* name_end() const { return data + name_to; };
char_t* name_end() { return data + name_to; };
uint16_t name_len() const { return name_to; };
const char_t* first () const { return data; };
char_t* first () { return data; };
const char_t* first_end() const { return data + first_to; };
char_t* first_end() { return data + first_to; };
uint16_t first_len() const { return first_to; };
const char_t* name () const { return data + first_to; };
char_t* name () { return data + first_to; };
const char_t* name_end() const { return data + name_to; };
char_t* name_end() { return data + name_to; };
uint16_t name_len() const { return name_to - first_to; };
};
#pragma pack(pop)
///
/// Character classification index
/// Character block index
///
class indexChrId : public index<uint16_t, uint32_t, chrcls>
class indexChrId : public index<uint16_t, uint32_t, chrblk>
{
public:
///
@@ -352,10 +364,10 @@ namespace ZRCola {
///
/// \param[in] h Reference to vector holding the data
///
indexChrId(_In_ std::vector<uint16_t> &h) : index<uint16_t, uint32_t, chrcls>(h) {}
indexChrId(_In_ std::vector<uint16_t> &h) : index<uint16_t, uint32_t, chrblk>(h) {}
///
/// Compares two character categories by ID (for searching)
/// Compares two character blocks by ID (for searching)
///
/// \param[in] a Pointer to first element
/// \param[in] b Pointer to second element
@@ -365,19 +377,19 @@ namespace ZRCola {
/// - =0 when a == b
/// - >0 when a > b
///
virtual int compare(_In_ const chrcls &a, _In_ const chrcls &b) const
virtual int compare(_In_ const chrblk &a, _In_ const chrblk &b) const
{
if (a.id < b.id) return -1;
if (a.id > b.id) return 1;
return 0;
}
} idxChrId; ///< Character classification index
} idxChrId; ///< Character block index
///
/// Rank index
/// First character index
///
class indexRank : public index<uint16_t, uint32_t, chrcls>
class indexFirst : public index<uint16_t, uint32_t, chrblk>
{
public:
///
@@ -385,10 +397,10 @@ namespace ZRCola {
///
/// \param[in] h Reference to vector holding the data
///
indexRank(_In_ std::vector<uint16_t> &h) : index<uint16_t, uint32_t, chrcls>(h) {}
indexFirst(_In_ std::vector<uint16_t> &h) : index<uint16_t, uint32_t, chrblk>(h) {}
///
/// Compares two character categories by ID (for searching)
/// Compares two character blocks by first character (for searching)
///
/// \param[in] a Pointer to first element
/// \param[in] b Pointer to second element
@@ -398,7 +410,40 @@ namespace ZRCola {
/// - =0 when a == b
/// - >0 when a > b
///
virtual int compare(_In_ const chrcls &a, _In_ const chrcls &b) const
virtual int compare(_In_ const chrblk &a, _In_ const chrblk &b) const
{
int r = ZRCola::CompareString(a.first(), a.first_len(), b.first(), b.first_len());
if (r != 0) return r;
return 0;
}
} idxFirst; ///< First character index
///
/// Rank index
///
class indexRank : public index<uint16_t, uint32_t, chrblk>
{
public:
///
/// Constructs the index
///
/// \param[in] h Reference to vector holding the data
///
indexRank(_In_ std::vector<uint16_t> &h) : index<uint16_t, uint32_t, chrblk>(h) {}
///
/// Compares two character blocks by ID (for searching)
///
/// \param[in] a Pointer to first element
/// \param[in] b Pointer to second element
///
/// \returns
/// - <0 when a < b
/// - =0 when a == b
/// - >0 when a > b
///
virtual int compare(_In_ const chrblk &a, _In_ const chrblk &b) const
{
if (a.rank < b.rank) return -1;
if (a.rank > b.rank) return +1;
@@ -407,17 +452,17 @@ namespace ZRCola {
}
///
/// Compares two character categories by rank (for sorting)
/// Compares two character blocks by rank (for sorting)
///
/// \param[in] a Pointer to character classification
/// \param[in] b Pointer to second character classification
/// \param[in] a Pointer to character block
/// \param[in] b Pointer to second character block
///
/// \returns
/// - <0 when a < b
/// - =0 when a == b
/// - >0 when a > b
///
virtual int compare_sort(_In_ const chrcls &a, _In_ const chrcls &b) const
virtual int compare_sort(_In_ const chrblk &a, _In_ const chrblk &b) const
{
if (a.rank < b.rank) return -1;
else if (a.rank > b.rank) return +1;
@@ -427,13 +472,13 @@ namespace ZRCola {
}
} idxRank; ///< Rank index
std::vector<uint16_t> data; ///< Character classification data
std::vector<uint16_t> data; ///< Character block data
public:
///
/// Constructs the database
///
chrclass_db() : idxChrId(data), idxRank(data) {}
chrblk_db() : idxChrId(data), idxFirst(data), idxRank(data) {}
///
/// Clears the database
@@ -441,25 +486,30 @@ namespace ZRCola {
void clear()
{
idxChrId.clear();
idxFirst.clear();
idxRank .clear();
data .clear();
}
///
/// Writes character classification database to a stream
/// Writes character block database to a stream
///
/// \param[in] stream Output stream
/// \param[in] db Character classification database
/// \param[in] db Character block database
///
/// \returns The stream \p stream
///
friend std::ostream& operator <<(_In_ std::ostream& stream, _In_ const chrclass_db<T_id>& db)
friend std::ostream& operator <<(_In_ std::ostream& stream, _In_ const chrblk_db& db)
{
// Write character classification index.
// Write character block index.
if (stream.fail()) return stream;
stream << db.idxChrId;
// Write first character index.
if (stream.fail()) return stream;
stream << db.idxFirst;
// Write rank index.
if (stream.fail()) return stream;
stream << db.idxRank;
@@ -486,19 +536,23 @@ namespace ZRCola {
///
/// Reads character classification database from a stream
/// Reads character block database from a stream
///
/// \param[in ] stream Input stream
/// \param[out] db Character classification database
/// \param[out] db Character block database
///
/// \returns The stream \p stream
///
friend std::istream& operator >>(_In_ std::istream& stream, _Out_ chrclass_db<T_id>& db)
friend std::istream& operator >>(_In_ std::istream& stream, _Out_ chrblk_db& db)
{
// Read character classification index.
// Read character block index.
stream >> db.idxChrId;
if (!stream.good()) return stream;
// Read first character index.
stream >> db.idxFirst;
if (!stream.good()) return stream;
// Read rank index.
stream >> db.idxRank;
if (!stream.good()) return stream;
@@ -519,11 +573,6 @@ namespace ZRCola {
return stream;
}
};
///
/// Character block database
///
using chrblk_db = chrclass_db<chrblkid_t>;
};
#pragma warning(pop)

View File

@@ -9,13 +9,7 @@
_Use_decl_annotations_
int ZRCola::CompareString(const char_t* str_a, size_t count_a, const char_t* str_b, size_t count_b)
{
for (size_t i = 0; ; i++) {
if (i >= count_a && i >= count_b) return 0;
else if (i >= count_a && i < count_b) return -1;
else if (i < count_a && i >= count_b) return +1;
else if (str_a[i] < str_b[i]) return -1;
else if (str_a[i] > str_b[i]) return +1;
}
return stdex::strncmp(str_a, count_a, str_b, count_b);
}

View File

@@ -14,6 +14,8 @@
#include "../include/zrcola/translate.h"
#include "../include/zrcola/tag.h"
#include <stdex/string.hpp>
#include <assert.h>
#include <algorithm>

Binary file not shown.