From e6ea4a2742a9b6d1b7af85d5567e415ea29b1adb Mon Sep 17 00:00:00 2001 From: Simon Rozman Date: Wed, 3 Dec 2025 12:49:19 +0100 Subject: [PATCH] Order blocks by Unicode location Signed-off-by: Simon Rozman --- ZRCola/zrcolachrslct.cpp | 24 ++-- ZRColaCompile/dbsource.h | 13 +- ZRColaCompile/main.cpp | 2 + lib/libZRCola/include/zrcola/character.h | 165 +++++++++++++++-------- lib/libZRCola/src/common.cpp | 8 +- lib/libZRCola/src/pch.h | 2 + lib/stdex | 2 +- output/data/ZRCola.zrcdb | Bin 3849102 -> 3849514 bytes 8 files changed, 137 insertions(+), 79 deletions(-) diff --git a/ZRCola/zrcolachrslct.cpp b/ZRCola/zrcolachrslct.cpp index 1711adc..a45c280 100644 --- a/ZRCola/zrcolachrslct.cpp +++ b/ZRCola/zrcolachrslct.cpp @@ -204,8 +204,8 @@ wxZRColaCharSelect::wxZRColaCharSelect(wxWindow* parent) : // Fill blocks. auto app = dynamic_cast(wxTheApp); - for (size_t i = 0, n = app->m_cb_db.idxRank.size(); i < n; i++) { - const auto &cb = app->m_cb_db.idxRank[i]; + for (size_t i = 0, n = app->m_cb_db.idxFirst.size(); i < n; i++) { + const auto &cb = app->m_cb_db.idxFirst[i]; int idx = m_blocks->Insert(wxGetTranslation(wxString(cb.name(), cb.name_len()), wxT("ZRCola-zrcdb")), (unsigned int)i); m_blocks->Check(idx); m_cbOrder.insert(std::make_pair(cb.id, idx)); @@ -267,7 +267,7 @@ void wxZRColaCharSelect::OnIdle(wxIdleEvent& event) { // Update character block. ZRCola::chrblk_db::indexChrId::size_type cb_start; - if (app->m_cb_db.idxChrId.find(ZRCola::chrblk_db::chrcls(chr.blk), cb_start)) { + if (app->m_cb_db.idxChrId.find(ZRCola::chrblk_db::chrblk(chr.blk), cb_start)) { const auto &blk = app->m_cb_db.idxChrId[cb_start]; m_block->SetValue(wxGetTranslation(wxString(blk.name(), blk.name_len()), wxT("ZRCola-zrcdb"))); } else @@ -341,8 +341,8 @@ void wxZRColaCharSelect::OnIdle(wxIdleEvent& event) m_searchThread->m_search.assign(val.c_str(), val.Length()); // Select blocks. - for (size_t i = 0, n = app->m_cb_db.idxRank.size(); i < n; i++) { - const auto &cb = app->m_cb_db.idxRank[i]; + for (size_t i = 0, n = app->m_cb_db.idxFirst.size(); i < n; i++) { + const auto &cb = app->m_cb_db.idxFirst[i]; if (m_blocks->IsChecked((unsigned int)i)) m_searchThread->m_blks.insert(cb.id); } @@ -389,7 +389,7 @@ void wxZRColaCharSelect::OnBlocksAll(wxHyperlinkEvent& event) event.StopPropagation(); auto app = dynamic_cast(wxTheApp); - for (size_t i = 0, n = app->m_cb_db.idxRank.size(); i < n; i++) + for (size_t i = 0, n = app->m_cb_db.idxFirst.size(); i < n; i++) m_blocks->Check((unsigned int)i, true); m_searchChanged = true; @@ -401,7 +401,7 @@ void wxZRColaCharSelect::OnBlocksNone(wxHyperlinkEvent& event) event.StopPropagation(); auto app = dynamic_cast(wxTheApp); - for (size_t i = 0, n = app->m_cb_db.idxRank.size(); i < n; i++) + for (size_t i = 0, n = app->m_cb_db.idxFirst.size(); i < n; i++) m_blocks->Check((unsigned int)i, false); m_searchChanged = true; @@ -413,7 +413,7 @@ void wxZRColaCharSelect::OnBlocksInvert(wxHyperlinkEvent& event) event.StopPropagation(); auto app = dynamic_cast(wxTheApp); - for (size_t i = 0, n = app->m_cb_db.idxRank.size(); i < n; i++) + for (size_t i = 0, n = app->m_cb_db.idxFirst.size(); i < n; i++) m_blocks->Check((unsigned int)i, !m_blocks->IsChecked((unsigned int)i)); m_searchChanged = true; @@ -820,8 +820,8 @@ void wxPersistentZRColaCharSelect::Save() const SaveValue(wxT("recentChars" ), str ); // Save in legacy format for backward compatibility. SaveValue(wxT("recentChars2"), str2); // Save in native format - for (size_t i = 0, n = app->m_cb_db.idxRank.size(); i < n; i++) { - const auto &cb = app->m_cb_db.idxRank[i]; + for (size_t i = 0, n = app->m_cb_db.idxFirst.size(); i < n; i++) { + const auto &cb = app->m_cb_db.idxFirst[i]; SaveValue(wxString::Format(wxT("block%u"), cb.id), wnd->m_blocks->IsChecked((unsigned int)i)); } @@ -853,8 +853,8 @@ bool wxPersistentZRColaCharSelect::Restore() wnd->m_gridRecent->SetCharacters(val); } - for (size_t i = 0, n = app->m_cb_db.idxRank.size(); i < n; i++) { - const auto &cb = app->m_cb_db.idxRank[i]; + for (size_t i = 0, n = app->m_cb_db.idxFirst.size(); i < n; i++) { + const auto &cb = app->m_cb_db.idxFirst[i]; bool val; if (RestoreValue(wxString::Format(wxT("block%u"), cb.id), &val)) wnd->m_blocks->Check((unsigned int)i, val); diff --git a/ZRColaCompile/dbsource.h b/ZRColaCompile/dbsource.h index 377da85..1537463 100644 --- a/ZRColaCompile/dbsource.h +++ b/ZRColaCompile/dbsource.h @@ -16,6 +16,7 @@ #include #include +#include #include #pragma warning(push) @@ -1019,11 +1020,21 @@ inline ZRCola::chrblk_db& operator<<(_Inout_ ZRCola::chrblk_db &db, _In_ const Z uint32_t idx = db.data.size(); db.data.push_back((uint16_t)rec.second.id); db.data.push_back((uint16_t)rec.second.rank); - std::wstring::size_type n = rec.second.name.length(); + std::wstring::size_type n = rec.first >= 0x10000 ? 2 : 1; + db.data.push_back((uint16_t)n); + n += rec.second.name.length(); wxASSERT_MSG(n <= 0xffff, wxT("character block name overflow")); db.data.push_back((uint16_t)n); + if (rec.first < 0x10000) + db.data.push_back((uint16_t)rec.first); + else { + stdex::utf16_t buf[2]; + stdex::ucs4_to_surrogate_pair(buf, rec.first); + db.data.insert(db.data.end(), &buf[0], &buf[2]); + } db.data.insert(db.data.end(), rec.second.name.cbegin(), rec.second.name.cend()); db.idxChrId.push_back(idx); + db.idxFirst.push_back(idx); db.idxRank .push_back(idx); return db; diff --git a/ZRColaCompile/main.cpp b/ZRColaCompile/main.cpp index 16256ec..f7ab49f 100644 --- a/ZRColaCompile/main.cpp +++ b/ZRColaCompile/main.cpp @@ -821,6 +821,7 @@ int _tmain(int argc, _TCHAR *argv[]) // Preallocate memory. db.idxChrId.reserve(chrs.idxChrBlk.size()); + db.idxFirst.reserve(chrs.idxChrBlk.size()); db.idxRank.reserve(chrs.idxChrBlk.size()); db.data.reserve(chrs.idxChrBlk.size() * 16); @@ -841,6 +842,7 @@ int _tmain(int argc, _TCHAR *argv[]) // Write character blocks to file. db.idxChrId.sort(); + db.idxFirst.sort(); db.idxRank.sort(); dst << ZRCola::chrblk_rec(db); } diff --git a/lib/libZRCola/include/zrcola/character.h b/lib/libZRCola/include/zrcola/character.h index 65d36de..cffe431 100644 --- a/lib/libZRCola/include/zrcola/character.h +++ b/lib/libZRCola/include/zrcola/character.h @@ -289,62 +289,74 @@ namespace ZRCola { /// - /// Character classification database template + /// Character block database /// - template - class chrclass_db { + class chrblk_db { public: #pragma pack(push) #pragma pack(2) /// - /// Character classification data + /// Character block data /// - struct chrcls { + struct chrblk { public: - T_id id; ///< Character classification ID - uint16_t rank; ///< Character classification rank + chrblkid_t id; ///< Character block ID + uint16_t rank; ///< Character block rank protected: - uint16_t name_to; ///< Character classification name end in \c data - char_t data[]; ///< Character classification name + uint16_t first_to; ///< Character block first character end in \c data + uint16_t name_to; ///< Character block name end in \c data + char_t data[]; ///< Character block name private: - chrcls(_In_ const chrcls &other); - chrcls& operator=(_In_ const chrcls &other); + chrblk(_In_ const chrblk &other); + chrblk& operator=(_In_ const chrblk &other); public: /// - /// Constructs the character classification + /// Constructs the character block /// - /// \param[in] id Character classification ID - /// \param[in] rank Character classification rank - /// \param[in] name Character classification name - /// \param[in] name_len Number of UTF-16 characters in \p name + /// \param[in] id Character block ID + /// \param[in] rank Character block rank + /// \param[in] first Character block first character + /// \param[in] first_len Number of UTF-16 characters in \p first + /// \param[in] name Character block name + /// \param[in] name_len Number of UTF-16 characters in \p name /// - chrcls( - _In_opt_ T_id id = default, - _In_opt_ uint16_t rank = 0, - _In_opt_z_count_(name_len) const char_t *name = NULL, - _In_opt_ size_t name_len = 0) + chrblk( + _In_opt_ chrblkid_t id = 0, + _In_opt_ uint16_t rank = 0, + _In_opt_z_count_(first_len) const char_t *first = NULL, + _In_opt_ size_t first_len = 0, + _In_opt_z_count_(name_len) const char_t *name = NULL, + _In_opt_ size_t name_len = 0) { this->id = id; this->rank = rank; - this->name_to = static_cast(name_len); - if (name && name_len) memcpy(this->data, name, sizeof(char_t)*name_len); + this->first_to = static_cast(first_len); + if (first && first_len) memcpy(this->data, first, sizeof(char_t)*first_len); + this->name_to = static_cast(this->first_to + name_len); + if (name && name_len) memcpy(this->data + this->first_to, name, sizeof(char_t)*name_len); } - const char_t* name () const { return data; }; - char_t* name () { return data; }; - const char_t* name_end() const { return data + name_to; }; - char_t* name_end() { return data + name_to; }; - uint16_t name_len() const { return name_to; }; + const char_t* first () const { return data; }; + char_t* first () { return data; }; + const char_t* first_end() const { return data + first_to; }; + char_t* first_end() { return data + first_to; }; + uint16_t first_len() const { return first_to; }; + + const char_t* name () const { return data + first_to; }; + char_t* name () { return data + first_to; }; + const char_t* name_end() const { return data + name_to; }; + char_t* name_end() { return data + name_to; }; + uint16_t name_len() const { return name_to - first_to; }; }; #pragma pack(pop) /// - /// Character classification index + /// Character block index /// - class indexChrId : public index + class indexChrId : public index { public: /// @@ -352,10 +364,10 @@ namespace ZRCola { /// /// \param[in] h Reference to vector holding the data /// - indexChrId(_In_ std::vector &h) : index(h) {} + indexChrId(_In_ std::vector &h) : index(h) {} /// - /// Compares two character categories by ID (for searching) + /// Compares two character blocks by ID (for searching) /// /// \param[in] a Pointer to first element /// \param[in] b Pointer to second element @@ -365,19 +377,19 @@ namespace ZRCola { /// - =0 when a == b /// - >0 when a > b /// - virtual int compare(_In_ const chrcls &a, _In_ const chrcls &b) const + virtual int compare(_In_ const chrblk &a, _In_ const chrblk &b) const { if (a.id < b.id) return -1; if (a.id > b.id) return 1; return 0; } - } idxChrId; ///< Character classification index + } idxChrId; ///< Character block index /// - /// Rank index + /// First character index /// - class indexRank : public index + class indexFirst : public index { public: /// @@ -385,10 +397,10 @@ namespace ZRCola { /// /// \param[in] h Reference to vector holding the data /// - indexRank(_In_ std::vector &h) : index(h) {} + indexFirst(_In_ std::vector &h) : index(h) {} /// - /// Compares two character categories by ID (for searching) + /// Compares two character blocks by first character (for searching) /// /// \param[in] a Pointer to first element /// \param[in] b Pointer to second element @@ -398,7 +410,40 @@ namespace ZRCola { /// - =0 when a == b /// - >0 when a > b /// - virtual int compare(_In_ const chrcls &a, _In_ const chrcls &b) const + virtual int compare(_In_ const chrblk &a, _In_ const chrblk &b) const + { + int r = ZRCola::CompareString(a.first(), a.first_len(), b.first(), b.first_len()); + if (r != 0) return r; + + return 0; + } + } idxFirst; ///< First character index + + /// + /// Rank index + /// + class indexRank : public index + { + public: + /// + /// Constructs the index + /// + /// \param[in] h Reference to vector holding the data + /// + indexRank(_In_ std::vector &h) : index(h) {} + + /// + /// Compares two character blocks by ID (for searching) + /// + /// \param[in] a Pointer to first element + /// \param[in] b Pointer to second element + /// + /// \returns + /// - <0 when a < b + /// - =0 when a == b + /// - >0 when a > b + /// + virtual int compare(_In_ const chrblk &a, _In_ const chrblk &b) const { if (a.rank < b.rank) return -1; if (a.rank > b.rank) return +1; @@ -407,17 +452,17 @@ namespace ZRCola { } /// - /// Compares two character categories by rank (for sorting) + /// Compares two character blocks by rank (for sorting) /// - /// \param[in] a Pointer to character classification - /// \param[in] b Pointer to second character classification + /// \param[in] a Pointer to character block + /// \param[in] b Pointer to second character block /// /// \returns /// - <0 when a < b /// - =0 when a == b /// - >0 when a > b /// - virtual int compare_sort(_In_ const chrcls &a, _In_ const chrcls &b) const + virtual int compare_sort(_In_ const chrblk &a, _In_ const chrblk &b) const { if (a.rank < b.rank) return -1; else if (a.rank > b.rank) return +1; @@ -427,13 +472,13 @@ namespace ZRCola { } } idxRank; ///< Rank index - std::vector data; ///< Character classification data + std::vector data; ///< Character block data public: /// /// Constructs the database /// - chrclass_db() : idxChrId(data), idxRank(data) {} + chrblk_db() : idxChrId(data), idxFirst(data), idxRank(data) {} /// /// Clears the database @@ -441,25 +486,30 @@ namespace ZRCola { void clear() { idxChrId.clear(); + idxFirst.clear(); idxRank .clear(); data .clear(); } /// - /// Writes character classification database to a stream + /// Writes character block database to a stream /// /// \param[in] stream Output stream - /// \param[in] db Character classification database + /// \param[in] db Character block database /// /// \returns The stream \p stream /// - friend std::ostream& operator <<(_In_ std::ostream& stream, _In_ const chrclass_db& db) + friend std::ostream& operator <<(_In_ std::ostream& stream, _In_ const chrblk_db& db) { - // Write character classification index. + // Write character block index. if (stream.fail()) return stream; stream << db.idxChrId; + // Write first character index. + if (stream.fail()) return stream; + stream << db.idxFirst; + // Write rank index. if (stream.fail()) return stream; stream << db.idxRank; @@ -486,19 +536,23 @@ namespace ZRCola { /// - /// Reads character classification database from a stream + /// Reads character block database from a stream /// /// \param[in ] stream Input stream - /// \param[out] db Character classification database + /// \param[out] db Character block database /// /// \returns The stream \p stream /// - friend std::istream& operator >>(_In_ std::istream& stream, _Out_ chrclass_db& db) + friend std::istream& operator >>(_In_ std::istream& stream, _Out_ chrblk_db& db) { - // Read character classification index. + // Read character block index. stream >> db.idxChrId; if (!stream.good()) return stream; + // Read first character index. + stream >> db.idxFirst; + if (!stream.good()) return stream; + // Read rank index. stream >> db.idxRank; if (!stream.good()) return stream; @@ -519,11 +573,6 @@ namespace ZRCola { return stream; } }; - - /// - /// Character block database - /// - using chrblk_db = chrclass_db; }; #pragma warning(pop) diff --git a/lib/libZRCola/src/common.cpp b/lib/libZRCola/src/common.cpp index cb9029d..2a52df3 100644 --- a/lib/libZRCola/src/common.cpp +++ b/lib/libZRCola/src/common.cpp @@ -9,13 +9,7 @@ _Use_decl_annotations_ int ZRCola::CompareString(const char_t* str_a, size_t count_a, const char_t* str_b, size_t count_b) { - for (size_t i = 0; ; i++) { - if (i >= count_a && i >= count_b) return 0; - else if (i >= count_a && i < count_b) return -1; - else if (i < count_a && i >= count_b) return +1; - else if (str_a[i] < str_b[i]) return -1; - else if (str_a[i] > str_b[i]) return +1; - } + return stdex::strncmp(str_a, count_a, str_b, count_b); } diff --git a/lib/libZRCola/src/pch.h b/lib/libZRCola/src/pch.h index 8e394e5..f664f61 100644 --- a/lib/libZRCola/src/pch.h +++ b/lib/libZRCola/src/pch.h @@ -14,6 +14,8 @@ #include "../include/zrcola/translate.h" #include "../include/zrcola/tag.h" +#include + #include #include diff --git a/lib/stdex b/lib/stdex index f9b7dba..655bee3 160000 --- a/lib/stdex +++ b/lib/stdex @@ -1 +1 @@ -Subproject commit f9b7dba5421ed51317d4fa9c321482959a00129c +Subproject commit 655bee369213eb33fe118b99437815719fd6c3d7 diff --git a/output/data/ZRCola.zrcdb b/output/data/ZRCola.zrcdb index 48907c98ac53fe4104f9eec34e2bcd023acc774d..a8a2ede34af67aa58869c3549def093acd5e4da1 100644 GIT binary patch delta 1666 zcmb`HPfXlZ5XZj@>`=C_E3oCy%AbX@uu!N#D?T+*X$fkqx@luf8xPQiG;IW%wuYD( zS#3sdq>09-sRs_mCS1zJm|!%-R4;ny!HYLd(897^xEcRc{muhC+r-^`_V+Sx-n^Ol z&Tr>`EkqW7FGS?j@uTSc;!RPBsFbOct5m2&RcNVH*`rdW5>wf$Qms;>5?4v6+^15j zQm0a{l2qBJ(xB3)(xlR?l2W-}rA4JxrA?(>C9Tq-(y6jvrAsBF(yh{?l2z$d=~FqN z(yub0a!};~l|hvum0^`bDu-1bR2flu=u>gsKhMoq1BbBvImpL(GDqGc*U3$?N`4~} z0oup`GD1elF>;bzA=k*4WSvx&0Vy&}j+3+GU9w0%C*P2t%6QdXE}F`LEP0gV$aykL z7RWOBp4=s!6~GV~CuhhNa*ZsJugPtK=)DxgQvc_Q?yaCXX8d0(vI?#3maw(ZI%b`- zk4a}8xyh$VVC}YUSnEwX>y*i7owbgeG}i4oYpH3qN+;9Ov^DKb!`)f11g*`cpXp}O zm?S2Z$wD22uDdf}a@g#c6sG^Kj3%4SjLl08_acZ0fBFFb!!Pc|Xao*Hj%j9I`4nz2;olWXoU1GNC%+XP7nnv`iEtvE94VU6(J<#Dl+Hl#&Bdjvl z@f>rWW_?r4I*oC>!sj$D;rX3fuXb1mF48_rYc5zD`3gVl6Qx+Aj~87Nqc#EieYWU^ zxo*Tnmw&0~2D#?HQpV30!_5igGk&oc@@I+}|6MVp*Z_3f*%qHGxp>#fa0LbJ8Dk4) zaF)qU;xaEk6?QUM>GAH4JI3%0*y{1iC71FGcU&?U(7SE8wQa~C-)r9E)kiQ9&Uf(T z`1iUj&+G;I{mT2%WbAZssKrEom{`A5a(n2|%=glQ?o8pJ-zvFBXiwsaPkjGo5TO?7 z>$_t`eMtZ1S2tZh*Hzg1`<=Qi_gUT6!>@n)4`cB( A(EtDd delta 1273 zcmZ9KTWC~g5Xa|yx|8iDZj)?wT@$mecFkp*){U2B#M0IaT9iiF76qYTlBPu=sm2?E z;04A)5nmKPT3YbUQcx(gG|+-bp)d8p2g!qI`=YqnWRqPHN=qsI&xvVE&+=p`yhwSmazq)CI_0SH3(7I&dgTV?CCYK-7nK{8 zo0OZC6Ur^hN#&ICQsuPrGUZm~Hs$5Y?aCSD70OxVmCCD>UsBE~uU76*?o{ql?pDq# zzpUJ&{EG6c%4?Jh%Du{KmDee+SKgr9r`-Q7OUL6gPh&*uX#VQK;{$LCTm)Z$2`~w! zz%vjFh>{=!Iza(!0>j`G_ze6Aeg##)jwlEE!5}yR6kGz=9i5@u_;VkW!0(`85emQ> zuosMh3*ZL03m$?hs9#Lf4c-7nFai{O4$P3fn@AIE4Q+qxW0x7V`U&(86`MLMHl^m& zHa52=!~~iX6K4XOh?QcAheUX0JO1=K;C(`coB`LZ_@{Lu=_-iHMg_r0Sil8D+ezGK4 zmnQ#bN!GhI@~3B{kE2*a*@-F1xhVN-Q_>P7E;j`Ue`iY4??-7fjngQN&=?)0LEIZq z!1NX(Ls%K1le8p^Sl%FF2Wc41Eax4Zpz*M3m|78u(hfRKhmd8N$FPz|bT#<{Wf^b@ z^52vt?=s{cmE}#>5egJPG%abjjQrVY>2q-L!?a}U6WGcmjnKG%yDIIx6Fb3D;d~_J zyJp}8o{IMD47Vw|X7cxDq^q}q1|VV-LQzXu+D;$gIZ8)qcPQFyj3agukv)i{{E<0n z^Y2z9<(85^T#>adMgGf*6kI*|UnQ+*`R-