diff --git a/ZRColaCompile/dbsource.cpp b/ZRColaCompile/dbsource.cpp index c31edc0..d57929c 100644 --- a/ZRColaCompile/dbsource.cpp +++ b/ZRColaCompile/dbsource.cpp @@ -797,7 +797,7 @@ bool ZRCola::DBSource::GetLanguageCharacter(const com_obj& rs, ZRC { com_obj f; wxVERIFY(SUCCEEDED(flds->get_Item(variant(L"znak"), &f))); - wxCHECK(GetUnicodeCharacter(f, lc.chr), false); + wxCHECK(GetUnicodeString(f, lc.chr), false); } { diff --git a/ZRColaCompile/dbsource.h b/ZRColaCompile/dbsource.h index 6de0a9b..ba39bd3 100644 --- a/ZRColaCompile/dbsource.h +++ b/ZRColaCompile/dbsource.h @@ -134,7 +134,7 @@ namespace ZRCola { /// class langchar { public: - wchar_t chr; ///> Character + std::wstring chr; ///> Character ZRCola::langid_t lang; ///< Language ID }; diff --git a/ZRColaCompile/main.cpp b/ZRColaCompile/main.cpp index 0034e8a..5bc5974 100644 --- a/ZRColaCompile/main.cpp +++ b/ZRColaCompile/main.cpp @@ -383,9 +383,13 @@ int _tmain(int argc, _TCHAR *argv[]) if (src.GetLanguageCharacter(rs, lc)) { // Add language characters to index and data. unsigned __int32 idx = db.data.size(); - db.data.push_back(lc.chr); for (wstring::size_type i = 0; i < sizeof(ZRCola::langid_t)/sizeof(unsigned __int16); i++) db.data.push_back(((const unsigned __int16*)lc.lang.data)[i]); + wstring::size_type n = lc.chr.length(); + wxASSERT_MSG(n <= 0xffff, wxT("character string too long")); + db.data.push_back((unsigned __int16)n); + for (wstring::size_type i = 0; i < n; i++) + db.data.push_back(lc.chr[i]); db.idxChr.push_back(idx); #ifdef ZRCOLA_LANGCHAR_LANG_IDX db.idxLng.push_back(idx); diff --git a/lib/libZRCola/include/zrcola/language.h b/lib/libZRCola/include/zrcola/language.h index b3036b9..1bfe4e1 100644 --- a/lib/libZRCola/include/zrcola/language.h +++ b/lib/libZRCola/include/zrcola/language.h @@ -45,8 +45,9 @@ namespace ZRCola { /// Character data /// struct langchar { - wchar_t chr; ///> Character langid_t lang; ///< Language ID + unsigned __int16 chr_len; ///< \c chr length (in UTF-16 characters) + wchar_t chr[]; ///< Character }; #pragma pack(pop) @@ -76,27 +77,8 @@ namespace ZRCola { /// virtual int compare(_In_ const langchar &a, _In_ const langchar &b) const { - if (a.chr < b.chr) return -1; - else if (a.chr > b.chr) return 1; - - return 0; - } - - /// - /// Compares two characters by ID (for sorting) - /// - /// \param[in] a Pointer to first element - /// \param[in] b Pointer to second element - /// - /// \returns - /// - <0 when a < b - /// - =0 when a == b - /// - >0 when a > b - /// - virtual int compare_sort(_In_ const langchar &a, _In_ const langchar &b) const - { - if (a.chr < b.chr) return -1; - else if (a.chr > b.chr) return 1; + int r = ZRCola::CompareString(a.chr, a.chr + a.chr_len, b.chr, b.chr + b.chr_len); + if (r != 0) return r; if (a.lang < b.lang) return -1; else if (a.lang > b.lang) return 1; @@ -133,33 +115,14 @@ namespace ZRCola { /// virtual int compare(_In_ const langchar &a, _In_ const langchar &b) const { - int r = memcmp(a.lang, b.lang, sizeof(langid_t)); + if (a.lang < b.lang) return -1; + else if (a.lang > b.lang) return 1; + + int r = ZRCola::CompareString(a.chr, a.chr + a.chr_len, b.chr, b.chr + b.chr_len); if (r != 0) return r; return 0; } - - /// - /// Compares two languages by ID (for sorting) - /// - /// \param[in] a Pointer to first element - /// \param[in] b Pointer to second element - /// - /// \returns - /// - <0 when a < b - /// - =0 when a == b - /// - >0 when a > b - /// - virtual int compare_sort(_In_ const langchar &a, _In_ const langchar &b) const - { - int r = memcmp(a.lang, b.lang, sizeof(langid_t)); - if (r != 0) return r; - - if (a.chr < b.chr) return -1; - else if (a.chr > b.chr) return 1; - - return 0; - } } idxLng; ///< Character language index #endif @@ -190,19 +153,8 @@ namespace ZRCola { /// /// Tests presence of character in the given language /// - /// \param[in] chr Character (UTF-16) - /// \param[in] lang Language - /// - /// \returns - /// - \c true when character is used in language - /// - \c false otherwise - bool IsLocalCharacter(_In_ wchar_t chr, _In_ langid_t lang) const; - - /// - /// Tests presence of character in the given language - /// - /// \param[in] chr Pointer to UTF-16 character start - /// \param[in] chr_end Pointer to UTF-16 character end + /// \param[in] chr Pointer to character + /// \param[in] chr_end Pointer to character end /// \param[in] lang Language /// /// \returns @@ -227,7 +179,7 @@ namespace ZRCola { /// struct language { langid_t id; ///< Language ID - unsigned __int16 name_len; ///< \c name length (in characters) + unsigned __int16 name_len; ///< \c name length (in UTF-16 characters) wchar_t name[]; ///< Language name }; #pragma pack(pop) diff --git a/lib/libZRCola/src/language.cpp b/lib/libZRCola/src/language.cpp index f523a90..841c627 100644 --- a/lib/libZRCola/src/language.cpp +++ b/lib/libZRCola/src/language.cpp @@ -71,35 +71,14 @@ void ZRCola::LangConvert(_In_ LANGID lang_win, _Inout_ ZRCola::langid_t &lang) #endif -bool ZRCola::langchar_db::IsLocalCharacter(_In_ wchar_t chr, _In_ ZRCola::langid_t lang) const -{ - for (size_t l = 0, r = idxChr.size(); l < r; ) { - // Test the character in the middle of the search area. - size_t m = (l + r) / 2; - const langchar &lc = idxChr[m]; - - // Do the bisection test on character. - if (chr < lc.chr) r = m; - else if (lc.chr < chr ) l = m + 1; - else { - // Do the bisection test on language. - if (lang < lc.lang) r = m; - else if (lang > lc.lang) l = m + 1; - else { - // Match found. - return true; - } - } - } - - return false; -} - - bool ZRCola::langchar_db::IsLocalCharacter(_In_ const wchar_t *chr, _In_ const wchar_t *chr_end, _In_ ZRCola::langid_t lang) const { - // TODO: Implement properly! - UNREFERENCED_PARAMETER(chr_end); - assert(chr < chr_end); - return IsLocalCharacter(*chr, lang); + size_t n = chr_end - chr; + assert(n <= 0xffff); + std::unique_ptr lc((ZRCola::langchar_db::langchar*)new char[sizeof(ZRCola::langchar_db::langchar) + sizeof(wchar_t)*n]); + lc->lang = lang; + lc->chr_len = (unsigned __int16)n; + memcpy(lc->chr, chr, sizeof(wchar_t)*n); + ZRCola::langchar_db::indexChar::size_type start; + return idxChr.find(*lc, start); } diff --git a/output/data/ZRCola.zrcdb b/output/data/ZRCola.zrcdb index 71bdd87..0ebd89b 100644 Binary files a/output/data/ZRCola.zrcdb and b/output/data/ZRCola.zrcdb differ