diff --git a/ZRColaCompile/main.cpp b/ZRColaCompile/main.cpp index 2d95970..ee8cc74 100644 --- a/ZRColaCompile/main.cpp +++ b/ZRColaCompile/main.cpp @@ -35,7 +35,7 @@ inline std::ostream& operator <<(std::ostream& stream, const ZRCola::translation unsigned __int32 count; // Write index count. - std::vector::size_type trans_count = t_db.idxComp.size(); + std::vector::size_type trans_count = t_db.idxComp.size(); #if defined(_WIN64) || defined(__x86_64__) || defined(__ppc64__) // 4G check if (trans_count > 0xffffffff) { @@ -49,11 +49,11 @@ inline std::ostream& operator <<(std::ostream& stream, const ZRCola::translation // Write composition index. if (stream.fail()) return stream; - stream.write((const char*)t_db.idxComp.data(), sizeof(ZRCola::translation_db::index)*count); + stream.write((const char*)t_db.idxComp.data(), sizeof(unsigned __int32)*count); // Write decomposition index. if (stream.fail()) return stream; - stream.write((const char*)t_db.idxDecomp.data(), sizeof(ZRCola::translation_db::index)*count); + stream.write((const char*)t_db.idxDecomp.data(), sizeof(unsigned __int32)*count); // Write data count. std::vector::size_type data_count = t_db.data.size(); @@ -121,21 +121,15 @@ static inline int CompareBinary(const wchar_t *str_a, size_t count_a, const wcha /// static int __cdecl CompareCompositionIndex(void *data, const void *a, const void *b) { - const wchar_t - *chr_a = (const wchar_t*)data + ((const ZRCola::translation_db::index*)a)->start, - *chr_b = (const wchar_t*)data + ((const ZRCola::translation_db::index*)b)->start; - const wchar_t - *str_a = chr_a + 1, - *str_b = chr_b + 1; - size_t - count_a = (const wchar_t*)data + ((const ZRCola::translation_db::index*)a)->end - str_a, - count_b = (const wchar_t*)data + ((const ZRCola::translation_db::index*)b)->end - str_b; + const ZRCola::translation_db::translation + &trans_a = (const ZRCola::translation_db::translation&)((const wchar_t*)data)[*(const unsigned __int32*)a], + &trans_b = (const ZRCola::translation_db::translation&)((const wchar_t*)data)[*(const unsigned __int32*)b]; - int r = CompareBinary(str_a, count_a, str_b, count_b); + int r = CompareBinary(trans_a.str, trans_a.str_len, trans_b.str, trans_b.str_len); if (r != 0) return r; - if (*chr_a < *chr_b) return -1; - else if (*chr_a > *chr_b) return +1; + if (trans_a.chr < trans_b.chr) return -1; + else if (trans_a.chr > trans_b.chr) return +1; return 0; } @@ -155,21 +149,14 @@ static int __cdecl CompareCompositionIndex(void *data, const void *a, const void /// static int __cdecl CompareDecompositionIndex(void *data, const void *a, const void *b) { - const wchar_t - *chr_a = (const wchar_t*)data + ((const ZRCola::translation_db::index*)a)->start, - *chr_b = (const wchar_t*)data + ((const ZRCola::translation_db::index*)b)->start; + const ZRCola::translation_db::translation + &trans_a = (const ZRCola::translation_db::translation&)((const wchar_t*)data)[*(const unsigned __int32*)a], + &trans_b = (const ZRCola::translation_db::translation&)((const wchar_t*)data)[*(const unsigned __int32*)b]; - if (*chr_a < *chr_b) return -1; - else if (*chr_a > *chr_b) return +1; + if (trans_a.chr < trans_b.chr) return -1; + else if (trans_a.chr > trans_b.chr) return +1; - const wchar_t - *str_a = chr_a + 1, - *str_b = chr_b + 1; - size_t - count_a = (const wchar_t*)data + ((const ZRCola::translation_db::index*)a)->end - str_a, - count_b = (const wchar_t*)data + ((const ZRCola::translation_db::index*)b)->end - str_b; - - return CompareBinary(str_a, count_a, str_b, count_b); + return CompareBinary(trans_a.str, trans_a.str_len, trans_b.str, trans_b.str_len); } @@ -267,12 +254,14 @@ int _tmain(int argc, _TCHAR *argv[]) // Read translation from the database. if (src.GetTranslation(rs, trans)) { // Add translation to index and data. - ZRCola::translation_db::index ti; - ti.start = t_db.data.size(); + unsigned __int32 ti; + ti = t_db.data.size(); t_db.data.push_back(trans.chr); - for (std::wstring::size_type i = 0, n = trans.str.length(); i < n; i++) + std::wstring::size_type n = trans.str.length(); + wxASSERT_MSG(n <= 0xffff, wxT("transformation string too long")); + t_db.data.push_back((wchar_t)n); + for (std::wstring::size_type i = 0; i < n; i++) t_db.data.push_back(trans.str[i]); - ti.end = t_db.data.size(); t_db.idxComp .push_back(ti); t_db.idxDecomp.push_back(ti); } else @@ -282,8 +271,8 @@ int _tmain(int argc, _TCHAR *argv[]) } // Sort indices. - qsort_s(t_db.idxComp .data(), trans_count, sizeof(ZRCola::translation_db::index), CompareCompositionIndex , t_db.data.data()); - qsort_s(t_db.idxDecomp.data(), trans_count, sizeof(ZRCola::translation_db::index), CompareDecompositionIndex, t_db.data.data()); + qsort_s(t_db.idxComp .data(), trans_count, sizeof(unsigned __int32), CompareCompositionIndex , t_db.data.data()); + qsort_s(t_db.idxDecomp.data(), trans_count, sizeof(unsigned __int32), CompareDecompositionIndex, t_db.data.data()); // Write translations to file. dst << ZRCola::translation_rec(t_db); diff --git a/lib/libZRCola/include/zrcola/translate.h b/lib/libZRCola/include/zrcola/translate.h index d3cfbb4..b704766 100644 --- a/lib/libZRCola/include/zrcola/translate.h +++ b/lib/libZRCola/include/zrcola/translate.h @@ -36,22 +36,6 @@ namespace ZRCola { /// class ZRCOLA_API translation_db { public: -#pragma pack(push) -#pragma pack(4) - /// - /// Translation index - /// - struct index { - unsigned __int32 start; ///< Composed character offset - unsigned __int32 end; ///< Decomposed string end offset - - /// - /// Returns translation string length - /// - inline unsigned __int32 GetStrLength() const { return end - (start + 1); } - }; -#pragma pack(pop) - #pragma pack(push) #pragma pack(2) #pragma warning(push) @@ -60,15 +44,16 @@ namespace ZRCola { /// Translation data /// struct translation { - wchar_t chr; ///< Composed character - wchar_t str[]; ///< Decomposed string + wchar_t chr; ///< Composed character + unsigned __int16 str_len; ///< \c str length (in characters) + wchar_t str[]; ///< Decomposed string }; #pragma warning(pop) #pragma pack(pop) - std::vector idxComp; ///< Composition index - std::vector idxDecomp; ///< Decomposition index - std::vector data; ///< Transformation data + std::vector idxComp; ///< Composition index + std::vector idxDecomp; ///< Decomposition index + std::vector data; ///< Transformation data public: /// @@ -118,12 +103,12 @@ inline std::istream& operator >>(_In_ std::istream& stream, _Out_ ZRCola::transl // Read composition index. t_db.idxComp.resize(count); - stream.read((char*)t_db.idxComp.data(), sizeof(ZRCola::translation_db::index)*count); + stream.read((char*)t_db.idxComp.data(), sizeof(unsigned __int32)*count); if (!stream.good()) return stream; // Read decomposition index. t_db.idxDecomp.resize(count); - stream.read((char*)t_db.idxDecomp.data(), sizeof(ZRCola::translation_db::index)*count); + stream.read((char*)t_db.idxDecomp.data(), sizeof(unsigned __int32)*count); if (!stream.good()) return stream; // Read data count. diff --git a/lib/libZRCola/src/translate.cpp b/lib/libZRCola/src/translate.cpp index 1c4f365..4e2b935 100644 --- a/lib/libZRCola/src/translate.cpp +++ b/lib/libZRCola/src/translate.cpp @@ -34,7 +34,7 @@ void ZRCola::translation_db::Compose(_In_z_count_(inputMax) const wchar_t* input if (map) map->clear(); - std::vector::size_type compositionsCount = idxComp.size(); + std::vector::size_type compositionsCount = idxComp.size(); for (size_t i = 0; i < inputMax;) { // Start with the full search area at i-th character. @@ -49,7 +49,8 @@ void ZRCola::translation_db::Compose(_In_z_count_(inputMax) const wchar_t* input // Get the j-th character of the composition. // All compositions that get short on characters are lexically ordered before. // Thus the j-th character is considered 0. - wchar_t s = j < idxComp[m].GetStrLength() ? ((translation*)&data[idxComp[m].start])->str[j] : 0; + const translation &trans = (const translation&)data[idxComp[m]]; + wchar_t s = j < trans.str_len ? trans.str[j] : 0; // Do the bisection test. if (c < s) r = m; @@ -60,14 +61,16 @@ void ZRCola::translation_db::Compose(_In_z_count_(inputMax) const wchar_t* input // Narrow the search area on the left to start at the first composition in the run. for (size_t rr = m; l < rr;) { size_t m = (l + rr) / 2; - wchar_t s = j < idxComp[m].GetStrLength() ? ((translation*)&data[idxComp[m].start])->str[j] : 0; + const translation &trans = (const translation&)data[idxComp[m]]; + wchar_t s = j < trans.str_len ? trans.str[j] : 0; if (c <= s) rr = m; else l = m + 1; } // Narrow the search area on the right to end at the first composition not in the run. for (size_t ll = m + 1; ll < r;) { size_t m = (ll + r) / 2; - wchar_t s = j < idxComp[m].GetStrLength() ? ((translation*)&data[idxComp[m].start])->str[j] : 0; + const translation &trans = (const translation&)data[idxComp[m]]; + wchar_t s = j < trans.str_len ? trans.str[j] : 0; if (s <= c) ll = m + 1; else r = m; } @@ -77,9 +80,10 @@ void ZRCola::translation_db::Compose(_In_z_count_(inputMax) const wchar_t* input if (l >= r) { // The search area is empty. - if (j && l_prev < compositionsCount && j == idxComp[l_prev].GetStrLength()) { + const translation &trans = (const translation&)data[idxComp[l_prev]]; + if (j && l_prev < compositionsCount && j == trans.str_len) { // The first composition of the previous run was a match. - output += ((translation*)&data[idxComp[l_prev].start])->chr; + output += trans.chr; i = ii; if (j > 1 && map) { // Mapping changed. @@ -95,9 +99,10 @@ void ZRCola::translation_db::Compose(_In_z_count_(inputMax) const wchar_t* input } else { // End of input reached. - if (l < compositionsCount && j == idxComp[l].GetStrLength()) { + const translation &trans = (const translation&)data[idxComp[l]]; + if (l < compositionsCount && j == trans.str_len) { // The first composition of the previous run was a match. - output += ((translation*)&data[idxComp[l].start])->chr; + output += trans.chr; i = ii; if (j > 1 && map) { // Mapping changed. @@ -129,7 +134,7 @@ void ZRCOLA_API ZRCola::translation_db::Decompose(_In_z_count_(inputMax) const w if (map) map->clear(); - std::vector::size_type decompositionsCount = idxDecomp.size(); + std::vector::size_type decompositionsCount = idxDecomp.size(); for (size_t i = 0; i < inputMax;) { // Find whether the character can be decomposed. @@ -138,12 +143,13 @@ void ZRCOLA_API ZRCola::translation_db::Decompose(_In_z_count_(inputMax) const w for (size_t l = 0, r = decompositionsCount;; ) { if (l < r) { size_t m = (l + r) / 2; - wchar_t decompSrc = ((translation*)&data[idxDecomp[m].start])->chr; + const translation &trans = (const translation&)data[idxDecomp[m]]; + wchar_t decompSrc = trans.chr; if (c < decompSrc) r = m; else if (decompSrc < c) l = m + 1; else { // Character found. - output.append(((translation*)&data[idxDecomp[m].start])->str, idxDecomp[m].GetStrLength()); + output.append(trans.str, trans.str_len); i++; if (map) { // Mapping changed. diff --git a/output/data/ZRCola.zrcdb b/output/data/ZRCola.zrcdb index 9bd2462..eb9bbd4 100644 Binary files a/output/data/ZRCola.zrcdb and b/output/data/ZRCola.zrcdb differ