diff --git a/ZRColaCompile/dbsource.cpp b/ZRColaCompile/dbsource.cpp index 135f3ab..2da6f97 100644 --- a/ZRColaCompile/dbsource.cpp +++ b/ZRColaCompile/dbsource.cpp @@ -156,10 +156,13 @@ bool ZRCola::DBSource::GetValue(const ATL::CComPtr& f, std::wstring& v ATL::CComVariant v; wxVERIFY(SUCCEEDED(f->get_Value(&v))); - wxCHECK(SUCCEEDED(v.ChangeType(VT_BSTR)), false); + if (V_VT(&v) != VT_NULL) { + wxCHECK(SUCCEEDED(v.ChangeType(VT_BSTR)), false); - val.reserve(::SysStringLen(V_BSTR(&v))); - val = V_BSTR(&v); + val.reserve(::SysStringLen(V_BSTR(&v))); + val = V_BSTR(&v); + } else + val.empty(); return true; } @@ -171,26 +174,29 @@ bool ZRCola::DBSource::GetUnicodeCharacter(const ATL::CComPtr& f, wcha ATL::CComVariant v; wxVERIFY(SUCCEEDED(f->get_Value(&v))); - wxCHECK(SUCCEEDED(v.ChangeType(VT_BSTR)), false); + if (V_VT(&v) != VT_NULL) { + wxCHECK(SUCCEEDED(v.ChangeType(VT_BSTR)), false); - // Parse the field. Must be exactly one Unicode code. - UINT i = 0, n = ::SysStringLen(V_BSTR(&v)); - chr = 0; - for (; i < n && V_BSTR(&v)[i]; i++) { - if (L'0' <= V_BSTR(&v)[i] && V_BSTR(&v)[i] <= L'9') chr = chr*0x10 + (V_BSTR(&v)[i] - L'0'); - else if (L'A' <= V_BSTR(&v)[i] && V_BSTR(&v)[i] <= L'F') chr = chr*0x10 + (V_BSTR(&v)[i] - L'A' + 10); - else if (L'a' <= V_BSTR(&v)[i] && V_BSTR(&v)[i] <= L'f') chr = chr*0x10 + (V_BSTR(&v)[i] - L'a' + 10); - else break; - } - if (i <= 0 && 4 < i) { - ATL::CComBSTR fieldname; wxVERIFY(SUCCEEDED(f->get_Name(&fieldname))); - _ftprintf(stderr, wxT("%s: error ZCC0030: Syntax error in \"%.*ls\" field (\"%.*ls\"). Unicode code must be one to four hexadecimal characters long.\n"), m_filename.c_str(), fieldname.Length(), (BSTR)fieldname, n, V_BSTR(&v)); - return false; - } else if (i != n) { - ATL::CComBSTR fieldname; wxVERIFY(SUCCEEDED(f->get_Name(&fieldname))); - _ftprintf(stderr, wxT("%s: error ZCC0031: Syntax error in \"%.*ls\" field (\"%.*ls\"). Extra trailing characters.\n"), m_filename.c_str(), fieldname.Length(), (BSTR)fieldname, n, V_BSTR(&v)); - return false; - } + // Parse the field. Must be exactly one Unicode code. + UINT i = 0, n = ::SysStringLen(V_BSTR(&v)); + chr = 0; + for (; i < n && V_BSTR(&v)[i]; i++) { + if (L'0' <= V_BSTR(&v)[i] && V_BSTR(&v)[i] <= L'9') chr = chr*0x10 + (V_BSTR(&v)[i] - L'0'); + else if (L'A' <= V_BSTR(&v)[i] && V_BSTR(&v)[i] <= L'F') chr = chr*0x10 + (V_BSTR(&v)[i] - L'A' + 10); + else if (L'a' <= V_BSTR(&v)[i] && V_BSTR(&v)[i] <= L'f') chr = chr*0x10 + (V_BSTR(&v)[i] - L'a' + 10); + else break; + } + if (i <= 0 && 4 < i) { + ATL::CComBSTR fieldname; wxVERIFY(SUCCEEDED(f->get_Name(&fieldname))); + _ftprintf(stderr, wxT("%s: error ZCC0030: Syntax error in \"%.*ls\" field (\"%.*ls\"). Unicode code must be one to four hexadecimal characters long.\n"), m_filename.c_str(), fieldname.Length(), (BSTR)fieldname, n, V_BSTR(&v)); + return false; + } else if (i != n) { + ATL::CComBSTR fieldname; wxVERIFY(SUCCEEDED(f->get_Name(&fieldname))); + _ftprintf(stderr, wxT("%s: error ZCC0031: Syntax error in \"%.*ls\" field (\"%.*ls\"). Extra trailing characters.\n"), m_filename.c_str(), fieldname.Length(), (BSTR)fieldname, n, V_BSTR(&v)); + return false; + } + } else + chr = 0; return true; } @@ -317,6 +323,43 @@ bool ZRCola::DBSource::GetLanguage(const ATL::CComPtr& f, ZRCola::lang } +bool ZRCola::DBSource::GetChrCat(const ATL::CComPtr& f, chrcatid_t& cc) const +{ + wxASSERT_MSG(f, wxT("field is empty")); + + ATL::CComVariant v; + wxVERIFY(SUCCEEDED(f->get_Value(&v))); + if (V_VT(&v) != VT_NULL) { + wxCHECK(SUCCEEDED(v.ChangeType(VT_BSTR)), false); + + // Parse the field. + size_t n = wcsnlen(V_BSTR(&v), ::SysStringLen(V_BSTR(&v))); + if (n < 1 || 2 < n) { + ATL::CComBSTR fieldname; wxVERIFY(SUCCEEDED(f->get_Name(&fieldname))); + _ftprintf(stderr, wxT("%s: error ZCC0110: Syntax error in \"%.*ls\" field (\"%.*ls\"). Character category ID must be one (1) or two (2) characters long.\n"), m_filename.c_str(), fieldname.Length(), (BSTR)fieldname, n, V_BSTR(&v)); + return false; + } + for (size_t i = 0;; i++) { + if (i < sizeof(cc)) { + if (i < n) { + wchar_t c = V_BSTR(&v)[i]; + if ((unsigned short)c > 0x7f) { + ATL::CComBSTR fieldname; wxVERIFY(SUCCEEDED(f->get_Name(&fieldname))); + _ftprintf(stderr, wxT("%s: error ZCC0111: Syntax error in \"%.*ls\" field (\"%.*ls\"). Character category ID must contain ASCII characters only.\n"), m_filename.c_str(), fieldname.Length(), (BSTR)fieldname, n, V_BSTR(&v)); + return false; + } + cc[i] = (char)c; + } else + cc[i] = 0; + } else + break; + } + } else + memset(cc, 0, sizeof(cc)); + + return true; +} + bool ZRCola::DBSource::SelectTranslations(ATL::CComPtr &rs) const { @@ -610,3 +653,127 @@ bool ZRCola::DBSource::GetCharacterGroup(const ATL::CComPtr& rs, c return true; } + + +bool ZRCola::DBSource::SelectCharacters(ATL::CComPtr& rs) const +{ + // Create a new recordset. + if (rs) rs.Release(); + wxCHECK(SUCCEEDED(::CoCreateInstance(CLSID_CADORecordset, NULL, CLSCTX_ALL, IID_IADORecordset, (LPVOID*)&rs)), false); + + // Open it. + if (FAILED(rs->Open(ATL::CComVariant( + L"SELECT DISTINCT [znak], [opis_en], [klj_bes_en], [kat], [znak_v], [znak_m] " + L"FROM [VRS_CharList] " + L"ORDER BY [znak]"), ATL::CComVariant(m_db), adOpenStatic, adLockReadOnly, adCmdText))) + { + _ftprintf(stderr, wxT("%s: error ZCC0120: Error loading characters from database. Please make sure the file is ZRCola.zrc compatible.\n"), m_filename.c_str()); + LogErrors(); + return false; + } + + return true; +} + + +bool ZRCola::DBSource::GetCharacter(const ATL::CComPtr& rs, character& chr) const +{ + wxASSERT_MSG(rs, wxT("recordset is empty")); + + ATL::CComPtr flds; + wxVERIFY(SUCCEEDED(rs->get_Fields(&flds))); + wchar_t c; + + { + ATL::CComPtr f; + wxVERIFY(SUCCEEDED(flds->get_Item(ATL::CComVariant(L"znak"), &f))); + wxCHECK(GetUnicodeCharacter(f, chr.chr), false); + } + + { + ATL::CComPtr f; + wxVERIFY(SUCCEEDED(flds->get_Item(ATL::CComVariant(L"znak_v"), &f))); + wxCHECK(GetUnicodeCharacter(f, c), false); + if (c && c != chr.chr) + chr.rel += c; + } + + { + ATL::CComPtr f; + wxVERIFY(SUCCEEDED(flds->get_Item(ATL::CComVariant(L"znak_m"), &f))); + wxCHECK(GetUnicodeCharacter(f, c), false); + if (c && c != chr.chr) + chr.rel += c; + } + + { + ATL::CComPtr f; + wxVERIFY(SUCCEEDED(flds->get_Item(ATL::CComVariant(L"opis_en"), &f))); + wxCHECK(GetValue(f, chr.desc), false); + } + + { + ATL::CComPtr f; + wxVERIFY(SUCCEEDED(flds->get_Item(ATL::CComVariant(L"klj_bes_en"), &f))); + wxCHECK(GetValue(f, chr.keywords), false); + } + + { + ATL::CComPtr f; + wxVERIFY(SUCCEEDED(flds->get_Item(ATL::CComVariant(L"kat"), &f))); + wxCHECK(GetChrCat(f, chr.cat), false); + } + + return true; +} + + +bool ZRCola::DBSource::SelectCharacterCategories(ATL::CComPtr& rs) const +{ + // Create a new recordset. + if (rs) rs.Release(); + wxCHECK(SUCCEEDED(::CoCreateInstance(CLSID_CADORecordset, NULL, CLSCTX_ALL, IID_IADORecordset, (LPVOID*)&rs)), false); + + // Open it. + if (FAILED(rs->Open(ATL::CComVariant( + L"SELECT DISTINCT [kat], [opis_en], [Rang] " + L"FROM [VRS_CharCategories] " + L"ORDER BY [Rang], [opis_en]"), ATL::CComVariant(m_db), adOpenStatic, adLockReadOnly, adCmdText))) + { + _ftprintf(stderr, wxT("%s: error ZCC0130: Error loading character categories from database. Please make sure the file is ZRCola.zrc compatible.\n"), m_filename.c_str()); + LogErrors(); + return false; + } + + return true; +} + + +bool ZRCola::DBSource::GetCharacterCategory(const ATL::CComPtr& rs, chrcat& cc) const +{ + wxASSERT_MSG(rs, wxT("recordset is empty")); + + ATL::CComPtr flds; + wxVERIFY(SUCCEEDED(rs->get_Fields(&flds))); + std::wstring id; + + { + ATL::CComPtr f; + wxVERIFY(SUCCEEDED(flds->get_Item(ATL::CComVariant(L"kat"), &f))); + wxCHECK(GetChrCat(f, cc.id), false); + } + + { + ATL::CComPtr f; + wxVERIFY(SUCCEEDED(flds->get_Item(ATL::CComVariant(L"Rang"), &f))); + wxCHECK(GetValue(f, cc.rank), false); + } + + { + ATL::CComPtr f; + wxVERIFY(SUCCEEDED(flds->get_Item(ATL::CComVariant(L"opis_en"), &f))); + wxCHECK(GetValue(f, cc.name), false); + } + + return true; +} diff --git a/ZRColaCompile/dbsource.h b/ZRColaCompile/dbsource.h index 8382db3..15fbf0d 100644 --- a/ZRColaCompile/dbsource.h +++ b/ZRColaCompile/dbsource.h @@ -19,6 +19,7 @@ #pragma once +#include #include #include @@ -98,6 +99,30 @@ namespace ZRCola { }; + /// + /// Character + /// + class character { + public: + wchar_t chr; ///< Character + ZRCola::chrcatid_t cat; ///> Category ID + std::wstring desc; ///< Character description + std::wstring keywords; ///< Additional keywords + std::wstring rel; ///< Related characters + }; + + + /// + /// Character category + /// + class chrcat { + public: + ZRCola::chrcatid_t id; ///> Category ID + int rank; ///< Character category rank + std::wstring name; ///< Character category name + }; + + public: DBSource(); virtual ~DBSource(); @@ -242,6 +267,19 @@ namespace ZRCola { bool GetLanguage(const ATL::CComPtr& f, langid_t& lang) const; + /// + /// Gets character category ID from ZRCola.zrc database + /// + /// \param[in] f Data field + /// \param[out] cc Character category + /// + /// \returns + /// - true when successful + /// - false otherwise + /// + bool GetChrCat(const ATL::CComPtr& f, chrcatid_t& cc) const; + + /// /// Returns character translations /// @@ -366,6 +404,54 @@ namespace ZRCola { /// bool GetCharacterGroup(const ATL::CComPtr& rs, chrgrp& cg) const; + /// + /// Returns characters + /// + /// \param[out] rs Recordset with results + /// + /// \returns + /// - true when query succeeds + /// - false otherwise + /// + bool SelectCharacters(ATL::CComPtr& rs) const; + + + /// + /// Returns character data + /// + /// \param[in] rs Recordset with results + /// \param[out] chr Character + /// + /// \returns + /// - true when succeeded + /// - false otherwise + /// + bool GetCharacter(const ATL::CComPtr& rs, character& chr) const; + + /// + /// Returns character categories + /// + /// \param[out] rs Recordset with results + /// + /// \returns + /// - true when query succeeds + /// - false otherwise + /// + bool SelectCharacterCategories(ATL::CComPtr& rs) const; + + + /// + /// Returns character category data + /// + /// \param[in] rs Recordset with results + /// \param[out] cc Character category + /// + /// \returns + /// - true when succeeded + /// - false otherwise + /// + bool GetCharacterCategory(const ATL::CComPtr& rs, chrcat& cc) const; + protected: std::basic_string m_filename; ///< Database filename ATL::CComPtr m_db; ///< Database diff --git a/ZRColaCompile/main.cpp b/ZRColaCompile/main.cpp index 7ac3022..b00d914 100644 --- a/ZRColaCompile/main.cpp +++ b/ZRColaCompile/main.cpp @@ -28,7 +28,7 @@ /// /// \returns The stream \p stream /// -inline std::ostream& operator <<(std::ostream& stream, const ZRCola::translation_db &db) +inline std::ostream& operator <<(_In_ std::ostream& stream, _In_ const ZRCola::translation_db &db) { assert(db.idxComp.size() == db.idxDecomp.size()); @@ -84,7 +84,7 @@ inline std::ostream& operator <<(std::ostream& stream, const ZRCola::translation /// /// \returns The stream \p stream /// -inline std::ostream& operator <<(std::ostream& stream, const ZRCola::keyseq_db &db) +inline std::ostream& operator <<(_In_ std::ostream& stream, _In_ const ZRCola::keyseq_db &db) { assert(db.idxChr.size() == db.idxKey.size()); @@ -140,7 +140,7 @@ inline std::ostream& operator <<(std::ostream& stream, const ZRCola::keyseq_db & /// /// \returns The stream \p stream /// -inline std::ostream& operator <<(std::ostream& stream, const ZRCola::language_db &db) +inline std::ostream& operator <<(_In_ std::ostream& stream, _In_ const ZRCola::language_db &db) { unsigned __int32 count; @@ -191,7 +191,7 @@ inline std::ostream& operator <<(std::ostream& stream, const ZRCola::language_db /// /// \returns The stream \p stream /// -inline std::ostream& operator <<(std::ostream& stream, const ZRCola::langchar_db &db) +inline std::ostream& operator <<(_In_ std::ostream& stream, _In_ const ZRCola::langchar_db &db) { #ifdef ZRCOLA_LANGCHAR_LANG_IDX assert(db.idxChr.size() == db.idxLng.size()); @@ -251,7 +251,7 @@ inline std::ostream& operator <<(std::ostream& stream, const ZRCola::langchar_db /// /// \returns The stream \p stream /// -inline std::ostream& operator <<(std::ostream& stream, const ZRCola::chrgrp_db &db) +inline std::ostream& operator <<(_In_ std::ostream& stream, _In_ const ZRCola::chrgrp_db &db) { unsigned __int32 count; @@ -293,6 +293,110 @@ inline std::ostream& operator <<(std::ostream& stream, const ZRCola::chrgrp_db & } +/// +/// Writes character database to a stream +/// +/// \param[in] stream Output stream +/// \param[in] db Character database +/// +/// \returns The stream \p stream +/// +inline std::ostream& operator <<(_In_ std::ostream& stream, _In_ const ZRCola::character_db &db) +{ + unsigned __int32 count; + + // Write index count. + ZRCola::keyseq_db::indexChr::size_type ks_count = db.idxChr.size(); +#if defined(_WIN64) || defined(__x86_64__) || defined(__ppc64__) + // 4G check + if (ks_count > 0xffffffff) { + stream.setstate(std::ios_base::failbit); + return stream; + } +#endif + if (stream.fail()) return stream; + count = (unsigned __int32)ks_count; + stream.write((const char*)&count, sizeof(count)); + + // Write character index. + if (stream.fail()) return stream; + stream.write((const char*)db.idxChr.data(), sizeof(unsigned __int32)*count); + + // Write data count. + std::vector::size_type data_count = db.data.size(); +#if defined(_WIN64) || defined(__x86_64__) || defined(__ppc64__) + // 4G check + if (data_count > 0xffffffff) { + stream.setstate(std::ios_base::failbit); + return stream; + } +#endif + if (stream.fail()) return stream; + count = (unsigned __int32)data_count; + stream.write((const char*)&count, sizeof(count)); + + // Write data. + if (stream.fail()) return stream; + stream.write((const char*)db.data.data(), sizeof(unsigned __int16)*count); + + return stream; +} + + +/// +/// Writes character category database to a stream +/// +/// \param[in] stream Output stream +/// \param[in] db Character category database +/// +/// \returns The stream \p stream +/// +inline std::ostream& operator <<(_In_ std::ostream& stream, _In_ const ZRCola::chrcat_db &db) +{ + unsigned __int32 count; + + // Write index count. + ZRCola::keyseq_db::indexChr::size_type ks_count = db.idxChrCat.size(); +#if defined(_WIN64) || defined(__x86_64__) || defined(__ppc64__) + // 4G check + if (ks_count > 0xffffffff) { + stream.setstate(std::ios_base::failbit); + return stream; + } +#endif + if (stream.fail()) return stream; + count = (unsigned __int32)ks_count; + stream.write((const char*)&count, sizeof(count)); + + // Write character category index. + if (stream.fail()) return stream; + stream.write((const char*)db.idxChrCat.data(), sizeof(unsigned __int32)*count); + + // Write rank index. + if (stream.fail()) return stream; + stream.write((const char*)db.idxRnk.data(), sizeof(unsigned __int32)*count); + + // Write data count. + std::vector::size_type data_count = db.data.size(); +#if defined(_WIN64) || defined(__x86_64__) || defined(__ppc64__) + // 4G check + if (data_count > 0xffffffff) { + stream.setstate(std::ios_base::failbit); + return stream; + } +#endif + if (stream.fail()) return stream; + count = (unsigned __int32)data_count; + stream.write((const char*)&count, sizeof(count)); + + // Write data. + if (stream.fail()) return stream; + stream.write((const char*)db.data.data(), sizeof(unsigned __int16)*count); + + return stream; +} + + /// /// Main function /// @@ -656,6 +760,115 @@ int _tmain(int argc, _TCHAR *argv[]) } } + { + // Get characters. + ATL::CComPtr rs; + if (src.SelectCharacters(rs)) { + size_t count = src.GetRecordsetCount(rs); + if (count < 0xffffffff) { // 4G check (-1 is reserved for error condition) + ZRCola::DBSource::character chr; + ZRCola::character_db db; + + // Preallocate memory. + db.idxChr.reserve(count); + db.data .reserve(count*4); + + // Parse characters and build index and data. + while (!ZRCola::DBSource::IsEOF(rs)) { + // Read character from the database. + if (src.GetCharacter(rs, chr)) { + // Add character to index and data. + unsigned __int32 idx = db.data.size(); + db.data.push_back((unsigned __int16)chr.chr); + for (std::wstring::size_type i = 0; i < sizeof(ZRCola::chrcatid_t)/sizeof(unsigned __int16); i++) + db.data.push_back(((const unsigned __int16*)chr.cat)[i]); + std::wstring::size_type n_desc = chr.desc.length(); + wxASSERT_MSG(n_desc <= 0xffff, wxT("character description too long")); + db.data.push_back((unsigned __int16)n_desc); + std::wstring::size_type n_rel = chr.rel.length(); + wxASSERT_MSG(n_rel <= 0xffff, wxT("too many related characters")); + db.data.push_back((unsigned __int16)n_rel); + for (std::wstring::size_type i = 0; i < n_desc; i++) + db.data.push_back(chr.desc[i]); + for (std::wstring::size_type i = 0; i < n_rel; i++) + db.data.push_back(chr.rel[i]); + db.idxChr.push_back(idx); + } else + has_errors = true; + + wxVERIFY(SUCCEEDED(rs->MoveNext())); + } + + // Sort indices. + db.idxChr.sort(); + + // Write characters to file. + dst << ZRCola::character_rec(db); + } else { + _ftprintf(stderr, wxT("%s: error ZCC0017: Error getting character count from database or too many characters.\n"), (LPCTSTR)filenameIn.c_str()); + has_errors = true; + } + } else { + _ftprintf(stderr, wxT("%s: error ZCC0016: Error getting characters from database. Please make sure the file is ZRCola.zrc compatible.\n"), (LPCTSTR)filenameIn.c_str()); + has_errors = true; + } + } + + { + // Get character categories. + ATL::CComPtr rs; + if (src.SelectCharacterCategories(rs)) { + size_t count = src.GetRecordsetCount(rs); + if (count < 0xffffffff) { // 4G check (-1 is reserved for error condition) + ZRCola::DBSource::chrcat cc; + ZRCola::chrcat_db db; + + // Preallocate memory. + db.idxChrCat.reserve(count); + db.idxRnk .reserve(count); + db.data .reserve(count*4); + + // Parse character categories and build index and data. + while (!ZRCola::DBSource::IsEOF(rs)) { + // Read character category from the database. + if (src.GetCharacterCategory(rs, cc)) { + // Add character category to index and data. + unsigned __int32 idx = db.data.size(); + for (std::wstring::size_type i = 0; i < sizeof(ZRCola::chrcatid_t)/sizeof(unsigned __int16); i++) + db.data.push_back(((const unsigned __int16*)cc.id)[i]); + wxASSERT_MSG((int)0xffff8000 <= cc.rank && cc.rank <= (int)0x00007fff, wxT("character category rank out of bounds")); + db.data.push_back((unsigned __int16)cc.rank); + std::wstring::size_type n_name = cc.name.length(); + wxASSERT_MSG(n_name <= 0xffff, wxT("character category name too long")); + db.data.push_back((unsigned __int16)n_name); + for (std::wstring::size_type i = 0; i < n_name; i++) + db.data.push_back(cc.name[i]); + db.idxChrCat.push_back(idx); + db.idxRnk .push_back(idx); + if (build_pot) + pot.insert(cc.name); + } else + has_errors = true; + + wxVERIFY(SUCCEEDED(rs->MoveNext())); + } + + // Sort indices. + db.idxChrCat.sort(); + db.idxRnk .sort(); + + // Write character categories to file. + dst << ZRCola::chrcat_rec(db); + } else { + _ftprintf(stderr, wxT("%s: error ZCC0019: Error getting character category count from database or too many character categories.\n"), (LPCTSTR)filenameIn.c_str()); + has_errors = true; + } + } else { + _ftprintf(stderr, wxT("%s: error ZCC0018: Error getting character categories from database. Please make sure the file is ZRCola.zrc compatible.\n"), (LPCTSTR)filenameIn.c_str()); + has_errors = true; + } + } + stdex::idrec::close(dst, dst_start); if (dst.fail()) { diff --git a/lib/libZRCola/build/libZRCola.vcxproj b/lib/libZRCola/build/libZRCola.vcxproj index b3020b7..e940d35 100644 --- a/lib/libZRCola/build/libZRCola.vcxproj +++ b/lib/libZRCola/build/libZRCola.vcxproj @@ -31,6 +31,7 @@ + diff --git a/lib/libZRCola/build/libZRCola.vcxproj.filters b/lib/libZRCola/build/libZRCola.vcxproj.filters index ec7f0a4..f87aee2 100644 --- a/lib/libZRCola/build/libZRCola.vcxproj.filters +++ b/lib/libZRCola/build/libZRCola.vcxproj.filters @@ -47,6 +47,9 @@ Header Files + + Header Files + diff --git a/lib/libZRCola/include/zrcola/character.h b/lib/libZRCola/include/zrcola/character.h new file mode 100644 index 0000000..d44b566 --- /dev/null +++ b/lib/libZRCola/include/zrcola/character.h @@ -0,0 +1,303 @@ +/* + Copyright 2015-2016 Amebis + + This file is part of ZRCola. + + ZRCola is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + ZRCola is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with ZRCola. If not, see . +*/ + +#pragma once + +#include "common.h" + +#include +#include +#include +#include + +#pragma warning(push) +#pragma warning(disable: 4200) +#pragma warning(disable: 4251) +#pragma warning(disable: 4512) + + +namespace ZRCola { + /// + /// Character category ID type + /// Two letter abbreviation, non-terminated + /// + typedef char chrcatid_t[2]; + + /// + /// Character Database + /// + class ZRCOLA_API character_db { + public: +#pragma pack(push) +#pragma pack(2) + /// + /// Character data + /// + struct character { + wchar_t chr; ///> Character + chrcatid_t cat; ///> Category ID + unsigned __int16 desc_len; ///< Character description length in \c data + unsigned __int16 rel_len; ///< Related character count in \c data + wchar_t data[]; ///< Character description and list of related characters + }; +#pragma pack(pop) + + /// + /// Character index + /// + class indexChar : public index + { + public: + /// + /// Constructs the index + /// + /// \param[in] h Reference to vector holding the data + /// + indexChar(_In_ std::vector &h) : index(h) {} + + /// + /// Compares two characters by ID (for searching) + /// + /// \param[in] a Pointer to first element + /// \param[in] b Pointer to second element + /// + /// \returns + /// - <0 when a < b + /// - =0 when a == b + /// - >0 when a > b + /// + virtual int compare(_In_ const character &a, _In_ const character &b) const + { + if (a.chr < b.chr) return -1; + else if (a.chr > b.chr) return 1; + + return 0; + } + } idxChr; ///< Character index + + std::vector data; ///< Character data + + public: + /// + /// Constructs the database + /// + inline character_db() : idxChr(data) {} + }; + + + typedef ZRCOLA_API stdex::idrec::record character_rec; + + + /// + /// Character category database + /// + class ZRCOLA_API chrcat_db { + public: +#pragma pack(push) +#pragma pack(2) + /// + /// Character category data + /// + struct chrcat { + chrcatid_t id; ///< Character category ID + unsigned __int16 rank; ///< Character category rank + unsigned __int16 name_len; ///< \c name length (in characters) + wchar_t name[]; ///< Character category name + }; +#pragma pack(pop) + + /// + /// Character category index + /// + class indexChrCat : public index + { + public: + /// + /// Constructs the index + /// + /// \param[in] h Reference to vector holding the data + /// + indexChrCat(_In_ std::vector &h) : index(h) {} + + /// + /// Compares two character categories by ID (for searching) + /// + /// \param[in] a Pointer to first element + /// \param[in] b Pointer to second element + /// + /// \returns + /// - <0 when a < b + /// - =0 when a == b + /// - >0 when a > b + /// + virtual int compare(_In_ const chrcat &a, _In_ const chrcat &b) const + { + int r = memcmp(a.id, b.id, sizeof(chrcatid_t)); + if (r != 0) return r; + + return 0; + } + } idxChrCat; ///< Character category index + + /// + /// Rank index + /// + class indexRank : public index + { + public: + /// + /// Constructs the index + /// + /// \param[in] h Reference to vector holding the data + /// + indexRank(_In_ std::vector &h) : index(h) {} + + /// + /// Compares two character categories by ID (for searching) + /// + /// \param[in] a Pointer to first element + /// \param[in] b Pointer to second element + /// + /// \returns + /// - <0 when a < b + /// - =0 when a == b + /// - >0 when a > b + /// + virtual int compare(_In_ const chrcat &a, _In_ const chrcat &b) const + { + if (a.rank < b.rank) return -1; + else if (a.rank > b.rank) return +1; + + return 0; + } + + /// + /// Compares two character categories by rank (for sorting) + /// + /// \param[in] a Pointer to character category + /// \param[in] b Pointer to second character category + /// + /// \returns + /// - <0 when a < b + /// - =0 when a == b + /// - >0 when a > b + /// + virtual int compare_sort(_In_ const chrcat &a, _In_ const chrcat &b) const + { + if (a.rank < b.rank) return -1; + else if (a.rank > b.rank) return +1; + + int r = _wcsncoll(a.name, b.name, std::min(a.name_len, b.name_len)); + if (r != 0) return r; + if (a.name_len < b.name_len) return -1; + else if (a.name_len > b.name_len) return +1; + + return 0; + } + } idxRnk; ///< Rank index + + std::vector data; ///< Character category data + + public: + /// + /// Constructs the database + /// + inline chrcat_db() : idxChrCat(data), idxRnk(data) {} + }; + + + typedef ZRCOLA_API stdex::idrec::record chrcat_rec; +}; + + +const ZRCola::recordid_t stdex::idrec::record::id = *(ZRCola::recordid_t*)"CHR"; +const ZRCola::recordid_t stdex::idrec::record::id = *(ZRCola::recordid_t*)"CCT"; + + +/// +/// Reads character database from a stream +/// +/// \param[in] stream Input stream +/// \param[out] db Character database +/// +/// \returns The stream \p stream +/// +inline std::istream& operator >>(_In_ std::istream& stream, _Out_ ZRCola::character_db &db) +{ + unsigned __int32 count; + + // Read index count. + stream.read((char*)&count, sizeof(count)); + if (!stream.good()) return stream; + + // Read character index. + db.idxChr.resize(count); + stream.read((char*)db.idxChr.data(), sizeof(unsigned __int32)*count); + if (!stream.good()) return stream; + + // Read data count. + stream.read((char*)&count, sizeof(count)); + if (!stream.good()) return stream; + + // Read data. + db.data.resize(count); + stream.read((char*)db.data.data(), sizeof(unsigned __int16)*count); + + return stream; +} + + +/// +/// Reads character category database from a stream +/// +/// \param[in] stream Input stream +/// \param[out] db Character category database +/// +/// \returns The stream \p stream +/// +inline std::istream& operator >>(_In_ std::istream& stream, _Out_ ZRCola::chrcat_db &db) +{ + unsigned __int32 count; + + // Read index count. + stream.read((char*)&count, sizeof(count)); + if (!stream.good()) return stream; + + // Read character category index. + db.idxChrCat.resize(count); + stream.read((char*)db.idxChrCat.data(), sizeof(unsigned __int32)*count); + if (!stream.good()) return stream; + + // Read rank index. + db.idxRnk.resize(count); + stream.read((char*)db.idxRnk.data(), sizeof(unsigned __int32)*count); + if (!stream.good()) return stream; + + // Read data count. + stream.read((char*)&count, sizeof(count)); + if (!stream.good()) return stream; + + // Read data. + db.data.resize(count); + stream.read((char*)db.data.data(), sizeof(unsigned __int16)*count); + + return stream; +} + +#pragma warning(pop) diff --git a/lib/libZRCola/src/stdafx.h b/lib/libZRCola/src/stdafx.h index 3e3a4df..f773d3d 100644 --- a/lib/libZRCola/src/stdafx.h +++ b/lib/libZRCola/src/stdafx.h @@ -21,6 +21,7 @@ #include "../../../include/zrcola.h" +#include "../include/zrcola/character.h" #include "../include/zrcola/language.h" #include "../include/zrcola/normalize.h" #include "../include/zrcola/translate.h" diff --git a/lib/libZRColaUI/include/zrcolaui/chargroup.h b/lib/libZRColaUI/include/zrcolaui/chargroup.h index 0e7eba4..fd89fa3 100644 --- a/lib/libZRColaUI/include/zrcolaui/chargroup.h +++ b/lib/libZRColaUI/include/zrcolaui/chargroup.h @@ -55,7 +55,7 @@ namespace ZRCola { /// /// Rank index /// - class indexRnk : public index + class indexRank : public index { public: /// @@ -63,7 +63,7 @@ namespace ZRCola { /// /// \param[in] h Reference to vector holding the data /// - indexRnk(_In_ std::vector &h) : index(h) {} + indexRank(_In_ std::vector &h) : index(h) {} /// /// Compares two character groups by rank (for searching) diff --git a/output/data/ZRCola.zrcdb b/output/data/ZRCola.zrcdb index be4147b..45b3e6c 100644 Binary files a/output/data/ZRCola.zrcdb and b/output/data/ZRCola.zrcdb differ diff --git a/output/locale/ZRCola-zrcdb.pot b/output/locale/ZRCola-zrcdb.pot index d3dd158..71f85b3 100644 --- a/output/locale/ZRCola-zrcdb.pot +++ b/output/locale/ZRCola-zrcdb.pot @@ -5,7 +5,7 @@ msgstr "" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" -"X-Generator: ZRColaCompile 2.0-alpha6\n" +"X-Generator: ZRColaCompile 2.0-alpha7\n" msgid "Albanian" msgstr "" @@ -238,6 +238,21 @@ msgstr "" msgid "Letter z" msgstr "" +msgid "Letter, Lowercase" +msgstr "" + +msgid "Letter, Modifier" +msgstr "" + +msgid "Letter, Other" +msgstr "" + +msgid "Letter, Titlecase" +msgstr "" + +msgid "Letter, Uppercase" +msgstr "" + msgid "Ligatures" msgstr "" @@ -250,6 +265,15 @@ msgstr "" msgid "Maltese" msgstr "" +msgid "Mark, Enclosing" +msgstr "" + +msgid "Mark, Non-Spacing" +msgstr "" + +msgid "Mark, Spacing Combining" +msgstr "" + msgid "Metric" msgstr "" @@ -295,12 +319,30 @@ msgstr "" msgid "Number 9" msgstr "" +msgid "Number, Decimal Digit" +msgstr "" + +msgid "Number, Letter" +msgstr "" + +msgid "Number, Other" +msgstr "" + msgid "Numbers" msgstr "" msgid "Numbers - Circled" msgstr "" +msgid "Other, Control" +msgstr "" + +msgid "Other, Format" +msgstr "" + +msgid "Other, Surrogate" +msgstr "" + msgid "Parentheses" msgstr "" @@ -310,6 +352,27 @@ msgstr "" msgid "Portuguese" msgstr "" +msgid "Punctuation, Close" +msgstr "" + +msgid "Punctuation, Connector" +msgstr "" + +msgid "Punctuation, Dash" +msgstr "" + +msgid "Punctuation, Final quote" +msgstr "" + +msgid "Punctuation, Initial quote" +msgstr "" + +msgid "Punctuation, Open" +msgstr "" + +msgid "Punctuation, Other" +msgstr "" + msgid "Quotes" msgstr "" @@ -319,6 +382,15 @@ msgstr "" msgid "Russian" msgstr "" +msgid "Separator, Line" +msgstr "" + +msgid "Separator, Paragraph" +msgstr "" + +msgid "Separator, Space" +msgstr "" + msgid "Serbian – Cyrillic" msgstr "" @@ -385,6 +457,18 @@ msgstr "" msgid "Symbol ?" msgstr "" +msgid "Symbol, Currency" +msgstr "" + +msgid "Symbol, Math" +msgstr "" + +msgid "Symbol, Modifier" +msgstr "" + +msgid "Symbol, Other" +msgstr "" + msgid "Turkish" msgstr "" diff --git a/output/locale/sl_SI/ZRCola-zrcdb.mo b/output/locale/sl_SI/ZRCola-zrcdb.mo index 1030158..48bc785 100644 Binary files a/output/locale/sl_SI/ZRCola-zrcdb.mo and b/output/locale/sl_SI/ZRCola-zrcdb.mo differ diff --git a/output/locale/sl_SI/ZRCola-zrcdb.po b/output/locale/sl_SI/ZRCola-zrcdb.po index 18b79d2..a90321f 100644 --- a/output/locale/sl_SI/ZRCola-zrcdb.po +++ b/output/locale/sl_SI/ZRCola-zrcdb.po @@ -245,6 +245,21 @@ msgstr "Črka y" msgid "Letter z" msgstr "Črka z" +msgid "Letter, Lowercase" +msgstr "Črka, mala" + +msgid "Letter, Modifier" +msgstr "Črka, spreminjevalo" + +msgid "Letter, Other" +msgstr "Črka, drugo" + +msgid "Letter, Titlecase" +msgstr "Črka, naslovna" + +msgid "Letter, Uppercase" +msgstr "Črka, velika" + msgid "Ligatures" msgstr "Ligature" @@ -257,6 +272,15 @@ msgstr "makedonščina" msgid "Maltese" msgstr "malteščina" +msgid "Mark, Enclosing" +msgstr "Ločevalo, obdajajoča" + +msgid "Mark, Non-Spacing" +msgstr "Ločevalo, neločljivo" + +msgid "Mark, Spacing Combining" +msgstr "Ločevalo, ločljivo" + msgid "Metric" msgstr "Metrično" @@ -302,12 +326,30 @@ msgstr "Številka 8" msgid "Number 9" msgstr "Številka 9" +msgid "Number, Decimal Digit" +msgstr "Številka, desetiška števka" + +msgid "Number, Letter" +msgstr "Številka, črka" + +msgid "Number, Other" +msgstr "Številka, drugo" + msgid "Numbers" msgstr "Številke" msgid "Numbers - Circled" msgstr "Številke - obkroženo" +msgid "Other, Control" +msgstr "Drugo, kontrolni" + +msgid "Other, Format" +msgstr "Drugo, oblikovni" + +msgid "Other, Surrogate" +msgstr "Drugo, nadomestni" + msgid "Parentheses" msgstr "Oklepaji" @@ -317,6 +359,27 @@ msgstr "poljščina" msgid "Portuguese" msgstr "portugalščina" +msgid "Punctuation, Close" +msgstr "Ločilo, zapiralno" + +msgid "Punctuation, Connector" +msgstr "Ločilo, povezaj" + +msgid "Punctuation, Dash" +msgstr "Ločilo, pomišljaj" + +msgid "Punctuation, Final quote" +msgstr "Ločilo, zaključni narekovaj" + +msgid "Punctuation, Initial quote" +msgstr "Ločilo, uvodni narekovaj" + +msgid "Punctuation, Open" +msgstr "Ločilo, odpiralno" + +msgid "Punctuation, Other" +msgstr "Ločilo, drugo" + msgid "Quotes" msgstr "Narekovaji" @@ -326,6 +389,15 @@ msgstr "romunščina" msgid "Russian" msgstr "ruščina" +msgid "Separator, Line" +msgstr "Ločilo, vrstic" + +msgid "Separator, Paragraph" +msgstr "Ločilo, odstavkov" + +msgid "Separator, Space" +msgstr "Ločilo, presledek" + msgid "Serbian – Cyrillic" msgstr "srbščina – cirilica" @@ -392,6 +464,18 @@ msgstr "Simbol >" msgid "Symbol ?" msgstr "Simbol ?" +msgid "Symbol, Currency" +msgstr "Simbol, valuta" + +msgid "Symbol, Math" +msgstr "Simbol, matematični" + +msgid "Symbol, Modifier" +msgstr "Simbol, ločevalo" + +msgid "Symbol, Other" +msgstr "Simbol, drugo" + msgid "Turkish" msgstr "turščina"