diff --git a/ZRColaCompile/ZRColaCompile.vcxproj b/ZRColaCompile/ZRColaCompile.vcxproj index e95fc88..5b5d1e3 100644 --- a/ZRColaCompile/ZRColaCompile.vcxproj +++ b/ZRColaCompile/ZRColaCompile.vcxproj @@ -60,9 +60,6 @@ {3c61929e-7289-4101-8d0a-da22d6e1aea8} - - {518777cc-0a59-4415-a12a-82751ed75343} - {a3a36689-ac35-4026-93da-a3ba0c0e767c} diff --git a/ZRColaCompile/dbsource.cpp b/ZRColaCompile/dbsource.cpp index 4135b19..e4588af 100644 --- a/ZRColaCompile/dbsource.cpp +++ b/ZRColaCompile/dbsource.cpp @@ -32,32 +32,31 @@ ZRCola::DBSource::~DBSource() } -bool ZRCola::DBSource::Open(const wxString& filename) +bool ZRCola::DBSource::Open(LPCTSTR _filename) { wxASSERT_MSG(!m_db, wxT("database already open")); - HRESULT hr; - // Create COM object. - hr = ::CoCreateInstance(CLSID_CADOConnection, NULL, CLSCTX_ALL, IID_IADOConnection, (LPVOID*)&m_db); + HRESULT hr = ::CoCreateInstance(CLSID_CADOConnection, NULL, CLSCTX_ALL, IID_IADOConnection, (LPVOID*)&m_db); if (SUCCEEDED(hr)) { // Open the database. std::wstring cn; cn = L"Driver={Microsoft Access Driver (*.mdb)};"; cn += L"Dbq="; - cn += filename.c_str(); + cn += _filename; cn += L";Uid=;Pwd=;"; hr = m_db->Open(ATL::CComBSTR(cn.c_str())); if (SUCCEEDED(hr)) { // Database open and ready. + filename = _filename; return true; } else { - wxLogMessage(wxT("Could not open database %s (0x%x)."), filename.c_str(), hr); + _ftprintf(stderr, wxT("%s: error ZCC0002: Could not open database (0x%x).\n"), (LPCTSTR)_filename, hr); LogErrors(); } m_db.Release(); } else - wxLogMessage(wxT("Creating ADOConnection object failed (0x%x)."), hr); + _ftprintf(stderr, wxT("%s: error ZCC0001: Creating ADOConnection object failed (0x%x).\n"), (LPCTSTR)_filename, hr); return false; } @@ -85,7 +84,7 @@ void ZRCola::DBSource::LogErrors() const ATL::CComBSTR desc; wxVERIFY(SUCCEEDED(err->get_Description(&desc))); - wxLogMessage(wxT("ADO Error 0x%x: %ls"), num, (BSTR)desc); + _ftprintf(stderr, wxT(" error ADO%x: %ls\n"), num, (BSTR)desc); err->Release(); } @@ -96,17 +95,107 @@ void ZRCola::DBSource::LogErrors() const } -bool ZRCola::DBSource::SelectCompositions(ATL::CComPtr &rs) const +bool ZRCola::DBSource::GetUnicodeString(const CComPtr& f, std::wstring& str) const +{ + wxASSERT_MSG(f, wxT("field is empty")); + + CComVariant v; + wxVERIFY(SUCCEEDED(f->get_Value(&v))); + + // Parse the field. Must be "xxxx+xxxx+xxxx..." sequence. + wxVERIFY(SUCCEEDED(v.ChangeType(VT_BSTR))); + str.clear(); + for (UINT i = 0, n = ::SysStringLen(V_BSTR(&v)); i < n && V_BSTR(&v)[i];) { + // Parse Unicode code. + UINT j = 0; + wchar_t c = 0; + for (; i < n && V_BSTR(&v)[i]; i++, j++) { + if (L'0' <= V_BSTR(&v)[i] && V_BSTR(&v)[i] <= L'9') c = c*0x10 + (V_BSTR(&v)[i] - L'0'); + else if (L'A' <= V_BSTR(&v)[i] && V_BSTR(&v)[i] <= L'F') c = c*0x10 + (V_BSTR(&v)[i] - L'A' + 10); + else if (L'a' <= V_BSTR(&v)[i] && V_BSTR(&v)[i] <= L'f') c = c*0x10 + (V_BSTR(&v)[i] - L'a' + 10); + else break; + } + if (j <= 0 || 4 < j) { + CComBSTR fieldname; wxVERIFY(SUCCEEDED(f->get_Name(&fieldname))); + _ftprintf(stderr, wxT("%s: error ZCC0020: Syntax error in \"%.*ls\" field (\"%.*ls\"). Unicode code must be one to four hexadecimal characters long.\n"), filename.c_str(), fieldname.Length(), (BSTR)fieldname, n, V_BSTR(&v)); + return false; + } + str += c; + + // Skip delimiter(s) and whitespace. + for (; i < n && V_BSTR(&v)[i] && (V_BSTR(&v)[i] == L'+' || iswspace(V_BSTR(&v)[i])); i++); + } + + return true; +} + + +bool ZRCola::DBSource::GetUnicodeCharacter(const CComPtr& f, wchar_t& chr) const +{ + wxASSERT_MSG(f, wxT("field is empty")); + + CComVariant v; + wxVERIFY(SUCCEEDED(f->get_Value(&v))); + + // Parse the field. Must be exactly one Unicode code. + wxVERIFY(SUCCEEDED(v.ChangeType(VT_BSTR))); + UINT i = 0, n = ::SysStringLen(V_BSTR(&v)); + chr = 0; + for (; i < n && V_BSTR(&v)[i]; i++) { + if (L'0' <= V_BSTR(&v)[i] && V_BSTR(&v)[i] <= L'9') chr = chr*0x10 + (V_BSTR(&v)[i] - L'0'); + else if (L'A' <= V_BSTR(&v)[i] && V_BSTR(&v)[i] <= L'F') chr = chr*0x10 + (V_BSTR(&v)[i] - L'A' + 10); + else if (L'a' <= V_BSTR(&v)[i] && V_BSTR(&v)[i] <= L'f') chr = chr*0x10 + (V_BSTR(&v)[i] - L'a' + 10); + else break; + } + if (i <= 0 && 4 < i) { + CComBSTR fieldname; wxVERIFY(SUCCEEDED(f->get_Name(&fieldname))); + _ftprintf(stderr, wxT("%s: error ZCC0020: Syntax error in \"%.*ls\" field (\"%.*ls\"). Unicode code must be one to four hexadecimal characters long.\n"), filename.c_str(), fieldname.Length(), (BSTR)fieldname, n, V_BSTR(&v)); + return false; + } else if (i != n) { + CComBSTR fieldname; wxVERIFY(SUCCEEDED(f->get_Name(&fieldname))); + _ftprintf(stderr, wxT("%s: error ZCC0021: Syntax error in \"%.*ls\" field (\"%.*ls\"). Extra trailing characters.\n"), filename.c_str(), fieldname.Length(), (BSTR)fieldname, n, V_BSTR(&v)); + return false; + } + + return true; +} + + +bool ZRCola::DBSource::SelectTranslations(ATL::CComPtr &rs) const { // Create a new recordset. if (rs) rs.Release(); wxCHECK(SUCCEEDED(::CoCreateInstance(CLSID_CADORecordset, NULL, CLSCTX_ALL, IID_IADORecordset, (LPVOID*)&rs)), false); // Open it. - if (FAILED(rs->Open(ATL::CComVariant(L"SELECT [komb], [znak] FROM [VRS_ReplChar] WHERE [rang_komb]=1 ORDER BY [komb] ASC"), ATL::CComVariant(m_db), adOpenForwardOnly, adLockReadOnly, adCmdText))) { + if (FAILED(rs->Open(ATL::CComVariant(L"SELECT [komb], [znak] FROM [VRS_ReplChar] WHERE [rang_komb]=1"), ATL::CComVariant(m_db), adOpenStatic, adLockReadOnly, adCmdText))) { + _ftprintf(stderr, wxT("%s: error ZCC0010: Error loading compositions from database. Please make sure the file is ZRCola.zrc compatible.\n"), filename.c_str()); LogErrors(); return false; } return true; } + + +bool ZRCola::DBSource::GetTranslation(const ATL::CComPtr& rs, ZRCola::DBSource::translation& t) const +{ + wxASSERT_MSG(rs, wxT("recordset is empty")); + + CComPtr flds; + wxVERIFY(SUCCEEDED(rs->get_Fields(&flds))); + + { + CComPtr f; + wxVERIFY(SUCCEEDED(flds->get_Item(CComVariant(L"komb"), &f))); + wxCHECK(GetUnicodeString(f, t.str), false); + } + + { + CComPtr f; + wxVERIFY(SUCCEEDED(flds->get_Item(CComVariant(L"znak"), &f))); + wxCHECK(GetUnicodeCharacter(f, t.chr), false); + } + + return true; +} diff --git a/ZRColaCompile/dbsource.h b/ZRColaCompile/dbsource.h index 29c1e05..95f350b 100644 --- a/ZRColaCompile/dbsource.h +++ b/ZRColaCompile/dbsource.h @@ -19,22 +19,32 @@ #pragma once -#include - #include - #include +#include namespace ZRCola { /// /// Source database /// - class DBSource { + class DBSource + { + public: + /// + /// Translation + /// + class translation { + public: + wchar_t chr; ///< composed character + std::wstring str; ///< decomposed string + }; + public: DBSource(); virtual ~DBSource(); + /// /// Opens the database /// @@ -44,15 +54,73 @@ namespace ZRCola { /// - true when open succeeds /// - false otherwise /// - bool Open(const wxString& filename); + bool Open(LPCTSTR filename); + /// /// Logs errors in database connections /// void LogErrors() const; + /// - /// Returns ordered decomposed to composed character translations + /// Is recordset at end + /// + /// \param[out] rs Recordset with results + /// + /// \returns + /// - true when at end + /// - false otherwise + /// + static inline bool IsEOF(const ATL::CComPtr& rs) + { + VARIANT_BOOL eof = VARIANT_TRUE; + return FAILED(rs->get_EOF(&eof)) || eof ? true : false; + } + + + /// + /// Gets number of records in a recordset + /// + /// \param[out] rs Recordset with results + /// + /// \returns Number of records + /// + static inline size_t GetRecordsetCount(const ATL::CComPtr& rs) + { + ADO_LONGPTR count; + return SUCCEEDED(rs->get_RecordCount(&count)) ? count : (size_t)-1; + } + + + /// + /// Gets encoded Unicode string from ZRCola.zrc database + /// + /// \param[in] f Data field + /// \param[out] str Output string + /// + /// \returns + /// - true when successful + /// - false otherwise + /// + bool GetUnicodeString(const CComPtr& f, std::wstring& str) const; + + + /// + /// Gets encoded Unicode character from ZRCola.zrc database + /// + /// \param[in] f Data field + /// \param[out] chr Output character + /// + /// \returns + /// - true when successful + /// - false otherwise + /// + bool GetUnicodeCharacter(const CComPtr& f, wchar_t& chr) const; + + + /// + /// Returns character translations /// /// \param[out] rs Recordset with results /// @@ -60,9 +128,23 @@ namespace ZRCola { /// - true when query succeeds /// - false otherwise /// - bool SelectCompositions(ATL::CComPtr &rs) const; + bool SelectTranslations(ATL::CComPtr& rs) const; + + + /// + /// Returns translation data + /// + /// \param[in] rs Recordset with results + /// \param[out] t Translation + /// + /// \returns + /// - true when succeeded + /// - false otherwise + /// + bool GetTranslation(const ATL::CComPtr& rs, translation& t) const; protected: - ATL::CComPtr m_db; ///< the database + std::basic_string filename; ///< the database filename + ATL::CComPtr m_db; ///< the database }; }; diff --git a/ZRColaCompile/main.cpp b/ZRColaCompile/main.cpp index f77e55b..406bf99 100644 --- a/ZRColaCompile/main.cpp +++ b/ZRColaCompile/main.cpp @@ -20,6 +20,62 @@ #include "stdafx.h" +static inline int CompareBinary(const wchar_t *str_a, size_t count_a, const wchar_t *str_b, size_t count_b) +{ + for (size_t i = 0; ; i++) { + if (i >= count_a && i >= count_b) break; + else if (i >= count_a && i < count_b) return -1; + else if (i < count_a && i >= count_b) return +1; + else if (str_a[i] < str_b[i]) return -1; + else if (str_a[i] > str_b[i]) return +1; + } + + return 0; +} + + +static int __cdecl CompareCompositionIndex(void *data, const void *a, const void *b) +{ + const wchar_t + *chr_a = (const wchar_t*)data + ((const ZRCola::translation_index*)a)->start, + *chr_b = (const wchar_t*)data + ((const ZRCola::translation_index*)b)->start; + const wchar_t + *str_a = chr_a + 1, + *str_b = chr_b + 1; + size_t + count_a = (const wchar_t*)data + ((const ZRCola::translation_index*)a)->end - str_a, + count_b = (const wchar_t*)data + ((const ZRCola::translation_index*)b)->end - str_b; + + int r = CompareBinary(str_a, count_a, str_b, count_b); + if (r != 0) return r; + + if (*chr_a < *chr_b) return -1; + else if (*chr_a > *chr_b) return +1; + + return 0; +} + + +static int __cdecl CompareDecompositionIndex(void *data, const void *a, const void *b) +{ + const wchar_t + *chr_a = (const wchar_t*)data + ((const ZRCola::translation_index*)a)->start, + *chr_b = (const wchar_t*)data + ((const ZRCola::translation_index*)b)->start; + + if (*chr_a < *chr_b) return -1; + else if (*chr_a > *chr_b) return +1; + + const wchar_t + *str_a = chr_a + 1, + *str_b = chr_b + 1; + size_t + count_a = (const wchar_t*)data + ((const ZRCola::translation_index*)a)->end - str_a, + count_b = (const wchar_t*)data + ((const ZRCola::translation_index*)b)->end - str_b; + + return CompareBinary(str_a, count_a, str_b, count_b); +} + + /// /// Main function /// @@ -83,18 +139,97 @@ int _tmain(int argc, _TCHAR *argv[]) return 1; } - wxFile dst; const wxString& filenameOut = parser.GetParam(1); - if (!dst.Create(filenameOut, true, wxS_IRUSR | wxS_IWUSR | wxS_IRGRP | wxS_IWGRP | wxS_IROTH)) { + std::fstream dst((LPCTSTR)filenameOut, std::ios_base::out | std::ios_base::trunc | std::ios_base::binary); + if (dst.fail()) { _ftprintf(stderr, _("Error opening %s output file.\n"), filenameOut.fn_str()); return 1; } - ATL::CComPtr rs_comp; - if (!src.SelectCompositions(rs_comp)) { - _ftprintf(stderr, _("Error loading compositions from %s input file. Please make sure the input file is ZRCola.zrc compatible.\n"), filenameIn.fn_str()); - return 1; + bool has_errors = false; + + // Open file ID. + std::streamoff dst_start = stdex::idrec::open(dst, ZRCOLA_DB_ID); + + { + // Get translations. + ATL::CComPtr rs; + if (src.SelectTranslations(rs)) { + size_t trans_count = src.GetRecordsetCount(rs); + if (trans_count < 0xffffffff) { // 4G check (-1 is reserved for error condition) + // Allocate memory. + std::vector comp_index; + comp_index.reserve(trans_count); + std::vector decomp_index; + decomp_index.reserve(trans_count); + std::vector comp_data; + comp_data.reserve(trans_count*4); + ZRCola::DBSource::translation trans; + + // Parse translations and build index and data. + while (!ZRCola::DBSource::IsEOF(rs)) { + // Read translation from the database. + if (src.GetTranslation(rs, trans)) { + // Add translation to index and data. + ZRCola::translation_index ti; + ti.start = comp_data.size(); + comp_data.push_back(trans.chr); + for (std::wstring::size_type i = 0, n = trans.str.length(); i < n; i++) + comp_data.push_back(trans.str[i]); + ti.end = comp_data.size(); + comp_index.push_back(ti); + decomp_index.push_back(ti); + } else + has_errors = true; + + wxVERIFY(SUCCEEDED(rs->MoveNext())); + } + + // Sort indices. + qsort_s( comp_index.data(), trans_count, sizeof(ZRCola::translation_index), CompareCompositionIndex , comp_data.data()); + qsort_s(decomp_index.data(), trans_count, sizeof(ZRCola::translation_index), CompareDecompositionIndex, comp_data.data()); + + // Write translations to file. + std::streamoff start = stdex::idrec::open(dst, ZRCOLA_DB_COMPOSITIONS_ID); + { + unsigned int _count = trans_count; + dst.write((const char*)&_count, sizeof(_count)); + dst.write((const char*) comp_index.data(), sizeof(__int32)*_count); + dst.write((const char*)decomp_index.data(), sizeof(__int32)*_count); + } + { + std::vector::size_type count = comp_data.size(); + if (count <= 0xffffffff) { // 4G check + unsigned int _count = (unsigned int)count; + dst.write((const char*)&_count, sizeof(_count)); + dst.write((const char*)comp_data.data(), sizeof(wchar_t)*_count); + } else { + _ftprintf(stderr, wxT("%s: error ZCC0005: Translation data exceeds 4G.\n"), (LPCTSTR)filenameIn.c_str()); + has_errors = true; + } + } + stdex::idrec::close(dst, start); + } else { + _ftprintf(stderr, wxT("%s: error ZCC0004: Error getting translation count from database or too many translations.\n"), (LPCTSTR)filenameIn.c_str()); + has_errors = true; + } + } else { + _ftprintf(stderr, wxT("%s: error ZCC0003: Error getting translations from database. Please make sure the file is ZRCola.zrc compatible.\n"), (LPCTSTR)filenameIn.c_str()); + has_errors = true; + } } - return 0; + stdex::idrec::close(dst, dst_start); + + if (dst.fail()) { + _ftprintf(stderr, wxT("Writing to %s output file failed.\n"), (LPCTSTR)filenameOut.c_str()); + has_errors = true; + } + + if (has_errors) { + dst.close(); + wxRemoveFile(filenameOut); + return 1; + } else + return 0; } diff --git a/ZRColaCompile/stdafx.h b/ZRColaCompile/stdafx.h index 2becbe1..a73f1f4 100644 --- a/ZRColaCompile/stdafx.h +++ b/ZRColaCompile/stdafx.h @@ -22,16 +22,23 @@ #include "../include/zrcola.h" #include "dbsource.h" +#include + #include #include -#include #include #include #include +#include + #include // GUID helper to prevent LNK2001 errors (unresolved external symbol IID_IADO...) #include #include #include + +#include + +#include diff --git a/lib/libZRCola/build/libZRCola.props b/lib/libZRCola/build/libZRCola.props index 1d6ec89..3c84cd3 100644 --- a/lib/libZRCola/build/libZRCola.props +++ b/lib/libZRCola/build/libZRCola.props @@ -10,6 +10,7 @@ LIBZRCOLA;%(PreprocessorDefinitions) + ..\..\stdex\include;%(AdditionalIncludeDirectories) diff --git a/lib/libZRCola/build/libZRCola.vcxproj b/lib/libZRCola/build/libZRCola.vcxproj index a7fe36b..f2f83eb 100644 --- a/lib/libZRCola/build/libZRCola.vcxproj +++ b/lib/libZRCola/build/libZRCola.vcxproj @@ -43,11 +43,6 @@ - - - {518777cc-0a59-4415-a12a-82751ed75343} - - {3C61929E-7289-4101-8D0A-DA22D6E1AEA8} libZRCola diff --git a/lib/libZRCola/include/zrcola/common.h b/lib/libZRCola/include/zrcola/common.h index 756339d..c3c7935 100644 --- a/lib/libZRCola/include/zrcola/common.h +++ b/lib/libZRCola/include/zrcola/common.h @@ -35,7 +35,39 @@ #pragma warning(disable: 4251) +/// +/// Data records alignment +/// +#define ZRCOLA_RECORD_ALIGN 1 + +/// +/// Database IDs +/// +#define ZRCOLA_DB_ID ((ZRCola::recordid_t)0x0043525a) +#define ZRCOLA_DB_COMPOSITIONS_ID ((ZRCola::recordid_t)0x00000001) +#define ZRCOLA_DB_DECOMPOSITIONS_ID ((ZRCola::recordid_t)0x00000002) + + namespace ZRCola { + typedef unsigned __int32 recordid_t; + typedef unsigned __int32 recordsize_t; + + +#pragma pack(push) +#pragma pack(4) + + /// + /// Translation index + /// + struct translation_index { + unsigned __int32 start; ///< Composed character offset + unsigned __int32 end; ///< Decomposed string end offset + }; + +#pragma pack(pop) + + + /// /// Composed-decomposed index transformation mapping /// diff --git a/lib/libZRCola/include/zrcola/compose.h b/lib/libZRCola/include/zrcola/compose.h index 11f309f..40716cc 100644 --- a/lib/libZRCola/include/zrcola/compose.h +++ b/lib/libZRCola/include/zrcola/compose.h @@ -26,6 +26,15 @@ namespace ZRCola { + /// + /// Composition + /// + struct composition { + const wchar_t *src; ///< Decomposed string + wchar_t dst; ///< Composed character + }; + + /// /// Composes string /// diff --git a/lib/libZRCola/include/zrcola/decompose.h b/lib/libZRCola/include/zrcola/decompose.h index 4407930..199921f 100644 --- a/lib/libZRCola/include/zrcola/decompose.h +++ b/lib/libZRCola/include/zrcola/decompose.h @@ -26,6 +26,15 @@ namespace ZRCola { + /// + /// Decomposition + /// + struct decomposition { + wchar_t src; ///< composed character + const wchar_t *dst; ///< decomposed string + }; + + /// /// Decomposes string /// diff --git a/lib/libZRCola/src/stdafx.h b/lib/libZRCola/src/stdafx.h index 4b2b12b..08e0bec 100644 --- a/lib/libZRCola/src/stdafx.h +++ b/lib/libZRCola/src/stdafx.h @@ -26,19 +26,9 @@ namespace ZRCola { - struct composition { - const wchar_t *src; - wchar_t dst; - }; - extern const composition* compositions; extern const size_t compositionsCount; - struct decomposition { - wchar_t src; - const wchar_t *dst; - }; - extern const decomposition* decompositions; extern const size_t decompositionsCount; } diff --git a/lib/stdex b/lib/stdex index f86db20..72766c2 160000 --- a/lib/stdex +++ b/lib/stdex @@ -1 +1 @@ -Subproject commit f86db2052780c33fae23815f066c7b71002acdd8 +Subproject commit 72766c21b2889ac13c94f4645638fe043dfa35d7