Support for Unicode character database added

This commit is contained in:
Simon Rozman 2016-05-11 14:39:20 +02:00
parent e3c6a01722
commit 4aa0d9183e
12 changed files with 972 additions and 30 deletions

View File

@ -156,10 +156,13 @@ bool ZRCola::DBSource::GetValue(const ATL::CComPtr<ADOField>& f, std::wstring& v
ATL::CComVariant v;
wxVERIFY(SUCCEEDED(f->get_Value(&v)));
wxCHECK(SUCCEEDED(v.ChangeType(VT_BSTR)), false);
if (V_VT(&v) != VT_NULL) {
wxCHECK(SUCCEEDED(v.ChangeType(VT_BSTR)), false);
val.reserve(::SysStringLen(V_BSTR(&v)));
val = V_BSTR(&v);
val.reserve(::SysStringLen(V_BSTR(&v)));
val = V_BSTR(&v);
} else
val.empty();
return true;
}
@ -171,26 +174,29 @@ bool ZRCola::DBSource::GetUnicodeCharacter(const ATL::CComPtr<ADOField>& f, wcha
ATL::CComVariant v;
wxVERIFY(SUCCEEDED(f->get_Value(&v)));
wxCHECK(SUCCEEDED(v.ChangeType(VT_BSTR)), false);
if (V_VT(&v) != VT_NULL) {
wxCHECK(SUCCEEDED(v.ChangeType(VT_BSTR)), false);
// Parse the field. Must be exactly one Unicode code.
UINT i = 0, n = ::SysStringLen(V_BSTR(&v));
chr = 0;
for (; i < n && V_BSTR(&v)[i]; i++) {
if (L'0' <= V_BSTR(&v)[i] && V_BSTR(&v)[i] <= L'9') chr = chr*0x10 + (V_BSTR(&v)[i] - L'0');
else if (L'A' <= V_BSTR(&v)[i] && V_BSTR(&v)[i] <= L'F') chr = chr*0x10 + (V_BSTR(&v)[i] - L'A' + 10);
else if (L'a' <= V_BSTR(&v)[i] && V_BSTR(&v)[i] <= L'f') chr = chr*0x10 + (V_BSTR(&v)[i] - L'a' + 10);
else break;
}
if (i <= 0 && 4 < i) {
ATL::CComBSTR fieldname; wxVERIFY(SUCCEEDED(f->get_Name(&fieldname)));
_ftprintf(stderr, wxT("%s: error ZCC0030: Syntax error in \"%.*ls\" field (\"%.*ls\"). Unicode code must be one to four hexadecimal characters long.\n"), m_filename.c_str(), fieldname.Length(), (BSTR)fieldname, n, V_BSTR(&v));
return false;
} else if (i != n) {
ATL::CComBSTR fieldname; wxVERIFY(SUCCEEDED(f->get_Name(&fieldname)));
_ftprintf(stderr, wxT("%s: error ZCC0031: Syntax error in \"%.*ls\" field (\"%.*ls\"). Extra trailing characters.\n"), m_filename.c_str(), fieldname.Length(), (BSTR)fieldname, n, V_BSTR(&v));
return false;
}
// Parse the field. Must be exactly one Unicode code.
UINT i = 0, n = ::SysStringLen(V_BSTR(&v));
chr = 0;
for (; i < n && V_BSTR(&v)[i]; i++) {
if (L'0' <= V_BSTR(&v)[i] && V_BSTR(&v)[i] <= L'9') chr = chr*0x10 + (V_BSTR(&v)[i] - L'0');
else if (L'A' <= V_BSTR(&v)[i] && V_BSTR(&v)[i] <= L'F') chr = chr*0x10 + (V_BSTR(&v)[i] - L'A' + 10);
else if (L'a' <= V_BSTR(&v)[i] && V_BSTR(&v)[i] <= L'f') chr = chr*0x10 + (V_BSTR(&v)[i] - L'a' + 10);
else break;
}
if (i <= 0 && 4 < i) {
ATL::CComBSTR fieldname; wxVERIFY(SUCCEEDED(f->get_Name(&fieldname)));
_ftprintf(stderr, wxT("%s: error ZCC0030: Syntax error in \"%.*ls\" field (\"%.*ls\"). Unicode code must be one to four hexadecimal characters long.\n"), m_filename.c_str(), fieldname.Length(), (BSTR)fieldname, n, V_BSTR(&v));
return false;
} else if (i != n) {
ATL::CComBSTR fieldname; wxVERIFY(SUCCEEDED(f->get_Name(&fieldname)));
_ftprintf(stderr, wxT("%s: error ZCC0031: Syntax error in \"%.*ls\" field (\"%.*ls\"). Extra trailing characters.\n"), m_filename.c_str(), fieldname.Length(), (BSTR)fieldname, n, V_BSTR(&v));
return false;
}
} else
chr = 0;
return true;
}
@ -317,6 +323,43 @@ bool ZRCola::DBSource::GetLanguage(const ATL::CComPtr<ADOField>& f, ZRCola::lang
}
bool ZRCola::DBSource::GetChrCat(const ATL::CComPtr<ADOField>& f, chrcatid_t& cc) const
{
wxASSERT_MSG(f, wxT("field is empty"));
ATL::CComVariant v;
wxVERIFY(SUCCEEDED(f->get_Value(&v)));
if (V_VT(&v) != VT_NULL) {
wxCHECK(SUCCEEDED(v.ChangeType(VT_BSTR)), false);
// Parse the field.
size_t n = wcsnlen(V_BSTR(&v), ::SysStringLen(V_BSTR(&v)));
if (n < 1 || 2 < n) {
ATL::CComBSTR fieldname; wxVERIFY(SUCCEEDED(f->get_Name(&fieldname)));
_ftprintf(stderr, wxT("%s: error ZCC0110: Syntax error in \"%.*ls\" field (\"%.*ls\"). Character category ID must be one (1) or two (2) characters long.\n"), m_filename.c_str(), fieldname.Length(), (BSTR)fieldname, n, V_BSTR(&v));
return false;
}
for (size_t i = 0;; i++) {
if (i < sizeof(cc)) {
if (i < n) {
wchar_t c = V_BSTR(&v)[i];
if ((unsigned short)c > 0x7f) {
ATL::CComBSTR fieldname; wxVERIFY(SUCCEEDED(f->get_Name(&fieldname)));
_ftprintf(stderr, wxT("%s: error ZCC0111: Syntax error in \"%.*ls\" field (\"%.*ls\"). Character category ID must contain ASCII characters only.\n"), m_filename.c_str(), fieldname.Length(), (BSTR)fieldname, n, V_BSTR(&v));
return false;
}
cc[i] = (char)c;
} else
cc[i] = 0;
} else
break;
}
} else
memset(cc, 0, sizeof(cc));
return true;
}
bool ZRCola::DBSource::SelectTranslations(ATL::CComPtr<ADORecordset> &rs) const
{
@ -610,3 +653,127 @@ bool ZRCola::DBSource::GetCharacterGroup(const ATL::CComPtr<ADORecordset>& rs, c
return true;
}
bool ZRCola::DBSource::SelectCharacters(ATL::CComPtr<ADORecordset>& rs) const
{
// Create a new recordset.
if (rs) rs.Release();
wxCHECK(SUCCEEDED(::CoCreateInstance(CLSID_CADORecordset, NULL, CLSCTX_ALL, IID_IADORecordset, (LPVOID*)&rs)), false);
// Open it.
if (FAILED(rs->Open(ATL::CComVariant(
L"SELECT DISTINCT [znak], [opis_en], [klj_bes_en], [kat], [znak_v], [znak_m] "
L"FROM [VRS_CharList] "
L"ORDER BY [znak]"), ATL::CComVariant(m_db), adOpenStatic, adLockReadOnly, adCmdText)))
{
_ftprintf(stderr, wxT("%s: error ZCC0120: Error loading characters from database. Please make sure the file is ZRCola.zrc compatible.\n"), m_filename.c_str());
LogErrors();
return false;
}
return true;
}
bool ZRCola::DBSource::GetCharacter(const ATL::CComPtr<ADORecordset>& rs, character& chr) const
{
wxASSERT_MSG(rs, wxT("recordset is empty"));
ATL::CComPtr<ADOFields> flds;
wxVERIFY(SUCCEEDED(rs->get_Fields(&flds)));
wchar_t c;
{
ATL::CComPtr<ADOField> f;
wxVERIFY(SUCCEEDED(flds->get_Item(ATL::CComVariant(L"znak"), &f)));
wxCHECK(GetUnicodeCharacter(f, chr.chr), false);
}
{
ATL::CComPtr<ADOField> f;
wxVERIFY(SUCCEEDED(flds->get_Item(ATL::CComVariant(L"znak_v"), &f)));
wxCHECK(GetUnicodeCharacter(f, c), false);
if (c && c != chr.chr)
chr.rel += c;
}
{
ATL::CComPtr<ADOField> f;
wxVERIFY(SUCCEEDED(flds->get_Item(ATL::CComVariant(L"znak_m"), &f)));
wxCHECK(GetUnicodeCharacter(f, c), false);
if (c && c != chr.chr)
chr.rel += c;
}
{
ATL::CComPtr<ADOField> f;
wxVERIFY(SUCCEEDED(flds->get_Item(ATL::CComVariant(L"opis_en"), &f)));
wxCHECK(GetValue(f, chr.desc), false);
}
{
ATL::CComPtr<ADOField> f;
wxVERIFY(SUCCEEDED(flds->get_Item(ATL::CComVariant(L"klj_bes_en"), &f)));
wxCHECK(GetValue(f, chr.keywords), false);
}
{
ATL::CComPtr<ADOField> f;
wxVERIFY(SUCCEEDED(flds->get_Item(ATL::CComVariant(L"kat"), &f)));
wxCHECK(GetChrCat(f, chr.cat), false);
}
return true;
}
bool ZRCola::DBSource::SelectCharacterCategories(ATL::CComPtr<ADORecordset>& rs) const
{
// Create a new recordset.
if (rs) rs.Release();
wxCHECK(SUCCEEDED(::CoCreateInstance(CLSID_CADORecordset, NULL, CLSCTX_ALL, IID_IADORecordset, (LPVOID*)&rs)), false);
// Open it.
if (FAILED(rs->Open(ATL::CComVariant(
L"SELECT DISTINCT [kat], [opis_en], [Rang] "
L"FROM [VRS_CharCategories] "
L"ORDER BY [Rang], [opis_en]"), ATL::CComVariant(m_db), adOpenStatic, adLockReadOnly, adCmdText)))
{
_ftprintf(stderr, wxT("%s: error ZCC0130: Error loading character categories from database. Please make sure the file is ZRCola.zrc compatible.\n"), m_filename.c_str());
LogErrors();
return false;
}
return true;
}
bool ZRCola::DBSource::GetCharacterCategory(const ATL::CComPtr<ADORecordset>& rs, chrcat& cc) const
{
wxASSERT_MSG(rs, wxT("recordset is empty"));
ATL::CComPtr<ADOFields> flds;
wxVERIFY(SUCCEEDED(rs->get_Fields(&flds)));
std::wstring id;
{
ATL::CComPtr<ADOField> f;
wxVERIFY(SUCCEEDED(flds->get_Item(ATL::CComVariant(L"kat"), &f)));
wxCHECK(GetChrCat(f, cc.id), false);
}
{
ATL::CComPtr<ADOField> f;
wxVERIFY(SUCCEEDED(flds->get_Item(ATL::CComVariant(L"Rang"), &f)));
wxCHECK(GetValue(f, cc.rank), false);
}
{
ATL::CComPtr<ADOField> f;
wxVERIFY(SUCCEEDED(flds->get_Item(ATL::CComVariant(L"opis_en"), &f)));
wxCHECK(GetValue(f, cc.name), false);
}
return true;
}

View File

@ -19,6 +19,7 @@
#pragma once
#include <zrcola/character.h>
#include <zrcola/common.h>
#include <atlbase.h>
@ -98,6 +99,30 @@ namespace ZRCola {
};
///
/// Character
///
class character {
public:
wchar_t chr; ///< Character
ZRCola::chrcatid_t cat; ///> Category ID
std::wstring desc; ///< Character description
std::wstring keywords; ///< Additional keywords
std::wstring rel; ///< Related characters
};
///
/// Character category
///
class chrcat {
public:
ZRCola::chrcatid_t id; ///> Category ID
int rank; ///< Character category rank
std::wstring name; ///< Character category name
};
public:
DBSource();
virtual ~DBSource();
@ -242,6 +267,19 @@ namespace ZRCola {
bool GetLanguage(const ATL::CComPtr<ADOField>& f, langid_t& lang) const;
///
/// Gets character category ID from ZRCola.zrc database
///
/// \param[in] f Data field
/// \param[out] cc Character category
///
/// \returns
/// - true when successful
/// - false otherwise
///
bool GetChrCat(const ATL::CComPtr<ADOField>& f, chrcatid_t& cc) const;
///
/// Returns character translations
///
@ -366,6 +404,54 @@ namespace ZRCola {
///
bool GetCharacterGroup(const ATL::CComPtr<ADORecordset>& rs, chrgrp& cg) const;
///
/// Returns characters
///
/// \param[out] rs Recordset with results
///
/// \returns
/// - true when query succeeds
/// - false otherwise
///
bool SelectCharacters(ATL::CComPtr<ADORecordset>& rs) const;
///
/// Returns character data
///
/// \param[in] rs Recordset with results
/// \param[out] chr Character
///
/// \returns
/// - true when succeeded
/// - false otherwise
///
bool GetCharacter(const ATL::CComPtr<ADORecordset>& rs, character& chr) const;
///
/// Returns character categories
///
/// \param[out] rs Recordset with results
///
/// \returns
/// - true when query succeeds
/// - false otherwise
///
bool SelectCharacterCategories(ATL::CComPtr<ADORecordset>& rs) const;
///
/// Returns character category data
///
/// \param[in] rs Recordset with results
/// \param[out] cc Character category
///
/// \returns
/// - true when succeeded
/// - false otherwise
///
bool GetCharacterCategory(const ATL::CComPtr<ADORecordset>& rs, chrcat& cc) const;
protected:
std::basic_string<TCHAR> m_filename; ///< Database filename
ATL::CComPtr<ADOConnection> m_db; ///< Database

View File

@ -28,7 +28,7 @@
///
/// \returns The stream \p stream
///
inline std::ostream& operator <<(std::ostream& stream, const ZRCola::translation_db &db)
inline std::ostream& operator <<(_In_ std::ostream& stream, _In_ const ZRCola::translation_db &db)
{
assert(db.idxComp.size() == db.idxDecomp.size());
@ -84,7 +84,7 @@ inline std::ostream& operator <<(std::ostream& stream, const ZRCola::translation
///
/// \returns The stream \p stream
///
inline std::ostream& operator <<(std::ostream& stream, const ZRCola::keyseq_db &db)
inline std::ostream& operator <<(_In_ std::ostream& stream, _In_ const ZRCola::keyseq_db &db)
{
assert(db.idxChr.size() == db.idxKey.size());
@ -140,7 +140,7 @@ inline std::ostream& operator <<(std::ostream& stream, const ZRCola::keyseq_db &
///
/// \returns The stream \p stream
///
inline std::ostream& operator <<(std::ostream& stream, const ZRCola::language_db &db)
inline std::ostream& operator <<(_In_ std::ostream& stream, _In_ const ZRCola::language_db &db)
{
unsigned __int32 count;
@ -191,7 +191,7 @@ inline std::ostream& operator <<(std::ostream& stream, const ZRCola::language_db
///
/// \returns The stream \p stream
///
inline std::ostream& operator <<(std::ostream& stream, const ZRCola::langchar_db &db)
inline std::ostream& operator <<(_In_ std::ostream& stream, _In_ const ZRCola::langchar_db &db)
{
#ifdef ZRCOLA_LANGCHAR_LANG_IDX
assert(db.idxChr.size() == db.idxLng.size());
@ -251,7 +251,7 @@ inline std::ostream& operator <<(std::ostream& stream, const ZRCola::langchar_db
///
/// \returns The stream \p stream
///
inline std::ostream& operator <<(std::ostream& stream, const ZRCola::chrgrp_db &db)
inline std::ostream& operator <<(_In_ std::ostream& stream, _In_ const ZRCola::chrgrp_db &db)
{
unsigned __int32 count;
@ -293,6 +293,110 @@ inline std::ostream& operator <<(std::ostream& stream, const ZRCola::chrgrp_db &
}
///
/// Writes character database to a stream
///
/// \param[in] stream Output stream
/// \param[in] db Character database
///
/// \returns The stream \p stream
///
inline std::ostream& operator <<(_In_ std::ostream& stream, _In_ const ZRCola::character_db &db)
{
unsigned __int32 count;
// Write index count.
ZRCola::keyseq_db::indexChr::size_type ks_count = db.idxChr.size();
#if defined(_WIN64) || defined(__x86_64__) || defined(__ppc64__)
// 4G check
if (ks_count > 0xffffffff) {
stream.setstate(std::ios_base::failbit);
return stream;
}
#endif
if (stream.fail()) return stream;
count = (unsigned __int32)ks_count;
stream.write((const char*)&count, sizeof(count));
// Write character index.
if (stream.fail()) return stream;
stream.write((const char*)db.idxChr.data(), sizeof(unsigned __int32)*count);
// Write data count.
std::vector<unsigned __int16>::size_type data_count = db.data.size();
#if defined(_WIN64) || defined(__x86_64__) || defined(__ppc64__)
// 4G check
if (data_count > 0xffffffff) {
stream.setstate(std::ios_base::failbit);
return stream;
}
#endif
if (stream.fail()) return stream;
count = (unsigned __int32)data_count;
stream.write((const char*)&count, sizeof(count));
// Write data.
if (stream.fail()) return stream;
stream.write((const char*)db.data.data(), sizeof(unsigned __int16)*count);
return stream;
}
///
/// Writes character category database to a stream
///
/// \param[in] stream Output stream
/// \param[in] db Character category database
///
/// \returns The stream \p stream
///
inline std::ostream& operator <<(_In_ std::ostream& stream, _In_ const ZRCola::chrcat_db &db)
{
unsigned __int32 count;
// Write index count.
ZRCola::keyseq_db::indexChr::size_type ks_count = db.idxChrCat.size();
#if defined(_WIN64) || defined(__x86_64__) || defined(__ppc64__)
// 4G check
if (ks_count > 0xffffffff) {
stream.setstate(std::ios_base::failbit);
return stream;
}
#endif
if (stream.fail()) return stream;
count = (unsigned __int32)ks_count;
stream.write((const char*)&count, sizeof(count));
// Write character category index.
if (stream.fail()) return stream;
stream.write((const char*)db.idxChrCat.data(), sizeof(unsigned __int32)*count);
// Write rank index.
if (stream.fail()) return stream;
stream.write((const char*)db.idxRnk.data(), sizeof(unsigned __int32)*count);
// Write data count.
std::vector<unsigned __int16>::size_type data_count = db.data.size();
#if defined(_WIN64) || defined(__x86_64__) || defined(__ppc64__)
// 4G check
if (data_count > 0xffffffff) {
stream.setstate(std::ios_base::failbit);
return stream;
}
#endif
if (stream.fail()) return stream;
count = (unsigned __int32)data_count;
stream.write((const char*)&count, sizeof(count));
// Write data.
if (stream.fail()) return stream;
stream.write((const char*)db.data.data(), sizeof(unsigned __int16)*count);
return stream;
}
///
/// Main function
///
@ -656,6 +760,115 @@ int _tmain(int argc, _TCHAR *argv[])
}
}
{
// Get characters.
ATL::CComPtr<ADORecordset> rs;
if (src.SelectCharacters(rs)) {
size_t count = src.GetRecordsetCount(rs);
if (count < 0xffffffff) { // 4G check (-1 is reserved for error condition)
ZRCola::DBSource::character chr;
ZRCola::character_db db;
// Preallocate memory.
db.idxChr.reserve(count);
db.data .reserve(count*4);
// Parse characters and build index and data.
while (!ZRCola::DBSource::IsEOF(rs)) {
// Read character from the database.
if (src.GetCharacter(rs, chr)) {
// Add character to index and data.
unsigned __int32 idx = db.data.size();
db.data.push_back((unsigned __int16)chr.chr);
for (std::wstring::size_type i = 0; i < sizeof(ZRCola::chrcatid_t)/sizeof(unsigned __int16); i++)
db.data.push_back(((const unsigned __int16*)chr.cat)[i]);
std::wstring::size_type n_desc = chr.desc.length();
wxASSERT_MSG(n_desc <= 0xffff, wxT("character description too long"));
db.data.push_back((unsigned __int16)n_desc);
std::wstring::size_type n_rel = chr.rel.length();
wxASSERT_MSG(n_rel <= 0xffff, wxT("too many related characters"));
db.data.push_back((unsigned __int16)n_rel);
for (std::wstring::size_type i = 0; i < n_desc; i++)
db.data.push_back(chr.desc[i]);
for (std::wstring::size_type i = 0; i < n_rel; i++)
db.data.push_back(chr.rel[i]);
db.idxChr.push_back(idx);
} else
has_errors = true;
wxVERIFY(SUCCEEDED(rs->MoveNext()));
}
// Sort indices.
db.idxChr.sort();
// Write characters to file.
dst << ZRCola::character_rec(db);
} else {
_ftprintf(stderr, wxT("%s: error ZCC0017: Error getting character count from database or too many characters.\n"), (LPCTSTR)filenameIn.c_str());
has_errors = true;
}
} else {
_ftprintf(stderr, wxT("%s: error ZCC0016: Error getting characters from database. Please make sure the file is ZRCola.zrc compatible.\n"), (LPCTSTR)filenameIn.c_str());
has_errors = true;
}
}
{
// Get character categories.
ATL::CComPtr<ADORecordset> rs;
if (src.SelectCharacterCategories(rs)) {
size_t count = src.GetRecordsetCount(rs);
if (count < 0xffffffff) { // 4G check (-1 is reserved for error condition)
ZRCola::DBSource::chrcat cc;
ZRCola::chrcat_db db;
// Preallocate memory.
db.idxChrCat.reserve(count);
db.idxRnk .reserve(count);
db.data .reserve(count*4);
// Parse character categories and build index and data.
while (!ZRCola::DBSource::IsEOF(rs)) {
// Read character category from the database.
if (src.GetCharacterCategory(rs, cc)) {
// Add character category to index and data.
unsigned __int32 idx = db.data.size();
for (std::wstring::size_type i = 0; i < sizeof(ZRCola::chrcatid_t)/sizeof(unsigned __int16); i++)
db.data.push_back(((const unsigned __int16*)cc.id)[i]);
wxASSERT_MSG((int)0xffff8000 <= cc.rank && cc.rank <= (int)0x00007fff, wxT("character category rank out of bounds"));
db.data.push_back((unsigned __int16)cc.rank);
std::wstring::size_type n_name = cc.name.length();
wxASSERT_MSG(n_name <= 0xffff, wxT("character category name too long"));
db.data.push_back((unsigned __int16)n_name);
for (std::wstring::size_type i = 0; i < n_name; i++)
db.data.push_back(cc.name[i]);
db.idxChrCat.push_back(idx);
db.idxRnk .push_back(idx);
if (build_pot)
pot.insert(cc.name);
} else
has_errors = true;
wxVERIFY(SUCCEEDED(rs->MoveNext()));
}
// Sort indices.
db.idxChrCat.sort();
db.idxRnk .sort();
// Write character categories to file.
dst << ZRCola::chrcat_rec(db);
} else {
_ftprintf(stderr, wxT("%s: error ZCC0019: Error getting character category count from database or too many character categories.\n"), (LPCTSTR)filenameIn.c_str());
has_errors = true;
}
} else {
_ftprintf(stderr, wxT("%s: error ZCC0018: Error getting character categories from database. Please make sure the file is ZRCola.zrc compatible.\n"), (LPCTSTR)filenameIn.c_str());
has_errors = true;
}
}
stdex::idrec::close<ZRCola::recordid_t, ZRCola::recordsize_t, ZRCOLA_RECORD_ALIGN>(dst, dst_start);
if (dst.fail()) {

View File

@ -31,6 +31,7 @@
<ClCompile Include="..\src\translate.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\include\zrcola\character.h" />
<ClInclude Include="..\include\zrcola\common.h" />
<ClInclude Include="..\include\zrcola\language.h" />
<ClInclude Include="..\include\zrcola\normalize.h" />

View File

@ -47,6 +47,9 @@
<ClInclude Include="..\include\zrcola\language.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\include\zrcola\character.h">
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ResourceCompile Include="..\res\libZRCola.rc">

View File

@ -0,0 +1,303 @@
/*
Copyright 2015-2016 Amebis
This file is part of ZRCola.
ZRCola is free software: you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
ZRCola is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with ZRCola. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include "common.h"
#include <stdex/idrec.h>
#include <istream>
#include <vector>
#include <string>
#pragma warning(push)
#pragma warning(disable: 4200)
#pragma warning(disable: 4251)
#pragma warning(disable: 4512)
namespace ZRCola {
///
/// Character category ID type
/// Two letter abbreviation, non-terminated
///
typedef char chrcatid_t[2];
///
/// Character Database
///
class ZRCOLA_API character_db {
public:
#pragma pack(push)
#pragma pack(2)
///
/// Character data
///
struct character {
wchar_t chr; ///> Character
chrcatid_t cat; ///> Category ID
unsigned __int16 desc_len; ///< Character description length in \c data
unsigned __int16 rel_len; ///< Related character count in \c data
wchar_t data[]; ///< Character description and list of related characters
};
#pragma pack(pop)
///
/// Character index
///
class indexChar : public index<unsigned __int16, unsigned __int32, character>
{
public:
///
/// Constructs the index
///
/// \param[in] h Reference to vector holding the data
///
indexChar(_In_ std::vector<unsigned __int16> &h) : index<unsigned __int16, unsigned __int32, character>(h) {}
///
/// Compares two characters by ID (for searching)
///
/// \param[in] a Pointer to first element
/// \param[in] b Pointer to second element
///
/// \returns
/// - <0 when a < b
/// - =0 when a == b
/// - >0 when a > b
///
virtual int compare(_In_ const character &a, _In_ const character &b) const
{
if (a.chr < b.chr) return -1;
else if (a.chr > b.chr) return 1;
return 0;
}
} idxChr; ///< Character index
std::vector<unsigned __int16> data; ///< Character data
public:
///
/// Constructs the database
///
inline character_db() : idxChr(data) {}
};
typedef ZRCOLA_API stdex::idrec::record<character_db, recordid_t, recordsize_t, ZRCOLA_RECORD_ALIGN> character_rec;
///
/// Character category database
///
class ZRCOLA_API chrcat_db {
public:
#pragma pack(push)
#pragma pack(2)
///
/// Character category data
///
struct chrcat {
chrcatid_t id; ///< Character category ID
unsigned __int16 rank; ///< Character category rank
unsigned __int16 name_len; ///< \c name length (in characters)
wchar_t name[]; ///< Character category name
};
#pragma pack(pop)
///
/// Character category index
///
class indexChrCat : public index<unsigned __int16, unsigned __int32, chrcat>
{
public:
///
/// Constructs the index
///
/// \param[in] h Reference to vector holding the data
///
indexChrCat(_In_ std::vector<unsigned __int16> &h) : index<unsigned __int16, unsigned __int32, chrcat>(h) {}
///
/// Compares two character categories by ID (for searching)
///
/// \param[in] a Pointer to first element
/// \param[in] b Pointer to second element
///
/// \returns
/// - <0 when a < b
/// - =0 when a == b
/// - >0 when a > b
///
virtual int compare(_In_ const chrcat &a, _In_ const chrcat &b) const
{
int r = memcmp(a.id, b.id, sizeof(chrcatid_t));
if (r != 0) return r;
return 0;
}
} idxChrCat; ///< Character category index
///
/// Rank index
///
class indexRank : public index<unsigned __int16, unsigned __int32, chrcat>
{
public:
///
/// Constructs the index
///
/// \param[in] h Reference to vector holding the data
///
indexRank(_In_ std::vector<unsigned __int16> &h) : index<unsigned __int16, unsigned __int32, chrcat>(h) {}
///
/// Compares two character categories by ID (for searching)
///
/// \param[in] a Pointer to first element
/// \param[in] b Pointer to second element
///
/// \returns
/// - <0 when a < b
/// - =0 when a == b
/// - >0 when a > b
///
virtual int compare(_In_ const chrcat &a, _In_ const chrcat &b) const
{
if (a.rank < b.rank) return -1;
else if (a.rank > b.rank) return +1;
return 0;
}
///
/// Compares two character categories by rank (for sorting)
///
/// \param[in] a Pointer to character category
/// \param[in] b Pointer to second character category
///
/// \returns
/// - <0 when a < b
/// - =0 when a == b
/// - >0 when a > b
///
virtual int compare_sort(_In_ const chrcat &a, _In_ const chrcat &b) const
{
if (a.rank < b.rank) return -1;
else if (a.rank > b.rank) return +1;
int r = _wcsncoll(a.name, b.name, std::min<unsigned __int16>(a.name_len, b.name_len));
if (r != 0) return r;
if (a.name_len < b.name_len) return -1;
else if (a.name_len > b.name_len) return +1;
return 0;
}
} idxRnk; ///< Rank index
std::vector<unsigned __int16> data; ///< Character category data
public:
///
/// Constructs the database
///
inline chrcat_db() : idxChrCat(data), idxRnk(data) {}
};
typedef ZRCOLA_API stdex::idrec::record<chrcat_db, recordid_t, recordsize_t, ZRCOLA_RECORD_ALIGN> chrcat_rec;
};
const ZRCola::recordid_t stdex::idrec::record<ZRCola::character_db, ZRCola::recordid_t, ZRCola::recordsize_t, ZRCOLA_RECORD_ALIGN>::id = *(ZRCola::recordid_t*)"CHR";
const ZRCola::recordid_t stdex::idrec::record<ZRCola::chrcat_db, ZRCola::recordid_t, ZRCola::recordsize_t, ZRCOLA_RECORD_ALIGN>::id = *(ZRCola::recordid_t*)"CCT";
///
/// Reads character database from a stream
///
/// \param[in] stream Input stream
/// \param[out] db Character database
///
/// \returns The stream \p stream
///
inline std::istream& operator >>(_In_ std::istream& stream, _Out_ ZRCola::character_db &db)
{
unsigned __int32 count;
// Read index count.
stream.read((char*)&count, sizeof(count));
if (!stream.good()) return stream;
// Read character index.
db.idxChr.resize(count);
stream.read((char*)db.idxChr.data(), sizeof(unsigned __int32)*count);
if (!stream.good()) return stream;
// Read data count.
stream.read((char*)&count, sizeof(count));
if (!stream.good()) return stream;
// Read data.
db.data.resize(count);
stream.read((char*)db.data.data(), sizeof(unsigned __int16)*count);
return stream;
}
///
/// Reads character category database from a stream
///
/// \param[in] stream Input stream
/// \param[out] db Character category database
///
/// \returns The stream \p stream
///
inline std::istream& operator >>(_In_ std::istream& stream, _Out_ ZRCola::chrcat_db &db)
{
unsigned __int32 count;
// Read index count.
stream.read((char*)&count, sizeof(count));
if (!stream.good()) return stream;
// Read character category index.
db.idxChrCat.resize(count);
stream.read((char*)db.idxChrCat.data(), sizeof(unsigned __int32)*count);
if (!stream.good()) return stream;
// Read rank index.
db.idxRnk.resize(count);
stream.read((char*)db.idxRnk.data(), sizeof(unsigned __int32)*count);
if (!stream.good()) return stream;
// Read data count.
stream.read((char*)&count, sizeof(count));
if (!stream.good()) return stream;
// Read data.
db.data.resize(count);
stream.read((char*)db.data.data(), sizeof(unsigned __int16)*count);
return stream;
}
#pragma warning(pop)

View File

@ -21,6 +21,7 @@
#include "../../../include/zrcola.h"
#include "../include/zrcola/character.h"
#include "../include/zrcola/language.h"
#include "../include/zrcola/normalize.h"
#include "../include/zrcola/translate.h"

View File

@ -55,7 +55,7 @@ namespace ZRCola {
///
/// Rank index
///
class indexRnk : public index<unsigned __int16, unsigned __int32, chrgrp>
class indexRank : public index<unsigned __int16, unsigned __int32, chrgrp>
{
public:
///
@ -63,7 +63,7 @@ namespace ZRCola {
///
/// \param[in] h Reference to vector holding the data
///
indexRnk(_In_ std::vector<unsigned __int16> &h) : index<unsigned __int16, unsigned __int32, chrgrp>(h) {}
indexRank(_In_ std::vector<unsigned __int16> &h) : index<unsigned __int16, unsigned __int32, chrgrp>(h) {}
///
/// Compares two character groups by rank (for searching)

Binary file not shown.

View File

@ -5,7 +5,7 @@ msgstr ""
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"X-Generator: ZRColaCompile 2.0-alpha6\n"
"X-Generator: ZRColaCompile 2.0-alpha7\n"
msgid "Albanian"
msgstr ""
@ -238,6 +238,21 @@ msgstr ""
msgid "Letter z"
msgstr ""
msgid "Letter, Lowercase"
msgstr ""
msgid "Letter, Modifier"
msgstr ""
msgid "Letter, Other"
msgstr ""
msgid "Letter, Titlecase"
msgstr ""
msgid "Letter, Uppercase"
msgstr ""
msgid "Ligatures"
msgstr ""
@ -250,6 +265,15 @@ msgstr ""
msgid "Maltese"
msgstr ""
msgid "Mark, Enclosing"
msgstr ""
msgid "Mark, Non-Spacing"
msgstr ""
msgid "Mark, Spacing Combining"
msgstr ""
msgid "Metric"
msgstr ""
@ -295,12 +319,30 @@ msgstr ""
msgid "Number 9"
msgstr ""
msgid "Number, Decimal Digit"
msgstr ""
msgid "Number, Letter"
msgstr ""
msgid "Number, Other"
msgstr ""
msgid "Numbers"
msgstr ""
msgid "Numbers - Circled"
msgstr ""
msgid "Other, Control"
msgstr ""
msgid "Other, Format"
msgstr ""
msgid "Other, Surrogate"
msgstr ""
msgid "Parentheses"
msgstr ""
@ -310,6 +352,27 @@ msgstr ""
msgid "Portuguese"
msgstr ""
msgid "Punctuation, Close"
msgstr ""
msgid "Punctuation, Connector"
msgstr ""
msgid "Punctuation, Dash"
msgstr ""
msgid "Punctuation, Final quote"
msgstr ""
msgid "Punctuation, Initial quote"
msgstr ""
msgid "Punctuation, Open"
msgstr ""
msgid "Punctuation, Other"
msgstr ""
msgid "Quotes"
msgstr ""
@ -319,6 +382,15 @@ msgstr ""
msgid "Russian"
msgstr ""
msgid "Separator, Line"
msgstr ""
msgid "Separator, Paragraph"
msgstr ""
msgid "Separator, Space"
msgstr ""
msgid "Serbian Cyrillic"
msgstr ""
@ -385,6 +457,18 @@ msgstr ""
msgid "Symbol ?"
msgstr ""
msgid "Symbol, Currency"
msgstr ""
msgid "Symbol, Math"
msgstr ""
msgid "Symbol, Modifier"
msgstr ""
msgid "Symbol, Other"
msgstr ""
msgid "Turkish"
msgstr ""

Binary file not shown.

View File

@ -245,6 +245,21 @@ msgstr "Črka y"
msgid "Letter z"
msgstr "Črka z"
msgid "Letter, Lowercase"
msgstr "Črka, mala"
msgid "Letter, Modifier"
msgstr "Črka, spreminjevalo"
msgid "Letter, Other"
msgstr "Črka, drugo"
msgid "Letter, Titlecase"
msgstr "Črka, naslovna"
msgid "Letter, Uppercase"
msgstr "Črka, velika"
msgid "Ligatures"
msgstr "Ligature"
@ -257,6 +272,15 @@ msgstr "makedonščina"
msgid "Maltese"
msgstr "malteščina"
msgid "Mark, Enclosing"
msgstr "Ločevalo, obdajajoča"
msgid "Mark, Non-Spacing"
msgstr "Ločevalo, neločljivo"
msgid "Mark, Spacing Combining"
msgstr "Ločevalo, ločljivo"
msgid "Metric"
msgstr "Metrično"
@ -302,12 +326,30 @@ msgstr "Številka 8"
msgid "Number 9"
msgstr "Številka 9"
msgid "Number, Decimal Digit"
msgstr "Številka, desetiška števka"
msgid "Number, Letter"
msgstr "Številka, črka"
msgid "Number, Other"
msgstr "Številka, drugo"
msgid "Numbers"
msgstr "Številke"
msgid "Numbers - Circled"
msgstr "Številke - obkroženo"
msgid "Other, Control"
msgstr "Drugo, kontrolni"
msgid "Other, Format"
msgstr "Drugo, oblikovni"
msgid "Other, Surrogate"
msgstr "Drugo, nadomestni"
msgid "Parentheses"
msgstr "Oklepaji"
@ -317,6 +359,27 @@ msgstr "poljščina"
msgid "Portuguese"
msgstr "portugalščina"
msgid "Punctuation, Close"
msgstr "Ločilo, zapiralno"
msgid "Punctuation, Connector"
msgstr "Ločilo, povezaj"
msgid "Punctuation, Dash"
msgstr "Ločilo, pomišljaj"
msgid "Punctuation, Final quote"
msgstr "Ločilo, zaključni narekovaj"
msgid "Punctuation, Initial quote"
msgstr "Ločilo, uvodni narekovaj"
msgid "Punctuation, Open"
msgstr "Ločilo, odpiralno"
msgid "Punctuation, Other"
msgstr "Ločilo, drugo"
msgid "Quotes"
msgstr "Narekovaji"
@ -326,6 +389,15 @@ msgstr "romunščina"
msgid "Russian"
msgstr "ruščina"
msgid "Separator, Line"
msgstr "Ločilo, vrstic"
msgid "Separator, Paragraph"
msgstr "Ločilo, odstavkov"
msgid "Separator, Space"
msgstr "Ločilo, presledek"
msgid "Serbian Cyrillic"
msgstr "srbščina cirilica"
@ -392,6 +464,18 @@ msgstr "Simbol >"
msgid "Symbol ?"
msgstr "Simbol ?"
msgid "Symbol, Currency"
msgstr "Simbol, valuta"
msgid "Symbol, Math"
msgstr "Simbol, matematični"
msgid "Symbol, Modifier"
msgstr "Simbol, ločevalo"
msgid "Symbol, Other"
msgstr "Simbol, drugo"
msgid "Turkish"
msgstr "turščina"