Character-Language table extended to support multi-UTF-16 characters

This commit is contained in:
Simon Rozman 2017-03-14 14:14:39 +01:00
parent a224454b3c
commit 9f083bb521
6 changed files with 26 additions and 91 deletions

View File

@ -797,7 +797,7 @@ bool ZRCola::DBSource::GetLanguageCharacter(const com_obj<ADORecordset>& rs, ZRC
{
com_obj<ADOField> f;
wxVERIFY(SUCCEEDED(flds->get_Item(variant(L"znak"), &f)));
wxCHECK(GetUnicodeCharacter(f, lc.chr), false);
wxCHECK(GetUnicodeString(f, lc.chr), false);
}
{

View File

@ -134,7 +134,7 @@ namespace ZRCola {
///
class langchar {
public:
wchar_t chr; ///> Character
std::wstring chr; ///> Character
ZRCola::langid_t lang; ///< Language ID
};

View File

@ -383,9 +383,13 @@ int _tmain(int argc, _TCHAR *argv[])
if (src.GetLanguageCharacter(rs, lc)) {
// Add language characters to index and data.
unsigned __int32 idx = db.data.size();
db.data.push_back(lc.chr);
for (wstring::size_type i = 0; i < sizeof(ZRCola::langid_t)/sizeof(unsigned __int16); i++)
db.data.push_back(((const unsigned __int16*)lc.lang.data)[i]);
wstring::size_type n = lc.chr.length();
wxASSERT_MSG(n <= 0xffff, wxT("character string too long"));
db.data.push_back((unsigned __int16)n);
for (wstring::size_type i = 0; i < n; i++)
db.data.push_back(lc.chr[i]);
db.idxChr.push_back(idx);
#ifdef ZRCOLA_LANGCHAR_LANG_IDX
db.idxLng.push_back(idx);

View File

@ -45,8 +45,9 @@ namespace ZRCola {
/// Character data
///
struct langchar {
wchar_t chr; ///> Character
langid_t lang; ///< Language ID
unsigned __int16 chr_len; ///< \c chr length (in UTF-16 characters)
wchar_t chr[]; ///< Character
};
#pragma pack(pop)
@ -76,27 +77,8 @@ namespace ZRCola {
///
virtual int compare(_In_ const langchar &a, _In_ const langchar &b) const
{
if (a.chr < b.chr) return -1;
else if (a.chr > b.chr) return 1;
return 0;
}
///
/// Compares two characters by ID (for sorting)
///
/// \param[in] a Pointer to first element
/// \param[in] b Pointer to second element
///
/// \returns
/// - <0 when a < b
/// - =0 when a == b
/// - >0 when a > b
///
virtual int compare_sort(_In_ const langchar &a, _In_ const langchar &b) const
{
if (a.chr < b.chr) return -1;
else if (a.chr > b.chr) return 1;
int r = ZRCola::CompareString(a.chr, a.chr + a.chr_len, b.chr, b.chr + b.chr_len);
if (r != 0) return r;
if (a.lang < b.lang) return -1;
else if (a.lang > b.lang) return 1;
@ -133,33 +115,14 @@ namespace ZRCola {
///
virtual int compare(_In_ const langchar &a, _In_ const langchar &b) const
{
int r = memcmp(a.lang, b.lang, sizeof(langid_t));
if (a.lang < b.lang) return -1;
else if (a.lang > b.lang) return 1;
int r = ZRCola::CompareString(a.chr, a.chr + a.chr_len, b.chr, b.chr + b.chr_len);
if (r != 0) return r;
return 0;
}
///
/// Compares two languages by ID (for sorting)
///
/// \param[in] a Pointer to first element
/// \param[in] b Pointer to second element
///
/// \returns
/// - <0 when a < b
/// - =0 when a == b
/// - >0 when a > b
///
virtual int compare_sort(_In_ const langchar &a, _In_ const langchar &b) const
{
int r = memcmp(a.lang, b.lang, sizeof(langid_t));
if (r != 0) return r;
if (a.chr < b.chr) return -1;
else if (a.chr > b.chr) return 1;
return 0;
}
} idxLng; ///< Character language index
#endif
@ -190,19 +153,8 @@ namespace ZRCola {
///
/// Tests presence of character in the given language
///
/// \param[in] chr Character (UTF-16)
/// \param[in] lang Language
///
/// \returns
/// - \c true when character is used in language
/// - \c false otherwise
bool IsLocalCharacter(_In_ wchar_t chr, _In_ langid_t lang) const;
///
/// Tests presence of character in the given language
///
/// \param[in] chr Pointer to UTF-16 character start
/// \param[in] chr_end Pointer to UTF-16 character end
/// \param[in] chr Pointer to character
/// \param[in] chr_end Pointer to character end
/// \param[in] lang Language
///
/// \returns
@ -227,7 +179,7 @@ namespace ZRCola {
///
struct language {
langid_t id; ///< Language ID
unsigned __int16 name_len; ///< \c name length (in characters)
unsigned __int16 name_len; ///< \c name length (in UTF-16 characters)
wchar_t name[]; ///< Language name
};
#pragma pack(pop)

View File

@ -71,35 +71,14 @@ void ZRCola::LangConvert(_In_ LANGID lang_win, _Inout_ ZRCola::langid_t &lang)
#endif
bool ZRCola::langchar_db::IsLocalCharacter(_In_ wchar_t chr, _In_ ZRCola::langid_t lang) const
{
for (size_t l = 0, r = idxChr.size(); l < r; ) {
// Test the character in the middle of the search area.
size_t m = (l + r) / 2;
const langchar &lc = idxChr[m];
// Do the bisection test on character.
if (chr < lc.chr) r = m;
else if (lc.chr < chr ) l = m + 1;
else {
// Do the bisection test on language.
if (lang < lc.lang) r = m;
else if (lang > lc.lang) l = m + 1;
else {
// Match found.
return true;
}
}
}
return false;
}
bool ZRCola::langchar_db::IsLocalCharacter(_In_ const wchar_t *chr, _In_ const wchar_t *chr_end, _In_ ZRCola::langid_t lang) const
{
// TODO: Implement properly!
UNREFERENCED_PARAMETER(chr_end);
assert(chr < chr_end);
return IsLocalCharacter(*chr, lang);
size_t n = chr_end - chr;
assert(n <= 0xffff);
std::unique_ptr<ZRCola::langchar_db::langchar> lc((ZRCola::langchar_db::langchar*)new char[sizeof(ZRCola::langchar_db::langchar) + sizeof(wchar_t)*n]);
lc->lang = lang;
lc->chr_len = (unsigned __int16)n;
memcpy(lc->chr, chr, sizeof(wchar_t)*n);
ZRCola::langchar_db::indexChar::size_type start;
return idxChr.find(*lc, start);
}

Binary file not shown.