Character-Language table extended to support multi-UTF-16 characters
This commit is contained in:
parent
a224454b3c
commit
9f083bb521
@ -797,7 +797,7 @@ bool ZRCola::DBSource::GetLanguageCharacter(const com_obj<ADORecordset>& rs, ZRC
|
|||||||
{
|
{
|
||||||
com_obj<ADOField> f;
|
com_obj<ADOField> f;
|
||||||
wxVERIFY(SUCCEEDED(flds->get_Item(variant(L"znak"), &f)));
|
wxVERIFY(SUCCEEDED(flds->get_Item(variant(L"znak"), &f)));
|
||||||
wxCHECK(GetUnicodeCharacter(f, lc.chr), false);
|
wxCHECK(GetUnicodeString(f, lc.chr), false);
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
|
@ -134,7 +134,7 @@ namespace ZRCola {
|
|||||||
///
|
///
|
||||||
class langchar {
|
class langchar {
|
||||||
public:
|
public:
|
||||||
wchar_t chr; ///> Character
|
std::wstring chr; ///> Character
|
||||||
ZRCola::langid_t lang; ///< Language ID
|
ZRCola::langid_t lang; ///< Language ID
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -383,9 +383,13 @@ int _tmain(int argc, _TCHAR *argv[])
|
|||||||
if (src.GetLanguageCharacter(rs, lc)) {
|
if (src.GetLanguageCharacter(rs, lc)) {
|
||||||
// Add language characters to index and data.
|
// Add language characters to index and data.
|
||||||
unsigned __int32 idx = db.data.size();
|
unsigned __int32 idx = db.data.size();
|
||||||
db.data.push_back(lc.chr);
|
|
||||||
for (wstring::size_type i = 0; i < sizeof(ZRCola::langid_t)/sizeof(unsigned __int16); i++)
|
for (wstring::size_type i = 0; i < sizeof(ZRCola::langid_t)/sizeof(unsigned __int16); i++)
|
||||||
db.data.push_back(((const unsigned __int16*)lc.lang.data)[i]);
|
db.data.push_back(((const unsigned __int16*)lc.lang.data)[i]);
|
||||||
|
wstring::size_type n = lc.chr.length();
|
||||||
|
wxASSERT_MSG(n <= 0xffff, wxT("character string too long"));
|
||||||
|
db.data.push_back((unsigned __int16)n);
|
||||||
|
for (wstring::size_type i = 0; i < n; i++)
|
||||||
|
db.data.push_back(lc.chr[i]);
|
||||||
db.idxChr.push_back(idx);
|
db.idxChr.push_back(idx);
|
||||||
#ifdef ZRCOLA_LANGCHAR_LANG_IDX
|
#ifdef ZRCOLA_LANGCHAR_LANG_IDX
|
||||||
db.idxLng.push_back(idx);
|
db.idxLng.push_back(idx);
|
||||||
|
@ -45,8 +45,9 @@ namespace ZRCola {
|
|||||||
/// Character data
|
/// Character data
|
||||||
///
|
///
|
||||||
struct langchar {
|
struct langchar {
|
||||||
wchar_t chr; ///> Character
|
|
||||||
langid_t lang; ///< Language ID
|
langid_t lang; ///< Language ID
|
||||||
|
unsigned __int16 chr_len; ///< \c chr length (in UTF-16 characters)
|
||||||
|
wchar_t chr[]; ///< Character
|
||||||
};
|
};
|
||||||
#pragma pack(pop)
|
#pragma pack(pop)
|
||||||
|
|
||||||
@ -76,27 +77,8 @@ namespace ZRCola {
|
|||||||
///
|
///
|
||||||
virtual int compare(_In_ const langchar &a, _In_ const langchar &b) const
|
virtual int compare(_In_ const langchar &a, _In_ const langchar &b) const
|
||||||
{
|
{
|
||||||
if (a.chr < b.chr) return -1;
|
int r = ZRCola::CompareString(a.chr, a.chr + a.chr_len, b.chr, b.chr + b.chr_len);
|
||||||
else if (a.chr > b.chr) return 1;
|
if (r != 0) return r;
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
///
|
|
||||||
/// Compares two characters by ID (for sorting)
|
|
||||||
///
|
|
||||||
/// \param[in] a Pointer to first element
|
|
||||||
/// \param[in] b Pointer to second element
|
|
||||||
///
|
|
||||||
/// \returns
|
|
||||||
/// - <0 when a < b
|
|
||||||
/// - =0 when a == b
|
|
||||||
/// - >0 when a > b
|
|
||||||
///
|
|
||||||
virtual int compare_sort(_In_ const langchar &a, _In_ const langchar &b) const
|
|
||||||
{
|
|
||||||
if (a.chr < b.chr) return -1;
|
|
||||||
else if (a.chr > b.chr) return 1;
|
|
||||||
|
|
||||||
if (a.lang < b.lang) return -1;
|
if (a.lang < b.lang) return -1;
|
||||||
else if (a.lang > b.lang) return 1;
|
else if (a.lang > b.lang) return 1;
|
||||||
@ -133,33 +115,14 @@ namespace ZRCola {
|
|||||||
///
|
///
|
||||||
virtual int compare(_In_ const langchar &a, _In_ const langchar &b) const
|
virtual int compare(_In_ const langchar &a, _In_ const langchar &b) const
|
||||||
{
|
{
|
||||||
int r = memcmp(a.lang, b.lang, sizeof(langid_t));
|
if (a.lang < b.lang) return -1;
|
||||||
|
else if (a.lang > b.lang) return 1;
|
||||||
|
|
||||||
|
int r = ZRCola::CompareString(a.chr, a.chr + a.chr_len, b.chr, b.chr + b.chr_len);
|
||||||
if (r != 0) return r;
|
if (r != 0) return r;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
///
|
|
||||||
/// Compares two languages by ID (for sorting)
|
|
||||||
///
|
|
||||||
/// \param[in] a Pointer to first element
|
|
||||||
/// \param[in] b Pointer to second element
|
|
||||||
///
|
|
||||||
/// \returns
|
|
||||||
/// - <0 when a < b
|
|
||||||
/// - =0 when a == b
|
|
||||||
/// - >0 when a > b
|
|
||||||
///
|
|
||||||
virtual int compare_sort(_In_ const langchar &a, _In_ const langchar &b) const
|
|
||||||
{
|
|
||||||
int r = memcmp(a.lang, b.lang, sizeof(langid_t));
|
|
||||||
if (r != 0) return r;
|
|
||||||
|
|
||||||
if (a.chr < b.chr) return -1;
|
|
||||||
else if (a.chr > b.chr) return 1;
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
} idxLng; ///< Character language index
|
} idxLng; ///< Character language index
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -190,19 +153,8 @@ namespace ZRCola {
|
|||||||
///
|
///
|
||||||
/// Tests presence of character in the given language
|
/// Tests presence of character in the given language
|
||||||
///
|
///
|
||||||
/// \param[in] chr Character (UTF-16)
|
/// \param[in] chr Pointer to character
|
||||||
/// \param[in] lang Language
|
/// \param[in] chr_end Pointer to character end
|
||||||
///
|
|
||||||
/// \returns
|
|
||||||
/// - \c true when character is used in language
|
|
||||||
/// - \c false otherwise
|
|
||||||
bool IsLocalCharacter(_In_ wchar_t chr, _In_ langid_t lang) const;
|
|
||||||
|
|
||||||
///
|
|
||||||
/// Tests presence of character in the given language
|
|
||||||
///
|
|
||||||
/// \param[in] chr Pointer to UTF-16 character start
|
|
||||||
/// \param[in] chr_end Pointer to UTF-16 character end
|
|
||||||
/// \param[in] lang Language
|
/// \param[in] lang Language
|
||||||
///
|
///
|
||||||
/// \returns
|
/// \returns
|
||||||
@ -227,7 +179,7 @@ namespace ZRCola {
|
|||||||
///
|
///
|
||||||
struct language {
|
struct language {
|
||||||
langid_t id; ///< Language ID
|
langid_t id; ///< Language ID
|
||||||
unsigned __int16 name_len; ///< \c name length (in characters)
|
unsigned __int16 name_len; ///< \c name length (in UTF-16 characters)
|
||||||
wchar_t name[]; ///< Language name
|
wchar_t name[]; ///< Language name
|
||||||
};
|
};
|
||||||
#pragma pack(pop)
|
#pragma pack(pop)
|
||||||
|
@ -71,35 +71,14 @@ void ZRCola::LangConvert(_In_ LANGID lang_win, _Inout_ ZRCola::langid_t &lang)
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
bool ZRCola::langchar_db::IsLocalCharacter(_In_ wchar_t chr, _In_ ZRCola::langid_t lang) const
|
|
||||||
{
|
|
||||||
for (size_t l = 0, r = idxChr.size(); l < r; ) {
|
|
||||||
// Test the character in the middle of the search area.
|
|
||||||
size_t m = (l + r) / 2;
|
|
||||||
const langchar &lc = idxChr[m];
|
|
||||||
|
|
||||||
// Do the bisection test on character.
|
|
||||||
if (chr < lc.chr) r = m;
|
|
||||||
else if (lc.chr < chr ) l = m + 1;
|
|
||||||
else {
|
|
||||||
// Do the bisection test on language.
|
|
||||||
if (lang < lc.lang) r = m;
|
|
||||||
else if (lang > lc.lang) l = m + 1;
|
|
||||||
else {
|
|
||||||
// Match found.
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
bool ZRCola::langchar_db::IsLocalCharacter(_In_ const wchar_t *chr, _In_ const wchar_t *chr_end, _In_ ZRCola::langid_t lang) const
|
bool ZRCola::langchar_db::IsLocalCharacter(_In_ const wchar_t *chr, _In_ const wchar_t *chr_end, _In_ ZRCola::langid_t lang) const
|
||||||
{
|
{
|
||||||
// TODO: Implement properly!
|
size_t n = chr_end - chr;
|
||||||
UNREFERENCED_PARAMETER(chr_end);
|
assert(n <= 0xffff);
|
||||||
assert(chr < chr_end);
|
std::unique_ptr<ZRCola::langchar_db::langchar> lc((ZRCola::langchar_db::langchar*)new char[sizeof(ZRCola::langchar_db::langchar) + sizeof(wchar_t)*n]);
|
||||||
return IsLocalCharacter(*chr, lang);
|
lc->lang = lang;
|
||||||
|
lc->chr_len = (unsigned __int16)n;
|
||||||
|
memcpy(lc->chr, chr, sizeof(wchar_t)*n);
|
||||||
|
ZRCola::langchar_db::indexChar::size_type start;
|
||||||
|
return idxChr.find(*lc, start);
|
||||||
}
|
}
|
||||||
|
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user