Character-Language table extended to support multi-UTF-16 characters
This commit is contained in:
parent
a224454b3c
commit
9f083bb521
@ -797,7 +797,7 @@ bool ZRCola::DBSource::GetLanguageCharacter(const com_obj<ADORecordset>& rs, ZRC
|
||||
{
|
||||
com_obj<ADOField> f;
|
||||
wxVERIFY(SUCCEEDED(flds->get_Item(variant(L"znak"), &f)));
|
||||
wxCHECK(GetUnicodeCharacter(f, lc.chr), false);
|
||||
wxCHECK(GetUnicodeString(f, lc.chr), false);
|
||||
}
|
||||
|
||||
{
|
||||
|
@ -134,7 +134,7 @@ namespace ZRCola {
|
||||
///
|
||||
class langchar {
|
||||
public:
|
||||
wchar_t chr; ///> Character
|
||||
std::wstring chr; ///> Character
|
||||
ZRCola::langid_t lang; ///< Language ID
|
||||
};
|
||||
|
||||
|
@ -383,9 +383,13 @@ int _tmain(int argc, _TCHAR *argv[])
|
||||
if (src.GetLanguageCharacter(rs, lc)) {
|
||||
// Add language characters to index and data.
|
||||
unsigned __int32 idx = db.data.size();
|
||||
db.data.push_back(lc.chr);
|
||||
for (wstring::size_type i = 0; i < sizeof(ZRCola::langid_t)/sizeof(unsigned __int16); i++)
|
||||
db.data.push_back(((const unsigned __int16*)lc.lang.data)[i]);
|
||||
wstring::size_type n = lc.chr.length();
|
||||
wxASSERT_MSG(n <= 0xffff, wxT("character string too long"));
|
||||
db.data.push_back((unsigned __int16)n);
|
||||
for (wstring::size_type i = 0; i < n; i++)
|
||||
db.data.push_back(lc.chr[i]);
|
||||
db.idxChr.push_back(idx);
|
||||
#ifdef ZRCOLA_LANGCHAR_LANG_IDX
|
||||
db.idxLng.push_back(idx);
|
||||
|
@ -45,8 +45,9 @@ namespace ZRCola {
|
||||
/// Character data
|
||||
///
|
||||
struct langchar {
|
||||
wchar_t chr; ///> Character
|
||||
langid_t lang; ///< Language ID
|
||||
unsigned __int16 chr_len; ///< \c chr length (in UTF-16 characters)
|
||||
wchar_t chr[]; ///< Character
|
||||
};
|
||||
#pragma pack(pop)
|
||||
|
||||
@ -76,27 +77,8 @@ namespace ZRCola {
|
||||
///
|
||||
virtual int compare(_In_ const langchar &a, _In_ const langchar &b) const
|
||||
{
|
||||
if (a.chr < b.chr) return -1;
|
||||
else if (a.chr > b.chr) return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
///
|
||||
/// Compares two characters by ID (for sorting)
|
||||
///
|
||||
/// \param[in] a Pointer to first element
|
||||
/// \param[in] b Pointer to second element
|
||||
///
|
||||
/// \returns
|
||||
/// - <0 when a < b
|
||||
/// - =0 when a == b
|
||||
/// - >0 when a > b
|
||||
///
|
||||
virtual int compare_sort(_In_ const langchar &a, _In_ const langchar &b) const
|
||||
{
|
||||
if (a.chr < b.chr) return -1;
|
||||
else if (a.chr > b.chr) return 1;
|
||||
int r = ZRCola::CompareString(a.chr, a.chr + a.chr_len, b.chr, b.chr + b.chr_len);
|
||||
if (r != 0) return r;
|
||||
|
||||
if (a.lang < b.lang) return -1;
|
||||
else if (a.lang > b.lang) return 1;
|
||||
@ -133,33 +115,14 @@ namespace ZRCola {
|
||||
///
|
||||
virtual int compare(_In_ const langchar &a, _In_ const langchar &b) const
|
||||
{
|
||||
int r = memcmp(a.lang, b.lang, sizeof(langid_t));
|
||||
if (a.lang < b.lang) return -1;
|
||||
else if (a.lang > b.lang) return 1;
|
||||
|
||||
int r = ZRCola::CompareString(a.chr, a.chr + a.chr_len, b.chr, b.chr + b.chr_len);
|
||||
if (r != 0) return r;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
///
|
||||
/// Compares two languages by ID (for sorting)
|
||||
///
|
||||
/// \param[in] a Pointer to first element
|
||||
/// \param[in] b Pointer to second element
|
||||
///
|
||||
/// \returns
|
||||
/// - <0 when a < b
|
||||
/// - =0 when a == b
|
||||
/// - >0 when a > b
|
||||
///
|
||||
virtual int compare_sort(_In_ const langchar &a, _In_ const langchar &b) const
|
||||
{
|
||||
int r = memcmp(a.lang, b.lang, sizeof(langid_t));
|
||||
if (r != 0) return r;
|
||||
|
||||
if (a.chr < b.chr) return -1;
|
||||
else if (a.chr > b.chr) return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
} idxLng; ///< Character language index
|
||||
#endif
|
||||
|
||||
@ -190,19 +153,8 @@ namespace ZRCola {
|
||||
///
|
||||
/// Tests presence of character in the given language
|
||||
///
|
||||
/// \param[in] chr Character (UTF-16)
|
||||
/// \param[in] lang Language
|
||||
///
|
||||
/// \returns
|
||||
/// - \c true when character is used in language
|
||||
/// - \c false otherwise
|
||||
bool IsLocalCharacter(_In_ wchar_t chr, _In_ langid_t lang) const;
|
||||
|
||||
///
|
||||
/// Tests presence of character in the given language
|
||||
///
|
||||
/// \param[in] chr Pointer to UTF-16 character start
|
||||
/// \param[in] chr_end Pointer to UTF-16 character end
|
||||
/// \param[in] chr Pointer to character
|
||||
/// \param[in] chr_end Pointer to character end
|
||||
/// \param[in] lang Language
|
||||
///
|
||||
/// \returns
|
||||
@ -227,7 +179,7 @@ namespace ZRCola {
|
||||
///
|
||||
struct language {
|
||||
langid_t id; ///< Language ID
|
||||
unsigned __int16 name_len; ///< \c name length (in characters)
|
||||
unsigned __int16 name_len; ///< \c name length (in UTF-16 characters)
|
||||
wchar_t name[]; ///< Language name
|
||||
};
|
||||
#pragma pack(pop)
|
||||
|
@ -71,35 +71,14 @@ void ZRCola::LangConvert(_In_ LANGID lang_win, _Inout_ ZRCola::langid_t &lang)
|
||||
#endif
|
||||
|
||||
|
||||
bool ZRCola::langchar_db::IsLocalCharacter(_In_ wchar_t chr, _In_ ZRCola::langid_t lang) const
|
||||
{
|
||||
for (size_t l = 0, r = idxChr.size(); l < r; ) {
|
||||
// Test the character in the middle of the search area.
|
||||
size_t m = (l + r) / 2;
|
||||
const langchar &lc = idxChr[m];
|
||||
|
||||
// Do the bisection test on character.
|
||||
if (chr < lc.chr) r = m;
|
||||
else if (lc.chr < chr ) l = m + 1;
|
||||
else {
|
||||
// Do the bisection test on language.
|
||||
if (lang < lc.lang) r = m;
|
||||
else if (lang > lc.lang) l = m + 1;
|
||||
else {
|
||||
// Match found.
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
bool ZRCola::langchar_db::IsLocalCharacter(_In_ const wchar_t *chr, _In_ const wchar_t *chr_end, _In_ ZRCola::langid_t lang) const
|
||||
{
|
||||
// TODO: Implement properly!
|
||||
UNREFERENCED_PARAMETER(chr_end);
|
||||
assert(chr < chr_end);
|
||||
return IsLocalCharacter(*chr, lang);
|
||||
size_t n = chr_end - chr;
|
||||
assert(n <= 0xffff);
|
||||
std::unique_ptr<ZRCola::langchar_db::langchar> lc((ZRCola::langchar_db::langchar*)new char[sizeof(ZRCola::langchar_db::langchar) + sizeof(wchar_t)*n]);
|
||||
lc->lang = lang;
|
||||
lc->chr_len = (unsigned __int16)n;
|
||||
memcpy(lc->chr, chr, sizeof(wchar_t)*n);
|
||||
ZRCola::langchar_db::indexChar::size_type start;
|
||||
return idxChr.find(*lc, start);
|
||||
}
|
||||
|
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user