Ambiguous decomposition sequences now decompose to first ranked decomposition

(resolves #18)
This commit is contained in:
Simon Rozman 2016-04-22 11:30:42 +02:00
parent 5df7ca886b
commit 8c51f9c2a6
6 changed files with 24 additions and 1 deletions

View File

@ -302,7 +302,7 @@ bool ZRCola::DBSource::SelectTranslations(ATL::CComPtr<ADORecordset> &rs) const
wxCHECK(SUCCEEDED(::CoCreateInstance(CLSID_CADORecordset, NULL, CLSCTX_ALL, IID_IADORecordset, (LPVOID*)&rs)), false);
// Open it.
if (FAILED(rs->Open(ATL::CComVariant(L"SELECT [komb], [znak] FROM [VRS_ReplChar] WHERE [rang_komb]=1"), ATL::CComVariant(m_db), adOpenStatic, adLockReadOnly, adCmdText))) {
if (FAILED(rs->Open(ATL::CComVariant(L"SELECT [komb], [znak], [rang_znak] FROM [VRS_ReplChar] WHERE [rang_komb]=1"), ATL::CComVariant(m_db), adOpenStatic, adLockReadOnly, adCmdText))) {
_ftprintf(stderr, wxT("%s: error ZCC0040: Error loading compositions from database. Please make sure the file is ZRCola.zrc compatible.\n"), m_filename.c_str());
LogErrors();
return false;
@ -331,6 +331,12 @@ bool ZRCola::DBSource::GetTranslation(const ATL::CComPtr<ADORecordset>& rs, ZRCo
wxCHECK(GetUnicodeCharacter(f, t.chr), false);
}
{
ATL::CComPtr<ADOField> f;
wxVERIFY(SUCCEEDED(flds->get_Item(ATL::CComVariant(L"rang_znak"), &f)));
wxCHECK(GetValue(f, t.rank), false);
}
return true;
}

View File

@ -41,6 +41,7 @@ namespace ZRCola {
public:
wchar_t chr; ///< Composed character
std::wstring str; ///< Decomposed string
int rank; ///< Decomposition rank
};

View File

@ -346,6 +346,8 @@ int _tmain(int argc, _TCHAR *argv[])
// Add translation to index and data.
unsigned __int32 idx = db.data.size();
db.data.push_back(trans.chr);
wxASSERT_MSG((int)0xffff8000 <= trans.rank && trans.rank <= (int)0x00007fff, wxT("transformation rank out of bounds"));
db.data.push_back((unsigned __int16)trans.rank);
std::wstring::size_type n = trans.str.length();
wxASSERT_MSG(n <= 0xffff, wxT("transformation string too long"));
db.data.push_back((unsigned __int16)n);

View File

@ -46,6 +46,7 @@ namespace ZRCola {
///
struct translation {
wchar_t chr; ///< Composed character
unsigned __int16 rank; ///< Decomposition rank
unsigned __int16 str_len; ///< \c str length (in characters)
wchar_t str[]; ///< Decomposed string
@ -183,6 +184,9 @@ namespace ZRCola {
if (a.chr < b.chr) return -1;
else if (a.chr > b.chr) return +1;
if (a.rank < b.rank) return -1;
else if (a.rank > b.rank) return +1;
int r = translation::CompareString(a.str, a.str_len, b.str, b.str_len);
if (r != 0) return r;

View File

@ -131,6 +131,16 @@ void ZRCola::translation_db::Decompose(_In_z_count_(inputMax) const wchar_t* inp
else if (decompSrc < c) l = m + 1;
else {
// Character found.
// Narrow the search area on the left to start at the first decomposition in the run (first by rank).
for (size_t rr = m; l < rr;) {
size_t m = (l + rr) / 2;
const translation &trans = idxDecomp[m];
wchar_t decompSrc = trans.chr;
if (c <= decompSrc) rr = m; else l = m + 1;
}
const translation &trans = idxDecomp[l];
if (trans.str_len && trans.str[0] != L'#' && (!lc_db || !lc_db->IsLocalCharacter(c, lang))) {
// Append decomposed sequence.
output.append(trans.str, trans.str_len);

Binary file not shown.