Add non-PUA ZRCola translations to ZRCola to Unicode translation

This allows ZRCola to Unicode translation to use precomposed Unicode
characters and provide better coverage for ZRCola to Unicode
translation.

Signed-off-by: Simon Rozman <simon@rozman.si>
This commit is contained in:
Simon Rozman 2021-12-13 14:37:59 +01:00
parent a3f0662022
commit 0b68cdedda
5 changed files with 21 additions and 9 deletions

View File

@ -5,11 +5,6 @@
#include "pch.h"
static inline bool is_pua(_In_ wchar_t c)
{
return L'\ue000' <= c && c <= L'\uf8ff';
}
//////////////////////////////////////////////////////////////////////////
// wxZRColaComposerPanel
@ -329,8 +324,8 @@ void wxZRColaComposerPanel::OnDestinationText(wxCommandEvent& event)
wxString src = m_destination->GetValue();
size_t len = src.Length();
for (size_t i = 0, j; i < len;) {
bool pua_i = is_pua(src[i]);
for (j = i + 1; j < len && pua_i == is_pua(src[j]); j++);
bool pua_i = ZRCola::ispua(src[i]);
for (j = i + 1; j < len && pua_i == ZRCola::ispua(src[j]); j++);
m_destination->SetStyle((long)i, (long)j, pua_i ? m_stylePUA : m_styleNormal);
i = j;
}

View File

@ -380,15 +380,27 @@ int _tmain(int argc, _TCHAR *argv[])
// Parse translations and build index and data.
ZRCola::DBSource::translation trans;
trans.set = 0;
for (auto t = db_temp2.cbegin(), t_end = db_temp2.cend(); t != t_end; ++t) {
// Add translation to index and data.
trans.dst.str = t->first;
for (auto d = t->second.cbegin(), d_end = t->second.cend(); d != d_end; ++d) {
trans.set = (int)ZRCOLA_TRANSEQID_DEFAULT;
trans.dst.rank = d->second.rank_dst;
trans.src.rank = d->second.rank_src;
trans.src.str = d->first;
db_trans << trans;
// If destination contains no PUA characters, add it to the ZRCola >> Unicode transliteration too.
bool has_pua = false;
for (auto ch = trans.dst.str.cbegin(), ch_end = trans.dst.str.cend(); ch != ch_end; ++ch)
if (ZRCola::ispua(*ch)) {
has_pua = true;
break;
}
if (!has_pua) {
trans.set = (int)ZRCOLA_TRANSEQID_UNICODE;
db_trans << trans;
}
}
}
} else {

View File

@ -29,6 +29,11 @@ namespace ZRCola {
///
typedef double charrank_t;
inline bool ispua(_In_ wchar_t c)
{
return L'\ue000' <= c && c <= L'\uf8ff';
}
#pragma pack(push)
#pragma pack(2)
///

View File

@ -33,7 +33,7 @@
///
/// ZRCola to Unicode translation sequence ID
///
#define ZRCOLA_TRANSEQID_UNICODE ((ZRCola::transeqid_t)31)
#define ZRCOLA_TRANSEQID_UNICODE ((ZRCola::transeqid_t)32)
namespace ZRCola {

Binary file not shown.