Add non-PUA ZRCola translations to ZRCola to Unicode translation

This allows ZRCola to Unicode translation to use precomposed Unicode
characters and provide better coverage for ZRCola to Unicode
translation.

Signed-off-by: Simon Rozman <simon@rozman.si>
This commit is contained in:
Simon Rozman 2021-12-13 14:37:59 +01:00
parent a3f0662022
commit 0b68cdedda
5 changed files with 21 additions and 9 deletions

View File

@ -5,11 +5,6 @@
#include "pch.h" #include "pch.h"
static inline bool is_pua(_In_ wchar_t c)
{
return L'\ue000' <= c && c <= L'\uf8ff';
}
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
// wxZRColaComposerPanel // wxZRColaComposerPanel
@ -329,8 +324,8 @@ void wxZRColaComposerPanel::OnDestinationText(wxCommandEvent& event)
wxString src = m_destination->GetValue(); wxString src = m_destination->GetValue();
size_t len = src.Length(); size_t len = src.Length();
for (size_t i = 0, j; i < len;) { for (size_t i = 0, j; i < len;) {
bool pua_i = is_pua(src[i]); bool pua_i = ZRCola::ispua(src[i]);
for (j = i + 1; j < len && pua_i == is_pua(src[j]); j++); for (j = i + 1; j < len && pua_i == ZRCola::ispua(src[j]); j++);
m_destination->SetStyle((long)i, (long)j, pua_i ? m_stylePUA : m_styleNormal); m_destination->SetStyle((long)i, (long)j, pua_i ? m_stylePUA : m_styleNormal);
i = j; i = j;
} }

View File

@ -380,15 +380,27 @@ int _tmain(int argc, _TCHAR *argv[])
// Parse translations and build index and data. // Parse translations and build index and data.
ZRCola::DBSource::translation trans; ZRCola::DBSource::translation trans;
trans.set = 0;
for (auto t = db_temp2.cbegin(), t_end = db_temp2.cend(); t != t_end; ++t) { for (auto t = db_temp2.cbegin(), t_end = db_temp2.cend(); t != t_end; ++t) {
// Add translation to index and data. // Add translation to index and data.
trans.dst.str = t->first; trans.dst.str = t->first;
for (auto d = t->second.cbegin(), d_end = t->second.cend(); d != d_end; ++d) { for (auto d = t->second.cbegin(), d_end = t->second.cend(); d != d_end; ++d) {
trans.set = (int)ZRCOLA_TRANSEQID_DEFAULT;
trans.dst.rank = d->second.rank_dst; trans.dst.rank = d->second.rank_dst;
trans.src.rank = d->second.rank_src; trans.src.rank = d->second.rank_src;
trans.src.str = d->first; trans.src.str = d->first;
db_trans << trans; db_trans << trans;
// If destination contains no PUA characters, add it to the ZRCola >> Unicode transliteration too.
bool has_pua = false;
for (auto ch = trans.dst.str.cbegin(), ch_end = trans.dst.str.cend(); ch != ch_end; ++ch)
if (ZRCola::ispua(*ch)) {
has_pua = true;
break;
}
if (!has_pua) {
trans.set = (int)ZRCOLA_TRANSEQID_UNICODE;
db_trans << trans;
}
} }
} }
} else { } else {

View File

@ -29,6 +29,11 @@ namespace ZRCola {
/// ///
typedef double charrank_t; typedef double charrank_t;
inline bool ispua(_In_ wchar_t c)
{
return L'\ue000' <= c && c <= L'\uf8ff';
}
#pragma pack(push) #pragma pack(push)
#pragma pack(2) #pragma pack(2)
/// ///

View File

@ -33,7 +33,7 @@
/// ///
/// ZRCola to Unicode translation sequence ID /// ZRCola to Unicode translation sequence ID
/// ///
#define ZRCOLA_TRANSEQID_UNICODE ((ZRCola::transeqid_t)31) #define ZRCOLA_TRANSEQID_UNICODE ((ZRCola::transeqid_t)32)
namespace ZRCola { namespace ZRCola {

Binary file not shown.