Multiple translation sets support

This commit is contained in:
Simon Rozman 2017-03-30 09:49:22 +02:00
parent 716dde0a84
commit 186dbee443
5 changed files with 47 additions and 21 deletions

View File

@ -91,10 +91,10 @@ void wxZRColaComposerPanel::SynchronizePanels()
size_t len = GetValue(m_source, src);
std::wstring norm;
app->m_t_db.TranslateInv(src.data(), len, norm, &m_mapping1);
app->m_t_db.TranslateInv(0, src.data(), len, norm, &m_mapping1);
std::wstring dst;
app->m_t_db.Translate(norm.data(), norm.size(), dst, &m_mapping2);
app->m_t_db.Translate(0, norm.data(), norm.size(), dst, &m_mapping2);
m_source->GetSelection(&m_selSource.first, &m_selSource.second);
@ -120,9 +120,9 @@ void wxZRColaComposerPanel::SynchronizePanels()
std::wstring dst;
wxZRColaFrame *mainWnd = dynamic_cast<wxZRColaFrame*>(wxGetActiveWindow());
if (mainWnd)
app->m_t_db.TranslateInv(src.data(), len, &app->m_lc_db, mainWnd->m_settings->m_lang, dst, &m_mapping2);
app->m_t_db.TranslateInv(0, src.data(), len, &app->m_lc_db, mainWnd->m_settings->m_lang, dst, &m_mapping2);
else
app->m_t_db.TranslateInv(src.data(), len, dst, &m_mapping2);
app->m_t_db.TranslateInv(0, src.data(), len, dst, &m_mapping2);
m_mapping1.clear();
m_mapping2.invert();

View File

@ -234,6 +234,7 @@ int _tmain(int argc, _TCHAR *argv[])
// Add translation to index and data.
for (auto d = t->second.cbegin(), d_end = t->second.cend(); d != d_end; ++d) {
unsigned __int32 idx = db.data.size();
db.data.push_back((unsigned __int16)0);
wxASSERT_MSG((int)0xffff8000 <= d->first && d->first <= (int)0x00007fff, wxT("destination character rank out of bounds"));
db.data.push_back((unsigned __int16)d->first);
wxASSERT_MSG((int)0xffff8000 <= d->second.rank && d->second.rank <= (int)0x00007fff, wxT("source character rank out of bounds"));

View File

@ -47,6 +47,7 @@ namespace ZRCola {
///
struct translation {
public:
unsigned __int16 set; ///< Translation set ID
unsigned __int16 dst_rank; ///< Destination character rank
unsigned __int16 src_rank; ///< Source character rank
@ -63,6 +64,7 @@ namespace ZRCola {
///
/// Constructs the translation
///
/// \param[in] set Translation set ID
/// \param[in] dst_rank Destination character rank
/// \param[in] dst Destination character
/// \param[in] dst_len Number of UTF-16 characters in \p dst
@ -71,6 +73,7 @@ namespace ZRCola {
/// \param[in] src_len Number of UTF-16 characters in \p src
///
inline translation(
_In_opt_ unsigned __int16 set = 0,
_In_opt_ unsigned __int16 dst_rank = 0,
_In_opt_z_count_(dst_len) const wchar_t *dst = NULL,
_In_opt_ size_t dst_len = 0,
@ -78,6 +81,7 @@ namespace ZRCola {
_In_opt_z_count_(src_len) const wchar_t *src = NULL,
_In_opt_ size_t src_len = 0)
{
this->set = set;
this->dst_rank = dst_rank;
this->src_rank = src_rank;
this->dst_to = static_cast<unsigned __int16>(dst_len);
@ -137,6 +141,9 @@ namespace ZRCola {
///
virtual int compare(_In_ const translation &a, _In_ const translation &b) const
{
if (a.set < b.set) return -1;
else if (a.set > b.set) return +1;
int r = ZRCola::CompareString(a.src(), a.src_len(), b.src(), b.src_len());
if (r != 0) return r;
@ -156,6 +163,9 @@ namespace ZRCola {
///
virtual int compare_sort(_In_ const translation &a, _In_ const translation &b) const
{
if (a.set < b.set) return -1;
else if (a.set > b.set) return +1;
int r = ZRCola::CompareString(a.src(), a.src_len(), b.src(), b.src_len());
if (r != 0) return r;
@ -196,6 +206,9 @@ namespace ZRCola {
///
virtual int compare(_In_ const translation &a, _In_ const translation &b) const
{
if (a.set < b.set) return -1;
else if (a.set > b.set) return +1;
int r = ZRCola::CompareString(a.dst(), a.dst_len(), b.dst(), b.dst_len());
if (r != 0) return r;
@ -215,6 +228,9 @@ namespace ZRCola {
///
virtual int compare_sort(_In_ const translation &a, _In_ const translation &b) const
{
if (a.set < b.set) return -1;
else if (a.set > b.set) return +1;
int r = ZRCola::CompareString(a.dst(), a.dst_len(), b.dst(), b.dst_len());
if (r != 0) return r;
@ -250,29 +266,32 @@ namespace ZRCola {
///
/// Translates string
///
/// \param[in] set Translation set ID
/// \param[in] input Input string (UTF-16)
/// \param[in] inputMax Length of the input string in characters. Can be (size_t)-1 if \p input is zero terminated.
/// \param[out] output Output string (UTF-16)
/// \param[out] map The vector of source to destination index mappings (optional)
///
void Translate(_In_z_count_(inputMax) const wchar_t* input, _In_ size_t inputMax, _Out_ std::wstring &output, _Out_opt_ std::vector<mapping>* map = NULL) const;
void Translate(_In_ unsigned __int16 set, _In_z_count_(inputMax) const wchar_t* input, _In_ size_t inputMax, _Out_ std::wstring &output, _Out_opt_ std::vector<mapping>* map = NULL) const;
///
/// Inverse translates string
///
/// \param[in] set Translation set ID
/// \param[in] input Input string (UTF-16)
/// \param[in] inputMax Length of the input string in characters. Can be (size_t)-1 if \p input is zero terminated.
/// \param[out] output Output string (UTF-16)
/// \param[out] map The vector of source to destination index mappings (optional)
///
inline void TranslateInv(_In_z_count_(inputMax) const wchar_t* input, _In_ size_t inputMax, _Out_ std::wstring &output, _Out_opt_ std::vector<mapping>* map = NULL) const
inline void TranslateInv(_In_ unsigned __int16 set, _In_z_count_(inputMax) const wchar_t* input, _In_ size_t inputMax, _Out_ std::wstring &output, _Out_opt_ std::vector<mapping>* map = NULL) const
{
TranslateInv(input, inputMax, NULL, langid_t::blank, output, map);
TranslateInv(set, input, inputMax, NULL, langid_t::blank, output, map);
}
///
/// Inverse translates string according ommiting language specific characters
///
/// \param[in] set Translation set ID
/// \param[in] input Input string (UTF-16)
/// \param[in] inputMax Length of the input string in characters. Can be (size_t)-1 if \p input is zero terminated.
/// \param[in] lc_db Language character database
@ -280,7 +299,7 @@ namespace ZRCola {
/// \param[out] output Output string (UTF-16)
/// \param[out] map The vector of source to destination index mappings (optional)
///
void TranslateInv(_In_z_count_(inputMax) const wchar_t* input, _In_ size_t inputMax, _In_opt_ const langchar_db *lc_db, _In_opt_ langid_t lang, _Out_ std::wstring &output, _Out_opt_ std::vector<mapping>* map = NULL) const;
void TranslateInv(_In_ unsigned __int16 set, _In_z_count_(inputMax) const wchar_t* input, _In_ size_t inputMax, _In_opt_ const langchar_db *lc_db, _In_opt_ langid_t lang, _Out_ std::wstring &output, _Out_opt_ std::vector<mapping>* map = NULL) const;
};

View File

@ -20,7 +20,7 @@
#include "stdafx.h"
void ZRCola::translation_db::Translate(_In_z_count_(inputMax) const wchar_t* input, _In_ size_t inputMax, _Out_ std::wstring &output, _Out_opt_ std::vector<mapping>* map) const
void ZRCola::translation_db::Translate(_In_ unsigned __int16 set, _In_z_count_(inputMax) const wchar_t* input, _In_ size_t inputMax, _Out_ std::wstring &output, _Out_opt_ std::vector<mapping>* map) const
{
assert(input || inputMax == 0);
@ -33,12 +33,15 @@ void ZRCola::translation_db::Translate(_In_z_count_(inputMax) const wchar_t* inp
if (map)
map->clear();
auto count = idxTrans.size();
// Limit search to the given set first.
indexTrans::size_type l_set, r_set;
idxTrans.find(translation(set ), l_set);
idxTrans.find(translation(set + 1), r_set);
for (size_t i = 0; i < inputMax;) {
// Find the longest matching translation at i-th character.
size_t l_match = (size_t)-1;
for (size_t l = 0, r = count, ii = i, j = 0; ii < inputMax && l < r; ii++, j++) {
for (size_t l = l_set, r = r_set, ii = i, j = 0; ii < inputMax && l < r; ii++, j++) {
wchar_t c = input[ii];
while (l < r) {
// Test the translation in the middle of the search area.
@ -83,7 +86,7 @@ void ZRCola::translation_db::Translate(_In_z_count_(inputMax) const wchar_t* inp
}
}
if (l_match < count) {
if (l_match < r_set) {
// The saved translation was an exact match.
const translation &trans = idxTrans[l_match];
output.append(trans.dst(), trans.dst_end());
@ -101,7 +104,7 @@ void ZRCola::translation_db::Translate(_In_z_count_(inputMax) const wchar_t* inp
}
void ZRCola::translation_db::TranslateInv(_In_z_count_(inputMax) const wchar_t* input, _In_ size_t inputMax, _In_ const langchar_db *lc_db, _In_ langid_t lang, _Out_ std::wstring &output, _Out_opt_ std::vector<mapping>* map) const
void ZRCola::translation_db::TranslateInv(_In_ unsigned __int16 set, _In_z_count_(inputMax) const wchar_t* input, _In_ size_t inputMax, _In_ const langchar_db *lc_db, _In_ langid_t lang, _Out_ std::wstring &output, _Out_opt_ std::vector<mapping>* map) const
{
assert(input || inputMax == 0);
@ -114,12 +117,15 @@ void ZRCola::translation_db::TranslateInv(_In_z_count_(inputMax) const wchar_t*
if (map)
map->clear();
auto count = idxTransInv.size();
// Limit search to the given set first.
indexTransInv::size_type l_set, r_set;
idxTransInv.find(translation(set ), l_set);
idxTransInv.find(translation(set + 1), r_set);
for (size_t i = 0; i < inputMax;) {
// Find the longest matching inverse translation at i-th character.
size_t l_match = (size_t)-1;
for (size_t l = 0, r = count, ii = i, j = 0; ii < inputMax && l < r; ii++, j++) {
for (size_t l = 0, r = r_set, ii = i, j = 0; ii < inputMax && l < r; ii++, j++) {
wchar_t c = input[ii];
while (l < r) {
// Test the inverse translation in the middle of the search area.
@ -164,7 +170,7 @@ void ZRCola::translation_db::TranslateInv(_In_z_count_(inputMax) const wchar_t*
}
}
if (l_match < count) {
if (l_match < r_set) {
// The saved inverse translation was an exact match.
const translation &trans = idxTransInv[l_match];
if (trans.src_len() && trans.src()[0] != L'#' && (!lc_db || !lc_db->IsLocalCharacter(trans.dst(), trans.dst_end(), lang))) {

Binary file not shown.