Composed and decomposed strings of the ZRCola::translation_db::translation protected
This commit is contained in:
parent
03ff056898
commit
ca306345c2
@ -46,14 +46,37 @@ namespace ZRCola {
|
|||||||
/// Translation data
|
/// Translation data
|
||||||
///
|
///
|
||||||
struct translation {
|
struct translation {
|
||||||
unsigned __int16 rank; ///< Decomposition rank
|
public:
|
||||||
static const unsigned __int16 com_start; ///< Composed character start in \c data
|
unsigned __int16 rank; ///< Decomposition rank
|
||||||
union {
|
|
||||||
unsigned __int16 com_end; ///< Composed character end in \c data
|
protected:
|
||||||
unsigned __int16 dec_start; ///< Decomposed character start in \c data
|
unsigned __int16 com_to; ///< Composed character end in \c data
|
||||||
};
|
unsigned __int16 dec_to; ///< Decomposed string end in \c data
|
||||||
unsigned __int16 dec_end; ///< Decomposed string end in \c data
|
wchar_t data[]; ///< Decomposed string and composed character
|
||||||
wchar_t data[]; ///< Decomposed string and composed character
|
|
||||||
|
public:
|
||||||
|
inline const wchar_t* com () const { return data; };
|
||||||
|
inline wchar_t* com () { return data; };
|
||||||
|
inline const wchar_t* com_end() const { return data + com_to; };
|
||||||
|
inline wchar_t* com_end() { return data + com_to; };
|
||||||
|
inline unsigned __int16 com_len() const { return com_to; };
|
||||||
|
|
||||||
|
inline wchar_t com_at(_In_ size_t i) const
|
||||||
|
{
|
||||||
|
return i < com_to ? data[i] : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline const wchar_t* dec () const { return data + com_to; };
|
||||||
|
inline wchar_t* dec () { return data + com_to; };
|
||||||
|
inline const wchar_t* dec_end() const { return data + dec_to; };
|
||||||
|
inline wchar_t* dec_end() { return data + dec_to; };
|
||||||
|
inline unsigned __int16 dec_len() const { return dec_to - com_to; };
|
||||||
|
|
||||||
|
inline wchar_t dec_at(_In_ size_t i) const
|
||||||
|
{
|
||||||
|
size_t ii = i + com_to; // absolute index
|
||||||
|
return ii < dec_to ? data[ii] : 0;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
#pragma pack(pop)
|
#pragma pack(pop)
|
||||||
|
|
||||||
@ -83,7 +106,7 @@ namespace ZRCola {
|
|||||||
///
|
///
|
||||||
virtual int compare(_In_ const translation &a, _In_ const translation &b) const
|
virtual int compare(_In_ const translation &a, _In_ const translation &b) const
|
||||||
{
|
{
|
||||||
int r = ZRCola::CompareString(a.data + a.dec_start, a.data + a.dec_end, b.data + b.dec_start, b.data + b.dec_end);
|
int r = ZRCola::CompareString(a.dec(), a.dec_end(), b.dec(), b.dec_end());
|
||||||
if (r != 0) return r;
|
if (r != 0) return r;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@ -102,10 +125,10 @@ namespace ZRCola {
|
|||||||
///
|
///
|
||||||
virtual int compare_sort(_In_ const translation &a, _In_ const translation &b) const
|
virtual int compare_sort(_In_ const translation &a, _In_ const translation &b) const
|
||||||
{
|
{
|
||||||
int r = ZRCola::CompareString(a.data + a.dec_start, a.data + a.dec_end, b.data + b.dec_start, b.data + b.dec_end);
|
int r = ZRCola::CompareString(a.dec(), a.dec_end(), b.dec(), b.dec_end());
|
||||||
if (r != 0) return r;
|
if (r != 0) return r;
|
||||||
|
|
||||||
r = ZRCola::CompareString(a.data + a.com_start, a.data + a.com_end, b.data + b.com_start, b.data + b.com_end);
|
r = ZRCola::CompareString(a.com(), a.com_end(), b.com(), b.com_end());
|
||||||
if (r != 0) return r;
|
if (r != 0) return r;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@ -139,7 +162,7 @@ namespace ZRCola {
|
|||||||
///
|
///
|
||||||
virtual int compare(_In_ const translation &a, _In_ const translation &b) const
|
virtual int compare(_In_ const translation &a, _In_ const translation &b) const
|
||||||
{
|
{
|
||||||
int r = ZRCola::CompareString(a.data + a.com_start, a.data + a.com_end, b.data + b.com_start, b.data + b.com_end);
|
int r = ZRCola::CompareString(a.com(), a.com_end(), b.com(), b.com_end());
|
||||||
if (r != 0) return r;
|
if (r != 0) return r;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@ -158,13 +181,13 @@ namespace ZRCola {
|
|||||||
///
|
///
|
||||||
virtual int compare_sort(_In_ const translation &a, _In_ const translation &b) const
|
virtual int compare_sort(_In_ const translation &a, _In_ const translation &b) const
|
||||||
{
|
{
|
||||||
int r = ZRCola::CompareString(a.data + a.com_start, a.data + a.com_end, b.data + b.com_start, b.data + b.com_end);
|
int r = ZRCola::CompareString(a.com(), a.com_end(), b.com(), b.com_end());
|
||||||
if (r != 0) return r;
|
if (r != 0) return r;
|
||||||
|
|
||||||
if (a.rank < b.rank) return -1;
|
if (a.rank < b.rank) return -1;
|
||||||
else if (a.rank > b.rank) return +1;
|
else if (a.rank > b.rank) return +1;
|
||||||
|
|
||||||
r = ZRCola::CompareString(a.data + a.dec_start, a.data + a.dec_end, b.data + b.dec_start, b.data + b.dec_end);
|
r = ZRCola::CompareString(a.dec(), a.dec_end(), b.dec(), b.dec_end());
|
||||||
if (r != 0) return r;
|
if (r != 0) return r;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -19,8 +19,6 @@
|
|||||||
|
|
||||||
#include "stdafx.h"
|
#include "stdafx.h"
|
||||||
|
|
||||||
const unsigned __int16 ZRCola::translation_db::translation::com_start = 0;
|
|
||||||
|
|
||||||
|
|
||||||
void ZRCola::translation_db::Compose(_In_z_count_(inputMax) const wchar_t* input, _In_ size_t inputMax, _Out_ std::wstring &output, _Out_opt_ std::vector<mapping>* map) const
|
void ZRCola::translation_db::Compose(_In_z_count_(inputMax) const wchar_t* input, _In_ size_t inputMax, _Out_ std::wstring &output, _Out_opt_ std::vector<mapping>* map) const
|
||||||
{
|
{
|
||||||
@ -50,8 +48,7 @@ void ZRCola::translation_db::Compose(_In_z_count_(inputMax) const wchar_t* input
|
|||||||
// All compositions that get short on characters are lexically ordered before.
|
// All compositions that get short on characters are lexically ordered before.
|
||||||
// Thus the j-th character is considered 0.
|
// Thus the j-th character is considered 0.
|
||||||
const translation &trans = idxComp[m];
|
const translation &trans = idxComp[m];
|
||||||
size_t jj = trans.dec_start + j;
|
wchar_t s = trans.dec_at(j);
|
||||||
wchar_t s = jj < trans.dec_end ? trans.data[jj] : 0;
|
|
||||||
|
|
||||||
// Do the bisection test.
|
// Do the bisection test.
|
||||||
if (c < s) r = m;
|
if (c < s) r = m;
|
||||||
@ -63,8 +60,7 @@ void ZRCola::translation_db::Compose(_In_z_count_(inputMax) const wchar_t* input
|
|||||||
for (size_t rr = m; l < rr;) {
|
for (size_t rr = m; l < rr;) {
|
||||||
size_t m = (l + rr) / 2;
|
size_t m = (l + rr) / 2;
|
||||||
const translation &trans = idxComp[m];
|
const translation &trans = idxComp[m];
|
||||||
size_t jj = trans.dec_start + j;
|
wchar_t s = trans.dec_at(j);
|
||||||
wchar_t s = jj < trans.dec_end ? trans.data[jj] : 0;
|
|
||||||
if (c <= s) rr = m; else l = m + 1;
|
if (c <= s) rr = m; else l = m + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -72,13 +68,12 @@ void ZRCola::translation_db::Compose(_In_z_count_(inputMax) const wchar_t* input
|
|||||||
for (size_t ll = m + 1; ll < r;) {
|
for (size_t ll = m + 1; ll < r;) {
|
||||||
size_t m = (ll + r) / 2;
|
size_t m = (ll + r) / 2;
|
||||||
const translation &trans = idxComp[m];
|
const translation &trans = idxComp[m];
|
||||||
size_t jj = trans.dec_start + j;
|
wchar_t s = trans.dec_at(j);
|
||||||
wchar_t s = jj < trans.dec_end ? trans.data[jj] : 0;
|
|
||||||
if (s <= c) ll = m + 1; else r = m;
|
if (s <= c) ll = m + 1; else r = m;
|
||||||
}
|
}
|
||||||
|
|
||||||
const translation &trans = idxComp[l];
|
const translation &trans = idxComp[l];
|
||||||
if (trans.dec_start + j + 1 == trans.dec_end) {
|
if (j + 1 == trans.dec_len()) {
|
||||||
// The first composition of the run was a match (thus far). Save it.
|
// The first composition of the run was a match (thus far). Save it.
|
||||||
l_match = l;
|
l_match = l;
|
||||||
}
|
}
|
||||||
@ -91,9 +86,9 @@ void ZRCola::translation_db::Compose(_In_z_count_(inputMax) const wchar_t* input
|
|||||||
if (l_match < compositionsCount) {
|
if (l_match < compositionsCount) {
|
||||||
// The saved composition was an exact match.
|
// The saved composition was an exact match.
|
||||||
const translation &trans = idxComp[l_match];
|
const translation &trans = idxComp[l_match];
|
||||||
output.append(trans.data + trans.com_start, trans.data + trans.com_end);
|
output.append(trans.com(), trans.com_end());
|
||||||
i += trans.dec_end - trans.dec_start;
|
i += trans.dec_len();
|
||||||
if (trans.dec_end - trans.dec_start != trans.com_end - trans.com_start && map) {
|
if (trans.dec_len() != trans.com_len() && map) {
|
||||||
// Mapping changed.
|
// Mapping changed.
|
||||||
map->push_back(ZRCola::mapping(i, output.length()));
|
map->push_back(ZRCola::mapping(i, output.length()));
|
||||||
}
|
}
|
||||||
@ -134,8 +129,7 @@ void ZRCola::translation_db::Decompose(_In_z_count_(inputMax) const wchar_t* inp
|
|||||||
// All decompositions that get short on characters are lexically ordered before.
|
// All decompositions that get short on characters are lexically ordered before.
|
||||||
// Thus the j-th character is considered 0.
|
// Thus the j-th character is considered 0.
|
||||||
const translation &trans = idxDecomp[m];
|
const translation &trans = idxDecomp[m];
|
||||||
size_t jj = trans.com_start + j;
|
wchar_t s = trans.com_at(j);
|
||||||
wchar_t s = jj < trans.com_end ? trans.data[jj] : 0;
|
|
||||||
|
|
||||||
// Do the bisection test.
|
// Do the bisection test.
|
||||||
if (c < s) r = m;
|
if (c < s) r = m;
|
||||||
@ -147,8 +141,7 @@ void ZRCola::translation_db::Decompose(_In_z_count_(inputMax) const wchar_t* inp
|
|||||||
for (size_t rr = m; l < rr;) {
|
for (size_t rr = m; l < rr;) {
|
||||||
size_t m = (l + rr) / 2;
|
size_t m = (l + rr) / 2;
|
||||||
const translation &trans = idxDecomp[m];
|
const translation &trans = idxDecomp[m];
|
||||||
size_t jj = trans.com_start + j;
|
wchar_t s = trans.com_at(j);
|
||||||
wchar_t s = jj < trans.com_end ? trans.data[jj] : 0;
|
|
||||||
if (c <= s) rr = m; else l = m + 1;
|
if (c <= s) rr = m; else l = m + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -156,13 +149,12 @@ void ZRCola::translation_db::Decompose(_In_z_count_(inputMax) const wchar_t* inp
|
|||||||
for (size_t ll = m + 1; ll < r;) {
|
for (size_t ll = m + 1; ll < r;) {
|
||||||
size_t m = (ll + r) / 2;
|
size_t m = (ll + r) / 2;
|
||||||
const translation &trans = idxDecomp[m];
|
const translation &trans = idxDecomp[m];
|
||||||
size_t jj = trans.com_start + j;
|
wchar_t s = trans.com_at(j);
|
||||||
wchar_t s = jj < trans.com_end ? trans.data[jj] : 0;
|
|
||||||
if (s <= c) ll = m + 1; else r = m;
|
if (s <= c) ll = m + 1; else r = m;
|
||||||
}
|
}
|
||||||
|
|
||||||
const translation &trans = idxDecomp[l];
|
const translation &trans = idxDecomp[l];
|
||||||
if (trans.com_start + j + 1 == trans.com_end) {
|
if (j + 1 == trans.com_len()) {
|
||||||
// The first decomposition of the run was a match (thus far). Save it.
|
// The first decomposition of the run was a match (thus far). Save it.
|
||||||
l_match = l;
|
l_match = l;
|
||||||
}
|
}
|
||||||
@ -175,18 +167,18 @@ void ZRCola::translation_db::Decompose(_In_z_count_(inputMax) const wchar_t* inp
|
|||||||
if (l_match < decompositionsCount) {
|
if (l_match < decompositionsCount) {
|
||||||
// The saved decomposition was an exact match.
|
// The saved decomposition was an exact match.
|
||||||
const translation &trans = idxDecomp[l_match];
|
const translation &trans = idxDecomp[l_match];
|
||||||
if (trans.dec_start < trans.dec_end && trans.data[trans.dec_start] != L'#' && (!lc_db || !lc_db->IsLocalCharacter(trans.data + trans.com_start, trans.data + trans.com_end, lang))) {
|
if (trans.dec_len() && trans.dec()[0] != L'#' && (!lc_db || !lc_db->IsLocalCharacter(trans.com(), trans.com_end(), lang))) {
|
||||||
// Append decomposed sequence.
|
// Append decomposed sequence.
|
||||||
output.append(trans.data + trans.dec_start, trans.data + trans.dec_end);
|
output.append(trans.dec(), trans.dec_end());
|
||||||
i += trans.com_end - trans.com_start;
|
i += trans.com_len();
|
||||||
if (trans.dec_end - trans.dec_start != trans.com_end - trans.com_start && map) {
|
if (trans.com_len() != trans.dec_len() && map) {
|
||||||
// Mapping changed.
|
// Mapping changed.
|
||||||
map->push_back(ZRCola::mapping(i, output.length()));
|
map->push_back(ZRCola::mapping(i, output.length()));
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Character is inhibited to decompose.
|
// Character is inhibited to decompose.
|
||||||
output.append(trans.data + trans.com_start, trans.data + trans.com_end);
|
output.append(trans.com(), trans.com_end());
|
||||||
i += trans.com_end - trans.com_start;
|
i += trans.com_len();
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// The match was not found.
|
// The match was not found.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user