Accessing translation data standardized

This commit is contained in:
Simon Rozman 2016-03-02 10:45:22 +01:00
parent c44b449a36
commit 4e5811803a
3 changed files with 24 additions and 20 deletions

View File

@ -36,8 +36,8 @@ namespace ZRCola {
/// ///
class translation { class translation {
public: public:
wchar_t chr; ///< composed character wchar_t chr; ///< Composed character
std::wstring str; ///< decomposed string std::wstring str; ///< Decomposed string
}; };
public: public:

View File

@ -45,16 +45,6 @@ namespace ZRCola {
unsigned __int32 start; ///< Composed character offset unsigned __int32 start; ///< Composed character offset
unsigned __int32 end; ///< Decomposed string end offset unsigned __int32 end; ///< Decomposed string end offset
///
/// Returns translation string start offset
///
inline unsigned __int32 GetChrStart() const { return start; }
///
/// Returns translation string start offset
///
inline unsigned __int32 GetStrStart() const { return start + 1; }
/// ///
/// Returns translation string length /// Returns translation string length
/// ///
@ -62,6 +52,20 @@ namespace ZRCola {
}; };
#pragma pack(pop) #pragma pack(pop)
#pragma pack(push)
#pragma pack(2)
#pragma warning(push)
#pragma warning(disable: 4200)
///
/// Translation data
///
struct translation {
wchar_t chr; ///< Composed character
wchar_t str[]; ///< Decomposed string
};
#pragma warning(pop)
#pragma pack(pop)
std::vector<index> idxComp; ///< Composition index std::vector<index> idxComp; ///< Composition index
std::vector<index> idxDecomp; ///< Decomposition index std::vector<index> idxDecomp; ///< Decomposition index
std::vector<wchar_t> data; ///< Transformation data std::vector<wchar_t> data; ///< Transformation data

View File

@ -49,7 +49,7 @@ void ZRCola::translation_db::Compose(_In_z_count_(inputMax) const wchar_t* input
// Get the j-th character of the composition. // Get the j-th character of the composition.
// All compositions that get short on characters are lexically ordered before. // All compositions that get short on characters are lexically ordered before.
// Thus the j-th character is considered 0. // Thus the j-th character is considered 0.
wchar_t s = j < idxComp[m].GetStrLength() ? data[idxComp[m].GetStrStart() + j] : 0; wchar_t s = j < idxComp[m].GetStrLength() ? ((translation*)&data[idxComp[m].start])->str[j] : 0;
// Do the bisection test. // Do the bisection test.
if (c < s) r = m; if (c < s) r = m;
@ -60,14 +60,14 @@ void ZRCola::translation_db::Compose(_In_z_count_(inputMax) const wchar_t* input
// Narrow the search area on the left to start at the first composition in the run. // Narrow the search area on the left to start at the first composition in the run.
for (size_t rr = m; l < rr;) { for (size_t rr = m; l < rr;) {
size_t m = (l + rr) / 2; size_t m = (l + rr) / 2;
wchar_t s = j < idxComp[m].GetStrLength() ? data[idxComp[m].GetStrStart() + j] : 0; wchar_t s = j < idxComp[m].GetStrLength() ? ((translation*)&data[idxComp[m].start])->str[j] : 0;
if (c <= s) rr = m; else l = m + 1; if (c <= s) rr = m; else l = m + 1;
} }
// Narrow the search area on the right to end at the first composition not in the run. // Narrow the search area on the right to end at the first composition not in the run.
for (size_t ll = m + 1; ll < r;) { for (size_t ll = m + 1; ll < r;) {
size_t m = (ll + r) / 2; size_t m = (ll + r) / 2;
wchar_t s = j < idxComp[m].GetStrLength() ? data[idxComp[m].GetStrStart() + j] : 0; wchar_t s = j < idxComp[m].GetStrLength() ? ((translation*)&data[idxComp[m].start])->str[j] : 0;
if (s <= c) ll = m + 1; else r = m; if (s <= c) ll = m + 1; else r = m;
} }
@ -79,7 +79,7 @@ void ZRCola::translation_db::Compose(_In_z_count_(inputMax) const wchar_t* input
// The search area is empty. // The search area is empty.
if (j && l_prev < compositionsCount && j == idxComp[l_prev].GetStrLength()) { if (j && l_prev < compositionsCount && j == idxComp[l_prev].GetStrLength()) {
// The first composition of the previous run was a match. // The first composition of the previous run was a match.
output += data[idxComp[l_prev].GetChrStart()]; output += ((translation*)&data[idxComp[l_prev].start])->chr;
i = ii; i = ii;
if (j > 1 && map) { if (j > 1 && map) {
// Mapping changed. // Mapping changed.
@ -97,7 +97,7 @@ void ZRCola::translation_db::Compose(_In_z_count_(inputMax) const wchar_t* input
if (l < compositionsCount && j == idxComp[l].GetStrLength()) { if (l < compositionsCount && j == idxComp[l].GetStrLength()) {
// The first composition of the previous run was a match. // The first composition of the previous run was a match.
output += data[idxComp[l].GetChrStart()]; output += ((translation*)&data[idxComp[l].start])->chr;
i = ii; i = ii;
if (j > 1 && map) { if (j > 1 && map) {
// Mapping changed. // Mapping changed.
@ -138,12 +138,12 @@ void ZRCOLA_API ZRCola::translation_db::Decompose(_In_z_count_(inputMax) const w
for (size_t l = 0, r = decompositionsCount;; ) { for (size_t l = 0, r = decompositionsCount;; ) {
if (l < r) { if (l < r) {
size_t m = (l + r) / 2; size_t m = (l + r) / 2;
wchar_t decompSrc = data[idxDecomp[m].GetChrStart()]; wchar_t decompSrc = ((translation*)&data[idxDecomp[m].start])->chr;
if (c < decompSrc) r = m; if (c < decompSrc) r = m;
else if (decompSrc < c) l = m + 1; else if (decompSrc < c) l = m + 1;
else { else {
// Character found. // Character found.
output.append(&data[idxDecomp[m].GetStrStart()], idxDecomp[m].GetStrLength()); output.append(((translation*)&data[idxDecomp[m].start])->str, idxDecomp[m].GetStrLength());
i++; i++;
if (map) { if (map) {
// Mapping changed. // Mapping changed.