ZRCola composition added

This commit is contained in:
Simon Rozman 2016-02-09 15:09:32 +01:00
parent 0cb3acf426
commit 7db14e1d20
6 changed files with 5716 additions and 2 deletions

View File

@ -20,6 +20,7 @@
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\src\compose.cpp" />
<ClCompile Include="..\src\compositions.cpp" />
<ClCompile Include="..\src\decompose.cpp" />
<ClCompile Include="..\src\decompositions.cpp" />
<ClCompile Include="..\src\mapping.cpp" />

View File

@ -30,6 +30,9 @@
<ClCompile Include="..\src\decompositions.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\src\compositions.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\src\stdafx.h">

View File

@ -34,5 +34,5 @@ namespace ZRCola {
/// \param[out] output Output string (UTF-16)
/// \param[out] map The vector of source to destination index mappings (optional)
///
void ZRCOLA_API Compose(_In_z_count_(inputMax) const wchar_t *input, _In_ size_t inputMax, _Out_ std::wstring &output, _Out_opt_ std::vector<mapping>* map = NULL);
void ZRCOLA_API Compose(_In_z_count_(inputMax) const wchar_t* input, _In_ size_t inputMax, _Out_ std::wstring &output, _Out_opt_ std::vector<mapping>* map = NULL);
};

View File

@ -20,8 +20,15 @@
#include "stdafx.h"
void ZRCOLA_API ZRCola::Compose(_In_z_count_(inputMax) const wchar_t *input, _In_ size_t inputMax, _Out_ std::wstring &output, _Out_opt_ std::vector<mapping>* map)
static inline void Compose(
_In_count_(compositionsCount) const ZRCola::composition* compositions,
_In_ const size_t compositionsCount,
_In_z_count_(inputMax) const wchar_t* input,
_In_ size_t inputMax,
_Out_ std::wstring &output,
_Out_opt_ std::vector<ZRCola::mapping>* map)
{
assert(compositions || compositionsCount == 0);
assert(input || inputMax == 0);
// Trim inputMax to actual length.
@ -31,6 +38,89 @@ void ZRCOLA_API ZRCola::Compose(_In_z_count_(inputMax) const wchar_t *input, _In
// Since composing is usually reducing the number of chars, memory reallocation is not expected later.
output.clear();
output.reserve(inputMax);
if (map)
map->clear();
for (size_t i = 0; i < inputMax;) {
// Start with the full search area at i-th character.
for (size_t l = 0, r = compositionsCount, ii = i, j = 0;; ii++, j++) {
if (ii < inputMax) {
size_t l_prev = l;
wchar_t c = input[ii];
while (l < r) {
// Test the composition in the middle of the search area.
size_t m = (l + r) / 2;
// Get the j-th character of the composition.
// All compositions that get short on characters are lexically ordered before.
// Thus the j-th character is considered 0.
wchar_t s = j < wcslen(compositions[m].src) ? compositions[m].src[j] : 0;
// Do the bisection test.
if (c < s) r = m;
else if (s < c) l = m + 1;
else {
// Character found.
// Narrow the search area on the left to start at the first composition in the run.
for (size_t rr = m; l < rr;) {
size_t m = (l + rr) / 2;
wchar_t s = j < wcslen(compositions[m].src) ? compositions[m].src[j] : 0;
if (c <= s) rr = m; else l = m + 1;
}
// Narrow the search area on the right to end at the first composition not in the run.
for (size_t ll = m + 1; ll < r;) {
size_t m = (ll + r) / 2;
wchar_t s = j < wcslen(compositions[m].src) ? compositions[m].src[j] : 0;
if (s <= c) ll = m + 1; else r = m;
}
break;
}
}
if (l >= r) {
// The search area is empty.
if (j && wcslen(compositions[l_prev].src) == j) {
// The first composition of the previous run was a match.
output += compositions[l_prev].dst;
i = ii;
if (j > 1 && map) {
// Mapping changed.
map->push_back(ZRCola::mapping(output.length(), i));
}
} else {
// The exact match was not found.
output += input[i];
i++;
}
break;
}
} else {
// End of input reached.
if (l && wcslen(compositions[l - 1].src) == j) {
// The first composition of the previous run was a match.
output += compositions[l].dst;
i = ii;
if (j > 1 && map) {
// Mapping changed.
map->push_back(ZRCola::mapping(output.length(), i));
}
} else {
output += input[i];
i++;
}
break;
}
}
}
}
void ZRCOLA_API ZRCola::Compose(_In_z_count_(inputMax) const wchar_t* input, _In_ size_t inputMax, _Out_ std::wstring &output, _Out_opt_ std::vector<mapping>* map)
{
::Compose(compositions, compositionsCount, input, inputMax, output, map);
}

File diff suppressed because it is too large Load Diff

View File

@ -25,6 +25,14 @@
namespace ZRCola {
struct composition {
const wchar_t *src;
wchar_t dst;
};
extern const composition* compositions;
extern const size_t compositionsCount;
struct decomposition {
wchar_t src;
const wchar_t *dst;