ZRCola composition added
This commit is contained in:
parent
0cb3acf426
commit
7db14e1d20
@ -20,6 +20,7 @@
|
|||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<ClCompile Include="..\src\compose.cpp" />
|
<ClCompile Include="..\src\compose.cpp" />
|
||||||
|
<ClCompile Include="..\src\compositions.cpp" />
|
||||||
<ClCompile Include="..\src\decompose.cpp" />
|
<ClCompile Include="..\src\decompose.cpp" />
|
||||||
<ClCompile Include="..\src\decompositions.cpp" />
|
<ClCompile Include="..\src\decompositions.cpp" />
|
||||||
<ClCompile Include="..\src\mapping.cpp" />
|
<ClCompile Include="..\src\mapping.cpp" />
|
||||||
|
@ -30,6 +30,9 @@
|
|||||||
<ClCompile Include="..\src\decompositions.cpp">
|
<ClCompile Include="..\src\decompositions.cpp">
|
||||||
<Filter>Source Files</Filter>
|
<Filter>Source Files</Filter>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
|
<ClCompile Include="..\src\compositions.cpp">
|
||||||
|
<Filter>Source Files</Filter>
|
||||||
|
</ClCompile>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<ClInclude Include="..\src\stdafx.h">
|
<ClInclude Include="..\src\stdafx.h">
|
||||||
|
@ -34,5 +34,5 @@ namespace ZRCola {
|
|||||||
/// \param[out] output Output string (UTF-16)
|
/// \param[out] output Output string (UTF-16)
|
||||||
/// \param[out] map The vector of source to destination index mappings (optional)
|
/// \param[out] map The vector of source to destination index mappings (optional)
|
||||||
///
|
///
|
||||||
void ZRCOLA_API Compose(_In_z_count_(inputMax) const wchar_t *input, _In_ size_t inputMax, _Out_ std::wstring &output, _Out_opt_ std::vector<mapping>* map = NULL);
|
void ZRCOLA_API Compose(_In_z_count_(inputMax) const wchar_t* input, _In_ size_t inputMax, _Out_ std::wstring &output, _Out_opt_ std::vector<mapping>* map = NULL);
|
||||||
};
|
};
|
||||||
|
@ -20,8 +20,15 @@
|
|||||||
#include "stdafx.h"
|
#include "stdafx.h"
|
||||||
|
|
||||||
|
|
||||||
void ZRCOLA_API ZRCola::Compose(_In_z_count_(inputMax) const wchar_t *input, _In_ size_t inputMax, _Out_ std::wstring &output, _Out_opt_ std::vector<mapping>* map)
|
static inline void Compose(
|
||||||
|
_In_count_(compositionsCount) const ZRCola::composition* compositions,
|
||||||
|
_In_ const size_t compositionsCount,
|
||||||
|
_In_z_count_(inputMax) const wchar_t* input,
|
||||||
|
_In_ size_t inputMax,
|
||||||
|
_Out_ std::wstring &output,
|
||||||
|
_Out_opt_ std::vector<ZRCola::mapping>* map)
|
||||||
{
|
{
|
||||||
|
assert(compositions || compositionsCount == 0);
|
||||||
assert(input || inputMax == 0);
|
assert(input || inputMax == 0);
|
||||||
|
|
||||||
// Trim inputMax to actual length.
|
// Trim inputMax to actual length.
|
||||||
@ -31,6 +38,89 @@ void ZRCOLA_API ZRCola::Compose(_In_z_count_(inputMax) const wchar_t *input, _In
|
|||||||
// Since composing is usually reducing the number of chars, memory reallocation is not expected later.
|
// Since composing is usually reducing the number of chars, memory reallocation is not expected later.
|
||||||
output.clear();
|
output.clear();
|
||||||
output.reserve(inputMax);
|
output.reserve(inputMax);
|
||||||
|
if (map)
|
||||||
|
map->clear();
|
||||||
|
|
||||||
|
for (size_t i = 0; i < inputMax;) {
|
||||||
|
// Start with the full search area at i-th character.
|
||||||
|
for (size_t l = 0, r = compositionsCount, ii = i, j = 0;; ii++, j++) {
|
||||||
|
if (ii < inputMax) {
|
||||||
|
size_t l_prev = l;
|
||||||
|
wchar_t c = input[ii];
|
||||||
|
while (l < r) {
|
||||||
|
// Test the composition in the middle of the search area.
|
||||||
|
size_t m = (l + r) / 2;
|
||||||
|
|
||||||
|
// Get the j-th character of the composition.
|
||||||
|
// All compositions that get short on characters are lexically ordered before.
|
||||||
|
// Thus the j-th character is considered 0.
|
||||||
|
wchar_t s = j < wcslen(compositions[m].src) ? compositions[m].src[j] : 0;
|
||||||
|
|
||||||
|
// Do the bisection test.
|
||||||
|
if (c < s) r = m;
|
||||||
|
else if (s < c) l = m + 1;
|
||||||
|
else {
|
||||||
|
// Character found.
|
||||||
|
|
||||||
|
// Narrow the search area on the left to start at the first composition in the run.
|
||||||
|
for (size_t rr = m; l < rr;) {
|
||||||
|
size_t m = (l + rr) / 2;
|
||||||
|
wchar_t s = j < wcslen(compositions[m].src) ? compositions[m].src[j] : 0;
|
||||||
|
if (c <= s) rr = m; else l = m + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Narrow the search area on the right to end at the first composition not in the run.
|
||||||
|
for (size_t ll = m + 1; ll < r;) {
|
||||||
|
size_t m = (ll + r) / 2;
|
||||||
|
wchar_t s = j < wcslen(compositions[m].src) ? compositions[m].src[j] : 0;
|
||||||
|
if (s <= c) ll = m + 1; else r = m;
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (l >= r) {
|
||||||
|
// The search area is empty.
|
||||||
|
if (j && wcslen(compositions[l_prev].src) == j) {
|
||||||
|
// The first composition of the previous run was a match.
|
||||||
|
output += compositions[l_prev].dst;
|
||||||
|
i = ii;
|
||||||
|
if (j > 1 && map) {
|
||||||
|
// Mapping changed.
|
||||||
|
map->push_back(ZRCola::mapping(output.length(), i));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// The exact match was not found.
|
||||||
|
output += input[i];
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// End of input reached.
|
||||||
|
|
||||||
|
if (l && wcslen(compositions[l - 1].src) == j) {
|
||||||
|
// The first composition of the previous run was a match.
|
||||||
|
output += compositions[l].dst;
|
||||||
|
i = ii;
|
||||||
|
if (j > 1 && map) {
|
||||||
|
// Mapping changed.
|
||||||
|
map->push_back(ZRCola::mapping(output.length(), i));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
output += input[i];
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void ZRCOLA_API ZRCola::Compose(_In_z_count_(inputMax) const wchar_t* input, _In_ size_t inputMax, _Out_ std::wstring &output, _Out_opt_ std::vector<mapping>* map)
|
||||||
|
{
|
||||||
|
::Compose(compositions, compositionsCount, input, inputMax, output, map);
|
||||||
}
|
}
|
||||||
|
5612
lib/libZRCola/src/compositions.cpp
Normal file
5612
lib/libZRCola/src/compositions.cpp
Normal file
File diff suppressed because it is too large
Load Diff
@ -25,6 +25,14 @@
|
|||||||
|
|
||||||
|
|
||||||
namespace ZRCola {
|
namespace ZRCola {
|
||||||
|
struct composition {
|
||||||
|
const wchar_t *src;
|
||||||
|
wchar_t dst;
|
||||||
|
};
|
||||||
|
|
||||||
|
extern const composition* compositions;
|
||||||
|
extern const size_t compositionsCount;
|
||||||
|
|
||||||
struct decomposition {
|
struct decomposition {
|
||||||
wchar_t src;
|
wchar_t src;
|
||||||
const wchar_t *dst;
|
const wchar_t *dst;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user