(De)composition is not static any more
This commit is contained in:
parent
ce89d26373
commit
6bb0cd7b89
@ -24,9 +24,6 @@
|
||||
#include "zrcolacomppnl.h"
|
||||
#include "zrcolafrm.h"
|
||||
|
||||
#include <zrcola/compose.h>
|
||||
#include <zrcola/decompose.h>
|
||||
|
||||
#include <wx/msgdlg.h>
|
||||
#include <wxex/common.h>
|
||||
|
||||
|
@ -44,9 +44,9 @@ wxZRColaComposerPanel::wxZRColaComposerPanel(wxWindow* parent) :
|
||||
dat >> r_rec;
|
||||
if (!dat.good()) {
|
||||
wxFAIL_MSG(wxT("Error reading translation data from ZRCola.zrcdb."));
|
||||
m_t_db. comp_index.clear();
|
||||
m_t_db.decomp_index.clear();
|
||||
m_t_db. data.clear();
|
||||
m_t_db.idxComp .clear();
|
||||
m_t_db.idxDecomp.clear();
|
||||
m_t_db.data .clear();
|
||||
}
|
||||
} else
|
||||
wxFAIL_MSG(wxT("ZRCola.zrcdb has no translation data."));
|
||||
@ -94,7 +94,7 @@ void wxZRColaComposerPanel::OnDecomposedText(wxCommandEvent& event)
|
||||
#endif
|
||||
|
||||
std::wstring dst;
|
||||
ZRCola::Compose(src.data(), src.size(), dst, &m_mapping);
|
||||
m_t_db.Compose(src.data(), src.size(), dst, &m_mapping);
|
||||
|
||||
long from, to;
|
||||
m_decomposed->GetSelection(&from, &to);
|
||||
@ -141,7 +141,7 @@ void wxZRColaComposerPanel::OnComposedText(wxCommandEvent& event)
|
||||
#endif
|
||||
|
||||
std::wstring dst;
|
||||
ZRCola::Decompose(src.data(), src.size(), dst, &m_mapping);
|
||||
m_t_db.Decompose(src.data(), src.size(), dst, &m_mapping);
|
||||
|
||||
long from, to;
|
||||
m_composed->GetSelection(&from, &to);
|
||||
|
@ -26,7 +26,7 @@ class wxZRColaComposerPanel;
|
||||
#pragma once
|
||||
|
||||
#include "zrcolagui.h"
|
||||
#include <zrcola/common.h>
|
||||
#include <zrcola/translate.h>
|
||||
#include <utility>
|
||||
|
||||
|
||||
|
@ -30,12 +30,12 @@
|
||||
///
|
||||
inline std::ostream& operator <<(std::ostream& stream, const ZRCola::translation_db &t_db)
|
||||
{
|
||||
assert(t_db.comp_index.size() == t_db.decomp_index.size());
|
||||
assert(t_db.idxComp.size() == t_db.idxDecomp.size());
|
||||
|
||||
unsigned __int32 count;
|
||||
|
||||
// Write index count.
|
||||
std::vector<ZRCola::translation_db::index>::size_type trans_count = t_db.comp_index.size();
|
||||
std::vector<ZRCola::translation_db::index>::size_type trans_count = t_db.idxComp.size();
|
||||
#if defined(_WIN64) || defined(__x86_64__) || defined(__ppc64__)
|
||||
// 4G check
|
||||
if (trans_count > 0xffffffff) {
|
||||
@ -49,11 +49,11 @@ inline std::ostream& operator <<(std::ostream& stream, const ZRCola::translation
|
||||
|
||||
// Write composition index.
|
||||
if (stream.fail()) return stream;
|
||||
stream.write((const char*)t_db.comp_index.data(), sizeof(ZRCola::translation_db::index)*count);
|
||||
stream.write((const char*)t_db.idxComp.data(), sizeof(ZRCola::translation_db::index)*count);
|
||||
|
||||
// Write decomposition index.
|
||||
if (stream.fail()) return stream;
|
||||
stream.write((const char*)t_db.decomp_index.data(), sizeof(ZRCola::translation_db::index)*count);
|
||||
stream.write((const char*)t_db.idxDecomp.data(), sizeof(ZRCola::translation_db::index)*count);
|
||||
|
||||
// Write data count.
|
||||
std::vector<wchar_t>::size_type data_count = t_db.data.size();
|
||||
@ -258,9 +258,9 @@ int _tmain(int argc, _TCHAR *argv[])
|
||||
ZRCola::translation_db t_db;
|
||||
|
||||
// Preallocate memory.
|
||||
t_db. comp_index.reserve(trans_count);
|
||||
t_db.decomp_index.reserve(trans_count);
|
||||
t_db. data.reserve(trans_count*4);
|
||||
t_db.idxComp .reserve(trans_count);
|
||||
t_db.idxDecomp.reserve(trans_count);
|
||||
t_db.data .reserve(trans_count*4);
|
||||
|
||||
// Parse translations and build index and data.
|
||||
while (!ZRCola::DBSource::IsEOF(rs)) {
|
||||
@ -273,8 +273,8 @@ int _tmain(int argc, _TCHAR *argv[])
|
||||
for (std::wstring::size_type i = 0, n = trans.str.length(); i < n; i++)
|
||||
t_db.data.push_back(trans.str[i]);
|
||||
ti.end = t_db.data.size();
|
||||
t_db.comp_index.push_back(ti);
|
||||
t_db.decomp_index.push_back(ti);
|
||||
t_db.idxComp .push_back(ti);
|
||||
t_db.idxDecomp.push_back(ti);
|
||||
} else
|
||||
has_errors = true;
|
||||
|
||||
@ -282,8 +282,8 @@ int _tmain(int argc, _TCHAR *argv[])
|
||||
}
|
||||
|
||||
// Sort indices.
|
||||
qsort_s(t_db. comp_index.data(), trans_count, sizeof(ZRCola::translation_db::index), CompareCompositionIndex , t_db.data.data());
|
||||
qsort_s(t_db.decomp_index.data(), trans_count, sizeof(ZRCola::translation_db::index), CompareDecompositionIndex, t_db.data.data());
|
||||
qsort_s(t_db.idxComp .data(), trans_count, sizeof(ZRCola::translation_db::index), CompareCompositionIndex , t_db.data.data());
|
||||
qsort_s(t_db.idxDecomp.data(), trans_count, sizeof(ZRCola::translation_db::index), CompareDecompositionIndex, t_db.data.data());
|
||||
|
||||
// Write translations to file.
|
||||
dst << ZRCola::translation_rec(t_db);
|
||||
|
@ -22,7 +22,7 @@
|
||||
#include "../include/zrcola.h"
|
||||
#include "dbsource.h"
|
||||
|
||||
#include <zrcola/compose.h>
|
||||
#include <zrcola/translate.h>
|
||||
|
||||
#include <wx/app.h>
|
||||
#include <wx/cmdline.h>
|
||||
|
@ -19,12 +19,7 @@
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="..\src\compose.cpp" />
|
||||
<ClCompile Include="..\src\compositions.cpp" />
|
||||
<ClCompile Include="..\src\decompose.cpp" />
|
||||
<ClCompile Include="..\src\decompositions.cpp" />
|
||||
<ClCompile Include="..\src\mapping.cpp" />
|
||||
<ClCompile Include="..\src\normalizations.cpp" />
|
||||
<ClCompile Include="..\src\normalize.cpp" />
|
||||
<ClCompile Include="..\src\stdafx.cpp">
|
||||
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Create</PrecompiledHeader>
|
||||
@ -32,12 +27,12 @@
|
||||
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Create</PrecompiledHeader>
|
||||
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Create</PrecompiledHeader>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\src\translate.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="..\include\zrcola\common.h" />
|
||||
<ClInclude Include="..\include\zrcola\compose.h" />
|
||||
<ClInclude Include="..\include\zrcola\decompose.h" />
|
||||
<ClInclude Include="..\include\zrcola\normalize.h" />
|
||||
<ClInclude Include="..\include\zrcola\translate.h" />
|
||||
<ClInclude Include="..\src\stdafx.h" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
|
@ -18,25 +18,13 @@
|
||||
<ClCompile Include="..\src\stdafx.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\src\decompose.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\src\compose.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\src\mapping.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\src\decompositions.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\src\normalize.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\src\normalizations.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\src\compositions.cpp">
|
||||
<ClCompile Include="..\src\translate.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
@ -47,15 +35,12 @@
|
||||
<ClInclude Include="..\include\zrcola\common.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\include\zrcola\decompose.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\include\zrcola\compose.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\include\zrcola\normalize.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\include\zrcola\translate.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ResourceCompile Include="..\res\libZRCola.rc">
|
||||
|
@ -19,8 +19,6 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdex/idrec.h>
|
||||
#include <istream>
|
||||
#include <vector>
|
||||
|
||||
|
||||
@ -54,31 +52,6 @@ namespace ZRCola {
|
||||
typedef unsigned __int32 recordsize_t;
|
||||
|
||||
|
||||
///
|
||||
/// Translation database
|
||||
///
|
||||
class translation_db {
|
||||
public:
|
||||
#pragma pack(push)
|
||||
#pragma pack(4)
|
||||
///
|
||||
/// Translation index
|
||||
///
|
||||
struct index {
|
||||
unsigned __int32 start; ///< Composed character offset
|
||||
unsigned __int32 end; ///< Decomposed string end offset
|
||||
};
|
||||
#pragma pack(pop)
|
||||
|
||||
std::vector<index> comp_index; ///< Composition index
|
||||
std::vector<index> decomp_index; ///< Decomposition index
|
||||
std::vector<wchar_t> data; ///< Transformation data
|
||||
};
|
||||
|
||||
|
||||
typedef ZRCOLA_API stdex::idrec::record<translation_db, recordid_t, recordsize_t, ZRCOLA_RECORD_ALIGN> translation_rec;
|
||||
|
||||
|
||||
///
|
||||
/// Composed-decomposed index transformation mapping
|
||||
///
|
||||
@ -118,44 +91,3 @@ namespace ZRCola {
|
||||
};
|
||||
|
||||
#pragma warning(pop)
|
||||
|
||||
|
||||
const ZRCola::recordid_t stdex::idrec::record<ZRCola::translation_db, ZRCola::recordid_t, ZRCola::recordsize_t, ZRCOLA_RECORD_ALIGN>::id = ZRCOLA_DB_TRANSLATIONS_ID;
|
||||
|
||||
|
||||
///
|
||||
/// Reads translation database from a stream
|
||||
///
|
||||
/// \param[in] stream Input stream
|
||||
/// \param[out] t_db Translation database
|
||||
///
|
||||
/// \returns The stream \p stream
|
||||
///
|
||||
inline std::istream& operator >>(std::istream& stream, ZRCola::translation_db &t_db)
|
||||
{
|
||||
unsigned __int32 count;
|
||||
|
||||
// Read index count.
|
||||
stream.read((char*)&count, sizeof(count));
|
||||
if (!stream.good()) return stream;
|
||||
|
||||
// Read composition index.
|
||||
t_db.comp_index.resize(count);
|
||||
stream.read((char*)t_db.comp_index.data(), sizeof(ZRCola::translation_db::index)*count);
|
||||
if (!stream.good()) return stream;
|
||||
|
||||
// Read decomposition index.
|
||||
t_db.decomp_index.resize(count);
|
||||
stream.read((char*)t_db.decomp_index.data(), sizeof(ZRCola::translation_db::index)*count);
|
||||
if (!stream.good()) return stream;
|
||||
|
||||
// Read data count.
|
||||
stream.read((char*)&count, sizeof(count));
|
||||
if (!stream.good()) return stream;
|
||||
|
||||
// Read data.
|
||||
t_db.data.resize(count);
|
||||
stream.read((char*)t_db.data.data(), sizeof(wchar_t)*count);
|
||||
|
||||
return stream;
|
||||
}
|
||||
|
@ -1,47 +0,0 @@
|
||||
/*
|
||||
Copyright 2015-2016 Amebis
|
||||
|
||||
This file is part of ZRCola.
|
||||
|
||||
ZRCola is free software: you can redistribute it and/or modify it
|
||||
under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
ZRCola is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with ZRCola. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common.h"
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
|
||||
namespace ZRCola {
|
||||
///
|
||||
/// Composition
|
||||
///
|
||||
struct composition {
|
||||
const wchar_t *src; ///< Decomposed string
|
||||
wchar_t dst; ///< Composed character
|
||||
};
|
||||
|
||||
|
||||
///
|
||||
/// Composes string
|
||||
///
|
||||
/// \param[in] input Input string (UTF-16)
|
||||
/// \param[in] inputMax Length of the input string in characters. Can be (size_t)-1 if \p input is zero terminated.
|
||||
/// \param[out] output Output string (UTF-16)
|
||||
/// \param[out] map The vector of source to destination index mappings (optional)
|
||||
///
|
||||
void ZRCOLA_API Compose(_In_z_count_(inputMax) const wchar_t* input, _In_ size_t inputMax, _Out_ std::wstring &output, _Out_opt_ std::vector<mapping>* map = NULL);
|
||||
};
|
@ -1,47 +0,0 @@
|
||||
/*
|
||||
Copyright 2015-2016 Amebis
|
||||
|
||||
This file is part of ZRCola.
|
||||
|
||||
ZRCola is free software: you can redistribute it and/or modify it
|
||||
under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
ZRCola is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with ZRCola. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common.h"
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
|
||||
namespace ZRCola {
|
||||
///
|
||||
/// Decomposition
|
||||
///
|
||||
struct decomposition {
|
||||
wchar_t src; ///< composed character
|
||||
const wchar_t *dst; ///< decomposed string
|
||||
};
|
||||
|
||||
|
||||
///
|
||||
/// Decomposes string
|
||||
///
|
||||
/// \param[in] input Input string (UTF-16)
|
||||
/// \param[in] inputMax Length of the input string in characters. Can be (size_t)-1 if \p input is zero terminated.
|
||||
/// \param[out] output Output string (UTF-16)
|
||||
/// \param[out] map The vector of source to destination index mappings (optional)
|
||||
///
|
||||
void ZRCOLA_API Decompose(_In_z_count_(inputMax) const wchar_t* input, _In_ size_t inputMax, _Out_ std::wstring &output, _Out_opt_ std::vector<mapping>* map = NULL);
|
||||
};
|
136
lib/libZRCola/include/zrcola/translate.h
Normal file
136
lib/libZRCola/include/zrcola/translate.h
Normal file
@ -0,0 +1,136 @@
|
||||
/*
|
||||
Copyright 2015-2016 Amebis
|
||||
|
||||
This file is part of ZRCola.
|
||||
|
||||
ZRCola is free software: you can redistribute it and/or modify it
|
||||
under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
ZRCola is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with ZRCola. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common.h"
|
||||
|
||||
#include <stdex/idrec.h>
|
||||
#include <istream>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
#pragma warning(push)
|
||||
#pragma warning(disable: 4251)
|
||||
|
||||
|
||||
namespace ZRCola {
|
||||
///
|
||||
/// Translation database
|
||||
///
|
||||
class ZRCOLA_API translation_db {
|
||||
public:
|
||||
#pragma pack(push)
|
||||
#pragma pack(4)
|
||||
///
|
||||
/// Translation index
|
||||
///
|
||||
struct index {
|
||||
unsigned __int32 start; ///< Composed character offset
|
||||
unsigned __int32 end; ///< Decomposed string end offset
|
||||
|
||||
///
|
||||
/// Returns translation string start offset
|
||||
///
|
||||
inline unsigned __int32 GetChrStart() const { return start; }
|
||||
|
||||
///
|
||||
/// Returns translation string start offset
|
||||
///
|
||||
inline unsigned __int32 GetStrStart() const { return start + 1; }
|
||||
|
||||
///
|
||||
/// Returns translation string length
|
||||
///
|
||||
inline unsigned __int32 GetStrLength() const { return end - (start + 1); }
|
||||
};
|
||||
#pragma pack(pop)
|
||||
|
||||
std::vector<index> idxComp; ///< Composition index
|
||||
std::vector<index> idxDecomp; ///< Decomposition index
|
||||
std::vector<wchar_t> data; ///< Transformation data
|
||||
|
||||
public:
|
||||
///
|
||||
/// Composes string
|
||||
///
|
||||
/// \param[in] input Input string (UTF-16)
|
||||
/// \param[in] inputMax Length of the input string in characters. Can be (size_t)-1 if \p input is zero terminated.
|
||||
/// \param[out] output Output string (UTF-16)
|
||||
/// \param[out] map The vector of source to destination index mappings (optional)
|
||||
///
|
||||
void Compose(_In_z_count_(inputMax) const wchar_t* input, _In_ size_t inputMax, _Out_ std::wstring &output, _Out_opt_ std::vector<mapping>* map = NULL) const;
|
||||
|
||||
///
|
||||
/// Decomposes string
|
||||
///
|
||||
/// \param[in] input Input string (UTF-16)
|
||||
/// \param[in] inputMax Length of the input string in characters. Can be (size_t)-1 if \p input is zero terminated.
|
||||
/// \param[out] output Output string (UTF-16)
|
||||
/// \param[out] map The vector of source to destination index mappings (optional)
|
||||
///
|
||||
void Decompose(_In_z_count_(inputMax) const wchar_t* input, _In_ size_t inputMax, _Out_ std::wstring &output, _Out_opt_ std::vector<mapping>* map = NULL) const;
|
||||
};
|
||||
|
||||
|
||||
typedef ZRCOLA_API stdex::idrec::record<translation_db, recordid_t, recordsize_t, ZRCOLA_RECORD_ALIGN> translation_rec;
|
||||
};
|
||||
|
||||
|
||||
const ZRCola::recordid_t stdex::idrec::record<ZRCola::translation_db, ZRCola::recordid_t, ZRCola::recordsize_t, ZRCOLA_RECORD_ALIGN>::id = ZRCOLA_DB_TRANSLATIONS_ID;
|
||||
|
||||
|
||||
///
|
||||
/// Reads translation database from a stream
|
||||
///
|
||||
/// \param[in] stream Input stream
|
||||
/// \param[out] t_db Translation database
|
||||
///
|
||||
/// \returns The stream \p stream
|
||||
///
|
||||
inline std::istream& operator >>(_In_ std::istream& stream, _Out_ ZRCola::translation_db &t_db)
|
||||
{
|
||||
unsigned __int32 count;
|
||||
|
||||
// Read index count.
|
||||
stream.read((char*)&count, sizeof(count));
|
||||
if (!stream.good()) return stream;
|
||||
|
||||
// Read composition index.
|
||||
t_db.idxComp.resize(count);
|
||||
stream.read((char*)t_db.idxComp.data(), sizeof(ZRCola::translation_db::index)*count);
|
||||
if (!stream.good()) return stream;
|
||||
|
||||
// Read decomposition index.
|
||||
t_db.idxDecomp.resize(count);
|
||||
stream.read((char*)t_db.idxDecomp.data(), sizeof(ZRCola::translation_db::index)*count);
|
||||
if (!stream.good()) return stream;
|
||||
|
||||
// Read data count.
|
||||
stream.read((char*)&count, sizeof(count));
|
||||
if (!stream.good()) return stream;
|
||||
|
||||
// Read data.
|
||||
t_db.data.resize(count);
|
||||
stream.read((char*)t_db.data.data(), sizeof(wchar_t)*count);
|
||||
|
||||
return stream;
|
||||
}
|
||||
|
||||
#pragma warning(pop)
|
File diff suppressed because it is too large
Load Diff
@ -1,77 +0,0 @@
|
||||
/*
|
||||
Copyright 2015-2016 Amebis
|
||||
|
||||
This file is part of ZRCola.
|
||||
|
||||
ZRCola is free software: you can redistribute it and/or modify it
|
||||
under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
ZRCola is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with ZRCola. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "stdafx.h"
|
||||
|
||||
|
||||
static inline void Decompose(
|
||||
_In_count_(decompositionsCount) const ZRCola::decomposition* decompositions,
|
||||
_In_ const size_t decompositionsCount,
|
||||
_In_z_count_(inputMax) const wchar_t* input,
|
||||
_In_ size_t inputMax,
|
||||
_Out_ std::wstring &output,
|
||||
_Out_opt_ std::vector<ZRCola::mapping>* map)
|
||||
{
|
||||
assert(decompositions || decompositionsCount == 0);
|
||||
assert(input || inputMax == 0);
|
||||
|
||||
// Trim inputMax to actual length.
|
||||
inputMax = inputMax != (size_t)-1 ? wcsnlen(input, inputMax) : wcslen(input);
|
||||
|
||||
// Clear the output string and preallocate at least 2*inputMax chars.
|
||||
// Since decomposition expands the string, let's keep our fingers crossed to avoid reallocation later.
|
||||
output.clear();
|
||||
output.reserve(inputMax * 2);
|
||||
if (map)
|
||||
map->clear();
|
||||
|
||||
for (size_t i = 0; i < inputMax;) {
|
||||
// Find whether the character can be decomposed.
|
||||
wchar_t c = input[i];
|
||||
|
||||
for (size_t l = 0, r = decompositionsCount;; ) {
|
||||
if (l < r) {
|
||||
size_t m = (l + r) / 2;
|
||||
if (c < decompositions[m].src) r = m;
|
||||
else if (decompositions[m].src < c) l = m + 1;
|
||||
else {
|
||||
// Character found.
|
||||
output += decompositions[m].dst;
|
||||
i++;
|
||||
if (map) {
|
||||
// Mapping changed.
|
||||
map->push_back(ZRCola::mapping(i, output.length()));
|
||||
}
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// Character not found.
|
||||
output += c;
|
||||
i++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void ZRCOLA_API ZRCola::Decompose(_In_z_count_(inputMax) const wchar_t* input, _In_ size_t inputMax, _Out_ std::wstring &output, _Out_opt_ std::vector<mapping>* map)
|
||||
{
|
||||
::Decompose(decompositions, decompositionsCount, input, inputMax, output, map);
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -1,20 +0,0 @@
|
||||
/*
|
||||
Copyright 2015-2016 Amebis
|
||||
|
||||
This file is part of ZRCola.
|
||||
|
||||
ZRCola is free software: you can redistribute it and/or modify it
|
||||
under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
ZRCola is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with ZRCola. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "stdafx.h"
|
@ -20,17 +20,7 @@
|
||||
#pragma once
|
||||
|
||||
#include "../../../include/zrcola.h"
|
||||
#include "../include/zrcola/compose.h"
|
||||
#include "../include/zrcola/decompose.h"
|
||||
#include "../include/zrcola/translate.h"
|
||||
#include "../include/zrcola/normalize.h"
|
||||
|
||||
|
||||
namespace ZRCola {
|
||||
extern const composition* compositions;
|
||||
extern const size_t compositionsCount;
|
||||
|
||||
extern const decomposition* decompositions;
|
||||
extern const size_t decompositionsCount;
|
||||
}
|
||||
|
||||
#include <assert.h>
|
||||
|
@ -20,15 +20,8 @@
|
||||
#include "stdafx.h"
|
||||
|
||||
|
||||
static inline void Compose(
|
||||
_In_count_(compositionsCount) const ZRCola::composition* compositions,
|
||||
_In_ const size_t compositionsCount,
|
||||
_In_z_count_(inputMax) const wchar_t* input,
|
||||
_In_ size_t inputMax,
|
||||
_Out_ std::wstring &output,
|
||||
_Out_opt_ std::vector<ZRCola::mapping>* map)
|
||||
void ZRCola::translation_db::Compose(_In_z_count_(inputMax) const wchar_t* input, _In_ size_t inputMax, _Out_ std::wstring &output, _Out_opt_ std::vector<mapping>* map) const
|
||||
{
|
||||
assert(compositions || compositionsCount == 0);
|
||||
assert(input || inputMax == 0);
|
||||
|
||||
// Trim inputMax to actual length.
|
||||
@ -41,6 +34,8 @@ static inline void Compose(
|
||||
if (map)
|
||||
map->clear();
|
||||
|
||||
std::vector<index>::size_type compositionsCount = idxComp.size();
|
||||
|
||||
for (size_t i = 0; i < inputMax;) {
|
||||
// Start with the full search area at i-th character.
|
||||
for (size_t l = 0, r = compositionsCount, ii = i, j = 0;; ii++, j++) {
|
||||
@ -54,7 +49,7 @@ static inline void Compose(
|
||||
// Get the j-th character of the composition.
|
||||
// All compositions that get short on characters are lexically ordered before.
|
||||
// Thus the j-th character is considered 0.
|
||||
wchar_t s = j < wcslen(compositions[m].src) ? compositions[m].src[j] : 0;
|
||||
wchar_t s = j < idxComp[m].GetStrLength() ? data[idxComp[m].GetStrStart() + j] : 0;
|
||||
|
||||
// Do the bisection test.
|
||||
if (c < s) r = m;
|
||||
@ -65,14 +60,14 @@ static inline void Compose(
|
||||
// Narrow the search area on the left to start at the first composition in the run.
|
||||
for (size_t rr = m; l < rr;) {
|
||||
size_t m = (l + rr) / 2;
|
||||
wchar_t s = j < wcslen(compositions[m].src) ? compositions[m].src[j] : 0;
|
||||
wchar_t s = j < idxComp[m].GetStrLength() ? data[idxComp[m].GetStrStart() + j] : 0;
|
||||
if (c <= s) rr = m; else l = m + 1;
|
||||
}
|
||||
|
||||
// Narrow the search area on the right to end at the first composition not in the run.
|
||||
for (size_t ll = m + 1; ll < r;) {
|
||||
size_t m = (ll + r) / 2;
|
||||
wchar_t s = j < wcslen(compositions[m].src) ? compositions[m].src[j] : 0;
|
||||
wchar_t s = j < idxComp[m].GetStrLength() ? data[idxComp[m].GetStrStart() + j] : 0;
|
||||
if (s <= c) ll = m + 1; else r = m;
|
||||
}
|
||||
|
||||
@ -82,9 +77,9 @@ static inline void Compose(
|
||||
|
||||
if (l >= r) {
|
||||
// The search area is empty.
|
||||
if (j && l_prev < compositionsCount && compositions[l_prev].src[j] == 0) {
|
||||
if (j && l_prev < compositionsCount && j == idxComp[l_prev].GetStrLength()) {
|
||||
// The first composition of the previous run was a match.
|
||||
output += compositions[l_prev].dst;
|
||||
output += data[idxComp[l_prev].GetChrStart()];
|
||||
i = ii;
|
||||
if (j > 1 && map) {
|
||||
// Mapping changed.
|
||||
@ -100,9 +95,9 @@ static inline void Compose(
|
||||
} else {
|
||||
// End of input reached.
|
||||
|
||||
if (l < compositionsCount && compositions[l].src[j] == 0) {
|
||||
if (l < compositionsCount && j == idxComp[l].GetStrLength()) {
|
||||
// The first composition of the previous run was a match.
|
||||
output += compositions[l].dst;
|
||||
output += data[idxComp[l].GetChrStart()];
|
||||
i = ii;
|
||||
if (j > 1 && map) {
|
||||
// Mapping changed.
|
||||
@ -120,7 +115,48 @@ static inline void Compose(
|
||||
}
|
||||
|
||||
|
||||
void ZRCOLA_API ZRCola::Compose(_In_z_count_(inputMax) const wchar_t* input, _In_ size_t inputMax, _Out_ std::wstring &output, _Out_opt_ std::vector<mapping>* map)
|
||||
void ZRCOLA_API ZRCola::translation_db::Decompose(_In_z_count_(inputMax) const wchar_t* input, _In_ size_t inputMax, _Out_ std::wstring &output, _Out_opt_ std::vector<mapping>* map) const
|
||||
{
|
||||
::Compose(compositions, compositionsCount, input, inputMax, output, map);
|
||||
assert(input || inputMax == 0);
|
||||
|
||||
// Trim inputMax to actual length.
|
||||
inputMax = inputMax != (size_t)-1 ? wcsnlen(input, inputMax) : wcslen(input);
|
||||
|
||||
// Clear the output string and preallocate at least 2*inputMax chars.
|
||||
// Since decomposition expands the string, let's keep our fingers crossed to avoid reallocation later.
|
||||
output.clear();
|
||||
output.reserve(inputMax * 2);
|
||||
if (map)
|
||||
map->clear();
|
||||
|
||||
std::vector<index>::size_type decompositionsCount = idxDecomp.size();
|
||||
|
||||
for (size_t i = 0; i < inputMax;) {
|
||||
// Find whether the character can be decomposed.
|
||||
wchar_t c = input[i];
|
||||
|
||||
for (size_t l = 0, r = decompositionsCount;; ) {
|
||||
if (l < r) {
|
||||
size_t m = (l + r) / 2;
|
||||
wchar_t decompSrc = data[idxDecomp[m].GetChrStart()];
|
||||
if (c < decompSrc) r = m;
|
||||
else if (decompSrc < c) l = m + 1;
|
||||
else {
|
||||
// Character found.
|
||||
output.append(&data[idxDecomp[m].GetStrStart()], idxDecomp[m].GetStrLength());
|
||||
i++;
|
||||
if (map) {
|
||||
// Mapping changed.
|
||||
map->push_back(ZRCola::mapping(i, output.length()));
|
||||
}
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// Character not found.
|
||||
output += c;
|
||||
i++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -1 +1 @@
|
||||
Subproject commit f5a2b04caa11c846a5414d5c10934723e53aa84b
|
||||
Subproject commit fbb20a204206e644140a653d518c3257bc872c47
|
Loading…
x
Reference in New Issue
Block a user