libZRCola: Make UTF-16 explicit
ZRCola is using UTF-16LE strings internally (thanks to Windows). However, wchar_t and std::wstring are UTF-32 on other platforms. Signed-off-by: Simon Rozman <simon@rozman.si>
This commit is contained in:
@@ -29,11 +29,16 @@ namespace ZRCola {
|
||||
///
|
||||
typedef double charrank_t;
|
||||
|
||||
inline bool ispua(_In_ wchar_t c)
|
||||
inline bool ispua(_In_ char16_t c)
|
||||
{
|
||||
return L'\ue000' <= c && c <= L'\uf8ff';
|
||||
return u'\ue000' <= c && c <= u'\uf8ff';
|
||||
}
|
||||
|
||||
#ifndef _WIN32
|
||||
size_t wcslen(_In_z_ const char16_t *str);
|
||||
size_t wcsnlen(_In_z_count_(count) const char16_t *str, _In_ size_t count);
|
||||
#endif
|
||||
|
||||
#pragma pack(push)
|
||||
#pragma pack(2)
|
||||
///
|
||||
@@ -183,7 +188,7 @@ namespace ZRCola {
|
||||
uint16_t chr_to; ///< Character end in \c data
|
||||
uint16_t desc_to; ///< Character description end in \c data
|
||||
uint16_t rel_to; ///< Related characters end in \c data
|
||||
wchar_t data[]; ///< Character, character description
|
||||
char16_t data[]; ///< Character, character description
|
||||
|
||||
private:
|
||||
inline character(_In_ const character &other);
|
||||
@@ -202,40 +207,40 @@ namespace ZRCola {
|
||||
/// \param[in] rel_len Number of UTF-16 characters in \p rel (including zero delimiters)
|
||||
///
|
||||
inline character(
|
||||
_In_opt_z_count_(chr_len) const wchar_t *chr = NULL,
|
||||
_In_opt_z_count_(chr_len) const char16_t *chr = NULL,
|
||||
_In_opt_ size_t chr_len = 0,
|
||||
_In_opt_ chrcatid_t cat = chrcatid_t::blank,
|
||||
_In_opt_z_count_(desc_len) const wchar_t *desc = NULL,
|
||||
_In_opt_z_count_(desc_len) const char16_t *desc = NULL,
|
||||
_In_opt_ size_t desc_len = 0,
|
||||
_In_opt_z_count_(rel_len) const wchar_t *rel = NULL,
|
||||
_In_opt_z_count_(rel_len) const char16_t *rel = NULL,
|
||||
_In_opt_ size_t rel_len = 0)
|
||||
{
|
||||
this->cat = cat;
|
||||
this->chr_to = static_cast<uint16_t>(chr_len);
|
||||
if (chr && chr_len) memcpy(this->data, chr, sizeof(wchar_t)*chr_len);
|
||||
if (chr && chr_len) memcpy(this->data, chr, sizeof(char16_t)*chr_len);
|
||||
this->desc_to = static_cast<uint16_t>(this->chr_to + desc_len);
|
||||
if (desc && desc_len) memcpy(this->data + this->chr_to, desc, sizeof(wchar_t)*desc_len);
|
||||
if (desc && desc_len) memcpy(this->data + this->chr_to, desc, sizeof(char16_t)*desc_len);
|
||||
this->rel_to = static_cast<uint16_t>(this->desc_to + rel_len);
|
||||
if (rel && rel_len) memcpy(this->data + this->desc_to, rel, sizeof(wchar_t)*rel_len);
|
||||
if (rel && rel_len) memcpy(this->data + this->desc_to, rel, sizeof(char16_t)*rel_len);
|
||||
}
|
||||
|
||||
inline const wchar_t* chr () const { return data; };
|
||||
inline wchar_t* chr () { return data; };
|
||||
inline const wchar_t* chr_end() const { return data + chr_to; };
|
||||
inline wchar_t* chr_end() { return data + chr_to; };
|
||||
inline uint16_t chr_len() const { return chr_to; };
|
||||
inline const char16_t* chr () const { return data; };
|
||||
inline char16_t* chr () { return data; };
|
||||
inline const char16_t* chr_end() const { return data + chr_to; };
|
||||
inline char16_t* chr_end() { return data + chr_to; };
|
||||
inline uint16_t chr_len() const { return chr_to; };
|
||||
|
||||
inline const wchar_t* desc () const { return data + chr_to; };
|
||||
inline wchar_t* desc () { return data + chr_to; };
|
||||
inline const wchar_t* desc_end() const { return data + desc_to; };
|
||||
inline wchar_t* desc_end() { return data + desc_to; };
|
||||
inline uint16_t desc_len() const { return desc_to - chr_to; };
|
||||
inline const char16_t* desc () const { return data + chr_to; };
|
||||
inline char16_t* desc () { return data + chr_to; };
|
||||
inline const char16_t* desc_end() const { return data + desc_to; };
|
||||
inline char16_t* desc_end() { return data + desc_to; };
|
||||
inline uint16_t desc_len() const { return desc_to - chr_to; };
|
||||
|
||||
inline const wchar_t* rel () const { return data + desc_to; };
|
||||
inline wchar_t* rel () { return data + desc_to; };
|
||||
inline const wchar_t* rel_end() const { return data + rel_to; };
|
||||
inline wchar_t* rel_end() { return data + rel_to; };
|
||||
inline uint16_t rel_len() const { return rel_to - desc_to; };
|
||||
inline const char16_t* rel () const { return data + desc_to; };
|
||||
inline char16_t* rel () { return data + desc_to; };
|
||||
inline const char16_t* rel_end() const { return data + rel_to; };
|
||||
inline char16_t* rel_end() { return data + rel_to; };
|
||||
inline uint16_t rel_len() const { return rel_to - desc_to; };
|
||||
};
|
||||
#pragma pack(pop)
|
||||
|
||||
@@ -272,9 +277,9 @@ namespace ZRCola {
|
||||
}
|
||||
} idxChr; ///< Character index
|
||||
|
||||
textindex<wchar_t, wchar_t, uint32_t> idxDsc; ///< Description index
|
||||
textindex<wchar_t, wchar_t, uint32_t> idxDscSub; ///< Description index (sub-terms)
|
||||
std::vector<uint16_t> data; ///< Character data
|
||||
textindex<char16_t, char16_t, uint32_t> idxDsc; ///< Description index
|
||||
textindex<char16_t, char16_t, uint32_t> idxDscSub; ///< Description index (sub-terms)
|
||||
std::vector<uint16_t> data; ///< Character data
|
||||
|
||||
public:
|
||||
///
|
||||
@@ -303,7 +308,7 @@ namespace ZRCola {
|
||||
/// \param[in ] fn_abort Pointer to function to periodically test for search cancellation
|
||||
/// \param[in ] cookie Cookie for \p fn_abort call
|
||||
///
|
||||
bool Search(_In_z_ const wchar_t *str, _In_ const std::set<chrcatid_t> &cats, _Inout_ std::map<std::wstring, charrank_t> &hits, _Inout_ std::map<std::wstring, charrank_t> &hits_sub, _In_opt_ bool (__cdecl *fn_abort)(void *cookie) = NULL, _In_opt_ void *cookie = NULL) const;
|
||||
bool Search(_In_z_ const char16_t *str, _In_ const std::set<chrcatid_t> &cats, _Inout_ std::map<std::u16string, charrank_t> &hits, _Inout_ std::map<std::u16string, charrank_t> &hits_sub, _In_opt_ bool (__cdecl *fn_abort)(void *cookie) = NULL, _In_opt_ void *cookie = NULL) const;
|
||||
|
||||
///
|
||||
/// Get character category
|
||||
@@ -315,10 +320,10 @@ namespace ZRCola {
|
||||
/// - Character category if character found
|
||||
/// - `ZRCola::chrcatid_t::blank` otherwise
|
||||
///
|
||||
inline chrcatid_t GetCharCat(_In_z_count_(len) const wchar_t *chr, _In_ const size_t len) const
|
||||
inline chrcatid_t GetCharCat(_In_z_count_(len) const char16_t *chr, _In_ const size_t len) const
|
||||
{
|
||||
assert(len <= 0xffff);
|
||||
std::unique_ptr<character> c((character*)new char[sizeof(character) + sizeof(wchar_t)*len]);
|
||||
std::unique_ptr<character> c((character*)new char[sizeof(character) + sizeof(char16_t)*len]);
|
||||
new (c.get()) character(chr, len);
|
||||
indexChr::size_type start;
|
||||
return idxChr.find(*c, start) ? idxChr[start].cat : chrcatid_t::blank;
|
||||
@@ -343,7 +348,7 @@ namespace ZRCola {
|
||||
|
||||
protected:
|
||||
uint16_t name_to; ///< Character category name end in \c data
|
||||
wchar_t data[]; ///< Character category name
|
||||
char16_t data[]; ///< Character category name
|
||||
|
||||
private:
|
||||
inline chrcat(_In_ const chrcat &other);
|
||||
@@ -361,20 +366,20 @@ namespace ZRCola {
|
||||
inline chrcat(
|
||||
_In_opt_ chrcatid_t cat = chrcatid_t::blank,
|
||||
_In_opt_ uint16_t rank = 0,
|
||||
_In_opt_z_count_(name_len) const wchar_t *name = NULL,
|
||||
_In_opt_z_count_(name_len) const char16_t *name = NULL,
|
||||
_In_opt_ size_t name_len = 0)
|
||||
{
|
||||
this->cat = cat;
|
||||
this->rank = rank;
|
||||
this->name_to = static_cast<uint16_t>(name_len);
|
||||
if (name && name_len) memcpy(this->data, name, sizeof(wchar_t)*name_len);
|
||||
if (name && name_len) memcpy(this->data, name, sizeof(char16_t)*name_len);
|
||||
}
|
||||
|
||||
inline const wchar_t* name () const { return data; };
|
||||
inline wchar_t* name () { return data; };
|
||||
inline const wchar_t* name_end() const { return data + name_to; };
|
||||
inline wchar_t* name_end() { return data + name_to; };
|
||||
inline uint16_t name_len() const { return name_to; };
|
||||
inline const char16_t* name () const { return data; };
|
||||
inline char16_t* name () { return data; };
|
||||
inline const char16_t* name_end() const { return data + name_to; };
|
||||
inline char16_t* name_end() { return data + name_to; };
|
||||
inline uint16_t name_len() const { return name_to; };
|
||||
};
|
||||
#pragma pack(pop)
|
||||
|
||||
@@ -459,7 +464,7 @@ namespace ZRCola {
|
||||
if (a.rank < b.rank) return -1;
|
||||
else if (a.rank > b.rank) return +1;
|
||||
|
||||
auto &coll = std::use_facet<std::collate<wchar_t>>(std::locale());
|
||||
auto &coll = std::use_facet<std::collate<char16_t>>(std::locale());
|
||||
return coll.compare(a.name(), a.name_end(), b.name(), b.name_end());
|
||||
}
|
||||
} idxRank; ///< Rank index
|
||||
|
@@ -571,7 +571,7 @@ namespace ZRCola {
|
||||
/// The function does not treat \\0 characters as terminators for performance reasons.
|
||||
/// Therefore \p count_a and \p count_b must represent exact string lengths.
|
||||
///
|
||||
int CompareString(_In_ const wchar_t* str_a, _In_ size_t count_a, _In_ const wchar_t* str_b, _In_ size_t count_b);
|
||||
int CompareString(_In_ const char16_t* str_a, _In_ size_t count_a, _In_ const char16_t* str_b, _In_ size_t count_b);
|
||||
|
||||
///
|
||||
/// Generates and returns Unicode representation of the string using hexadecimal codes.
|
||||
@@ -580,7 +580,7 @@ namespace ZRCola {
|
||||
/// \param[in] count Number of characters in string \p str
|
||||
/// \param[in] sep Separator
|
||||
///
|
||||
std::string GetUnicodeDumpA(_In_z_count_(count) const wchar_t* str, _In_ size_t count, _In_z_ const char* sep = "+");
|
||||
std::string GetUnicodeDumpA(_In_z_count_(count) const char16_t* str, _In_ size_t count, _In_z_ const char* sep = "+");
|
||||
|
||||
///
|
||||
/// Generates and returns Unicode representation of the string using hexadecimal codes.
|
||||
@@ -589,7 +589,7 @@ namespace ZRCola {
|
||||
/// \param[in] count Number of characters in string \p str
|
||||
/// \param[in] sep Separator
|
||||
///
|
||||
std::wstring GetUnicodeDumpW(_In_z_count_(count) const wchar_t* str, _In_ size_t count, _In_z_ const wchar_t* sep = L"+");
|
||||
std::wstring GetUnicodeDumpW(_In_z_count_(count) const char16_t* str, _In_ size_t count, _In_z_ const wchar_t* sep = L"+");
|
||||
|
||||
#ifdef _UNICODE
|
||||
#define GetUnicodeDump GetUnicodeDumpW
|
||||
|
@@ -45,7 +45,7 @@ namespace ZRCola {
|
||||
|
||||
protected:
|
||||
uint16_t chr_to; ///< Character end in \c data
|
||||
wchar_t data[]; ///< Character
|
||||
char16_t data[]; ///< Character
|
||||
|
||||
private:
|
||||
inline highlight(_In_ const highlight &other);
|
||||
@@ -61,21 +61,21 @@ namespace ZRCola {
|
||||
///
|
||||
inline highlight(
|
||||
_In_opt_ hlghtsetid_t set = 0,
|
||||
_In_opt_z_count_(chr_len) const wchar_t *chr = NULL,
|
||||
_In_opt_z_count_(chr_len) const char16_t *chr = NULL,
|
||||
_In_opt_ size_t chr_len = 0)
|
||||
{
|
||||
this->set = set;
|
||||
this->chr_to = static_cast<uint16_t>(chr_len);
|
||||
if (chr && chr_len) memcpy(this->data, chr, sizeof(wchar_t)*chr_len);
|
||||
if (chr && chr_len) memcpy(this->data, chr, sizeof(char16_t)*chr_len);
|
||||
}
|
||||
|
||||
inline const wchar_t* chr () const { return data; };
|
||||
inline wchar_t* chr () { return data; };
|
||||
inline const wchar_t* chr_end() const { return data + chr_to; };
|
||||
inline wchar_t* chr_end() { return data + chr_to; };
|
||||
inline uint16_t chr_len() const { return chr_to; };
|
||||
inline const char16_t* chr () const { return data; };
|
||||
inline char16_t* chr () { return data; };
|
||||
inline const char16_t* chr_end() const { return data + chr_to; };
|
||||
inline char16_t* chr_end() { return data + chr_to; };
|
||||
inline uint16_t chr_len() const { return chr_to; };
|
||||
|
||||
inline wchar_t chr_at(_In_ size_t i) const
|
||||
inline char16_t chr_at(_In_ size_t i) const
|
||||
{
|
||||
return i < chr_to ? data[i] : 0;
|
||||
}
|
||||
@@ -160,7 +160,7 @@ namespace ZRCola {
|
||||
/// \param[in] inputMax Length of the input string in characters. Can be (size_t)-1 if \p input is zero terminated.
|
||||
/// \param[in] callback Function to be called on highlight switch
|
||||
///
|
||||
void Highlight(_In_z_count_(inputMax) const wchar_t* input, _In_ size_t inputMax, _In_ std::function<void (hlghtsetid_t set, size_t start, size_t end)> callback) const;
|
||||
void Highlight(_In_z_count_(inputMax) const char16_t* input, _In_ size_t inputMax, _In_ std::function<void (hlghtsetid_t set, size_t start, size_t end)> callback) const;
|
||||
};
|
||||
};
|
||||
|
||||
|
@@ -35,7 +35,7 @@ namespace ZRCola {
|
||||
|
||||
protected:
|
||||
uint16_t chr_to; ///< Character end in \c data
|
||||
wchar_t data[]; ///< Character
|
||||
char16_t data[]; ///< Character
|
||||
|
||||
private:
|
||||
inline langchar(_In_ const langchar &other);
|
||||
@@ -51,19 +51,19 @@ namespace ZRCola {
|
||||
///
|
||||
inline langchar(
|
||||
_In_opt_ langid_t lang = langid_t::blank,
|
||||
_In_opt_z_count_(chr_len) const wchar_t *chr = NULL,
|
||||
_In_opt_z_count_(chr_len) const char16_t *chr = NULL,
|
||||
_In_opt_ size_t chr_len = 0)
|
||||
{
|
||||
this->lang = lang;
|
||||
this->chr_to = static_cast<uint16_t>(chr_len);
|
||||
if (chr && chr_len) memcpy(this->data, chr, sizeof(wchar_t)*chr_len);
|
||||
if (chr && chr_len) memcpy(this->data, chr, sizeof(char16_t)*chr_len);
|
||||
}
|
||||
|
||||
inline const wchar_t* chr () const { return data; };
|
||||
inline wchar_t* chr () { return data; };
|
||||
inline const wchar_t* chr_end() const { return data + chr_to; };
|
||||
inline wchar_t* chr_end() { return data + chr_to; };
|
||||
inline uint16_t chr_len() const { return chr_to; };
|
||||
inline const char16_t* chr () const { return data; };
|
||||
inline char16_t* chr () { return data; };
|
||||
inline const char16_t* chr_end() const { return data + chr_to; };
|
||||
inline char16_t* chr_end() { return data + chr_to; };
|
||||
inline uint16_t chr_len() const { return chr_to; };
|
||||
};
|
||||
#pragma pack(pop)
|
||||
|
||||
@@ -176,7 +176,7 @@ namespace ZRCola {
|
||||
/// \returns
|
||||
/// - \c true when character is used in language
|
||||
/// - \c false otherwise
|
||||
bool IsLocalCharacter(_In_ const wchar_t *chr, _In_ const wchar_t *chr_end, _In_ langid_t lang) const;
|
||||
bool IsLocalCharacter(_In_ const char16_t *chr, _In_ const char16_t *chr_end, _In_ langid_t lang) const;
|
||||
};
|
||||
|
||||
|
||||
@@ -196,7 +196,7 @@ namespace ZRCola {
|
||||
|
||||
protected:
|
||||
uint16_t name_to; ///< Language name end in \c data
|
||||
wchar_t data[]; ///< Language name
|
||||
char16_t data[]; ///< Language name
|
||||
|
||||
private:
|
||||
inline language(_In_ const language &other);
|
||||
@@ -212,19 +212,19 @@ namespace ZRCola {
|
||||
///
|
||||
inline language(
|
||||
_In_opt_ langid_t lang = langid_t::blank,
|
||||
_In_opt_z_count_(name_len) const wchar_t *name = NULL,
|
||||
_In_opt_z_count_(name_len) const char16_t *name = NULL,
|
||||
_In_opt_ size_t name_len = 0)
|
||||
{
|
||||
this->lang = lang;
|
||||
this->name_to = static_cast<uint16_t>(name_len);
|
||||
if (name && name_len) memcpy(this->data, name, sizeof(wchar_t)*name_len);
|
||||
if (name && name_len) memcpy(this->data, name, sizeof(char16_t)*name_len);
|
||||
}
|
||||
|
||||
inline const wchar_t* name () const { return data; };
|
||||
inline wchar_t* name () { return data; };
|
||||
inline const wchar_t* name_end() const { return data + name_to; };
|
||||
inline wchar_t* name_end() { return data + name_to; };
|
||||
inline uint16_t name_len() const { return name_to; };
|
||||
inline const char16_t* name () const { return data; };
|
||||
inline char16_t* name () { return data; };
|
||||
inline const char16_t* name_end() const { return data + name_to; };
|
||||
inline char16_t* name_end() { return data + name_to; };
|
||||
inline uint16_t name_len() const { return name_to; };
|
||||
};
|
||||
#pragma pack(pop)
|
||||
|
||||
|
@@ -38,7 +38,7 @@ namespace ZRCola {
|
||||
|
||||
protected:
|
||||
uint16_t chr_to; ///< Character end in \c data
|
||||
wchar_t data[]; ///< Character
|
||||
char16_t data[]; ///< Character
|
||||
|
||||
private:
|
||||
inline chrtag(_In_ const chrtag &other);
|
||||
@@ -53,20 +53,20 @@ namespace ZRCola {
|
||||
/// \param[in] tag Tag
|
||||
///
|
||||
inline chrtag(
|
||||
_In_opt_z_count_(chr_len) const wchar_t *chr = NULL,
|
||||
_In_opt_ size_t chr_len = 0,
|
||||
_In_opt_ tagid_t tag = 0)
|
||||
_In_opt_z_count_(chr_len) const char16_t *chr = NULL,
|
||||
_In_opt_ size_t chr_len = 0,
|
||||
_In_opt_ tagid_t tag = 0)
|
||||
{
|
||||
this->tag = tag;
|
||||
this->chr_to = static_cast<uint16_t>(chr_len);
|
||||
if (chr && chr_len) memcpy(this->data, chr, sizeof(wchar_t)*chr_len);
|
||||
if (chr && chr_len) memcpy(this->data, chr, sizeof(char16_t)*chr_len);
|
||||
}
|
||||
|
||||
inline const wchar_t* chr () const { return data; };
|
||||
inline wchar_t* chr () { return data; };
|
||||
inline const wchar_t* chr_end() const { return data + chr_to; };
|
||||
inline wchar_t* chr_end() { return data + chr_to; };
|
||||
inline uint16_t chr_len() const { return chr_to; };
|
||||
inline const char16_t* chr () const { return data; };
|
||||
inline char16_t* chr () { return data; };
|
||||
inline const char16_t* chr_end() const { return data + chr_to; };
|
||||
inline char16_t* chr_end() { return data + chr_to; };
|
||||
inline uint16_t chr_len() const { return chr_to; };
|
||||
};
|
||||
#pragma pack(pop)
|
||||
|
||||
@@ -209,7 +209,7 @@ namespace ZRCola {
|
||||
/// \param[in ] fn_abort Pointer to function to periodically test for search cancellation
|
||||
/// \param[in ] cookie Cookie for \p fn_abort call
|
||||
///
|
||||
bool Search(_In_ const std::map<tagid_t, uint16_t> &tags, _In_ const character_db &ch_db, _In_ const std::set<chrcatid_t> &cats, _Inout_ std::map<std::wstring, charrank_t> &hits, _In_opt_ bool (__cdecl *fn_abort)(void *cookie) = NULL, _In_opt_ void *cookie = NULL) const;
|
||||
bool Search(_In_ const std::map<tagid_t, uint16_t> &tags, _In_ const character_db &ch_db, _In_ const std::set<chrcatid_t> &cats, _Inout_ std::map<std::u16string, charrank_t> &hits, _In_opt_ bool (__cdecl *fn_abort)(void *cookie) = NULL, _In_opt_ void *cookie = NULL) const;
|
||||
};
|
||||
|
||||
|
||||
@@ -230,7 +230,7 @@ namespace ZRCola {
|
||||
|
||||
protected:
|
||||
uint16_t name_to; ///< Tag name end in \c data
|
||||
wchar_t data[]; ///< Tag name
|
||||
char16_t data[]; ///< Tag name
|
||||
|
||||
private:
|
||||
inline tagname(_In_ const tagname &other);
|
||||
@@ -246,22 +246,22 @@ namespace ZRCola {
|
||||
/// \param[in] name_len Number of UTF-16 characters in \p name
|
||||
///
|
||||
inline tagname(
|
||||
_In_opt_ tagid_t tag = 0,
|
||||
_In_opt_ uint32_t locale = 0,
|
||||
_In_opt_z_count_(name_len) const wchar_t *name = NULL,
|
||||
_In_opt_ size_t name_len = 0)
|
||||
_In_opt_ tagid_t tag = 0,
|
||||
_In_opt_ uint32_t locale = 0,
|
||||
_In_opt_z_count_(name_len) const char16_t *name = NULL,
|
||||
_In_opt_ size_t name_len = 0)
|
||||
{
|
||||
this->tag = tag;
|
||||
this->locale = locale;
|
||||
this->name_to = static_cast<uint16_t>(name_len);
|
||||
if (name && name_len) memcpy(this->data, name, sizeof(wchar_t)*name_len);
|
||||
if (name && name_len) memcpy(this->data, name, sizeof(char16_t)*name_len);
|
||||
}
|
||||
|
||||
inline const wchar_t* name () const { return data; };
|
||||
inline wchar_t* name () { return data; };
|
||||
inline const wchar_t* name_end() const { return data + name_to; };
|
||||
inline wchar_t* name_end() { return data + name_to; };
|
||||
inline uint16_t name_len() const { return name_to; };
|
||||
inline const char16_t* name () const { return data; };
|
||||
inline char16_t* name () { return data; };
|
||||
inline const char16_t* name_end() const { return data + name_to; };
|
||||
inline char16_t* name_end() { return data + name_to; };
|
||||
inline uint16_t name_len() const { return name_to; };
|
||||
|
||||
///
|
||||
/// Compares two names
|
||||
@@ -281,7 +281,7 @@ namespace ZRCola {
|
||||
/// The function does not treat \\0 characters as terminators for performance reasons.
|
||||
/// Therefore \p count_a and \p count_b must represent exact string lengths.
|
||||
///
|
||||
static inline int CompareName(_In_ uint32_t locale, _In_z_count_(count_a) const wchar_t *str_a, _In_ uint16_t count_a, _In_z_count_(count_b) const wchar_t *str_b, _In_ uint16_t count_b)
|
||||
static inline int CompareName(_In_ uint32_t locale, _In_z_count_(count_a) const char16_t *str_a, _In_ uint16_t count_a, _In_z_count_(count_b) const char16_t *str_b, _In_ uint16_t count_b)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
switch (::CompareString(locale, SORT_STRINGSORT | NORM_IGNORECASE, str_a, count_a, str_b, count_b)) {
|
||||
@@ -291,14 +291,14 @@ namespace ZRCola {
|
||||
default : assert(0); return -1;
|
||||
}
|
||||
#else
|
||||
assert(0); // TODO: 1. Should honour locale. 2. Should use ICU for lowercase conversion.
|
||||
std::wstring
|
||||
assert(0); // TODO: 1. Should honour locale. 2. Should use ICU for lowercase conversion. 3. Should be UTF-16-aware.
|
||||
std::u16string
|
||||
a(str_a, count_a),
|
||||
b(str_b, count_b);
|
||||
auto tolower = [](wchar_t c){ return std::towlower(c); };
|
||||
auto tolower = [](char16_t c){ return std::towlower(c); };
|
||||
std::transform(a.begin(), a.end(), a.begin(), tolower);
|
||||
std::transform(b.begin(), b.end(), b.begin(), tolower);
|
||||
auto &coll = std::use_facet<std::collate<wchar_t>>(std::locale());
|
||||
auto &coll = std::use_facet<std::collate<char16_t>>(std::locale());
|
||||
return coll.compare(&*a.cbegin(), &*a.cend(), &*b.cbegin(), &*b.cend());
|
||||
#endif
|
||||
}
|
||||
@@ -431,7 +431,7 @@ namespace ZRCola {
|
||||
/// \param[in ] fn_abort Pointer to function to periodically test for search cancellation
|
||||
/// \param[in ] cookie Cookie for \p fn_abort call
|
||||
///
|
||||
bool Search(_In_z_ const wchar_t *str, _In_ uint32_t locale, _Inout_ std::map<tagid_t, uint16_t> &hits, _In_opt_ bool (__cdecl *fn_abort)(void *cookie) = NULL, _In_opt_ void *cookie = NULL) const;
|
||||
bool Search(_In_z_ const char16_t *str, _In_ uint32_t locale, _Inout_ std::map<tagid_t, uint16_t> &hits, _In_opt_ bool (__cdecl *fn_abort)(void *cookie) = NULL, _In_opt_ void *cookie = NULL) const;
|
||||
};
|
||||
};
|
||||
|
||||
|
@@ -84,7 +84,7 @@ namespace ZRCola {
|
||||
protected:
|
||||
uint16_t dst_to; ///< Destination character end in \c data
|
||||
uint16_t src_to; ///< Source string end in \c data
|
||||
wchar_t data[]; ///< Destination string and source character
|
||||
char16_t data[]; ///< Destination string and source character
|
||||
|
||||
private:
|
||||
inline translation(_In_ const translation &other);
|
||||
@@ -105,39 +105,39 @@ namespace ZRCola {
|
||||
inline translation(
|
||||
_In_opt_ transetid_t set = 0,
|
||||
_In_opt_ uint16_t dst_rank = 0,
|
||||
_In_opt_z_count_(dst_len) const wchar_t *dst = NULL,
|
||||
_In_opt_z_count_(dst_len) const char16_t *dst = NULL,
|
||||
_In_opt_ size_t dst_len = 0,
|
||||
_In_opt_ uint16_t src_rank = 0,
|
||||
_In_opt_z_count_(src_len) const wchar_t *src = NULL,
|
||||
_In_opt_z_count_(src_len) const char16_t *src = NULL,
|
||||
_In_opt_ size_t src_len = 0)
|
||||
{
|
||||
this->set = set;
|
||||
this->dst_rank = dst_rank;
|
||||
this->src_rank = src_rank;
|
||||
this->dst_to = static_cast<uint16_t>(dst_len);
|
||||
if (dst && dst_len) memcpy(this->data, dst, sizeof(wchar_t)*dst_len);
|
||||
if (dst && dst_len) memcpy(this->data, dst, sizeof(char16_t)*dst_len);
|
||||
this->src_to = static_cast<uint16_t>(this->dst_to + src_len);
|
||||
if (src && src_len) memcpy(this->data + this->dst_to, src, sizeof(wchar_t)*src_len);
|
||||
if (src && src_len) memcpy(this->data + this->dst_to, src, sizeof(char16_t)*src_len);
|
||||
}
|
||||
|
||||
inline const wchar_t* dst () const { return data; };
|
||||
inline wchar_t* dst () { return data; };
|
||||
inline const wchar_t* dst_end() const { return data + dst_to; };
|
||||
inline wchar_t* dst_end() { return data + dst_to; };
|
||||
inline uint16_t dst_len() const { return dst_to; };
|
||||
inline const char16_t* dst () const { return data; };
|
||||
inline char16_t* dst () { return data; };
|
||||
inline const char16_t* dst_end() const { return data + dst_to; };
|
||||
inline char16_t* dst_end() { return data + dst_to; };
|
||||
inline uint16_t dst_len() const { return dst_to; };
|
||||
|
||||
inline wchar_t dst_at(_In_ size_t i) const
|
||||
inline char16_t dst_at(_In_ size_t i) const
|
||||
{
|
||||
return i < dst_to ? data[i] : 0;
|
||||
}
|
||||
|
||||
inline const wchar_t* src () const { return data + dst_to; };
|
||||
inline wchar_t* src () { return data + dst_to; };
|
||||
inline const wchar_t* src_end() const { return data + src_to; };
|
||||
inline wchar_t* src_end() { return data + src_to; };
|
||||
inline uint16_t src_len() const { return src_to - dst_to; };
|
||||
inline const char16_t* src () const { return data + dst_to; };
|
||||
inline char16_t* src () { return data + dst_to; };
|
||||
inline const char16_t* src_end() const { return data + src_to; };
|
||||
inline char16_t* src_end() { return data + src_to; };
|
||||
inline uint16_t src_len() const { return src_to - dst_to; };
|
||||
|
||||
inline wchar_t src_at(_In_ size_t i) const
|
||||
inline char16_t src_at(_In_ size_t i) const
|
||||
{
|
||||
size_t ii = i + dst_to; // absolute index
|
||||
return ii < src_to ? data[ii] : 0;
|
||||
@@ -302,7 +302,7 @@ namespace ZRCola {
|
||||
/// \param[out] output Output string (UTF-16)
|
||||
/// \param[out] map The vector of source to destination index mappings (optional)
|
||||
///
|
||||
void Translate(_In_ transetid_t set, _In_z_count_(inputMax) const wchar_t* input, _In_ size_t inputMax, _Out_ std::wstring &output, _Out_opt_ std::vector<mapping>* map = NULL) const;
|
||||
void Translate(_In_ transetid_t set, _In_z_count_(inputMax) const char16_t* input, _In_ size_t inputMax, _Out_ std::u16string &output, _Out_opt_ std::vector<mapping>* map = NULL) const;
|
||||
|
||||
///
|
||||
/// Inverse translates string
|
||||
@@ -313,7 +313,7 @@ namespace ZRCola {
|
||||
/// \param[out] output Output string (UTF-16)
|
||||
/// \param[out] map The vector of source to destination index mappings (optional)
|
||||
///
|
||||
inline void TranslateInv(_In_ transetid_t set, _In_z_count_(inputMax) const wchar_t* input, _In_ size_t inputMax, _Out_ std::wstring &output, _Out_opt_ std::vector<mapping>* map = NULL) const
|
||||
inline void TranslateInv(_In_ transetid_t set, _In_z_count_(inputMax) const char16_t* input, _In_ size_t inputMax, _Out_ std::u16string &output, _Out_opt_ std::vector<mapping>* map = NULL) const
|
||||
{
|
||||
TranslateInv(set, input, inputMax, NULL, langid_t::blank, output, map);
|
||||
}
|
||||
@@ -329,7 +329,7 @@ namespace ZRCola {
|
||||
/// \param[out] output Output string (UTF-16)
|
||||
/// \param[out] map The vector of source to destination index mappings (optional)
|
||||
///
|
||||
void TranslateInv(_In_ transetid_t set, _In_z_count_(inputMax) const wchar_t* input, _In_ size_t inputMax, _In_opt_ const langchar_db *lc_db, _In_opt_ langid_t lang, _Out_ std::wstring &output, _Out_opt_ std::vector<mapping>* map = NULL) const;
|
||||
void TranslateInv(_In_ transetid_t set, _In_z_count_(inputMax) const char16_t* input, _In_ size_t inputMax, _In_opt_ const langchar_db *lc_db, _In_opt_ langid_t lang, _Out_ std::u16string &output, _Out_opt_ std::vector<mapping>* map = NULL) const;
|
||||
};
|
||||
|
||||
|
||||
@@ -350,7 +350,7 @@ namespace ZRCola {
|
||||
protected:
|
||||
uint16_t src_to; ///< Source name end in \c data
|
||||
uint16_t dst_to; ///< Sestination name end in \c data
|
||||
wchar_t data[]; ///< Source and destination names
|
||||
char16_t data[]; ///< Source and destination names
|
||||
|
||||
private:
|
||||
inline transet(_In_ const transet &other);
|
||||
@@ -368,29 +368,29 @@ namespace ZRCola {
|
||||
///
|
||||
inline transet(
|
||||
_In_opt_ transetid_t set = 0,
|
||||
_In_opt_z_count_(src_len) const wchar_t *src = NULL,
|
||||
_In_opt_z_count_(src_len) const char16_t *src = NULL,
|
||||
_In_opt_ size_t src_len = 0,
|
||||
_In_opt_z_count_(dst_len) const wchar_t *dst = NULL,
|
||||
_In_opt_z_count_(dst_len) const char16_t *dst = NULL,
|
||||
_In_opt_ size_t dst_len = 0)
|
||||
{
|
||||
this->set = set;
|
||||
this->src_to = static_cast<uint16_t>(src_len);
|
||||
if (src && src_len) memcpy(this->data, src, sizeof(wchar_t)*src_len);
|
||||
if (src && src_len) memcpy(this->data, src, sizeof(char16_t)*src_len);
|
||||
this->dst_to = static_cast<uint16_t>(this->src_to + dst_len);
|
||||
if (dst && dst_len) memcpy(this->data + this->src_to, dst, sizeof(wchar_t)*dst_len);
|
||||
if (dst && dst_len) memcpy(this->data + this->src_to, dst, sizeof(char16_t)*dst_len);
|
||||
}
|
||||
|
||||
inline const wchar_t* src () const { return data; };
|
||||
inline wchar_t* src () { return data; };
|
||||
inline const wchar_t* src_end() const { return data + src_to; };
|
||||
inline wchar_t* src_end() { return data + src_to; };
|
||||
inline uint16_t src_len() const { return src_to; };
|
||||
inline const char16_t* src () const { return data; };
|
||||
inline char16_t* src () { return data; };
|
||||
inline const char16_t* src_end() const { return data + src_to; };
|
||||
inline char16_t* src_end() { return data + src_to; };
|
||||
inline uint16_t src_len() const { return src_to; };
|
||||
|
||||
inline const wchar_t* dst () const { return data + src_to; };
|
||||
inline wchar_t* dst () { return data + src_to; };
|
||||
inline const wchar_t* dst_end() const { return data + dst_to; };
|
||||
inline wchar_t* dst_end() { return data + dst_to; };
|
||||
inline uint16_t dst_len() const { return dst_to - src_to; };
|
||||
inline const char16_t* dst () const { return data + src_to; };
|
||||
inline char16_t* dst () { return data + src_to; };
|
||||
inline const char16_t* dst_end() const { return data + dst_to; };
|
||||
inline char16_t* dst_end() { return data + dst_to; };
|
||||
inline uint16_t dst_len() const { return dst_to - src_to; };
|
||||
};
|
||||
#pragma pack(pop)
|
||||
|
||||
@@ -464,7 +464,7 @@ namespace ZRCola {
|
||||
protected:
|
||||
uint16_t name_to; ///< Translation sequence name end in \c data
|
||||
uint16_t sets_to; ///< Translation sequence sets end in \c data
|
||||
wchar_t data[]; ///< Translation sequence name and sets
|
||||
char16_t data[]; ///< Translation sequence name and sets
|
||||
|
||||
private:
|
||||
inline transeq(_In_ const transeq &other);
|
||||
@@ -484,7 +484,7 @@ namespace ZRCola {
|
||||
inline transeq(
|
||||
_In_opt_ transeqid_t seq = 0,
|
||||
_In_opt_ uint16_t rank = 0,
|
||||
_In_opt_z_count_(name_len) const wchar_t *name = NULL,
|
||||
_In_opt_z_count_(name_len) const char16_t *name = NULL,
|
||||
_In_opt_ size_t name_len = 0,
|
||||
_In_opt_count_ (sets_len) const transetid_t *sets = NULL,
|
||||
_In_opt_ size_t sets_len = 0)
|
||||
@@ -492,16 +492,16 @@ namespace ZRCola {
|
||||
this->seq = seq;
|
||||
this->rank = rank;
|
||||
this->name_to = static_cast<uint16_t>(name_len);
|
||||
if (name && name_len) memcpy(this->data, name, sizeof(wchar_t)*name_len);
|
||||
if (name && name_len) memcpy(this->data, name, sizeof(char16_t)*name_len);
|
||||
this->sets_to = static_cast<uint16_t>(this->name_to + sets_len);
|
||||
if (sets && sets_len) memcpy(this->data + this->name_to, sets, sizeof(transetid_t)*sets_len);
|
||||
}
|
||||
|
||||
inline const wchar_t* name () const { return data; };
|
||||
inline wchar_t* name () { return data; };
|
||||
inline const wchar_t* name_end() const { return data + name_to; };
|
||||
inline wchar_t* name_end() { return data + name_to; };
|
||||
inline uint16_t name_len() const { return name_to; };
|
||||
inline const char16_t* name () const { return data; };
|
||||
inline char16_t* name () { return data; };
|
||||
inline const char16_t* name_end() const { return data + name_to; };
|
||||
inline char16_t* name_end() { return data + name_to; };
|
||||
inline uint16_t name_len() const { return name_to; };
|
||||
|
||||
inline const transetid_t* sets () const { return reinterpret_cast<const transetid_t*>(data + name_to); };
|
||||
inline transetid_t* sets () { return reinterpret_cast< transetid_t*>(data + name_to); };
|
||||
@@ -592,7 +592,7 @@ namespace ZRCola {
|
||||
if (a.rank < b.rank) return -1;
|
||||
else if (a.rank > b.rank) return +1;
|
||||
|
||||
auto &coll = std::use_facet<std::collate<wchar_t>>(std::locale());
|
||||
auto &coll = std::use_facet<std::collate<char16_t>>(std::locale());
|
||||
return coll.compare(a.name(), a.name_end(), b.name(), b.name_end());
|
||||
}
|
||||
} idxRank; ///< Rank index
|
||||
|
Reference in New Issue
Block a user