libZRCola: Make UTF-16 explicit

ZRCola is using UTF-16LE strings internally (thanks to Windows).
However, wchar_t and std::wstring are UTF-32 on other platforms.

Signed-off-by: Simon Rozman <simon@rozman.si>
This commit is contained in:
Simon Rozman 2022-09-15 13:02:33 +02:00
parent f35e49dc8b
commit 9709cc2845
12 changed files with 204 additions and 177 deletions

View File

@ -29,11 +29,16 @@ namespace ZRCola {
/// ///
typedef double charrank_t; typedef double charrank_t;
inline bool ispua(_In_ wchar_t c) inline bool ispua(_In_ char16_t c)
{ {
return L'\ue000' <= c && c <= L'\uf8ff'; return u'\ue000' <= c && c <= u'\uf8ff';
} }
#ifndef _WIN32
size_t wcslen(_In_z_ const char16_t *str);
size_t wcsnlen(_In_z_count_(count) const char16_t *str, _In_ size_t count);
#endif
#pragma pack(push) #pragma pack(push)
#pragma pack(2) #pragma pack(2)
/// ///
@ -183,7 +188,7 @@ namespace ZRCola {
uint16_t chr_to; ///< Character end in \c data uint16_t chr_to; ///< Character end in \c data
uint16_t desc_to; ///< Character description end in \c data uint16_t desc_to; ///< Character description end in \c data
uint16_t rel_to; ///< Related characters end in \c data uint16_t rel_to; ///< Related characters end in \c data
wchar_t data[]; ///< Character, character description char16_t data[]; ///< Character, character description
private: private:
inline character(_In_ const character &other); inline character(_In_ const character &other);
@ -202,40 +207,40 @@ namespace ZRCola {
/// \param[in] rel_len Number of UTF-16 characters in \p rel (including zero delimiters) /// \param[in] rel_len Number of UTF-16 characters in \p rel (including zero delimiters)
/// ///
inline character( inline character(
_In_opt_z_count_(chr_len) const wchar_t *chr = NULL, _In_opt_z_count_(chr_len) const char16_t *chr = NULL,
_In_opt_ size_t chr_len = 0, _In_opt_ size_t chr_len = 0,
_In_opt_ chrcatid_t cat = chrcatid_t::blank, _In_opt_ chrcatid_t cat = chrcatid_t::blank,
_In_opt_z_count_(desc_len) const wchar_t *desc = NULL, _In_opt_z_count_(desc_len) const char16_t *desc = NULL,
_In_opt_ size_t desc_len = 0, _In_opt_ size_t desc_len = 0,
_In_opt_z_count_(rel_len) const wchar_t *rel = NULL, _In_opt_z_count_(rel_len) const char16_t *rel = NULL,
_In_opt_ size_t rel_len = 0) _In_opt_ size_t rel_len = 0)
{ {
this->cat = cat; this->cat = cat;
this->chr_to = static_cast<uint16_t>(chr_len); this->chr_to = static_cast<uint16_t>(chr_len);
if (chr && chr_len) memcpy(this->data, chr, sizeof(wchar_t)*chr_len); if (chr && chr_len) memcpy(this->data, chr, sizeof(char16_t)*chr_len);
this->desc_to = static_cast<uint16_t>(this->chr_to + desc_len); this->desc_to = static_cast<uint16_t>(this->chr_to + desc_len);
if (desc && desc_len) memcpy(this->data + this->chr_to, desc, sizeof(wchar_t)*desc_len); if (desc && desc_len) memcpy(this->data + this->chr_to, desc, sizeof(char16_t)*desc_len);
this->rel_to = static_cast<uint16_t>(this->desc_to + rel_len); this->rel_to = static_cast<uint16_t>(this->desc_to + rel_len);
if (rel && rel_len) memcpy(this->data + this->desc_to, rel, sizeof(wchar_t)*rel_len); if (rel && rel_len) memcpy(this->data + this->desc_to, rel, sizeof(char16_t)*rel_len);
} }
inline const wchar_t* chr () const { return data; }; inline const char16_t* chr () const { return data; };
inline wchar_t* chr () { return data; }; inline char16_t* chr () { return data; };
inline const wchar_t* chr_end() const { return data + chr_to; }; inline const char16_t* chr_end() const { return data + chr_to; };
inline wchar_t* chr_end() { return data + chr_to; }; inline char16_t* chr_end() { return data + chr_to; };
inline uint16_t chr_len() const { return chr_to; }; inline uint16_t chr_len() const { return chr_to; };
inline const wchar_t* desc () const { return data + chr_to; }; inline const char16_t* desc () const { return data + chr_to; };
inline wchar_t* desc () { return data + chr_to; }; inline char16_t* desc () { return data + chr_to; };
inline const wchar_t* desc_end() const { return data + desc_to; }; inline const char16_t* desc_end() const { return data + desc_to; };
inline wchar_t* desc_end() { return data + desc_to; }; inline char16_t* desc_end() { return data + desc_to; };
inline uint16_t desc_len() const { return desc_to - chr_to; }; inline uint16_t desc_len() const { return desc_to - chr_to; };
inline const wchar_t* rel () const { return data + desc_to; }; inline const char16_t* rel () const { return data + desc_to; };
inline wchar_t* rel () { return data + desc_to; }; inline char16_t* rel () { return data + desc_to; };
inline const wchar_t* rel_end() const { return data + rel_to; }; inline const char16_t* rel_end() const { return data + rel_to; };
inline wchar_t* rel_end() { return data + rel_to; }; inline char16_t* rel_end() { return data + rel_to; };
inline uint16_t rel_len() const { return rel_to - desc_to; }; inline uint16_t rel_len() const { return rel_to - desc_to; };
}; };
#pragma pack(pop) #pragma pack(pop)
@ -272,9 +277,9 @@ namespace ZRCola {
} }
} idxChr; ///< Character index } idxChr; ///< Character index
textindex<wchar_t, wchar_t, uint32_t> idxDsc; ///< Description index textindex<char16_t, char16_t, uint32_t> idxDsc; ///< Description index
textindex<wchar_t, wchar_t, uint32_t> idxDscSub; ///< Description index (sub-terms) textindex<char16_t, char16_t, uint32_t> idxDscSub; ///< Description index (sub-terms)
std::vector<uint16_t> data; ///< Character data std::vector<uint16_t> data; ///< Character data
public: public:
/// ///
@ -303,7 +308,7 @@ namespace ZRCola {
/// \param[in ] fn_abort Pointer to function to periodically test for search cancellation /// \param[in ] fn_abort Pointer to function to periodically test for search cancellation
/// \param[in ] cookie Cookie for \p fn_abort call /// \param[in ] cookie Cookie for \p fn_abort call
/// ///
bool Search(_In_z_ const wchar_t *str, _In_ const std::set<chrcatid_t> &cats, _Inout_ std::map<std::wstring, charrank_t> &hits, _Inout_ std::map<std::wstring, charrank_t> &hits_sub, _In_opt_ bool (__cdecl *fn_abort)(void *cookie) = NULL, _In_opt_ void *cookie = NULL) const; bool Search(_In_z_ const char16_t *str, _In_ const std::set<chrcatid_t> &cats, _Inout_ std::map<std::u16string, charrank_t> &hits, _Inout_ std::map<std::u16string, charrank_t> &hits_sub, _In_opt_ bool (__cdecl *fn_abort)(void *cookie) = NULL, _In_opt_ void *cookie = NULL) const;
/// ///
/// Get character category /// Get character category
@ -315,10 +320,10 @@ namespace ZRCola {
/// - Character category if character found /// - Character category if character found
/// - `ZRCola::chrcatid_t::blank` otherwise /// - `ZRCola::chrcatid_t::blank` otherwise
/// ///
inline chrcatid_t GetCharCat(_In_z_count_(len) const wchar_t *chr, _In_ const size_t len) const inline chrcatid_t GetCharCat(_In_z_count_(len) const char16_t *chr, _In_ const size_t len) const
{ {
assert(len <= 0xffff); assert(len <= 0xffff);
std::unique_ptr<character> c((character*)new char[sizeof(character) + sizeof(wchar_t)*len]); std::unique_ptr<character> c((character*)new char[sizeof(character) + sizeof(char16_t)*len]);
new (c.get()) character(chr, len); new (c.get()) character(chr, len);
indexChr::size_type start; indexChr::size_type start;
return idxChr.find(*c, start) ? idxChr[start].cat : chrcatid_t::blank; return idxChr.find(*c, start) ? idxChr[start].cat : chrcatid_t::blank;
@ -343,7 +348,7 @@ namespace ZRCola {
protected: protected:
uint16_t name_to; ///< Character category name end in \c data uint16_t name_to; ///< Character category name end in \c data
wchar_t data[]; ///< Character category name char16_t data[]; ///< Character category name
private: private:
inline chrcat(_In_ const chrcat &other); inline chrcat(_In_ const chrcat &other);
@ -361,20 +366,20 @@ namespace ZRCola {
inline chrcat( inline chrcat(
_In_opt_ chrcatid_t cat = chrcatid_t::blank, _In_opt_ chrcatid_t cat = chrcatid_t::blank,
_In_opt_ uint16_t rank = 0, _In_opt_ uint16_t rank = 0,
_In_opt_z_count_(name_len) const wchar_t *name = NULL, _In_opt_z_count_(name_len) const char16_t *name = NULL,
_In_opt_ size_t name_len = 0) _In_opt_ size_t name_len = 0)
{ {
this->cat = cat; this->cat = cat;
this->rank = rank; this->rank = rank;
this->name_to = static_cast<uint16_t>(name_len); this->name_to = static_cast<uint16_t>(name_len);
if (name && name_len) memcpy(this->data, name, sizeof(wchar_t)*name_len); if (name && name_len) memcpy(this->data, name, sizeof(char16_t)*name_len);
} }
inline const wchar_t* name () const { return data; }; inline const char16_t* name () const { return data; };
inline wchar_t* name () { return data; }; inline char16_t* name () { return data; };
inline const wchar_t* name_end() const { return data + name_to; }; inline const char16_t* name_end() const { return data + name_to; };
inline wchar_t* name_end() { return data + name_to; }; inline char16_t* name_end() { return data + name_to; };
inline uint16_t name_len() const { return name_to; }; inline uint16_t name_len() const { return name_to; };
}; };
#pragma pack(pop) #pragma pack(pop)
@ -459,7 +464,7 @@ namespace ZRCola {
if (a.rank < b.rank) return -1; if (a.rank < b.rank) return -1;
else if (a.rank > b.rank) return +1; else if (a.rank > b.rank) return +1;
auto &coll = std::use_facet<std::collate<wchar_t>>(std::locale()); auto &coll = std::use_facet<std::collate<char16_t>>(std::locale());
return coll.compare(a.name(), a.name_end(), b.name(), b.name_end()); return coll.compare(a.name(), a.name_end(), b.name(), b.name_end());
} }
} idxRank; ///< Rank index } idxRank; ///< Rank index

View File

@ -571,7 +571,7 @@ namespace ZRCola {
/// The function does not treat \\0 characters as terminators for performance reasons. /// The function does not treat \\0 characters as terminators for performance reasons.
/// Therefore \p count_a and \p count_b must represent exact string lengths. /// Therefore \p count_a and \p count_b must represent exact string lengths.
/// ///
int CompareString(_In_ const wchar_t* str_a, _In_ size_t count_a, _In_ const wchar_t* str_b, _In_ size_t count_b); int CompareString(_In_ const char16_t* str_a, _In_ size_t count_a, _In_ const char16_t* str_b, _In_ size_t count_b);
/// ///
/// Generates and returns Unicode representation of the string using hexadecimal codes. /// Generates and returns Unicode representation of the string using hexadecimal codes.
@ -580,7 +580,7 @@ namespace ZRCola {
/// \param[in] count Number of characters in string \p str /// \param[in] count Number of characters in string \p str
/// \param[in] sep Separator /// \param[in] sep Separator
/// ///
std::string GetUnicodeDumpA(_In_z_count_(count) const wchar_t* str, _In_ size_t count, _In_z_ const char* sep = "+"); std::string GetUnicodeDumpA(_In_z_count_(count) const char16_t* str, _In_ size_t count, _In_z_ const char* sep = "+");
/// ///
/// Generates and returns Unicode representation of the string using hexadecimal codes. /// Generates and returns Unicode representation of the string using hexadecimal codes.
@ -589,7 +589,7 @@ namespace ZRCola {
/// \param[in] count Number of characters in string \p str /// \param[in] count Number of characters in string \p str
/// \param[in] sep Separator /// \param[in] sep Separator
/// ///
std::wstring GetUnicodeDumpW(_In_z_count_(count) const wchar_t* str, _In_ size_t count, _In_z_ const wchar_t* sep = L"+"); std::wstring GetUnicodeDumpW(_In_z_count_(count) const char16_t* str, _In_ size_t count, _In_z_ const wchar_t* sep = L"+");
#ifdef _UNICODE #ifdef _UNICODE
#define GetUnicodeDump GetUnicodeDumpW #define GetUnicodeDump GetUnicodeDumpW

View File

@ -45,7 +45,7 @@ namespace ZRCola {
protected: protected:
uint16_t chr_to; ///< Character end in \c data uint16_t chr_to; ///< Character end in \c data
wchar_t data[]; ///< Character char16_t data[]; ///< Character
private: private:
inline highlight(_In_ const highlight &other); inline highlight(_In_ const highlight &other);
@ -61,21 +61,21 @@ namespace ZRCola {
/// ///
inline highlight( inline highlight(
_In_opt_ hlghtsetid_t set = 0, _In_opt_ hlghtsetid_t set = 0,
_In_opt_z_count_(chr_len) const wchar_t *chr = NULL, _In_opt_z_count_(chr_len) const char16_t *chr = NULL,
_In_opt_ size_t chr_len = 0) _In_opt_ size_t chr_len = 0)
{ {
this->set = set; this->set = set;
this->chr_to = static_cast<uint16_t>(chr_len); this->chr_to = static_cast<uint16_t>(chr_len);
if (chr && chr_len) memcpy(this->data, chr, sizeof(wchar_t)*chr_len); if (chr && chr_len) memcpy(this->data, chr, sizeof(char16_t)*chr_len);
} }
inline const wchar_t* chr () const { return data; }; inline const char16_t* chr () const { return data; };
inline wchar_t* chr () { return data; }; inline char16_t* chr () { return data; };
inline const wchar_t* chr_end() const { return data + chr_to; }; inline const char16_t* chr_end() const { return data + chr_to; };
inline wchar_t* chr_end() { return data + chr_to; }; inline char16_t* chr_end() { return data + chr_to; };
inline uint16_t chr_len() const { return chr_to; }; inline uint16_t chr_len() const { return chr_to; };
inline wchar_t chr_at(_In_ size_t i) const inline char16_t chr_at(_In_ size_t i) const
{ {
return i < chr_to ? data[i] : 0; return i < chr_to ? data[i] : 0;
} }
@ -160,7 +160,7 @@ namespace ZRCola {
/// \param[in] inputMax Length of the input string in characters. Can be (size_t)-1 if \p input is zero terminated. /// \param[in] inputMax Length of the input string in characters. Can be (size_t)-1 if \p input is zero terminated.
/// \param[in] callback Function to be called on highlight switch /// \param[in] callback Function to be called on highlight switch
/// ///
void Highlight(_In_z_count_(inputMax) const wchar_t* input, _In_ size_t inputMax, _In_ std::function<void (hlghtsetid_t set, size_t start, size_t end)> callback) const; void Highlight(_In_z_count_(inputMax) const char16_t* input, _In_ size_t inputMax, _In_ std::function<void (hlghtsetid_t set, size_t start, size_t end)> callback) const;
}; };
}; };

View File

@ -35,7 +35,7 @@ namespace ZRCola {
protected: protected:
uint16_t chr_to; ///< Character end in \c data uint16_t chr_to; ///< Character end in \c data
wchar_t data[]; ///< Character char16_t data[]; ///< Character
private: private:
inline langchar(_In_ const langchar &other); inline langchar(_In_ const langchar &other);
@ -51,19 +51,19 @@ namespace ZRCola {
/// ///
inline langchar( inline langchar(
_In_opt_ langid_t lang = langid_t::blank, _In_opt_ langid_t lang = langid_t::blank,
_In_opt_z_count_(chr_len) const wchar_t *chr = NULL, _In_opt_z_count_(chr_len) const char16_t *chr = NULL,
_In_opt_ size_t chr_len = 0) _In_opt_ size_t chr_len = 0)
{ {
this->lang = lang; this->lang = lang;
this->chr_to = static_cast<uint16_t>(chr_len); this->chr_to = static_cast<uint16_t>(chr_len);
if (chr && chr_len) memcpy(this->data, chr, sizeof(wchar_t)*chr_len); if (chr && chr_len) memcpy(this->data, chr, sizeof(char16_t)*chr_len);
} }
inline const wchar_t* chr () const { return data; }; inline const char16_t* chr () const { return data; };
inline wchar_t* chr () { return data; }; inline char16_t* chr () { return data; };
inline const wchar_t* chr_end() const { return data + chr_to; }; inline const char16_t* chr_end() const { return data + chr_to; };
inline wchar_t* chr_end() { return data + chr_to; }; inline char16_t* chr_end() { return data + chr_to; };
inline uint16_t chr_len() const { return chr_to; }; inline uint16_t chr_len() const { return chr_to; };
}; };
#pragma pack(pop) #pragma pack(pop)
@ -176,7 +176,7 @@ namespace ZRCola {
/// \returns /// \returns
/// - \c true when character is used in language /// - \c true when character is used in language
/// - \c false otherwise /// - \c false otherwise
bool IsLocalCharacter(_In_ const wchar_t *chr, _In_ const wchar_t *chr_end, _In_ langid_t lang) const; bool IsLocalCharacter(_In_ const char16_t *chr, _In_ const char16_t *chr_end, _In_ langid_t lang) const;
}; };
@ -196,7 +196,7 @@ namespace ZRCola {
protected: protected:
uint16_t name_to; ///< Language name end in \c data uint16_t name_to; ///< Language name end in \c data
wchar_t data[]; ///< Language name char16_t data[]; ///< Language name
private: private:
inline language(_In_ const language &other); inline language(_In_ const language &other);
@ -212,19 +212,19 @@ namespace ZRCola {
/// ///
inline language( inline language(
_In_opt_ langid_t lang = langid_t::blank, _In_opt_ langid_t lang = langid_t::blank,
_In_opt_z_count_(name_len) const wchar_t *name = NULL, _In_opt_z_count_(name_len) const char16_t *name = NULL,
_In_opt_ size_t name_len = 0) _In_opt_ size_t name_len = 0)
{ {
this->lang = lang; this->lang = lang;
this->name_to = static_cast<uint16_t>(name_len); this->name_to = static_cast<uint16_t>(name_len);
if (name && name_len) memcpy(this->data, name, sizeof(wchar_t)*name_len); if (name && name_len) memcpy(this->data, name, sizeof(char16_t)*name_len);
} }
inline const wchar_t* name () const { return data; }; inline const char16_t* name () const { return data; };
inline wchar_t* name () { return data; }; inline char16_t* name () { return data; };
inline const wchar_t* name_end() const { return data + name_to; }; inline const char16_t* name_end() const { return data + name_to; };
inline wchar_t* name_end() { return data + name_to; }; inline char16_t* name_end() { return data + name_to; };
inline uint16_t name_len() const { return name_to; }; inline uint16_t name_len() const { return name_to; };
}; };
#pragma pack(pop) #pragma pack(pop)

View File

@ -38,7 +38,7 @@ namespace ZRCola {
protected: protected:
uint16_t chr_to; ///< Character end in \c data uint16_t chr_to; ///< Character end in \c data
wchar_t data[]; ///< Character char16_t data[]; ///< Character
private: private:
inline chrtag(_In_ const chrtag &other); inline chrtag(_In_ const chrtag &other);
@ -53,20 +53,20 @@ namespace ZRCola {
/// \param[in] tag Tag /// \param[in] tag Tag
/// ///
inline chrtag( inline chrtag(
_In_opt_z_count_(chr_len) const wchar_t *chr = NULL, _In_opt_z_count_(chr_len) const char16_t *chr = NULL,
_In_opt_ size_t chr_len = 0, _In_opt_ size_t chr_len = 0,
_In_opt_ tagid_t tag = 0) _In_opt_ tagid_t tag = 0)
{ {
this->tag = tag; this->tag = tag;
this->chr_to = static_cast<uint16_t>(chr_len); this->chr_to = static_cast<uint16_t>(chr_len);
if (chr && chr_len) memcpy(this->data, chr, sizeof(wchar_t)*chr_len); if (chr && chr_len) memcpy(this->data, chr, sizeof(char16_t)*chr_len);
} }
inline const wchar_t* chr () const { return data; }; inline const char16_t* chr () const { return data; };
inline wchar_t* chr () { return data; }; inline char16_t* chr () { return data; };
inline const wchar_t* chr_end() const { return data + chr_to; }; inline const char16_t* chr_end() const { return data + chr_to; };
inline wchar_t* chr_end() { return data + chr_to; }; inline char16_t* chr_end() { return data + chr_to; };
inline uint16_t chr_len() const { return chr_to; }; inline uint16_t chr_len() const { return chr_to; };
}; };
#pragma pack(pop) #pragma pack(pop)
@ -209,7 +209,7 @@ namespace ZRCola {
/// \param[in ] fn_abort Pointer to function to periodically test for search cancellation /// \param[in ] fn_abort Pointer to function to periodically test for search cancellation
/// \param[in ] cookie Cookie for \p fn_abort call /// \param[in ] cookie Cookie for \p fn_abort call
/// ///
bool Search(_In_ const std::map<tagid_t, uint16_t> &tags, _In_ const character_db &ch_db, _In_ const std::set<chrcatid_t> &cats, _Inout_ std::map<std::wstring, charrank_t> &hits, _In_opt_ bool (__cdecl *fn_abort)(void *cookie) = NULL, _In_opt_ void *cookie = NULL) const; bool Search(_In_ const std::map<tagid_t, uint16_t> &tags, _In_ const character_db &ch_db, _In_ const std::set<chrcatid_t> &cats, _Inout_ std::map<std::u16string, charrank_t> &hits, _In_opt_ bool (__cdecl *fn_abort)(void *cookie) = NULL, _In_opt_ void *cookie = NULL) const;
}; };
@ -230,7 +230,7 @@ namespace ZRCola {
protected: protected:
uint16_t name_to; ///< Tag name end in \c data uint16_t name_to; ///< Tag name end in \c data
wchar_t data[]; ///< Tag name char16_t data[]; ///< Tag name
private: private:
inline tagname(_In_ const tagname &other); inline tagname(_In_ const tagname &other);
@ -246,22 +246,22 @@ namespace ZRCola {
/// \param[in] name_len Number of UTF-16 characters in \p name /// \param[in] name_len Number of UTF-16 characters in \p name
/// ///
inline tagname( inline tagname(
_In_opt_ tagid_t tag = 0, _In_opt_ tagid_t tag = 0,
_In_opt_ uint32_t locale = 0, _In_opt_ uint32_t locale = 0,
_In_opt_z_count_(name_len) const wchar_t *name = NULL, _In_opt_z_count_(name_len) const char16_t *name = NULL,
_In_opt_ size_t name_len = 0) _In_opt_ size_t name_len = 0)
{ {
this->tag = tag; this->tag = tag;
this->locale = locale; this->locale = locale;
this->name_to = static_cast<uint16_t>(name_len); this->name_to = static_cast<uint16_t>(name_len);
if (name && name_len) memcpy(this->data, name, sizeof(wchar_t)*name_len); if (name && name_len) memcpy(this->data, name, sizeof(char16_t)*name_len);
} }
inline const wchar_t* name () const { return data; }; inline const char16_t* name () const { return data; };
inline wchar_t* name () { return data; }; inline char16_t* name () { return data; };
inline const wchar_t* name_end() const { return data + name_to; }; inline const char16_t* name_end() const { return data + name_to; };
inline wchar_t* name_end() { return data + name_to; }; inline char16_t* name_end() { return data + name_to; };
inline uint16_t name_len() const { return name_to; }; inline uint16_t name_len() const { return name_to; };
/// ///
/// Compares two names /// Compares two names
@ -281,7 +281,7 @@ namespace ZRCola {
/// The function does not treat \\0 characters as terminators for performance reasons. /// The function does not treat \\0 characters as terminators for performance reasons.
/// Therefore \p count_a and \p count_b must represent exact string lengths. /// Therefore \p count_a and \p count_b must represent exact string lengths.
/// ///
static inline int CompareName(_In_ uint32_t locale, _In_z_count_(count_a) const wchar_t *str_a, _In_ uint16_t count_a, _In_z_count_(count_b) const wchar_t *str_b, _In_ uint16_t count_b) static inline int CompareName(_In_ uint32_t locale, _In_z_count_(count_a) const char16_t *str_a, _In_ uint16_t count_a, _In_z_count_(count_b) const char16_t *str_b, _In_ uint16_t count_b)
{ {
#ifdef _WIN32 #ifdef _WIN32
switch (::CompareString(locale, SORT_STRINGSORT | NORM_IGNORECASE, str_a, count_a, str_b, count_b)) { switch (::CompareString(locale, SORT_STRINGSORT | NORM_IGNORECASE, str_a, count_a, str_b, count_b)) {
@ -291,14 +291,14 @@ namespace ZRCola {
default : assert(0); return -1; default : assert(0); return -1;
} }
#else #else
assert(0); // TODO: 1. Should honour locale. 2. Should use ICU for lowercase conversion. assert(0); // TODO: 1. Should honour locale. 2. Should use ICU for lowercase conversion. 3. Should be UTF-16-aware.
std::wstring std::u16string
a(str_a, count_a), a(str_a, count_a),
b(str_b, count_b); b(str_b, count_b);
auto tolower = [](wchar_t c){ return std::towlower(c); }; auto tolower = [](char16_t c){ return std::towlower(c); };
std::transform(a.begin(), a.end(), a.begin(), tolower); std::transform(a.begin(), a.end(), a.begin(), tolower);
std::transform(b.begin(), b.end(), b.begin(), tolower); std::transform(b.begin(), b.end(), b.begin(), tolower);
auto &coll = std::use_facet<std::collate<wchar_t>>(std::locale()); auto &coll = std::use_facet<std::collate<char16_t>>(std::locale());
return coll.compare(&*a.cbegin(), &*a.cend(), &*b.cbegin(), &*b.cend()); return coll.compare(&*a.cbegin(), &*a.cend(), &*b.cbegin(), &*b.cend());
#endif #endif
} }
@ -431,7 +431,7 @@ namespace ZRCola {
/// \param[in ] fn_abort Pointer to function to periodically test for search cancellation /// \param[in ] fn_abort Pointer to function to periodically test for search cancellation
/// \param[in ] cookie Cookie for \p fn_abort call /// \param[in ] cookie Cookie for \p fn_abort call
/// ///
bool Search(_In_z_ const wchar_t *str, _In_ uint32_t locale, _Inout_ std::map<tagid_t, uint16_t> &hits, _In_opt_ bool (__cdecl *fn_abort)(void *cookie) = NULL, _In_opt_ void *cookie = NULL) const; bool Search(_In_z_ const char16_t *str, _In_ uint32_t locale, _Inout_ std::map<tagid_t, uint16_t> &hits, _In_opt_ bool (__cdecl *fn_abort)(void *cookie) = NULL, _In_opt_ void *cookie = NULL) const;
}; };
}; };

View File

@ -84,7 +84,7 @@ namespace ZRCola {
protected: protected:
uint16_t dst_to; ///< Destination character end in \c data uint16_t dst_to; ///< Destination character end in \c data
uint16_t src_to; ///< Source string end in \c data uint16_t src_to; ///< Source string end in \c data
wchar_t data[]; ///< Destination string and source character char16_t data[]; ///< Destination string and source character
private: private:
inline translation(_In_ const translation &other); inline translation(_In_ const translation &other);
@ -105,39 +105,39 @@ namespace ZRCola {
inline translation( inline translation(
_In_opt_ transetid_t set = 0, _In_opt_ transetid_t set = 0,
_In_opt_ uint16_t dst_rank = 0, _In_opt_ uint16_t dst_rank = 0,
_In_opt_z_count_(dst_len) const wchar_t *dst = NULL, _In_opt_z_count_(dst_len) const char16_t *dst = NULL,
_In_opt_ size_t dst_len = 0, _In_opt_ size_t dst_len = 0,
_In_opt_ uint16_t src_rank = 0, _In_opt_ uint16_t src_rank = 0,
_In_opt_z_count_(src_len) const wchar_t *src = NULL, _In_opt_z_count_(src_len) const char16_t *src = NULL,
_In_opt_ size_t src_len = 0) _In_opt_ size_t src_len = 0)
{ {
this->set = set; this->set = set;
this->dst_rank = dst_rank; this->dst_rank = dst_rank;
this->src_rank = src_rank; this->src_rank = src_rank;
this->dst_to = static_cast<uint16_t>(dst_len); this->dst_to = static_cast<uint16_t>(dst_len);
if (dst && dst_len) memcpy(this->data, dst, sizeof(wchar_t)*dst_len); if (dst && dst_len) memcpy(this->data, dst, sizeof(char16_t)*dst_len);
this->src_to = static_cast<uint16_t>(this->dst_to + src_len); this->src_to = static_cast<uint16_t>(this->dst_to + src_len);
if (src && src_len) memcpy(this->data + this->dst_to, src, sizeof(wchar_t)*src_len); if (src && src_len) memcpy(this->data + this->dst_to, src, sizeof(char16_t)*src_len);
} }
inline const wchar_t* dst () const { return data; }; inline const char16_t* dst () const { return data; };
inline wchar_t* dst () { return data; }; inline char16_t* dst () { return data; };
inline const wchar_t* dst_end() const { return data + dst_to; }; inline const char16_t* dst_end() const { return data + dst_to; };
inline wchar_t* dst_end() { return data + dst_to; }; inline char16_t* dst_end() { return data + dst_to; };
inline uint16_t dst_len() const { return dst_to; }; inline uint16_t dst_len() const { return dst_to; };
inline wchar_t dst_at(_In_ size_t i) const inline char16_t dst_at(_In_ size_t i) const
{ {
return i < dst_to ? data[i] : 0; return i < dst_to ? data[i] : 0;
} }
inline const wchar_t* src () const { return data + dst_to; }; inline const char16_t* src () const { return data + dst_to; };
inline wchar_t* src () { return data + dst_to; }; inline char16_t* src () { return data + dst_to; };
inline const wchar_t* src_end() const { return data + src_to; }; inline const char16_t* src_end() const { return data + src_to; };
inline wchar_t* src_end() { return data + src_to; }; inline char16_t* src_end() { return data + src_to; };
inline uint16_t src_len() const { return src_to - dst_to; }; inline uint16_t src_len() const { return src_to - dst_to; };
inline wchar_t src_at(_In_ size_t i) const inline char16_t src_at(_In_ size_t i) const
{ {
size_t ii = i + dst_to; // absolute index size_t ii = i + dst_to; // absolute index
return ii < src_to ? data[ii] : 0; return ii < src_to ? data[ii] : 0;
@ -302,7 +302,7 @@ namespace ZRCola {
/// \param[out] output Output string (UTF-16) /// \param[out] output Output string (UTF-16)
/// \param[out] map The vector of source to destination index mappings (optional) /// \param[out] map The vector of source to destination index mappings (optional)
/// ///
void Translate(_In_ transetid_t set, _In_z_count_(inputMax) const wchar_t* input, _In_ size_t inputMax, _Out_ std::wstring &output, _Out_opt_ std::vector<mapping>* map = NULL) const; void Translate(_In_ transetid_t set, _In_z_count_(inputMax) const char16_t* input, _In_ size_t inputMax, _Out_ std::u16string &output, _Out_opt_ std::vector<mapping>* map = NULL) const;
/// ///
/// Inverse translates string /// Inverse translates string
@ -313,7 +313,7 @@ namespace ZRCola {
/// \param[out] output Output string (UTF-16) /// \param[out] output Output string (UTF-16)
/// \param[out] map The vector of source to destination index mappings (optional) /// \param[out] map The vector of source to destination index mappings (optional)
/// ///
inline void TranslateInv(_In_ transetid_t set, _In_z_count_(inputMax) const wchar_t* input, _In_ size_t inputMax, _Out_ std::wstring &output, _Out_opt_ std::vector<mapping>* map = NULL) const inline void TranslateInv(_In_ transetid_t set, _In_z_count_(inputMax) const char16_t* input, _In_ size_t inputMax, _Out_ std::u16string &output, _Out_opt_ std::vector<mapping>* map = NULL) const
{ {
TranslateInv(set, input, inputMax, NULL, langid_t::blank, output, map); TranslateInv(set, input, inputMax, NULL, langid_t::blank, output, map);
} }
@ -329,7 +329,7 @@ namespace ZRCola {
/// \param[out] output Output string (UTF-16) /// \param[out] output Output string (UTF-16)
/// \param[out] map The vector of source to destination index mappings (optional) /// \param[out] map The vector of source to destination index mappings (optional)
/// ///
void TranslateInv(_In_ transetid_t set, _In_z_count_(inputMax) const wchar_t* input, _In_ size_t inputMax, _In_opt_ const langchar_db *lc_db, _In_opt_ langid_t lang, _Out_ std::wstring &output, _Out_opt_ std::vector<mapping>* map = NULL) const; void TranslateInv(_In_ transetid_t set, _In_z_count_(inputMax) const char16_t* input, _In_ size_t inputMax, _In_opt_ const langchar_db *lc_db, _In_opt_ langid_t lang, _Out_ std::u16string &output, _Out_opt_ std::vector<mapping>* map = NULL) const;
}; };
@ -350,7 +350,7 @@ namespace ZRCola {
protected: protected:
uint16_t src_to; ///< Source name end in \c data uint16_t src_to; ///< Source name end in \c data
uint16_t dst_to; ///< Sestination name end in \c data uint16_t dst_to; ///< Sestination name end in \c data
wchar_t data[]; ///< Source and destination names char16_t data[]; ///< Source and destination names
private: private:
inline transet(_In_ const transet &other); inline transet(_In_ const transet &other);
@ -368,29 +368,29 @@ namespace ZRCola {
/// ///
inline transet( inline transet(
_In_opt_ transetid_t set = 0, _In_opt_ transetid_t set = 0,
_In_opt_z_count_(src_len) const wchar_t *src = NULL, _In_opt_z_count_(src_len) const char16_t *src = NULL,
_In_opt_ size_t src_len = 0, _In_opt_ size_t src_len = 0,
_In_opt_z_count_(dst_len) const wchar_t *dst = NULL, _In_opt_z_count_(dst_len) const char16_t *dst = NULL,
_In_opt_ size_t dst_len = 0) _In_opt_ size_t dst_len = 0)
{ {
this->set = set; this->set = set;
this->src_to = static_cast<uint16_t>(src_len); this->src_to = static_cast<uint16_t>(src_len);
if (src && src_len) memcpy(this->data, src, sizeof(wchar_t)*src_len); if (src && src_len) memcpy(this->data, src, sizeof(char16_t)*src_len);
this->dst_to = static_cast<uint16_t>(this->src_to + dst_len); this->dst_to = static_cast<uint16_t>(this->src_to + dst_len);
if (dst && dst_len) memcpy(this->data + this->src_to, dst, sizeof(wchar_t)*dst_len); if (dst && dst_len) memcpy(this->data + this->src_to, dst, sizeof(char16_t)*dst_len);
} }
inline const wchar_t* src () const { return data; }; inline const char16_t* src () const { return data; };
inline wchar_t* src () { return data; }; inline char16_t* src () { return data; };
inline const wchar_t* src_end() const { return data + src_to; }; inline const char16_t* src_end() const { return data + src_to; };
inline wchar_t* src_end() { return data + src_to; }; inline char16_t* src_end() { return data + src_to; };
inline uint16_t src_len() const { return src_to; }; inline uint16_t src_len() const { return src_to; };
inline const wchar_t* dst () const { return data + src_to; }; inline const char16_t* dst () const { return data + src_to; };
inline wchar_t* dst () { return data + src_to; }; inline char16_t* dst () { return data + src_to; };
inline const wchar_t* dst_end() const { return data + dst_to; }; inline const char16_t* dst_end() const { return data + dst_to; };
inline wchar_t* dst_end() { return data + dst_to; }; inline char16_t* dst_end() { return data + dst_to; };
inline uint16_t dst_len() const { return dst_to - src_to; }; inline uint16_t dst_len() const { return dst_to - src_to; };
}; };
#pragma pack(pop) #pragma pack(pop)
@ -464,7 +464,7 @@ namespace ZRCola {
protected: protected:
uint16_t name_to; ///< Translation sequence name end in \c data uint16_t name_to; ///< Translation sequence name end in \c data
uint16_t sets_to; ///< Translation sequence sets end in \c data uint16_t sets_to; ///< Translation sequence sets end in \c data
wchar_t data[]; ///< Translation sequence name and sets char16_t data[]; ///< Translation sequence name and sets
private: private:
inline transeq(_In_ const transeq &other); inline transeq(_In_ const transeq &other);
@ -484,7 +484,7 @@ namespace ZRCola {
inline transeq( inline transeq(
_In_opt_ transeqid_t seq = 0, _In_opt_ transeqid_t seq = 0,
_In_opt_ uint16_t rank = 0, _In_opt_ uint16_t rank = 0,
_In_opt_z_count_(name_len) const wchar_t *name = NULL, _In_opt_z_count_(name_len) const char16_t *name = NULL,
_In_opt_ size_t name_len = 0, _In_opt_ size_t name_len = 0,
_In_opt_count_ (sets_len) const transetid_t *sets = NULL, _In_opt_count_ (sets_len) const transetid_t *sets = NULL,
_In_opt_ size_t sets_len = 0) _In_opt_ size_t sets_len = 0)
@ -492,16 +492,16 @@ namespace ZRCola {
this->seq = seq; this->seq = seq;
this->rank = rank; this->rank = rank;
this->name_to = static_cast<uint16_t>(name_len); this->name_to = static_cast<uint16_t>(name_len);
if (name && name_len) memcpy(this->data, name, sizeof(wchar_t)*name_len); if (name && name_len) memcpy(this->data, name, sizeof(char16_t)*name_len);
this->sets_to = static_cast<uint16_t>(this->name_to + sets_len); this->sets_to = static_cast<uint16_t>(this->name_to + sets_len);
if (sets && sets_len) memcpy(this->data + this->name_to, sets, sizeof(transetid_t)*sets_len); if (sets && sets_len) memcpy(this->data + this->name_to, sets, sizeof(transetid_t)*sets_len);
} }
inline const wchar_t* name () const { return data; }; inline const char16_t* name () const { return data; };
inline wchar_t* name () { return data; }; inline char16_t* name () { return data; };
inline const wchar_t* name_end() const { return data + name_to; }; inline const char16_t* name_end() const { return data + name_to; };
inline wchar_t* name_end() { return data + name_to; }; inline char16_t* name_end() { return data + name_to; };
inline uint16_t name_len() const { return name_to; }; inline uint16_t name_len() const { return name_to; };
inline const transetid_t* sets () const { return reinterpret_cast<const transetid_t*>(data + name_to); }; inline const transetid_t* sets () const { return reinterpret_cast<const transetid_t*>(data + name_to); };
inline transetid_t* sets () { return reinterpret_cast< transetid_t*>(data + name_to); }; inline transetid_t* sets () { return reinterpret_cast< transetid_t*>(data + name_to); };
@ -592,7 +592,7 @@ namespace ZRCola {
if (a.rank < b.rank) return -1; if (a.rank < b.rank) return -1;
else if (a.rank > b.rank) return +1; else if (a.rank > b.rank) return +1;
auto &coll = std::use_facet<std::collate<wchar_t>>(std::locale()); auto &coll = std::use_facet<std::collate<char16_t>>(std::locale());
return coll.compare(a.name(), a.name_end(), b.name(), b.name_end()); return coll.compare(a.name(), a.name_end(), b.name(), b.name_end());
} }
} idxRank; ///< Rank index } idxRank; ///< Rank index

View File

@ -9,7 +9,29 @@
const ZRCola::chrcatid_t ZRCola::chrcatid_t::blank = {}; const ZRCola::chrcatid_t ZRCola::chrcatid_t::blank = {};
bool ZRCola::character_db::Search(_In_z_ const wchar_t *str, _In_ const std::set<chrcatid_t> &cats, _Inout_ std::map<std::wstring, charrank_t> &hits, _Inout_ std::map<std::wstring, charrank_t> &hits_sub, _In_opt_ bool (__cdecl *fn_abort)(void *cookie), _In_opt_ void *cookie) const #ifndef _WIN32
_Use_decl_annotations_
size_t ZRCola::wcslen(const char16_t *str)
{
for (size_t i = 0; ; ++i)
if (!str[i])
return i;
}
_Use_decl_annotations_
size_t ZRCola::wcsnlen(const char16_t *str, size_t count)
{
for (size_t i = 0; ; ++i)
if (i >= count || !str[i])
return i;
}
#endif
_Use_decl_annotations_
bool ZRCola::character_db::Search(const char16_t *str, const std::set<chrcatid_t> &cats, std::map<std::u16string, charrank_t> &hits, std::map<std::u16string, charrank_t> &hits_sub, bool (__cdecl *fn_abort)(void *cookie), void *cookie) const
{ {
assert(str); assert(str);
@ -27,14 +49,14 @@ bool ZRCola::character_db::Search(_In_z_ const wchar_t *str, _In_ const std::set
} }
// Get term. // Get term.
std::wstring term; std::u16string term;
if (*str == L'"') { if (*str == u'"') {
const wchar_t *str_end = ++str; const char16_t *str_end = ++str;
for (;;) { for (;;) {
if (*str_end == 0) { if (*str_end == 0) {
term.assign(str, str_end); term.assign(str, str_end);
break; break;
} else if (*str_end == L'"') { } else if (*str_end == u'"') {
term.assign(str, str_end); term.assign(str, str_end);
str_end++; str_end++;
break; break;
@ -43,7 +65,7 @@ bool ZRCola::character_db::Search(_In_z_ const wchar_t *str, _In_ const std::set
} }
str = str_end; str = str_end;
} else { } else {
const wchar_t *str_end = str + 1; const char16_t *str_end = str + 1;
for (; *str_end && !iswspace(*str_end); str_end++); for (; *str_end && !iswspace(*str_end); str_end++);
term.assign(str, str_end); term.assign(str, str_end);
str = str_end; str = str_end;
@ -57,7 +79,7 @@ bool ZRCola::character_db::Search(_In_z_ const wchar_t *str, _In_ const std::set
if (fn_abort && fn_abort(cookie)) return false; if (fn_abort && fn_abort(cookie)) return false;
const wchar_t *val; const char16_t *val;
size_t val_len; size_t val_len;
if (idxDsc.find(term.c_str(), term.size(), &val, &val_len)) { if (idxDsc.find(term.c_str(), term.size(), &val, &val_len)) {
@ -66,7 +88,7 @@ bool ZRCola::character_db::Search(_In_z_ const wchar_t *str, _In_ const std::set
if (fn_abort && fn_abort(cookie)) return false; if (fn_abort && fn_abort(cookie)) return false;
j = wcsnlen(val + i, val_len - i); j = wcsnlen(val + i, val_len - i);
if (cats.find(GetCharCat(val + i, j)) != cats.end()) { if (cats.find(GetCharCat(val + i, j)) != cats.end()) {
std::wstring c(val + i, j); std::u16string c(val + i, j);
auto idx = hits.find(c); auto idx = hits.find(c);
if (idx == hits.end()) { if (idx == hits.end()) {
// New character. // New character.
@ -85,7 +107,7 @@ bool ZRCola::character_db::Search(_In_z_ const wchar_t *str, _In_ const std::set
if (fn_abort && fn_abort(cookie)) return false; if (fn_abort && fn_abort(cookie)) return false;
j = wcsnlen(val + i, val_len - i); j = wcsnlen(val + i, val_len - i);
if (cats.find(GetCharCat(val + i, j)) != cats.end()) { if (cats.find(GetCharCat(val + i, j)) != cats.end()) {
std::wstring c(val + i, j); std::u16string c(val + i, j);
auto idx = hits_sub.find(c); auto idx = hits_sub.find(c);
if (idx == hits_sub.end()) { if (idx == hits_sub.end()) {
// New character. // New character.

View File

@ -10,7 +10,7 @@ const ZRCola::langid_t ZRCola::langid_t::blank = {};
_Use_decl_annotations_ _Use_decl_annotations_
int ZRCola::CompareString(const wchar_t* str_a, size_t count_a, const wchar_t* str_b, size_t count_b) int ZRCola::CompareString(const char16_t* str_a, size_t count_a, const char16_t* str_b, size_t count_b)
{ {
for (size_t i = 0; ; i++) { for (size_t i = 0; ; i++) {
if (i >= count_a && i >= count_b) return 0; if (i >= count_a && i >= count_b) return 0;
@ -23,11 +23,11 @@ int ZRCola::CompareString(const wchar_t* str_a, size_t count_a, const wchar_t* s
_Use_decl_annotations_ _Use_decl_annotations_
inline std::string ZRCola::GetUnicodeDumpA(const wchar_t* str, size_t count, const char* sep) inline std::string ZRCola::GetUnicodeDumpA(const char16_t* str, size_t count, const char* sep)
{ {
std::string out; std::string out;
size_t sep_len = strlen(sep); size_t sep_len = strlen(sep);
size_t dump_len_max = sep_len + 8 + 1; size_t dump_len_max = sep_len + 4 + 1;
char* dump; char* dump;
std::unique_ptr<char[]> dump_obj(dump = new char[dump_len_max]); std::unique_ptr<char[]> dump_obj(dump = new char[dump_len_max]);
if (count && str[0]) { if (count && str[0]) {
@ -54,11 +54,11 @@ inline std::string ZRCola::GetUnicodeDumpA(const wchar_t* str, size_t count, con
_Use_decl_annotations_ _Use_decl_annotations_
std::wstring ZRCola::GetUnicodeDumpW(const wchar_t* str, size_t count, const wchar_t* sep) std::wstring ZRCola::GetUnicodeDumpW(const char16_t* str, size_t count, const wchar_t* sep)
{ {
std::wstring out; std::wstring out;
size_t sep_len = wcslen(sep); size_t sep_len = ::wcslen(sep);
size_t dump_len_max = sep_len + 8 + 1; size_t dump_len_max = sep_len + 4 + 1;
wchar_t* dump; wchar_t* dump;
std::unique_ptr<wchar_t[]> dump_obj(dump = new wchar_t[dump_len_max]); std::unique_ptr<wchar_t[]> dump_obj(dump = new wchar_t[dump_len_max]);
if (count && str[0]) { if (count && str[0]) {

View File

@ -6,7 +6,7 @@
#include "pch.h" #include "pch.h"
_Use_decl_annotations_ _Use_decl_annotations_
void ZRCola::highlight_db::Highlight(const wchar_t* input, size_t inputMax, std::function<void (hlghtsetid_t set, size_t start, size_t end)> callback) const void ZRCola::highlight_db::Highlight(const char16_t* input, size_t inputMax, std::function<void (hlghtsetid_t set, size_t start, size_t end)> callback) const
{ {
size_t start = 0; size_t start = 0;
hlghtsetid_t set = ZRCOLA_HLGHTSETID_DEFAULT; hlghtsetid_t set = ZRCOLA_HLGHTSETID_DEFAULT;
@ -15,7 +15,7 @@ void ZRCola::highlight_db::Highlight(const wchar_t* input, size_t inputMax, std:
// Find the longest matching highlight at i-th character. // Find the longest matching highlight at i-th character.
size_t l_match = (size_t)-1; size_t l_match = (size_t)-1;
for (size_t l = 0, r = idxChr.size(), ii = i, j = 0; ii < inputMax && l < r; ii++, j++) { for (size_t l = 0, r = idxChr.size(), ii = i, j = 0; ii < inputMax && l < r; ii++, j++) {
wchar_t c = input[ii]; char16_t c = input[ii];
while (l < r) { while (l < r) {
// Test the highlight in the middle of the search area. // Test the highlight in the middle of the search area.
size_t m = (l + r) / 2; size_t m = (l + r) / 2;
@ -23,7 +23,7 @@ void ZRCola::highlight_db::Highlight(const wchar_t* input, size_t inputMax, std:
// Get the j-th character of the highlight. // Get the j-th character of the highlight.
// All highlights that get short on characters are lexically ordered before. // All highlights that get short on characters are lexically ordered before.
// Thus the j-th character is considered 0. // Thus the j-th character is considered 0.
wchar_t s = idxChr[m].chr_at(j); char16_t s = idxChr[m].chr_at(j);
// Do the bisection test. // Do the bisection test.
if (c < s) r = m; if (c < s) r = m;

View File

@ -57,11 +57,11 @@ void ZRCola::LangConvert(_In_ LANGID lang_win, _Inout_ ZRCola::langid_t &lang)
#endif #endif
bool ZRCola::langchar_db::IsLocalCharacter(_In_ const wchar_t *chr, _In_ const wchar_t *chr_end, _In_ ZRCola::langid_t lang) const bool ZRCola::langchar_db::IsLocalCharacter(_In_ const char16_t *chr, _In_ const char16_t *chr_end, _In_ ZRCola::langid_t lang) const
{ {
size_t n = chr_end - chr; size_t n = chr_end - chr;
assert(n <= 0xffff); assert(n <= 0xffff);
std::unique_ptr<langchar> lc((langchar*)new char[sizeof(langchar) + sizeof(wchar_t)*n]); std::unique_ptr<langchar> lc((langchar*)new char[sizeof(langchar) + sizeof(char16_t)*n]);
new (lc.get()) langchar(lang, chr, n); new (lc.get()) langchar(lang, chr, n);
indexChr::size_type start; indexChr::size_type start;
return idxChr.find(*lc, start); return idxChr.find(*lc, start);

View File

@ -6,7 +6,7 @@
#include "pch.h" #include "pch.h"
bool ZRCola::chrtag_db::Search(_In_ const std::map<tagid_t, uint16_t> &tags, _In_ const character_db &ch_db, _In_ const std::set<chrcatid_t> &cats, _Inout_ std::map<std::wstring, charrank_t> &hits, _In_opt_ bool (__cdecl *fn_abort)(void *cookie), _In_opt_ void *cookie) const bool ZRCola::chrtag_db::Search(_In_ const std::map<tagid_t, uint16_t> &tags, _In_ const character_db &ch_db, _In_ const std::set<chrcatid_t> &cats, _Inout_ std::map<std::u16string, charrank_t> &hits, _In_opt_ bool (__cdecl *fn_abort)(void *cookie), _In_opt_ void *cookie) const
{ {
for (auto tag = tags.cbegin(), tag_end = tags.cend(); tag != tag_end; ++tag) { for (auto tag = tags.cbegin(), tag_end = tags.cend(); tag != tag_end; ++tag) {
if (fn_abort && fn_abort(cookie)) return false; if (fn_abort && fn_abort(cookie)) return false;
@ -19,7 +19,7 @@ bool ZRCola::chrtag_db::Search(_In_ const std::map<tagid_t, uint16_t> &tags, _In
const chrtag &ct = idxTag[i]; const chrtag &ct = idxTag[i];
uint16_t len = ct.chr_len(); uint16_t len = ct.chr_len();
if (cats.find(ch_db.GetCharCat(ct.chr(), len)) != cats.end()) { if (cats.find(ch_db.GetCharCat(ct.chr(), len)) != cats.end()) {
std::wstring chr(ct.chr(), len); std::u16string chr(ct.chr(), len);
auto idx = hits.find(chr); auto idx = hits.find(chr);
if (idx == hits.end()) { if (idx == hits.end()) {
// New character. // New character.
@ -37,7 +37,7 @@ bool ZRCola::chrtag_db::Search(_In_ const std::map<tagid_t, uint16_t> &tags, _In
} }
bool ZRCola::tagname_db::Search(_In_z_ const wchar_t *str, _In_ uint32_t locale, _Inout_ std::map<tagid_t, uint16_t> &hits, _In_opt_ bool (__cdecl *fn_abort)(void *cookie), _In_opt_ void *cookie) const bool ZRCola::tagname_db::Search(_In_z_ const char16_t *str, _In_ uint32_t locale, _Inout_ std::map<tagid_t, uint16_t> &hits, _In_opt_ bool (__cdecl *fn_abort)(void *cookie), _In_opt_ void *cookie) const
{ {
assert(str); assert(str);
@ -55,14 +55,14 @@ bool ZRCola::tagname_db::Search(_In_z_ const wchar_t *str, _In_ uint32_t locale,
} }
// Get name. // Get name.
std::wstring name; std::u16string name;
if (*str == L'"') { if (*str == u'"') {
const wchar_t *str_end = ++str; const char16_t *str_end = ++str;
for (;;) { for (;;) {
if (*str_end == 0) { if (*str_end == 0) {
name.assign(str, str_end); name.assign(str, str_end);
break; break;
} else if (*str_end == L'"') { } else if (*str_end == u'"') {
name.assign(str, str_end); name.assign(str, str_end);
str_end++; str_end++;
break; break;
@ -71,7 +71,7 @@ bool ZRCola::tagname_db::Search(_In_z_ const wchar_t *str, _In_ uint32_t locale,
} }
str = str_end; str = str_end;
} else { } else {
const wchar_t *str_end = str + 1; const char16_t *str_end = str + 1;
for (; *str_end && !iswspace(*str_end); str_end++); for (; *str_end && !iswspace(*str_end); str_end++);
name.assign(str, str_end); name.assign(str, str_end);
str = str_end; str = str_end;
@ -81,7 +81,7 @@ bool ZRCola::tagname_db::Search(_In_z_ const wchar_t *str, _In_ uint32_t locale,
if (fn_abort && fn_abort(cookie)) return false; if (fn_abort && fn_abort(cookie)) return false;
// Find the name. // Find the name.
std::unique_ptr<tagname> tn(reinterpret_cast<tagname*>(new char[sizeof(tagname) + sizeof(wchar_t)*name.length()])); std::unique_ptr<tagname> tn(reinterpret_cast<tagname*>(new char[sizeof(tagname) + sizeof(char16_t)*name.length()]));
new (tn.get()) tagname(0, locale, name.data(), name.length()); new (tn.get()) tagname(0, locale, name.data(), name.length());
size_t start, end; size_t start, end;
if (idxName.find(*tn, start, end)) { if (idxName.find(*tn, start, end)) {

View File

@ -6,7 +6,7 @@
#include "pch.h" #include "pch.h"
void ZRCola::translation_db::Translate(_In_ transetid_t set, _In_z_count_(inputMax) const wchar_t* input, _In_ size_t inputMax, _Out_ std::wstring &output, _Out_opt_ std::vector<mapping>* map) const void ZRCola::translation_db::Translate(_In_ transetid_t set, _In_z_count_(inputMax) const char16_t* input, _In_ size_t inputMax, _Out_ std::u16string &output, _Out_opt_ std::vector<mapping>* map) const
{ {
assert(input || inputMax == 0); assert(input || inputMax == 0);
@ -28,7 +28,7 @@ void ZRCola::translation_db::Translate(_In_ transetid_t set, _In_z_count_(inputM
// Find the longest matching translation at i-th character. // Find the longest matching translation at i-th character.
size_t l_match = (size_t)-1; size_t l_match = (size_t)-1;
for (size_t l = l_set, r = r_set, ii = i, j = 0; ii < inputMax && l < r; ii++, j++) { for (size_t l = l_set, r = r_set, ii = i, j = 0; ii < inputMax && l < r; ii++, j++) {
wchar_t c = input[ii]; char16_t c = input[ii];
while (l < r) { while (l < r) {
// Test the translation in the middle of the search area. // Test the translation in the middle of the search area.
size_t m = (l + r) / 2; size_t m = (l + r) / 2;
@ -36,7 +36,7 @@ void ZRCola::translation_db::Translate(_In_ transetid_t set, _In_z_count_(inputM
// Get the j-th character of the translation. // Get the j-th character of the translation.
// All translations that get short on characters are lexically ordered before. // All translations that get short on characters are lexically ordered before.
// Thus the j-th character is considered 0. // Thus the j-th character is considered 0.
wchar_t s = idxSrc[m].src_at(j); char16_t s = idxSrc[m].src_at(j);
// Do the bisection test. // Do the bisection test.
if (c < s) r = m; if (c < s) r = m;
@ -84,7 +84,7 @@ void ZRCola::translation_db::Translate(_In_ transetid_t set, _In_z_count_(inputM
} }
void ZRCola::translation_db::TranslateInv(_In_ transetid_t set, _In_z_count_(inputMax) const wchar_t* input, _In_ size_t inputMax, _In_opt_ const langchar_db *lc_db, _In_opt_ langid_t lang, _Out_ std::wstring &output, _Out_opt_ std::vector<mapping>* map) const void ZRCola::translation_db::TranslateInv(_In_ transetid_t set, _In_z_count_(inputMax) const char16_t* input, _In_ size_t inputMax, _In_opt_ const langchar_db *lc_db, _In_opt_ langid_t lang, _Out_ std::u16string &output, _Out_opt_ std::vector<mapping>* map) const
{ {
assert(input || inputMax == 0); assert(input || inputMax == 0);
@ -106,7 +106,7 @@ void ZRCola::translation_db::TranslateInv(_In_ transetid_t set, _In_z_count_(inp
// Find the longest matching inverse translation at i-th character. // Find the longest matching inverse translation at i-th character.
size_t l_match = (size_t)-1; size_t l_match = (size_t)-1;
for (size_t l = l_set, r = r_set, ii = i, j = 0; ii < inputMax && l < r; ii++, j++) { for (size_t l = l_set, r = r_set, ii = i, j = 0; ii < inputMax && l < r; ii++, j++) {
wchar_t c = input[ii]; char16_t c = input[ii];
while (l < r) { while (l < r) {
// Test the inverse translation in the middle of the search area. // Test the inverse translation in the middle of the search area.
size_t m = (l + r) / 2; size_t m = (l + r) / 2;
@ -114,7 +114,7 @@ void ZRCola::translation_db::TranslateInv(_In_ transetid_t set, _In_z_count_(inp
// Get the j-th character of the inverse translation. // Get the j-th character of the inverse translation.
// All inverse translations that get short on characters are lexically ordered before. // All inverse translations that get short on characters are lexically ordered before.
// Thus the j-th character is considered 0. // Thus the j-th character is considered 0.
wchar_t s = idxDst[m].dst_at(j); char16_t s = idxDst[m].dst_at(j);
// Do the bisection test. // Do the bisection test.
if (c < s) r = m; if (c < s) r = m;
@ -147,7 +147,7 @@ void ZRCola::translation_db::TranslateInv(_In_ transetid_t set, _In_z_count_(inp
if (l_match < r_set) { if (l_match < r_set) {
// The saved inverse translation was an exact match. // The saved inverse translation was an exact match.
const translation &trans = idxDst[l_match]; const translation &trans = idxDst[l_match];
if (trans.src_len() && trans.src()[0] != L'#' && (!lc_db || !lc_db->IsLocalCharacter(trans.dst(), trans.dst_end(), lang))) { if (trans.src_len() && trans.src()[0] != u'#' && (!lc_db || !lc_db->IsLocalCharacter(trans.dst(), trans.dst_end(), lang))) {
// Append source sequence. // Append source sequence.
output.append(trans.src(), trans.src_end()); output.append(trans.src(), trans.src_end());
i += trans.dst_len(); i += trans.dst_len();