diff --git a/lib/libZRCola/include/zrcola/character.h b/lib/libZRCola/include/zrcola/character.h index 53d755d..5be913f 100644 --- a/lib/libZRCola/include/zrcola/character.h +++ b/lib/libZRCola/include/zrcola/character.h @@ -29,11 +29,16 @@ namespace ZRCola { /// typedef double charrank_t; - inline bool ispua(_In_ wchar_t c) + inline bool ispua(_In_ char16_t c) { - return L'\ue000' <= c && c <= L'\uf8ff'; + return u'\ue000' <= c && c <= u'\uf8ff'; } +#ifndef _WIN32 + size_t wcslen(_In_z_ const char16_t *str); + size_t wcsnlen(_In_z_count_(count) const char16_t *str, _In_ size_t count); +#endif + #pragma pack(push) #pragma pack(2) /// @@ -183,7 +188,7 @@ namespace ZRCola { uint16_t chr_to; ///< Character end in \c data uint16_t desc_to; ///< Character description end in \c data uint16_t rel_to; ///< Related characters end in \c data - wchar_t data[]; ///< Character, character description + char16_t data[]; ///< Character, character description private: inline character(_In_ const character &other); @@ -202,40 +207,40 @@ namespace ZRCola { /// \param[in] rel_len Number of UTF-16 characters in \p rel (including zero delimiters) /// inline character( - _In_opt_z_count_(chr_len) const wchar_t *chr = NULL, + _In_opt_z_count_(chr_len) const char16_t *chr = NULL, _In_opt_ size_t chr_len = 0, _In_opt_ chrcatid_t cat = chrcatid_t::blank, - _In_opt_z_count_(desc_len) const wchar_t *desc = NULL, + _In_opt_z_count_(desc_len) const char16_t *desc = NULL, _In_opt_ size_t desc_len = 0, - _In_opt_z_count_(rel_len) const wchar_t *rel = NULL, + _In_opt_z_count_(rel_len) const char16_t *rel = NULL, _In_opt_ size_t rel_len = 0) { this->cat = cat; this->chr_to = static_cast(chr_len); - if (chr && chr_len) memcpy(this->data, chr, sizeof(wchar_t)*chr_len); + if (chr && chr_len) memcpy(this->data, chr, sizeof(char16_t)*chr_len); this->desc_to = static_cast(this->chr_to + desc_len); - if (desc && desc_len) memcpy(this->data + this->chr_to, desc, sizeof(wchar_t)*desc_len); + if (desc && desc_len) memcpy(this->data + this->chr_to, desc, sizeof(char16_t)*desc_len); this->rel_to = static_cast(this->desc_to + rel_len); - if (rel && rel_len) memcpy(this->data + this->desc_to, rel, sizeof(wchar_t)*rel_len); + if (rel && rel_len) memcpy(this->data + this->desc_to, rel, sizeof(char16_t)*rel_len); } - inline const wchar_t* chr () const { return data; }; - inline wchar_t* chr () { return data; }; - inline const wchar_t* chr_end() const { return data + chr_to; }; - inline wchar_t* chr_end() { return data + chr_to; }; - inline uint16_t chr_len() const { return chr_to; }; + inline const char16_t* chr () const { return data; }; + inline char16_t* chr () { return data; }; + inline const char16_t* chr_end() const { return data + chr_to; }; + inline char16_t* chr_end() { return data + chr_to; }; + inline uint16_t chr_len() const { return chr_to; }; - inline const wchar_t* desc () const { return data + chr_to; }; - inline wchar_t* desc () { return data + chr_to; }; - inline const wchar_t* desc_end() const { return data + desc_to; }; - inline wchar_t* desc_end() { return data + desc_to; }; - inline uint16_t desc_len() const { return desc_to - chr_to; }; + inline const char16_t* desc () const { return data + chr_to; }; + inline char16_t* desc () { return data + chr_to; }; + inline const char16_t* desc_end() const { return data + desc_to; }; + inline char16_t* desc_end() { return data + desc_to; }; + inline uint16_t desc_len() const { return desc_to - chr_to; }; - inline const wchar_t* rel () const { return data + desc_to; }; - inline wchar_t* rel () { return data + desc_to; }; - inline const wchar_t* rel_end() const { return data + rel_to; }; - inline wchar_t* rel_end() { return data + rel_to; }; - inline uint16_t rel_len() const { return rel_to - desc_to; }; + inline const char16_t* rel () const { return data + desc_to; }; + inline char16_t* rel () { return data + desc_to; }; + inline const char16_t* rel_end() const { return data + rel_to; }; + inline char16_t* rel_end() { return data + rel_to; }; + inline uint16_t rel_len() const { return rel_to - desc_to; }; }; #pragma pack(pop) @@ -272,9 +277,9 @@ namespace ZRCola { } } idxChr; ///< Character index - textindex idxDsc; ///< Description index - textindex idxDscSub; ///< Description index (sub-terms) - std::vector data; ///< Character data + textindex idxDsc; ///< Description index + textindex idxDscSub; ///< Description index (sub-terms) + std::vector data; ///< Character data public: /// @@ -303,7 +308,7 @@ namespace ZRCola { /// \param[in ] fn_abort Pointer to function to periodically test for search cancellation /// \param[in ] cookie Cookie for \p fn_abort call /// - bool Search(_In_z_ const wchar_t *str, _In_ const std::set &cats, _Inout_ std::map &hits, _Inout_ std::map &hits_sub, _In_opt_ bool (__cdecl *fn_abort)(void *cookie) = NULL, _In_opt_ void *cookie = NULL) const; + bool Search(_In_z_ const char16_t *str, _In_ const std::set &cats, _Inout_ std::map &hits, _Inout_ std::map &hits_sub, _In_opt_ bool (__cdecl *fn_abort)(void *cookie) = NULL, _In_opt_ void *cookie = NULL) const; /// /// Get character category @@ -315,10 +320,10 @@ namespace ZRCola { /// - Character category if character found /// - `ZRCola::chrcatid_t::blank` otherwise /// - inline chrcatid_t GetCharCat(_In_z_count_(len) const wchar_t *chr, _In_ const size_t len) const + inline chrcatid_t GetCharCat(_In_z_count_(len) const char16_t *chr, _In_ const size_t len) const { assert(len <= 0xffff); - std::unique_ptr c((character*)new char[sizeof(character) + sizeof(wchar_t)*len]); + std::unique_ptr c((character*)new char[sizeof(character) + sizeof(char16_t)*len]); new (c.get()) character(chr, len); indexChr::size_type start; return idxChr.find(*c, start) ? idxChr[start].cat : chrcatid_t::blank; @@ -343,7 +348,7 @@ namespace ZRCola { protected: uint16_t name_to; ///< Character category name end in \c data - wchar_t data[]; ///< Character category name + char16_t data[]; ///< Character category name private: inline chrcat(_In_ const chrcat &other); @@ -361,20 +366,20 @@ namespace ZRCola { inline chrcat( _In_opt_ chrcatid_t cat = chrcatid_t::blank, _In_opt_ uint16_t rank = 0, - _In_opt_z_count_(name_len) const wchar_t *name = NULL, + _In_opt_z_count_(name_len) const char16_t *name = NULL, _In_opt_ size_t name_len = 0) { this->cat = cat; this->rank = rank; this->name_to = static_cast(name_len); - if (name && name_len) memcpy(this->data, name, sizeof(wchar_t)*name_len); + if (name && name_len) memcpy(this->data, name, sizeof(char16_t)*name_len); } - inline const wchar_t* name () const { return data; }; - inline wchar_t* name () { return data; }; - inline const wchar_t* name_end() const { return data + name_to; }; - inline wchar_t* name_end() { return data + name_to; }; - inline uint16_t name_len() const { return name_to; }; + inline const char16_t* name () const { return data; }; + inline char16_t* name () { return data; }; + inline const char16_t* name_end() const { return data + name_to; }; + inline char16_t* name_end() { return data + name_to; }; + inline uint16_t name_len() const { return name_to; }; }; #pragma pack(pop) @@ -459,7 +464,7 @@ namespace ZRCola { if (a.rank < b.rank) return -1; else if (a.rank > b.rank) return +1; - auto &coll = std::use_facet>(std::locale()); + auto &coll = std::use_facet>(std::locale()); return coll.compare(a.name(), a.name_end(), b.name(), b.name_end()); } } idxRank; ///< Rank index diff --git a/lib/libZRCola/include/zrcola/common.h b/lib/libZRCola/include/zrcola/common.h index 8587f42..6dbfb1e 100644 --- a/lib/libZRCola/include/zrcola/common.h +++ b/lib/libZRCola/include/zrcola/common.h @@ -571,7 +571,7 @@ namespace ZRCola { /// The function does not treat \\0 characters as terminators for performance reasons. /// Therefore \p count_a and \p count_b must represent exact string lengths. /// - int CompareString(_In_ const wchar_t* str_a, _In_ size_t count_a, _In_ const wchar_t* str_b, _In_ size_t count_b); + int CompareString(_In_ const char16_t* str_a, _In_ size_t count_a, _In_ const char16_t* str_b, _In_ size_t count_b); /// /// Generates and returns Unicode representation of the string using hexadecimal codes. @@ -580,7 +580,7 @@ namespace ZRCola { /// \param[in] count Number of characters in string \p str /// \param[in] sep Separator /// - std::string GetUnicodeDumpA(_In_z_count_(count) const wchar_t* str, _In_ size_t count, _In_z_ const char* sep = "+"); + std::string GetUnicodeDumpA(_In_z_count_(count) const char16_t* str, _In_ size_t count, _In_z_ const char* sep = "+"); /// /// Generates and returns Unicode representation of the string using hexadecimal codes. @@ -589,7 +589,7 @@ namespace ZRCola { /// \param[in] count Number of characters in string \p str /// \param[in] sep Separator /// - std::wstring GetUnicodeDumpW(_In_z_count_(count) const wchar_t* str, _In_ size_t count, _In_z_ const wchar_t* sep = L"+"); + std::wstring GetUnicodeDumpW(_In_z_count_(count) const char16_t* str, _In_ size_t count, _In_z_ const wchar_t* sep = L"+"); #ifdef _UNICODE #define GetUnicodeDump GetUnicodeDumpW diff --git a/lib/libZRCola/include/zrcola/highlight.h b/lib/libZRCola/include/zrcola/highlight.h index f3668a3..d67ea3e 100644 --- a/lib/libZRCola/include/zrcola/highlight.h +++ b/lib/libZRCola/include/zrcola/highlight.h @@ -45,7 +45,7 @@ namespace ZRCola { protected: uint16_t chr_to; ///< Character end in \c data - wchar_t data[]; ///< Character + char16_t data[]; ///< Character private: inline highlight(_In_ const highlight &other); @@ -61,21 +61,21 @@ namespace ZRCola { /// inline highlight( _In_opt_ hlghtsetid_t set = 0, - _In_opt_z_count_(chr_len) const wchar_t *chr = NULL, + _In_opt_z_count_(chr_len) const char16_t *chr = NULL, _In_opt_ size_t chr_len = 0) { this->set = set; this->chr_to = static_cast(chr_len); - if (chr && chr_len) memcpy(this->data, chr, sizeof(wchar_t)*chr_len); + if (chr && chr_len) memcpy(this->data, chr, sizeof(char16_t)*chr_len); } - inline const wchar_t* chr () const { return data; }; - inline wchar_t* chr () { return data; }; - inline const wchar_t* chr_end() const { return data + chr_to; }; - inline wchar_t* chr_end() { return data + chr_to; }; - inline uint16_t chr_len() const { return chr_to; }; + inline const char16_t* chr () const { return data; }; + inline char16_t* chr () { return data; }; + inline const char16_t* chr_end() const { return data + chr_to; }; + inline char16_t* chr_end() { return data + chr_to; }; + inline uint16_t chr_len() const { return chr_to; }; - inline wchar_t chr_at(_In_ size_t i) const + inline char16_t chr_at(_In_ size_t i) const { return i < chr_to ? data[i] : 0; } @@ -160,7 +160,7 @@ namespace ZRCola { /// \param[in] inputMax Length of the input string in characters. Can be (size_t)-1 if \p input is zero terminated. /// \param[in] callback Function to be called on highlight switch /// - void Highlight(_In_z_count_(inputMax) const wchar_t* input, _In_ size_t inputMax, _In_ std::function callback) const; + void Highlight(_In_z_count_(inputMax) const char16_t* input, _In_ size_t inputMax, _In_ std::function callback) const; }; }; diff --git a/lib/libZRCola/include/zrcola/language.h b/lib/libZRCola/include/zrcola/language.h index d86d1a1..6cd60fe 100644 --- a/lib/libZRCola/include/zrcola/language.h +++ b/lib/libZRCola/include/zrcola/language.h @@ -35,7 +35,7 @@ namespace ZRCola { protected: uint16_t chr_to; ///< Character end in \c data - wchar_t data[]; ///< Character + char16_t data[]; ///< Character private: inline langchar(_In_ const langchar &other); @@ -51,19 +51,19 @@ namespace ZRCola { /// inline langchar( _In_opt_ langid_t lang = langid_t::blank, - _In_opt_z_count_(chr_len) const wchar_t *chr = NULL, + _In_opt_z_count_(chr_len) const char16_t *chr = NULL, _In_opt_ size_t chr_len = 0) { this->lang = lang; this->chr_to = static_cast(chr_len); - if (chr && chr_len) memcpy(this->data, chr, sizeof(wchar_t)*chr_len); + if (chr && chr_len) memcpy(this->data, chr, sizeof(char16_t)*chr_len); } - inline const wchar_t* chr () const { return data; }; - inline wchar_t* chr () { return data; }; - inline const wchar_t* chr_end() const { return data + chr_to; }; - inline wchar_t* chr_end() { return data + chr_to; }; - inline uint16_t chr_len() const { return chr_to; }; + inline const char16_t* chr () const { return data; }; + inline char16_t* chr () { return data; }; + inline const char16_t* chr_end() const { return data + chr_to; }; + inline char16_t* chr_end() { return data + chr_to; }; + inline uint16_t chr_len() const { return chr_to; }; }; #pragma pack(pop) @@ -176,7 +176,7 @@ namespace ZRCola { /// \returns /// - \c true when character is used in language /// - \c false otherwise - bool IsLocalCharacter(_In_ const wchar_t *chr, _In_ const wchar_t *chr_end, _In_ langid_t lang) const; + bool IsLocalCharacter(_In_ const char16_t *chr, _In_ const char16_t *chr_end, _In_ langid_t lang) const; }; @@ -196,7 +196,7 @@ namespace ZRCola { protected: uint16_t name_to; ///< Language name end in \c data - wchar_t data[]; ///< Language name + char16_t data[]; ///< Language name private: inline language(_In_ const language &other); @@ -212,19 +212,19 @@ namespace ZRCola { /// inline language( _In_opt_ langid_t lang = langid_t::blank, - _In_opt_z_count_(name_len) const wchar_t *name = NULL, + _In_opt_z_count_(name_len) const char16_t *name = NULL, _In_opt_ size_t name_len = 0) { this->lang = lang; this->name_to = static_cast(name_len); - if (name && name_len) memcpy(this->data, name, sizeof(wchar_t)*name_len); + if (name && name_len) memcpy(this->data, name, sizeof(char16_t)*name_len); } - inline const wchar_t* name () const { return data; }; - inline wchar_t* name () { return data; }; - inline const wchar_t* name_end() const { return data + name_to; }; - inline wchar_t* name_end() { return data + name_to; }; - inline uint16_t name_len() const { return name_to; }; + inline const char16_t* name () const { return data; }; + inline char16_t* name () { return data; }; + inline const char16_t* name_end() const { return data + name_to; }; + inline char16_t* name_end() { return data + name_to; }; + inline uint16_t name_len() const { return name_to; }; }; #pragma pack(pop) diff --git a/lib/libZRCola/include/zrcola/tag.h b/lib/libZRCola/include/zrcola/tag.h index 63bb970..e6d450d 100644 --- a/lib/libZRCola/include/zrcola/tag.h +++ b/lib/libZRCola/include/zrcola/tag.h @@ -38,7 +38,7 @@ namespace ZRCola { protected: uint16_t chr_to; ///< Character end in \c data - wchar_t data[]; ///< Character + char16_t data[]; ///< Character private: inline chrtag(_In_ const chrtag &other); @@ -53,20 +53,20 @@ namespace ZRCola { /// \param[in] tag Tag /// inline chrtag( - _In_opt_z_count_(chr_len) const wchar_t *chr = NULL, - _In_opt_ size_t chr_len = 0, - _In_opt_ tagid_t tag = 0) + _In_opt_z_count_(chr_len) const char16_t *chr = NULL, + _In_opt_ size_t chr_len = 0, + _In_opt_ tagid_t tag = 0) { this->tag = tag; this->chr_to = static_cast(chr_len); - if (chr && chr_len) memcpy(this->data, chr, sizeof(wchar_t)*chr_len); + if (chr && chr_len) memcpy(this->data, chr, sizeof(char16_t)*chr_len); } - inline const wchar_t* chr () const { return data; }; - inline wchar_t* chr () { return data; }; - inline const wchar_t* chr_end() const { return data + chr_to; }; - inline wchar_t* chr_end() { return data + chr_to; }; - inline uint16_t chr_len() const { return chr_to; }; + inline const char16_t* chr () const { return data; }; + inline char16_t* chr () { return data; }; + inline const char16_t* chr_end() const { return data + chr_to; }; + inline char16_t* chr_end() { return data + chr_to; }; + inline uint16_t chr_len() const { return chr_to; }; }; #pragma pack(pop) @@ -209,7 +209,7 @@ namespace ZRCola { /// \param[in ] fn_abort Pointer to function to periodically test for search cancellation /// \param[in ] cookie Cookie for \p fn_abort call /// - bool Search(_In_ const std::map &tags, _In_ const character_db &ch_db, _In_ const std::set &cats, _Inout_ std::map &hits, _In_opt_ bool (__cdecl *fn_abort)(void *cookie) = NULL, _In_opt_ void *cookie = NULL) const; + bool Search(_In_ const std::map &tags, _In_ const character_db &ch_db, _In_ const std::set &cats, _Inout_ std::map &hits, _In_opt_ bool (__cdecl *fn_abort)(void *cookie) = NULL, _In_opt_ void *cookie = NULL) const; }; @@ -230,7 +230,7 @@ namespace ZRCola { protected: uint16_t name_to; ///< Tag name end in \c data - wchar_t data[]; ///< Tag name + char16_t data[]; ///< Tag name private: inline tagname(_In_ const tagname &other); @@ -246,22 +246,22 @@ namespace ZRCola { /// \param[in] name_len Number of UTF-16 characters in \p name /// inline tagname( - _In_opt_ tagid_t tag = 0, - _In_opt_ uint32_t locale = 0, - _In_opt_z_count_(name_len) const wchar_t *name = NULL, - _In_opt_ size_t name_len = 0) + _In_opt_ tagid_t tag = 0, + _In_opt_ uint32_t locale = 0, + _In_opt_z_count_(name_len) const char16_t *name = NULL, + _In_opt_ size_t name_len = 0) { this->tag = tag; this->locale = locale; this->name_to = static_cast(name_len); - if (name && name_len) memcpy(this->data, name, sizeof(wchar_t)*name_len); + if (name && name_len) memcpy(this->data, name, sizeof(char16_t)*name_len); } - inline const wchar_t* name () const { return data; }; - inline wchar_t* name () { return data; }; - inline const wchar_t* name_end() const { return data + name_to; }; - inline wchar_t* name_end() { return data + name_to; }; - inline uint16_t name_len() const { return name_to; }; + inline const char16_t* name () const { return data; }; + inline char16_t* name () { return data; }; + inline const char16_t* name_end() const { return data + name_to; }; + inline char16_t* name_end() { return data + name_to; }; + inline uint16_t name_len() const { return name_to; }; /// /// Compares two names @@ -281,7 +281,7 @@ namespace ZRCola { /// The function does not treat \\0 characters as terminators for performance reasons. /// Therefore \p count_a and \p count_b must represent exact string lengths. /// - static inline int CompareName(_In_ uint32_t locale, _In_z_count_(count_a) const wchar_t *str_a, _In_ uint16_t count_a, _In_z_count_(count_b) const wchar_t *str_b, _In_ uint16_t count_b) + static inline int CompareName(_In_ uint32_t locale, _In_z_count_(count_a) const char16_t *str_a, _In_ uint16_t count_a, _In_z_count_(count_b) const char16_t *str_b, _In_ uint16_t count_b) { #ifdef _WIN32 switch (::CompareString(locale, SORT_STRINGSORT | NORM_IGNORECASE, str_a, count_a, str_b, count_b)) { @@ -291,14 +291,14 @@ namespace ZRCola { default : assert(0); return -1; } #else - assert(0); // TODO: 1. Should honour locale. 2. Should use ICU for lowercase conversion. - std::wstring + assert(0); // TODO: 1. Should honour locale. 2. Should use ICU for lowercase conversion. 3. Should be UTF-16-aware. + std::u16string a(str_a, count_a), b(str_b, count_b); - auto tolower = [](wchar_t c){ return std::towlower(c); }; + auto tolower = [](char16_t c){ return std::towlower(c); }; std::transform(a.begin(), a.end(), a.begin(), tolower); std::transform(b.begin(), b.end(), b.begin(), tolower); - auto &coll = std::use_facet>(std::locale()); + auto &coll = std::use_facet>(std::locale()); return coll.compare(&*a.cbegin(), &*a.cend(), &*b.cbegin(), &*b.cend()); #endif } @@ -431,7 +431,7 @@ namespace ZRCola { /// \param[in ] fn_abort Pointer to function to periodically test for search cancellation /// \param[in ] cookie Cookie for \p fn_abort call /// - bool Search(_In_z_ const wchar_t *str, _In_ uint32_t locale, _Inout_ std::map &hits, _In_opt_ bool (__cdecl *fn_abort)(void *cookie) = NULL, _In_opt_ void *cookie = NULL) const; + bool Search(_In_z_ const char16_t *str, _In_ uint32_t locale, _Inout_ std::map &hits, _In_opt_ bool (__cdecl *fn_abort)(void *cookie) = NULL, _In_opt_ void *cookie = NULL) const; }; }; diff --git a/lib/libZRCola/include/zrcola/translate.h b/lib/libZRCola/include/zrcola/translate.h index 7e9ba3b..dce035a 100644 --- a/lib/libZRCola/include/zrcola/translate.h +++ b/lib/libZRCola/include/zrcola/translate.h @@ -84,7 +84,7 @@ namespace ZRCola { protected: uint16_t dst_to; ///< Destination character end in \c data uint16_t src_to; ///< Source string end in \c data - wchar_t data[]; ///< Destination string and source character + char16_t data[]; ///< Destination string and source character private: inline translation(_In_ const translation &other); @@ -105,39 +105,39 @@ namespace ZRCola { inline translation( _In_opt_ transetid_t set = 0, _In_opt_ uint16_t dst_rank = 0, - _In_opt_z_count_(dst_len) const wchar_t *dst = NULL, + _In_opt_z_count_(dst_len) const char16_t *dst = NULL, _In_opt_ size_t dst_len = 0, _In_opt_ uint16_t src_rank = 0, - _In_opt_z_count_(src_len) const wchar_t *src = NULL, + _In_opt_z_count_(src_len) const char16_t *src = NULL, _In_opt_ size_t src_len = 0) { this->set = set; this->dst_rank = dst_rank; this->src_rank = src_rank; this->dst_to = static_cast(dst_len); - if (dst && dst_len) memcpy(this->data, dst, sizeof(wchar_t)*dst_len); + if (dst && dst_len) memcpy(this->data, dst, sizeof(char16_t)*dst_len); this->src_to = static_cast(this->dst_to + src_len); - if (src && src_len) memcpy(this->data + this->dst_to, src, sizeof(wchar_t)*src_len); + if (src && src_len) memcpy(this->data + this->dst_to, src, sizeof(char16_t)*src_len); } - inline const wchar_t* dst () const { return data; }; - inline wchar_t* dst () { return data; }; - inline const wchar_t* dst_end() const { return data + dst_to; }; - inline wchar_t* dst_end() { return data + dst_to; }; - inline uint16_t dst_len() const { return dst_to; }; + inline const char16_t* dst () const { return data; }; + inline char16_t* dst () { return data; }; + inline const char16_t* dst_end() const { return data + dst_to; }; + inline char16_t* dst_end() { return data + dst_to; }; + inline uint16_t dst_len() const { return dst_to; }; - inline wchar_t dst_at(_In_ size_t i) const + inline char16_t dst_at(_In_ size_t i) const { return i < dst_to ? data[i] : 0; } - inline const wchar_t* src () const { return data + dst_to; }; - inline wchar_t* src () { return data + dst_to; }; - inline const wchar_t* src_end() const { return data + src_to; }; - inline wchar_t* src_end() { return data + src_to; }; - inline uint16_t src_len() const { return src_to - dst_to; }; + inline const char16_t* src () const { return data + dst_to; }; + inline char16_t* src () { return data + dst_to; }; + inline const char16_t* src_end() const { return data + src_to; }; + inline char16_t* src_end() { return data + src_to; }; + inline uint16_t src_len() const { return src_to - dst_to; }; - inline wchar_t src_at(_In_ size_t i) const + inline char16_t src_at(_In_ size_t i) const { size_t ii = i + dst_to; // absolute index return ii < src_to ? data[ii] : 0; @@ -302,7 +302,7 @@ namespace ZRCola { /// \param[out] output Output string (UTF-16) /// \param[out] map The vector of source to destination index mappings (optional) /// - void Translate(_In_ transetid_t set, _In_z_count_(inputMax) const wchar_t* input, _In_ size_t inputMax, _Out_ std::wstring &output, _Out_opt_ std::vector* map = NULL) const; + void Translate(_In_ transetid_t set, _In_z_count_(inputMax) const char16_t* input, _In_ size_t inputMax, _Out_ std::u16string &output, _Out_opt_ std::vector* map = NULL) const; /// /// Inverse translates string @@ -313,7 +313,7 @@ namespace ZRCola { /// \param[out] output Output string (UTF-16) /// \param[out] map The vector of source to destination index mappings (optional) /// - inline void TranslateInv(_In_ transetid_t set, _In_z_count_(inputMax) const wchar_t* input, _In_ size_t inputMax, _Out_ std::wstring &output, _Out_opt_ std::vector* map = NULL) const + inline void TranslateInv(_In_ transetid_t set, _In_z_count_(inputMax) const char16_t* input, _In_ size_t inputMax, _Out_ std::u16string &output, _Out_opt_ std::vector* map = NULL) const { TranslateInv(set, input, inputMax, NULL, langid_t::blank, output, map); } @@ -329,7 +329,7 @@ namespace ZRCola { /// \param[out] output Output string (UTF-16) /// \param[out] map The vector of source to destination index mappings (optional) /// - void TranslateInv(_In_ transetid_t set, _In_z_count_(inputMax) const wchar_t* input, _In_ size_t inputMax, _In_opt_ const langchar_db *lc_db, _In_opt_ langid_t lang, _Out_ std::wstring &output, _Out_opt_ std::vector* map = NULL) const; + void TranslateInv(_In_ transetid_t set, _In_z_count_(inputMax) const char16_t* input, _In_ size_t inputMax, _In_opt_ const langchar_db *lc_db, _In_opt_ langid_t lang, _Out_ std::u16string &output, _Out_opt_ std::vector* map = NULL) const; }; @@ -350,7 +350,7 @@ namespace ZRCola { protected: uint16_t src_to; ///< Source name end in \c data uint16_t dst_to; ///< Sestination name end in \c data - wchar_t data[]; ///< Source and destination names + char16_t data[]; ///< Source and destination names private: inline transet(_In_ const transet &other); @@ -368,29 +368,29 @@ namespace ZRCola { /// inline transet( _In_opt_ transetid_t set = 0, - _In_opt_z_count_(src_len) const wchar_t *src = NULL, + _In_opt_z_count_(src_len) const char16_t *src = NULL, _In_opt_ size_t src_len = 0, - _In_opt_z_count_(dst_len) const wchar_t *dst = NULL, + _In_opt_z_count_(dst_len) const char16_t *dst = NULL, _In_opt_ size_t dst_len = 0) { this->set = set; this->src_to = static_cast(src_len); - if (src && src_len) memcpy(this->data, src, sizeof(wchar_t)*src_len); + if (src && src_len) memcpy(this->data, src, sizeof(char16_t)*src_len); this->dst_to = static_cast(this->src_to + dst_len); - if (dst && dst_len) memcpy(this->data + this->src_to, dst, sizeof(wchar_t)*dst_len); + if (dst && dst_len) memcpy(this->data + this->src_to, dst, sizeof(char16_t)*dst_len); } - inline const wchar_t* src () const { return data; }; - inline wchar_t* src () { return data; }; - inline const wchar_t* src_end() const { return data + src_to; }; - inline wchar_t* src_end() { return data + src_to; }; - inline uint16_t src_len() const { return src_to; }; + inline const char16_t* src () const { return data; }; + inline char16_t* src () { return data; }; + inline const char16_t* src_end() const { return data + src_to; }; + inline char16_t* src_end() { return data + src_to; }; + inline uint16_t src_len() const { return src_to; }; - inline const wchar_t* dst () const { return data + src_to; }; - inline wchar_t* dst () { return data + src_to; }; - inline const wchar_t* dst_end() const { return data + dst_to; }; - inline wchar_t* dst_end() { return data + dst_to; }; - inline uint16_t dst_len() const { return dst_to - src_to; }; + inline const char16_t* dst () const { return data + src_to; }; + inline char16_t* dst () { return data + src_to; }; + inline const char16_t* dst_end() const { return data + dst_to; }; + inline char16_t* dst_end() { return data + dst_to; }; + inline uint16_t dst_len() const { return dst_to - src_to; }; }; #pragma pack(pop) @@ -464,7 +464,7 @@ namespace ZRCola { protected: uint16_t name_to; ///< Translation sequence name end in \c data uint16_t sets_to; ///< Translation sequence sets end in \c data - wchar_t data[]; ///< Translation sequence name and sets + char16_t data[]; ///< Translation sequence name and sets private: inline transeq(_In_ const transeq &other); @@ -484,7 +484,7 @@ namespace ZRCola { inline transeq( _In_opt_ transeqid_t seq = 0, _In_opt_ uint16_t rank = 0, - _In_opt_z_count_(name_len) const wchar_t *name = NULL, + _In_opt_z_count_(name_len) const char16_t *name = NULL, _In_opt_ size_t name_len = 0, _In_opt_count_ (sets_len) const transetid_t *sets = NULL, _In_opt_ size_t sets_len = 0) @@ -492,16 +492,16 @@ namespace ZRCola { this->seq = seq; this->rank = rank; this->name_to = static_cast(name_len); - if (name && name_len) memcpy(this->data, name, sizeof(wchar_t)*name_len); + if (name && name_len) memcpy(this->data, name, sizeof(char16_t)*name_len); this->sets_to = static_cast(this->name_to + sets_len); if (sets && sets_len) memcpy(this->data + this->name_to, sets, sizeof(transetid_t)*sets_len); } - inline const wchar_t* name () const { return data; }; - inline wchar_t* name () { return data; }; - inline const wchar_t* name_end() const { return data + name_to; }; - inline wchar_t* name_end() { return data + name_to; }; - inline uint16_t name_len() const { return name_to; }; + inline const char16_t* name () const { return data; }; + inline char16_t* name () { return data; }; + inline const char16_t* name_end() const { return data + name_to; }; + inline char16_t* name_end() { return data + name_to; }; + inline uint16_t name_len() const { return name_to; }; inline const transetid_t* sets () const { return reinterpret_cast(data + name_to); }; inline transetid_t* sets () { return reinterpret_cast< transetid_t*>(data + name_to); }; @@ -592,7 +592,7 @@ namespace ZRCola { if (a.rank < b.rank) return -1; else if (a.rank > b.rank) return +1; - auto &coll = std::use_facet>(std::locale()); + auto &coll = std::use_facet>(std::locale()); return coll.compare(a.name(), a.name_end(), b.name(), b.name_end()); } } idxRank; ///< Rank index diff --git a/lib/libZRCola/src/character.cpp b/lib/libZRCola/src/character.cpp index 4df524b..01dfbb7 100644 --- a/lib/libZRCola/src/character.cpp +++ b/lib/libZRCola/src/character.cpp @@ -9,7 +9,29 @@ const ZRCola::chrcatid_t ZRCola::chrcatid_t::blank = {}; -bool ZRCola::character_db::Search(_In_z_ const wchar_t *str, _In_ const std::set &cats, _Inout_ std::map &hits, _Inout_ std::map &hits_sub, _In_opt_ bool (__cdecl *fn_abort)(void *cookie), _In_opt_ void *cookie) const +#ifndef _WIN32 + +_Use_decl_annotations_ +size_t ZRCola::wcslen(const char16_t *str) +{ + for (size_t i = 0; ; ++i) + if (!str[i]) + return i; +} + +_Use_decl_annotations_ +size_t ZRCola::wcsnlen(const char16_t *str, size_t count) +{ + for (size_t i = 0; ; ++i) + if (i >= count || !str[i]) + return i; +} + +#endif + + +_Use_decl_annotations_ +bool ZRCola::character_db::Search(const char16_t *str, const std::set &cats, std::map &hits, std::map &hits_sub, bool (__cdecl *fn_abort)(void *cookie), void *cookie) const { assert(str); @@ -27,14 +49,14 @@ bool ZRCola::character_db::Search(_In_z_ const wchar_t *str, _In_ const std::set } // Get term. - std::wstring term; - if (*str == L'"') { - const wchar_t *str_end = ++str; + std::u16string term; + if (*str == u'"') { + const char16_t *str_end = ++str; for (;;) { if (*str_end == 0) { term.assign(str, str_end); break; - } else if (*str_end == L'"') { + } else if (*str_end == u'"') { term.assign(str, str_end); str_end++; break; @@ -43,7 +65,7 @@ bool ZRCola::character_db::Search(_In_z_ const wchar_t *str, _In_ const std::set } str = str_end; } else { - const wchar_t *str_end = str + 1; + const char16_t *str_end = str + 1; for (; *str_end && !iswspace(*str_end); str_end++); term.assign(str, str_end); str = str_end; @@ -57,7 +79,7 @@ bool ZRCola::character_db::Search(_In_z_ const wchar_t *str, _In_ const std::set if (fn_abort && fn_abort(cookie)) return false; - const wchar_t *val; + const char16_t *val; size_t val_len; if (idxDsc.find(term.c_str(), term.size(), &val, &val_len)) { @@ -66,7 +88,7 @@ bool ZRCola::character_db::Search(_In_z_ const wchar_t *str, _In_ const std::set if (fn_abort && fn_abort(cookie)) return false; j = wcsnlen(val + i, val_len - i); if (cats.find(GetCharCat(val + i, j)) != cats.end()) { - std::wstring c(val + i, j); + std::u16string c(val + i, j); auto idx = hits.find(c); if (idx == hits.end()) { // New character. @@ -85,7 +107,7 @@ bool ZRCola::character_db::Search(_In_z_ const wchar_t *str, _In_ const std::set if (fn_abort && fn_abort(cookie)) return false; j = wcsnlen(val + i, val_len - i); if (cats.find(GetCharCat(val + i, j)) != cats.end()) { - std::wstring c(val + i, j); + std::u16string c(val + i, j); auto idx = hits_sub.find(c); if (idx == hits_sub.end()) { // New character. diff --git a/lib/libZRCola/src/common.cpp b/lib/libZRCola/src/common.cpp index 1a3dff1..92620d0 100644 --- a/lib/libZRCola/src/common.cpp +++ b/lib/libZRCola/src/common.cpp @@ -10,7 +10,7 @@ const ZRCola::langid_t ZRCola::langid_t::blank = {}; _Use_decl_annotations_ -int ZRCola::CompareString(const wchar_t* str_a, size_t count_a, const wchar_t* str_b, size_t count_b) +int ZRCola::CompareString(const char16_t* str_a, size_t count_a, const char16_t* str_b, size_t count_b) { for (size_t i = 0; ; i++) { if (i >= count_a && i >= count_b) return 0; @@ -23,11 +23,11 @@ int ZRCola::CompareString(const wchar_t* str_a, size_t count_a, const wchar_t* s _Use_decl_annotations_ -inline std::string ZRCola::GetUnicodeDumpA(const wchar_t* str, size_t count, const char* sep) +inline std::string ZRCola::GetUnicodeDumpA(const char16_t* str, size_t count, const char* sep) { std::string out; size_t sep_len = strlen(sep); - size_t dump_len_max = sep_len + 8 + 1; + size_t dump_len_max = sep_len + 4 + 1; char* dump; std::unique_ptr dump_obj(dump = new char[dump_len_max]); if (count && str[0]) { @@ -54,11 +54,11 @@ inline std::string ZRCola::GetUnicodeDumpA(const wchar_t* str, size_t count, con _Use_decl_annotations_ -std::wstring ZRCola::GetUnicodeDumpW(const wchar_t* str, size_t count, const wchar_t* sep) +std::wstring ZRCola::GetUnicodeDumpW(const char16_t* str, size_t count, const wchar_t* sep) { std::wstring out; - size_t sep_len = wcslen(sep); - size_t dump_len_max = sep_len + 8 + 1; + size_t sep_len = ::wcslen(sep); + size_t dump_len_max = sep_len + 4 + 1; wchar_t* dump; std::unique_ptr dump_obj(dump = new wchar_t[dump_len_max]); if (count && str[0]) { diff --git a/lib/libZRCola/src/highlight.cpp b/lib/libZRCola/src/highlight.cpp index 8ec772d..7685ccb 100644 --- a/lib/libZRCola/src/highlight.cpp +++ b/lib/libZRCola/src/highlight.cpp @@ -6,7 +6,7 @@ #include "pch.h" _Use_decl_annotations_ -void ZRCola::highlight_db::Highlight(const wchar_t* input, size_t inputMax, std::function callback) const +void ZRCola::highlight_db::Highlight(const char16_t* input, size_t inputMax, std::function callback) const { size_t start = 0; hlghtsetid_t set = ZRCOLA_HLGHTSETID_DEFAULT; @@ -15,7 +15,7 @@ void ZRCola::highlight_db::Highlight(const wchar_t* input, size_t inputMax, std: // Find the longest matching highlight at i-th character. size_t l_match = (size_t)-1; for (size_t l = 0, r = idxChr.size(), ii = i, j = 0; ii < inputMax && l < r; ii++, j++) { - wchar_t c = input[ii]; + char16_t c = input[ii]; while (l < r) { // Test the highlight in the middle of the search area. size_t m = (l + r) / 2; @@ -23,7 +23,7 @@ void ZRCola::highlight_db::Highlight(const wchar_t* input, size_t inputMax, std: // Get the j-th character of the highlight. // All highlights that get short on characters are lexically ordered before. // Thus the j-th character is considered 0. - wchar_t s = idxChr[m].chr_at(j); + char16_t s = idxChr[m].chr_at(j); // Do the bisection test. if (c < s) r = m; diff --git a/lib/libZRCola/src/language.cpp b/lib/libZRCola/src/language.cpp index 17c4a78..eb13409 100644 --- a/lib/libZRCola/src/language.cpp +++ b/lib/libZRCola/src/language.cpp @@ -57,11 +57,11 @@ void ZRCola::LangConvert(_In_ LANGID lang_win, _Inout_ ZRCola::langid_t &lang) #endif -bool ZRCola::langchar_db::IsLocalCharacter(_In_ const wchar_t *chr, _In_ const wchar_t *chr_end, _In_ ZRCola::langid_t lang) const +bool ZRCola::langchar_db::IsLocalCharacter(_In_ const char16_t *chr, _In_ const char16_t *chr_end, _In_ ZRCola::langid_t lang) const { size_t n = chr_end - chr; assert(n <= 0xffff); - std::unique_ptr lc((langchar*)new char[sizeof(langchar) + sizeof(wchar_t)*n]); + std::unique_ptr lc((langchar*)new char[sizeof(langchar) + sizeof(char16_t)*n]); new (lc.get()) langchar(lang, chr, n); indexChr::size_type start; return idxChr.find(*lc, start); diff --git a/lib/libZRCola/src/tag.cpp b/lib/libZRCola/src/tag.cpp index e7b3923..8584335 100644 --- a/lib/libZRCola/src/tag.cpp +++ b/lib/libZRCola/src/tag.cpp @@ -6,7 +6,7 @@ #include "pch.h" -bool ZRCola::chrtag_db::Search(_In_ const std::map &tags, _In_ const character_db &ch_db, _In_ const std::set &cats, _Inout_ std::map &hits, _In_opt_ bool (__cdecl *fn_abort)(void *cookie), _In_opt_ void *cookie) const +bool ZRCola::chrtag_db::Search(_In_ const std::map &tags, _In_ const character_db &ch_db, _In_ const std::set &cats, _Inout_ std::map &hits, _In_opt_ bool (__cdecl *fn_abort)(void *cookie), _In_opt_ void *cookie) const { for (auto tag = tags.cbegin(), tag_end = tags.cend(); tag != tag_end; ++tag) { if (fn_abort && fn_abort(cookie)) return false; @@ -19,7 +19,7 @@ bool ZRCola::chrtag_db::Search(_In_ const std::map &tags, _In const chrtag &ct = idxTag[i]; uint16_t len = ct.chr_len(); if (cats.find(ch_db.GetCharCat(ct.chr(), len)) != cats.end()) { - std::wstring chr(ct.chr(), len); + std::u16string chr(ct.chr(), len); auto idx = hits.find(chr); if (idx == hits.end()) { // New character. @@ -37,7 +37,7 @@ bool ZRCola::chrtag_db::Search(_In_ const std::map &tags, _In } -bool ZRCola::tagname_db::Search(_In_z_ const wchar_t *str, _In_ uint32_t locale, _Inout_ std::map &hits, _In_opt_ bool (__cdecl *fn_abort)(void *cookie), _In_opt_ void *cookie) const +bool ZRCola::tagname_db::Search(_In_z_ const char16_t *str, _In_ uint32_t locale, _Inout_ std::map &hits, _In_opt_ bool (__cdecl *fn_abort)(void *cookie), _In_opt_ void *cookie) const { assert(str); @@ -55,14 +55,14 @@ bool ZRCola::tagname_db::Search(_In_z_ const wchar_t *str, _In_ uint32_t locale, } // Get name. - std::wstring name; - if (*str == L'"') { - const wchar_t *str_end = ++str; + std::u16string name; + if (*str == u'"') { + const char16_t *str_end = ++str; for (;;) { if (*str_end == 0) { name.assign(str, str_end); break; - } else if (*str_end == L'"') { + } else if (*str_end == u'"') { name.assign(str, str_end); str_end++; break; @@ -71,7 +71,7 @@ bool ZRCola::tagname_db::Search(_In_z_ const wchar_t *str, _In_ uint32_t locale, } str = str_end; } else { - const wchar_t *str_end = str + 1; + const char16_t *str_end = str + 1; for (; *str_end && !iswspace(*str_end); str_end++); name.assign(str, str_end); str = str_end; @@ -81,7 +81,7 @@ bool ZRCola::tagname_db::Search(_In_z_ const wchar_t *str, _In_ uint32_t locale, if (fn_abort && fn_abort(cookie)) return false; // Find the name. - std::unique_ptr tn(reinterpret_cast(new char[sizeof(tagname) + sizeof(wchar_t)*name.length()])); + std::unique_ptr tn(reinterpret_cast(new char[sizeof(tagname) + sizeof(char16_t)*name.length()])); new (tn.get()) tagname(0, locale, name.data(), name.length()); size_t start, end; if (idxName.find(*tn, start, end)) { diff --git a/lib/libZRCola/src/translate.cpp b/lib/libZRCola/src/translate.cpp index 60f26fb..f6abee4 100644 --- a/lib/libZRCola/src/translate.cpp +++ b/lib/libZRCola/src/translate.cpp @@ -6,7 +6,7 @@ #include "pch.h" -void ZRCola::translation_db::Translate(_In_ transetid_t set, _In_z_count_(inputMax) const wchar_t* input, _In_ size_t inputMax, _Out_ std::wstring &output, _Out_opt_ std::vector* map) const +void ZRCola::translation_db::Translate(_In_ transetid_t set, _In_z_count_(inputMax) const char16_t* input, _In_ size_t inputMax, _Out_ std::u16string &output, _Out_opt_ std::vector* map) const { assert(input || inputMax == 0); @@ -28,7 +28,7 @@ void ZRCola::translation_db::Translate(_In_ transetid_t set, _In_z_count_(inputM // Find the longest matching translation at i-th character. size_t l_match = (size_t)-1; for (size_t l = l_set, r = r_set, ii = i, j = 0; ii < inputMax && l < r; ii++, j++) { - wchar_t c = input[ii]; + char16_t c = input[ii]; while (l < r) { // Test the translation in the middle of the search area. size_t m = (l + r) / 2; @@ -36,7 +36,7 @@ void ZRCola::translation_db::Translate(_In_ transetid_t set, _In_z_count_(inputM // Get the j-th character of the translation. // All translations that get short on characters are lexically ordered before. // Thus the j-th character is considered 0. - wchar_t s = idxSrc[m].src_at(j); + char16_t s = idxSrc[m].src_at(j); // Do the bisection test. if (c < s) r = m; @@ -84,7 +84,7 @@ void ZRCola::translation_db::Translate(_In_ transetid_t set, _In_z_count_(inputM } -void ZRCola::translation_db::TranslateInv(_In_ transetid_t set, _In_z_count_(inputMax) const wchar_t* input, _In_ size_t inputMax, _In_opt_ const langchar_db *lc_db, _In_opt_ langid_t lang, _Out_ std::wstring &output, _Out_opt_ std::vector* map) const +void ZRCola::translation_db::TranslateInv(_In_ transetid_t set, _In_z_count_(inputMax) const char16_t* input, _In_ size_t inputMax, _In_opt_ const langchar_db *lc_db, _In_opt_ langid_t lang, _Out_ std::u16string &output, _Out_opt_ std::vector* map) const { assert(input || inputMax == 0); @@ -106,7 +106,7 @@ void ZRCola::translation_db::TranslateInv(_In_ transetid_t set, _In_z_count_(inp // Find the longest matching inverse translation at i-th character. size_t l_match = (size_t)-1; for (size_t l = l_set, r = r_set, ii = i, j = 0; ii < inputMax && l < r; ii++, j++) { - wchar_t c = input[ii]; + char16_t c = input[ii]; while (l < r) { // Test the inverse translation in the middle of the search area. size_t m = (l + r) / 2; @@ -114,7 +114,7 @@ void ZRCola::translation_db::TranslateInv(_In_ transetid_t set, _In_z_count_(inp // Get the j-th character of the inverse translation. // All inverse translations that get short on characters are lexically ordered before. // Thus the j-th character is considered 0. - wchar_t s = idxDst[m].dst_at(j); + char16_t s = idxDst[m].dst_at(j); // Do the bisection test. if (c < s) r = m; @@ -147,7 +147,7 @@ void ZRCola::translation_db::TranslateInv(_In_ transetid_t set, _In_z_count_(inp if (l_match < r_set) { // The saved inverse translation was an exact match. const translation &trans = idxDst[l_match]; - if (trans.src_len() && trans.src()[0] != L'#' && (!lc_db || !lc_db->IsLocalCharacter(trans.dst(), trans.dst_end(), lang))) { + if (trans.src_len() && trans.src()[0] != u'#' && (!lc_db || !lc_db->IsLocalCharacter(trans.dst(), trans.dst_end(), lang))) { // Append source sequence. output.append(trans.src(), trans.src_end()); i += trans.dst_len();