Characters are represented as UTF-16 sequences instead of a single UTF-16 character
This commit is contained in:
@@ -41,6 +41,8 @@ namespace ZRCola {
|
||||
///
|
||||
typedef double charrank_t;
|
||||
|
||||
#pragma pack(push)
|
||||
#pragma pack(2)
|
||||
///
|
||||
/// Character category ID type
|
||||
/// Two letter abbreviation, non-terminated
|
||||
@@ -66,6 +68,7 @@ namespace ZRCola {
|
||||
///
|
||||
static const chrcatid_t blank;
|
||||
};
|
||||
#pragma pack(pop)
|
||||
|
||||
|
||||
///
|
||||
@@ -180,11 +183,66 @@ namespace ZRCola {
|
||||
/// Character data
|
||||
///
|
||||
struct character {
|
||||
wchar_t chr; ///> Character
|
||||
public:
|
||||
chrcatid_t cat; ///> Category ID
|
||||
unsigned __int16 desc_len; ///< Character description length in \c data
|
||||
unsigned __int16 rel_len; ///< Related character count in \c data
|
||||
wchar_t data[]; ///< Character description and list of related characters
|
||||
|
||||
protected:
|
||||
unsigned __int16 chr_to; ///< Character end in \c data
|
||||
unsigned __int16 desc_to; ///< Character description end in \c data
|
||||
unsigned __int16 rel_to; ///< Related characters end in \c data
|
||||
wchar_t data[]; ///< Character, character description
|
||||
|
||||
private:
|
||||
inline character(_In_ const character &other);
|
||||
inline character& operator=(_In_ const character &other);
|
||||
|
||||
public:
|
||||
///
|
||||
/// Constructs the character
|
||||
///
|
||||
/// \param[in] chr Character
|
||||
/// \param[in] chr_len Number of UTF-16 characters in \p chr
|
||||
/// \param[in] cat Category
|
||||
/// \param[in] desc Description
|
||||
/// \param[in] desc_len Number of UTF-16 characters in \p desc
|
||||
/// \param[in] rel Related characters list (zero delimited)
|
||||
/// \param[in] rel_len Number of UTF-16 characters in \p rel (including zero delimiters)
|
||||
///
|
||||
inline character(
|
||||
_In_opt_z_count_(chr_len) const wchar_t *chr = NULL,
|
||||
_In_opt_ size_t chr_len = 0,
|
||||
_In_opt_ chrcatid_t cat = chrcatid_t::blank,
|
||||
_In_opt_z_count_(desc_len) const wchar_t *desc = NULL,
|
||||
_In_opt_ size_t desc_len = 0,
|
||||
_In_opt_z_count_(rel_len) const wchar_t *rel = NULL,
|
||||
_In_opt_ size_t rel_len = 0)
|
||||
{
|
||||
this->cat = cat;
|
||||
this->chr_to = static_cast<unsigned __int16>(chr_len);
|
||||
if (chr_len) memcpy(this->data, chr, sizeof(wchar_t)*chr_len);
|
||||
this->desc_to = static_cast<unsigned __int16>(this->chr_to + desc_len);
|
||||
if (desc_len) memcpy(this->data + this->chr_to, desc, sizeof(wchar_t)*desc_len);
|
||||
this->rel_to = static_cast<unsigned __int16>(this->desc_to + rel_len);
|
||||
if (rel_len) memcpy(this->data + this->desc_to, rel, sizeof(wchar_t)*rel_len);
|
||||
}
|
||||
|
||||
inline const wchar_t* chr () const { return data; };
|
||||
inline wchar_t* chr () { return data; };
|
||||
inline const wchar_t* chr_end() const { return data + chr_to; };
|
||||
inline wchar_t* chr_end() { return data + chr_to; };
|
||||
inline unsigned __int16 chr_len() const { return chr_to; };
|
||||
|
||||
inline const wchar_t* desc () const { return data + chr_to; };
|
||||
inline wchar_t* desc () { return data + chr_to; };
|
||||
inline const wchar_t* desc_end() const { return data + desc_to; };
|
||||
inline wchar_t* desc_end() { return data + desc_to; };
|
||||
inline unsigned __int16 desc_len() const { return desc_to - chr_to; };
|
||||
|
||||
inline const wchar_t* rel () const { return data + desc_to; };
|
||||
inline wchar_t* rel () { return data + desc_to; };
|
||||
inline const wchar_t* rel_end() const { return data + rel_to; };
|
||||
inline wchar_t* rel_end() { return data + rel_to; };
|
||||
inline unsigned __int16 rel_len() const { return rel_to - desc_to; };
|
||||
};
|
||||
#pragma pack(pop)
|
||||
|
||||
@@ -214,8 +272,8 @@ namespace ZRCola {
|
||||
///
|
||||
virtual int compare(_In_ const character &a, _In_ const character &b) const
|
||||
{
|
||||
if (a.chr < b.chr) return -1;
|
||||
else if (a.chr > b.chr) return 1;
|
||||
int r = ZRCola::CompareString(a.chr(), a.chr_len(), b.chr(), b.chr_len());
|
||||
if (r != 0) return r;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -252,23 +310,25 @@ namespace ZRCola {
|
||||
/// \param[in ] fn_abort Pointer to function to periodically test for search cancellation
|
||||
/// \param[in ] cookie Cookie for \p fn_abort call
|
||||
///
|
||||
bool Search(_In_z_ const wchar_t *str, _In_ const std::set<chrcatid_t> &cats, _Inout_ std::map<wchar_t, charrank_t> &hits, _Inout_ std::map<wchar_t, charrank_t> &hits_sub, _In_opt_ bool (__cdecl *fn_abort)(void *cookie) = NULL, _In_opt_ void *cookie = NULL) const;
|
||||
bool Search(_In_z_ const wchar_t *str, _In_ const std::set<chrcatid_t> &cats, _Inout_ std::map<std::wstring, charrank_t> &hits, _Inout_ std::map<std::wstring, charrank_t> &hits_sub, _In_opt_ bool (__cdecl *fn_abort)(void *cookie) = NULL, _In_opt_ void *cookie = NULL) const;
|
||||
|
||||
///
|
||||
/// Get character category
|
||||
///
|
||||
/// \param[in] c Character
|
||||
/// \param[in] chr Character
|
||||
/// \param[in] len Number of UTF-16 characters in \p chr
|
||||
///
|
||||
/// \returns
|
||||
/// - Character category if character found
|
||||
/// - `ZRCola::chrcatid_t::blank` otherwise
|
||||
///
|
||||
inline chrcatid_t GetCharCat(wchar_t c) const
|
||||
inline chrcatid_t GetCharCat(_In_z_count_(len) const wchar_t *chr, _In_ const size_t len) const
|
||||
{
|
||||
char _chr[sizeof(character)];
|
||||
((character *)_chr)->chr = c;
|
||||
assert(len <= 0xffff);
|
||||
std::unique_ptr<character> c((character*)new char[sizeof(character) + sizeof(wchar_t)*len]);
|
||||
c->character::character(chr, len);
|
||||
indexChar::size_type start;
|
||||
return idxChr.find(*((character *)_chr), start) ? idxChr[start].cat : chrcatid_t::blank;
|
||||
return idxChr.find(*c, start) ? idxChr[start].cat : chrcatid_t::blank;
|
||||
}
|
||||
};
|
||||
|
||||
@@ -287,10 +347,44 @@ namespace ZRCola {
|
||||
/// Character category data
|
||||
///
|
||||
struct chrcat {
|
||||
chrcatid_t id; ///< Character category ID
|
||||
unsigned __int16 rank; ///< Character category rank
|
||||
unsigned __int16 name_len; ///< \c name length (in characters)
|
||||
wchar_t name[]; ///< Character category name
|
||||
public:
|
||||
chrcatid_t id; ///< Character category ID
|
||||
unsigned __int16 rank; ///< Character category rank
|
||||
|
||||
protected:
|
||||
unsigned __int16 name_to; ///< Character category name end in \c data
|
||||
wchar_t data[]; ///< Character category name
|
||||
|
||||
private:
|
||||
inline chrcat(_In_ const chrcat &other);
|
||||
inline chrcat& operator=(_In_ const chrcat &other);
|
||||
|
||||
public:
|
||||
///
|
||||
/// Constructs the character category
|
||||
///
|
||||
/// \param[in] id Character category ID
|
||||
/// \param[in] rank Character category rank
|
||||
/// \param[in] name Character category name
|
||||
/// \param[in] name_len Number of UTF-16 characters in \p name
|
||||
///
|
||||
inline chrcat(
|
||||
_In_opt_ chrcatid_t id = chrcatid_t::blank,
|
||||
_In_opt_ unsigned __int16 rank = 0,
|
||||
_In_opt_z_count_(name_len) const wchar_t *name = NULL,
|
||||
_In_opt_ size_t name_len = 0)
|
||||
{
|
||||
this->id = id;
|
||||
this->rank = rank;
|
||||
this->name_to = static_cast<unsigned __int16>(name_len);
|
||||
if (name_len) memcpy(this->data, name, sizeof(wchar_t)*name_len);
|
||||
}
|
||||
|
||||
inline const wchar_t* name () const { return data; };
|
||||
inline wchar_t* name () { return data; };
|
||||
inline const wchar_t* name_end() const { return data + name_to; };
|
||||
inline wchar_t* name_end() { return data + name_to; };
|
||||
inline unsigned __int16 name_len() const { return name_to; };
|
||||
};
|
||||
#pragma pack(pop)
|
||||
|
||||
@@ -374,10 +468,13 @@ namespace ZRCola {
|
||||
if (a.rank < b.rank) return -1;
|
||||
else if (a.rank > b.rank) return +1;
|
||||
|
||||
int r = _wcsncoll(a.name, b.name, std::min<unsigned __int16>(a.name_len, b.name_len));
|
||||
unsigned __int16
|
||||
a_name_len = a.name_len(),
|
||||
b_name_len = b.name_len();
|
||||
int r = _wcsncoll(a.name(), b.name(), std::min<unsigned __int16>(a_name_len, b_name_len));
|
||||
if (r != 0) return r;
|
||||
if (a.name_len < b.name_len) return -1;
|
||||
else if (a.name_len > b.name_len) return +1;
|
||||
if (a_name_len < b_name_len) return -1;
|
||||
else if (a_name_len > b_name_len) return +1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@@ -19,6 +19,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <sal.h>
|
||||
#include <istream>
|
||||
#include <ostream>
|
||||
#include <utility>
|
||||
@@ -58,11 +59,11 @@ namespace ZRCola {
|
||||
typedef unsigned __int32 recordsize_t;
|
||||
|
||||
|
||||
#pragma pack(push)
|
||||
#pragma pack(2)
|
||||
///
|
||||
/// Key-value index pair for mappings
|
||||
///
|
||||
#pragma pack(push)
|
||||
#pragma pack(2)
|
||||
template <class T>
|
||||
struct mappair_t
|
||||
{
|
||||
@@ -72,6 +73,8 @@ namespace ZRCola {
|
||||
#pragma pack(pop)
|
||||
|
||||
|
||||
#pragma pack(push)
|
||||
#pragma pack(2)
|
||||
///
|
||||
/// Language ID type
|
||||
/// Three letter abbreviation, zero terminated
|
||||
@@ -105,6 +108,7 @@ namespace ZRCola {
|
||||
///
|
||||
static const langid_t blank;
|
||||
};
|
||||
#pragma pack(pop)
|
||||
|
||||
|
||||
///
|
||||
@@ -228,11 +232,11 @@ namespace ZRCola {
|
||||
///
|
||||
/// Memory index
|
||||
///
|
||||
template <class T, class T_idx = unsigned __int32, class T_data = T>
|
||||
template <class T_data, class T_idx = unsigned __int32, class T_el = T_data>
|
||||
class index : public std::vector<T_idx>
|
||||
{
|
||||
protected:
|
||||
std::vector<T> &host; ///< Reference to host data
|
||||
std::vector<T_data> &host; ///< Reference to host data
|
||||
|
||||
public:
|
||||
///
|
||||
@@ -240,7 +244,7 @@ namespace ZRCola {
|
||||
///
|
||||
/// \param[in] h Reference to vector holding the data
|
||||
///
|
||||
index(_In_ std::vector<T> &h) : host(h) {}
|
||||
index(_In_ std::vector<T_data> &h) : host(h) {}
|
||||
|
||||
|
||||
///
|
||||
@@ -250,9 +254,9 @@ namespace ZRCola {
|
||||
///
|
||||
/// \returns Data reference
|
||||
///
|
||||
inline const T_data& at(size_type pos) const
|
||||
inline const T_el& at(size_type pos) const
|
||||
{
|
||||
return (const T_data&)host.at(std::vector<T_idx>::at(pos));
|
||||
return *reinterpret_cast<const T_el*>(&host[std::vector<T_idx>::at(pos)]);
|
||||
}
|
||||
|
||||
|
||||
@@ -263,9 +267,9 @@ namespace ZRCola {
|
||||
///
|
||||
/// \returns Data reference
|
||||
///
|
||||
inline T_data& at(size_type pos)
|
||||
inline T_el& at(size_type pos)
|
||||
{
|
||||
return (T_data&)host.at(std::vector<T_idx>::at(pos));
|
||||
return *reinterpret_cast<T_el*>(&host[std::vector<T_idx>::at(pos)]);
|
||||
}
|
||||
|
||||
|
||||
@@ -276,9 +280,9 @@ namespace ZRCola {
|
||||
///
|
||||
/// \returns Data reference
|
||||
///
|
||||
inline const T_data& operator[](size_type pos) const
|
||||
inline const T_el& operator[](size_type pos) const
|
||||
{
|
||||
return (const T_data&)host[std::vector<T_idx>::at(pos)];
|
||||
return *reinterpret_cast<const T_el*>(&host[std::vector<T_idx>::operator[](pos)]);
|
||||
}
|
||||
|
||||
|
||||
@@ -289,9 +293,9 @@ namespace ZRCola {
|
||||
///
|
||||
/// \returns Data reference
|
||||
///
|
||||
inline T_data& operator[](size_type pos)
|
||||
inline T_el& operator[](size_type pos)
|
||||
{
|
||||
return (T_data&)host[std::vector<T_idx>::at(pos)];
|
||||
return *reinterpret_cast<T_el*>(&host[std::vector<T_idx>::operator[](pos)]);
|
||||
}
|
||||
|
||||
|
||||
@@ -315,7 +319,7 @@ namespace ZRCola {
|
||||
/// - =0 when a == b
|
||||
/// - >0 when a > b
|
||||
///
|
||||
virtual int compare(_In_ const T_data &a, _In_ const T_data &b) const = 0;
|
||||
virtual int compare(_In_ const T_el &a, _In_ const T_el &b) const = 0;
|
||||
|
||||
|
||||
///
|
||||
@@ -329,7 +333,7 @@ namespace ZRCola {
|
||||
/// - =0 when a == b
|
||||
/// - >0 when a > b
|
||||
///
|
||||
virtual int compare_sort(_In_ const T_data &a, _In_ const T_data &b) const
|
||||
virtual int compare_sort(_In_ const T_el &a, _In_ const T_el &b) const
|
||||
{
|
||||
// Revert to `compare()` by default.
|
||||
return compare(a, b);
|
||||
@@ -348,7 +352,7 @@ namespace ZRCola {
|
||||
/// - \c true if found
|
||||
/// - \c false otherwise
|
||||
///
|
||||
bool find(_In_ const T_data &el, _Out_ size_type &start, _Out_ size_type &end) const
|
||||
bool find(_In_ const T_el &el, _Out_ size_type &start, _Out_ size_type &end) const
|
||||
{
|
||||
// Start with the full search area.
|
||||
for (start = 0, end = size(); start < end; ) {
|
||||
@@ -388,7 +392,7 @@ namespace ZRCola {
|
||||
/// - \c true if found
|
||||
/// - \c false otherwise
|
||||
///
|
||||
bool find(_In_ const T_data &el, _Out_ size_type &start) const
|
||||
bool find(_In_ const T_el &el, _Out_ size_type &start) const
|
||||
{
|
||||
// Start with the full search area.
|
||||
size_t end;
|
||||
@@ -415,8 +419,11 @@ namespace ZRCola {
|
||||
private:
|
||||
static int __cdecl compare_s(void *p, const void *a, const void *b)
|
||||
{
|
||||
const index<T, T_idx, T_data> *t = (const index<T, T_idx, T_data>*)p;
|
||||
return t->compare_sort((const T_data&)t->host[*(const T_idx*)a], (const T_data&)t->host[*(const T_idx*)b]);
|
||||
const index<T_data, T_idx, T_el> *_this = reinterpret_cast<const index<T_data, T_idx, T_el>*>(p);
|
||||
const T_data *data = _this->host.data();
|
||||
return _this->compare_sort(
|
||||
*reinterpret_cast<const T_el*>(data + *reinterpret_cast<const T_idx*>(a)),
|
||||
*reinterpret_cast<const T_el*>(data + *reinterpret_cast<const T_idx*>(b)));
|
||||
}
|
||||
};
|
||||
|
||||
@@ -556,10 +563,10 @@ namespace ZRCola {
|
||||
///
|
||||
/// Binary compares two strings
|
||||
///
|
||||
/// \param[in] str_a First string
|
||||
/// \param[in] str_a_end First string end
|
||||
/// \param[in] str_b Second string
|
||||
/// \param[in] str_b_end Second string end
|
||||
/// \param[in] str_a First string
|
||||
/// \param[in] count_a Number of characters in string \p str_a
|
||||
/// \param[in] str_b Second string
|
||||
/// \param[in] count_b Number of characters in string \p str_b
|
||||
///
|
||||
/// \returns
|
||||
/// - <0 when str_a < str_b
|
||||
@@ -568,18 +575,70 @@ namespace ZRCola {
|
||||
///
|
||||
/// \note
|
||||
/// The function does not treat \\0 characters as terminators for performance reasons.
|
||||
/// Therefore \p str_a_end and \p str_b_end must represent exact string ends.
|
||||
/// Therefore \p count_a and \p count_b must represent exact string lengths.
|
||||
///
|
||||
inline int CompareString(const wchar_t *str_a, const wchar_t *str_a_end, const wchar_t *str_b, const wchar_t *str_b_end)
|
||||
inline int CompareString(_In_ const wchar_t *str_a, _In_ size_t count_a, _In_ const wchar_t *str_b, _In_ size_t count_b)
|
||||
{
|
||||
for (; ; str_a++, str_b++) {
|
||||
if (str_a >= str_a_end && str_b >= str_b_end) return 0;
|
||||
else if (str_a >= str_a_end && str_b < str_b_end) return -1;
|
||||
else if (str_a < str_a_end && str_b >= str_b_end) return +1;
|
||||
else if (*str_a < *str_b) return -1;
|
||||
else if (*str_a > *str_b) return +1;
|
||||
for (size_t i = 0; ; i++) {
|
||||
if (i >= count_a && i >= count_b) return 0;
|
||||
else if (i >= count_a && i < count_b) return -1;
|
||||
else if (i < count_a && i >= count_b) return +1;
|
||||
else if (str_a[i] < str_b[i]) return -1;
|
||||
else if (str_a[i] > str_b[i]) return +1;
|
||||
}
|
||||
}
|
||||
|
||||
///
|
||||
/// Generates and returns Unicode representation of the string using hexadecimal codes.
|
||||
///
|
||||
/// \param[in] str Unicode string
|
||||
/// \param[in] count Number of characters in string \p str
|
||||
/// \param[in] sep Separator
|
||||
///
|
||||
inline std::string GetUnicodeDumpA(_In_ const wchar_t *str, _In_ size_t count, _In_opt_z_ const char *sep = "+")
|
||||
{
|
||||
std::string out;
|
||||
size_t dump_len_max = strlen(sep) + 4 + 1;
|
||||
char *dump;
|
||||
std::unique_ptr<char> dump_obj(dump = new char[dump_len_max]);
|
||||
if (count && str[0]) {
|
||||
size_t i = 0;
|
||||
out.insert(out.end(), dump, dump + _snprintf(dump, dump_len_max, "%04X", str[i++]));
|
||||
while (i < count && str[i])
|
||||
out.insert(out.end(), dump, dump + _snprintf(dump, dump_len_max, "%s%04X", sep, str[i++]));
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
///
|
||||
/// Generates and returns Unicode representation of the string using hexadecimal codes.
|
||||
///
|
||||
/// \param[in] str Unicode string
|
||||
/// \param[in] count Number of characters in string \p str
|
||||
/// \param[in] sep Separator
|
||||
///
|
||||
inline std::wstring GetUnicodeDumpW(_In_ const wchar_t *str, _In_ size_t count, _In_opt_z_ const wchar_t *sep = L"+")
|
||||
{
|
||||
std::wstring out;
|
||||
size_t dump_len_max = wcslen(sep) + 4 + 1;
|
||||
wchar_t *dump;
|
||||
std::unique_ptr<wchar_t> dump_obj(dump = new wchar_t[dump_len_max]);
|
||||
if (count && str[0]) {
|
||||
size_t i = 0;
|
||||
out.insert(out.end(), dump, dump + _snwprintf(dump, dump_len_max, L"%04X", str[i++]));
|
||||
while (i < count && str[i])
|
||||
out.insert(out.end(), dump, dump + _snwprintf(dump, dump_len_max, L"%s%04X", sep, str[i++]));
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
#ifdef _UNICODE
|
||||
#define GetUnicodeDump GetUnicodeDumpW
|
||||
#else
|
||||
#define GetUnicodeDump GetUnicodeDumpA
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
@@ -591,8 +650,8 @@ namespace ZRCola {
|
||||
///
|
||||
/// \returns The stream \p stream
|
||||
///
|
||||
template <class T, class T_idx, class T_data>
|
||||
inline std::ostream& operator <<(_In_ std::ostream& stream, _In_ const ZRCola::index<T, T_idx, T_data> &idx)
|
||||
template <class T_data, class T_idx, class T_el>
|
||||
inline std::ostream& operator <<(_In_ std::ostream& stream, _In_ const ZRCola::index<T_data, T_idx, T_el> &idx)
|
||||
{
|
||||
// Write index count.
|
||||
auto idx_count = idx.size();
|
||||
@@ -623,8 +682,8 @@ inline std::ostream& operator <<(_In_ std::ostream& stream, _In_ const ZRCola::i
|
||||
///
|
||||
/// \returns The stream \p stream
|
||||
///
|
||||
template <class T, class T_idx, class T_data>
|
||||
inline std::istream& operator >>(_In_ std::istream& stream, _Out_ ZRCola::index<T, T_idx, T_data> &idx)
|
||||
template <class T_data, class T_idx, class T_el>
|
||||
inline std::istream& operator >>(_In_ std::istream& stream, _Out_ ZRCola::index<T_data, T_idx, T_el> &idx)
|
||||
{
|
||||
unsigned __int32 count;
|
||||
|
||||
|
@@ -45,9 +45,40 @@ namespace ZRCola {
|
||||
/// Character data
|
||||
///
|
||||
struct langchar {
|
||||
public:
|
||||
langid_t lang; ///< Language ID
|
||||
unsigned __int16 chr_len; ///< \c chr length (in UTF-16 characters)
|
||||
wchar_t chr[]; ///< Character
|
||||
|
||||
protected:
|
||||
unsigned __int16 chr_to; ///< Character end in \c data
|
||||
wchar_t data[]; ///< Character
|
||||
|
||||
private:
|
||||
inline langchar(_In_ const langchar &other);
|
||||
inline langchar& operator=(_In_ const langchar &other);
|
||||
|
||||
public:
|
||||
///
|
||||
/// Constructs the language character
|
||||
///
|
||||
/// \param[in] lang Character language
|
||||
/// \param[in] chr Character
|
||||
/// \param[in] chr_len Number of UTF-16 characters in \p chr
|
||||
///
|
||||
inline langchar(
|
||||
_In_opt_ langid_t lang = langid_t::blank,
|
||||
_In_opt_z_count_(chr_len) const wchar_t *chr = NULL,
|
||||
_In_opt_ size_t chr_len = 0)
|
||||
{
|
||||
this->lang = lang;
|
||||
this->chr_to = static_cast<unsigned __int16>(chr_len);
|
||||
if (chr_len) memcpy(this->data, chr, sizeof(wchar_t)*chr_len);
|
||||
}
|
||||
|
||||
inline const wchar_t* chr () const { return data; };
|
||||
inline wchar_t* chr () { return data; };
|
||||
inline const wchar_t* chr_end() const { return data + chr_to; };
|
||||
inline wchar_t* chr_end() { return data + chr_to; };
|
||||
inline unsigned __int16 chr_len() const { return chr_to; };
|
||||
};
|
||||
#pragma pack(pop)
|
||||
|
||||
@@ -77,7 +108,7 @@ namespace ZRCola {
|
||||
///
|
||||
virtual int compare(_In_ const langchar &a, _In_ const langchar &b) const
|
||||
{
|
||||
int r = ZRCola::CompareString(a.chr, a.chr + a.chr_len, b.chr, b.chr + b.chr_len);
|
||||
int r = ZRCola::CompareString(a.chr(), a.chr_len(), b.chr(), b.chr_len());
|
||||
if (r != 0) return r;
|
||||
|
||||
if (a.lang < b.lang) return -1;
|
||||
@@ -118,7 +149,7 @@ namespace ZRCola {
|
||||
if (a.lang < b.lang) return -1;
|
||||
else if (a.lang > b.lang) return 1;
|
||||
|
||||
int r = ZRCola::CompareString(a.chr, a.chr + a.chr_len, b.chr, b.chr + b.chr_len);
|
||||
int r = ZRCola::CompareString(a.chr, a.chr_len(), b.chr(), b.chr_len());
|
||||
if (r != 0) return r;
|
||||
|
||||
return 0;
|
||||
@@ -178,9 +209,40 @@ namespace ZRCola {
|
||||
/// Language data
|
||||
///
|
||||
struct language {
|
||||
public:
|
||||
langid_t id; ///< Language ID
|
||||
unsigned __int16 name_len; ///< \c name length (in UTF-16 characters)
|
||||
wchar_t name[]; ///< Language name
|
||||
|
||||
protected:
|
||||
unsigned __int16 name_to; ///< Language name end in \c data
|
||||
wchar_t data[]; ///< Language name
|
||||
|
||||
private:
|
||||
inline language(_In_ const language &other);
|
||||
inline language& operator=(_In_ const language &other);
|
||||
|
||||
public:
|
||||
///
|
||||
/// Constructs the language
|
||||
///
|
||||
/// \param[in] id Language ID
|
||||
/// \param[in] name Language name
|
||||
/// \param[in] name_len Number of UTF-16 characters in \p name
|
||||
///
|
||||
inline language(
|
||||
_In_opt_ langid_t id = langid_t::blank,
|
||||
_In_opt_z_count_(name_len) const wchar_t *name = NULL,
|
||||
_In_opt_ size_t name_len = 0)
|
||||
{
|
||||
this->id = id;
|
||||
this->name_to = static_cast<unsigned __int16>(name_len);
|
||||
if (name_len) memcpy(this->data, name, sizeof(wchar_t)*name_len);
|
||||
}
|
||||
|
||||
inline const wchar_t* name () const { return data; };
|
||||
inline wchar_t* name () { return data; };
|
||||
inline const wchar_t* name_end() const { return data + name_to; };
|
||||
inline wchar_t* name_end() { return data + name_to; };
|
||||
inline unsigned __int16 name_len() const { return name_to; };
|
||||
};
|
||||
#pragma pack(pop)
|
||||
|
||||
|
@@ -47,8 +47,40 @@ namespace ZRCola {
|
||||
/// Character tag data
|
||||
///
|
||||
struct chrtag {
|
||||
wchar_t chr; ///> Character
|
||||
public:
|
||||
tagid_t tag; ///< Tag ID
|
||||
|
||||
protected:
|
||||
unsigned __int16 chr_to; ///< Character end in \c data
|
||||
wchar_t data[]; ///< Character
|
||||
|
||||
private:
|
||||
inline chrtag(_In_ const chrtag &other);
|
||||
inline chrtag& operator=(_In_ const chrtag &other);
|
||||
|
||||
public:
|
||||
///
|
||||
/// Constructs the character tag
|
||||
///
|
||||
/// \param[in] chr Character
|
||||
/// \param[in] chr_len Number of UTF-16 characters in \p chr
|
||||
/// \param[in] tag Tag
|
||||
///
|
||||
inline chrtag(
|
||||
_In_opt_z_count_(chr_len) const wchar_t *chr = NULL,
|
||||
_In_opt_ size_t chr_len = 0,
|
||||
_In_opt_ tagid_t tag = 0)
|
||||
{
|
||||
this->tag = tag;
|
||||
this->chr_to = static_cast<unsigned __int16>(chr_len);
|
||||
if (chr_len) memcpy(this->data, chr, sizeof(wchar_t)*chr_len);
|
||||
}
|
||||
|
||||
inline const wchar_t* chr () const { return data; };
|
||||
inline wchar_t* chr () { return data; };
|
||||
inline const wchar_t* chr_end() const { return data + chr_to; };
|
||||
inline wchar_t* chr_end() { return data + chr_to; };
|
||||
inline unsigned __int16 chr_len() const { return chr_to; };
|
||||
};
|
||||
#pragma pack(pop)
|
||||
|
||||
@@ -78,8 +110,8 @@ namespace ZRCola {
|
||||
///
|
||||
virtual int compare(_In_ const chrtag &a, _In_ const chrtag &b) const
|
||||
{
|
||||
if (a.chr < b.chr) return -1;
|
||||
else if (a.chr > b.chr) return 1;
|
||||
int r = ZRCola::CompareString(a.chr(), a.chr_len(), b.chr(), b.chr_len());
|
||||
if (r != 0) return r;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -97,8 +129,8 @@ namespace ZRCola {
|
||||
///
|
||||
virtual int compare_sort(_In_ const chrtag &a, _In_ const chrtag &b) const
|
||||
{
|
||||
if (a.chr < b.chr) return -1;
|
||||
else if (a.chr > b.chr) return 1;
|
||||
int r = ZRCola::CompareString(a.chr(), a.chr_len(), b.chr(), b.chr_len());
|
||||
if (r != 0) return r;
|
||||
|
||||
if (a.tag < b.tag) return -1;
|
||||
else if (a.tag > b.tag) return 1;
|
||||
@@ -156,8 +188,8 @@ namespace ZRCola {
|
||||
if (a.tag < b.tag) return -1;
|
||||
else if (a.tag > b.tag) return 1;
|
||||
|
||||
if (a.chr < b.chr) return -1;
|
||||
else if (a.chr > b.chr) return 1;
|
||||
int r = ZRCola::CompareString(a.chr(), a.chr_len(), b.chr(), b.chr_len());
|
||||
if (r != 0) return r;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -191,7 +223,7 @@ namespace ZRCola {
|
||||
/// \param[in ] fn_abort Pointer to function to periodically test for search cancellation
|
||||
/// \param[in ] cookie Cookie for \p fn_abort call
|
||||
///
|
||||
bool Search(_In_ const std::map<tagid_t, unsigned __int16> &tags, _In_ const character_db &ch_db, _In_ const std::set<chrcatid_t> &cats, _Inout_ std::map<wchar_t, charrank_t> &hits, _In_opt_ bool (__cdecl *fn_abort)(void *cookie) = NULL, _In_opt_ void *cookie = NULL) const;
|
||||
bool Search(_In_ const std::map<tagid_t, unsigned __int16> &tags, _In_ const character_db &ch_db, _In_ const std::set<chrcatid_t> &cats, _Inout_ std::map<std::wstring, charrank_t> &hits, _In_opt_ bool (__cdecl *fn_abort)(void *cookie) = NULL, _In_opt_ void *cookie = NULL) const;
|
||||
};
|
||||
|
||||
|
||||
@@ -209,19 +241,53 @@ namespace ZRCola {
|
||||
/// Tag name data
|
||||
///
|
||||
struct tagname {
|
||||
public:
|
||||
tagid_t tag; ///< Tag ID
|
||||
LCID locale; ///< Locale ID
|
||||
unsigned __int16 name_len; ///< \c name length (in characters)
|
||||
wchar_t name[]; ///< Tag localized name
|
||||
|
||||
protected:
|
||||
unsigned __int16 name_to; ///< Tag name end in \c data
|
||||
wchar_t data[]; ///< Tag name
|
||||
|
||||
private:
|
||||
inline tagname(_In_ const tagname &other);
|
||||
inline tagname& operator=(_In_ const tagname &other);
|
||||
|
||||
public:
|
||||
///
|
||||
/// Constructs the localized tag name
|
||||
///
|
||||
/// \param[in] tag Tag
|
||||
/// \param[in] locale Locale
|
||||
/// \param[in] name Tag name
|
||||
/// \param[in] name_len Number of UTF-16 characters in \p name
|
||||
///
|
||||
inline tagname(
|
||||
_In_opt_ tagid_t tag = 0,
|
||||
_In_opt_ LCID locale = MAKELCID(MAKELANGID(LANG_NEUTRAL, SUBLANG_NEUTRAL), SORT_DEFAULT),
|
||||
_In_opt_z_count_(name_len) const wchar_t *name = NULL,
|
||||
_In_opt_ size_t name_len = 0)
|
||||
{
|
||||
this->tag = tag;
|
||||
this->locale = locale;
|
||||
this->name_to = static_cast<unsigned __int16>(name_len);
|
||||
if (name_len) memcpy(this->data, name, sizeof(wchar_t)*name_len);
|
||||
}
|
||||
|
||||
inline const wchar_t* name () const { return data; };
|
||||
inline wchar_t* name () { return data; };
|
||||
inline const wchar_t* name_end() const { return data + name_to; };
|
||||
inline wchar_t* name_end() { return data + name_to; };
|
||||
inline unsigned __int16 name_len() const { return name_to; };
|
||||
|
||||
///
|
||||
/// Compares two names
|
||||
///
|
||||
/// \param[in] locale Locale ID to use for compare
|
||||
/// \param[in] str_a First name
|
||||
/// \param[in] count_a Number of characters in string \p str_a
|
||||
/// \param[in] count_a Number of UTF-16 characters in \p str_a
|
||||
/// \param[in] str_b Second name
|
||||
/// \param[in] count_b Number of characters in string \p str_b
|
||||
/// \param[in] count_b Number of UTF-16 characters in \p str_b
|
||||
///
|
||||
/// \returns
|
||||
/// - <0 when str_a < str_b
|
||||
@@ -259,7 +325,7 @@ namespace ZRCola {
|
||||
indexName(_In_ std::vector<unsigned __int16> &h) : index<unsigned __int16, unsigned __int32, tagname>(h) {}
|
||||
|
||||
///
|
||||
/// Compares two tag names by name (for searching)
|
||||
/// Compares two tag names by locale and name (for searching)
|
||||
///
|
||||
/// \param[in] a Pointer to first element
|
||||
/// \param[in] b Pointer to second element
|
||||
@@ -274,14 +340,14 @@ namespace ZRCola {
|
||||
if (a.locale < b.locale) return -1;
|
||||
else if (a.locale > b.locale) return 1;
|
||||
|
||||
int r = tagname::CompareName(a.locale, a.name, a.name_len, b.name, b.name_len);
|
||||
int r = tagname::CompareName(a.locale, a.name(), a.name_len(), b.name(), b.name_len());
|
||||
if (r != 0) return r;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
///
|
||||
/// Compares two tag names by name (for sorting)
|
||||
/// Compares two tag names by locale and name (for sorting)
|
||||
///
|
||||
/// \param[in] a Pointer to first element
|
||||
/// \param[in] b Pointer to second element
|
||||
@@ -296,7 +362,7 @@ namespace ZRCola {
|
||||
if (a.locale < b.locale) return -1;
|
||||
else if (a.locale > b.locale) return 1;
|
||||
|
||||
int r = tagname::CompareName(a.locale, a.name, a.name_len, b.name, b.name_len);
|
||||
int r = tagname::CompareName(a.locale, a.name(), a.name_len(), b.name(), b.name_len());
|
||||
if (r != 0) return r;
|
||||
|
||||
if (a.tag < b.tag) return -1;
|
||||
|
@@ -54,22 +54,49 @@ namespace ZRCola {
|
||||
unsigned __int16 dec_to; ///< Decomposed string end in \c data
|
||||
wchar_t data[]; ///< Decomposed string and composed character
|
||||
|
||||
private:
|
||||
inline translation(_In_ const translation &other);
|
||||
inline translation& operator=(_In_ const translation &other);
|
||||
|
||||
public:
|
||||
inline const wchar_t* com () const { return data; };
|
||||
inline wchar_t* com () { return data; };
|
||||
///
|
||||
/// Constructs the translation
|
||||
///
|
||||
/// \param[in] rank Translation rank
|
||||
/// \param[in] com Composed character
|
||||
/// \param[in] com_len Number of UTF-16 characters in \p com
|
||||
/// \param[in] dec Decomposed character
|
||||
/// \param[in] dec_len Number of UTF-16 characters in \p dec
|
||||
///
|
||||
inline translation(
|
||||
_In_opt_ unsigned __int16 rank = 0,
|
||||
_In_opt_z_count_(com_len) const wchar_t *com = NULL,
|
||||
_In_opt_ size_t com_len = 0,
|
||||
_In_opt_z_count_(dec_len) const wchar_t *dec = NULL,
|
||||
_In_opt_ size_t dec_len = 0)
|
||||
{
|
||||
this->rank = rank;
|
||||
this->com_to = static_cast<unsigned __int16>(com_len);
|
||||
if (com_len) memcpy(this->data, com, sizeof(wchar_t)*com_len);
|
||||
this->dec_to = static_cast<unsigned __int16>(this->com_to + dec_len);
|
||||
if (dec_len) memcpy(this->data + this->com_to, dec, sizeof(wchar_t)*dec_len);
|
||||
}
|
||||
|
||||
inline const wchar_t* com () const { return data; };
|
||||
inline wchar_t* com () { return data; };
|
||||
inline const wchar_t* com_end() const { return data + com_to; };
|
||||
inline wchar_t* com_end() { return data + com_to; };
|
||||
inline unsigned __int16 com_len() const { return com_to; };
|
||||
inline unsigned __int16 com_len() const { return com_to; };
|
||||
|
||||
inline wchar_t com_at(_In_ size_t i) const
|
||||
{
|
||||
return i < com_to ? data[i] : 0;
|
||||
}
|
||||
|
||||
inline const wchar_t* dec () const { return data + com_to; };
|
||||
inline wchar_t* dec () { return data + com_to; };
|
||||
inline const wchar_t* dec_end() const { return data + dec_to; };
|
||||
inline wchar_t* dec_end() { return data + dec_to; };
|
||||
inline const wchar_t* dec () const { return data + com_to; };
|
||||
inline wchar_t* dec () { return data + com_to; };
|
||||
inline const wchar_t* dec_end() const { return data + dec_to; };
|
||||
inline wchar_t* dec_end() { return data + dec_to; };
|
||||
inline unsigned __int16 dec_len() const { return dec_to - com_to; };
|
||||
|
||||
inline wchar_t dec_at(_In_ size_t i) const
|
||||
@@ -106,7 +133,7 @@ namespace ZRCola {
|
||||
///
|
||||
virtual int compare(_In_ const translation &a, _In_ const translation &b) const
|
||||
{
|
||||
int r = ZRCola::CompareString(a.dec(), a.dec_end(), b.dec(), b.dec_end());
|
||||
int r = ZRCola::CompareString(a.dec(), a.dec_len(), b.dec(), b.dec_len());
|
||||
if (r != 0) return r;
|
||||
|
||||
return 0;
|
||||
@@ -125,10 +152,10 @@ namespace ZRCola {
|
||||
///
|
||||
virtual int compare_sort(_In_ const translation &a, _In_ const translation &b) const
|
||||
{
|
||||
int r = ZRCola::CompareString(a.dec(), a.dec_end(), b.dec(), b.dec_end());
|
||||
int r = ZRCola::CompareString(a.dec(), a.dec_len(), b.dec(), b.dec_len());
|
||||
if (r != 0) return r;
|
||||
|
||||
r = ZRCola::CompareString(a.com(), a.com_end(), b.com(), b.com_end());
|
||||
r = ZRCola::CompareString(a.com(), a.com_len(), b.com(), b.com_len());
|
||||
if (r != 0) return r;
|
||||
|
||||
return 0;
|
||||
@@ -162,7 +189,7 @@ namespace ZRCola {
|
||||
///
|
||||
virtual int compare(_In_ const translation &a, _In_ const translation &b) const
|
||||
{
|
||||
int r = ZRCola::CompareString(a.com(), a.com_end(), b.com(), b.com_end());
|
||||
int r = ZRCola::CompareString(a.com(), a.com_len(), b.com(), b.com_len());
|
||||
if (r != 0) return r;
|
||||
|
||||
return 0;
|
||||
@@ -181,13 +208,13 @@ namespace ZRCola {
|
||||
///
|
||||
virtual int compare_sort(_In_ const translation &a, _In_ const translation &b) const
|
||||
{
|
||||
int r = ZRCola::CompareString(a.com(), a.com_end(), b.com(), b.com_end());
|
||||
int r = ZRCola::CompareString(a.com(), a.com_len(), b.com(), b.com_len());
|
||||
if (r != 0) return r;
|
||||
|
||||
if (a.rank < b.rank) return -1;
|
||||
else if (a.rank > b.rank) return +1;
|
||||
|
||||
r = ZRCola::CompareString(a.dec(), a.dec_end(), b.dec(), b.dec_end());
|
||||
r = ZRCola::CompareString(a.dec(), a.dec_len(), b.dec(), b.dec_len());
|
||||
if (r != 0) return r;
|
||||
|
||||
return 0;
|
||||
|
Reference in New Issue
Block a user