/* Copyright 2015-2017 Amebis This file is part of ZRCola. ZRCola is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. ZRCola is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with ZRCola. If not, see . */ #pragma once #include #include #include #include #include #ifdef _WIN32 #include #endif /// /// Public function calling convention /// #define ZRCOLA_API #define ZRCOLA_NOVTABLE __declspec(novtable) #pragma warning(push) #pragma warning(disable: 4251) #pragma warning(disable: 4512) /// /// Data records alignment /// #define ZRCOLA_RECORD_ALIGN 1 /// /// Database IDs /// #define ZRCOLA_DB_ID (*(ZRCola::recordid_t*)"ZRC") namespace ZRCola { typedef unsigned __int32 recordid_t; typedef unsigned __int32 recordsize_t; #pragma pack(push) #pragma pack(2) /// /// Key-value index pair for mappings /// template struct mappair_t { T idx_key; ///< Index of key T idx_val; ///< Index of value }; #pragma pack(pop) #pragma pack(push) #pragma pack(2) /// /// Language ID type /// Three letter abbreviation, zero terminated /// struct ZRCOLA_API langid_t { char data[4]; inline langid_t& operator=(const langid_t &src) { data[0] = src.data[0]; data[1] = src.data[1]; data[2] = src.data[2]; data[3] = src.data[3]; return *this; } inline langid_t& operator=(const char *src) { data[3] = ( data[2] = ( data[1] = ( data[0] = src[0] ) != 0 ? src[1] : 0) != 0 ? src[2] : 0) != 0 ? src[3] : 0; return *this; } /// /// Blank language ID /// static const langid_t blank; }; #pragma pack(pop) /// /// Compares two language IDs /// /// \param[in] a First language ID /// \param[in] b Second language ID /// /// \returns /// - true when \p a == \p b /// - false otherwise /// inline bool operator==(const langid_t &a, const langid_t & b) { return a.data[0] == b.data[0] && (a.data[0] == 0 || (a.data[1] == b.data[1] && (a.data[1] == 0 || (a.data[2] == b.data[2] && (a.data[2] == 0 || a.data[3] == b.data[3]))))); } /// /// Compares two language IDs /// /// \param[in] a First language ID /// \param[in] b Second language ID /// /// \returns /// - true when \p a != \p b /// - false otherwise /// inline bool operator!=(const langid_t &a, const langid_t & b) { return !operator==(a, b); } /// /// Compares two language IDs /// /// \param[in] a First language ID /// \param[in] b Second language ID /// /// \returns /// - true when \p a < \p b /// - false otherwise /// inline bool operator<(const langid_t& a, const langid_t& b) { if (a.data[0] < b.data[0]) return true; else if (a.data[0] > b.data[0]) return false; else if (a.data[1] < b.data[1]) return true; else if (a.data[1] > b.data[1]) return false; else if (a.data[2] < b.data[2]) return true; else if (a.data[2] > b.data[2]) return false; else if (a.data[3] < b.data[3]) return true; else return false; } /// /// Compares two language IDs /// /// \param[in] a First language ID /// \param[in] b Second language ID /// /// \returns /// - true when \p a > \p b /// - false otherwise /// inline bool operator>(const langid_t& a, const langid_t& b) { return operator<(b, a); } /// /// Compares two language IDs /// /// \param[in] a First language ID /// \param[in] b Second language ID /// /// \returns /// - true when \p a <= \p b /// - false otherwise /// inline bool operator<=(const langid_t &a, const langid_t & b) { return !operator>(a, b); } /// /// Compares two language IDs /// /// \param[in] a First language ID /// \param[in] b Second language ID /// /// \returns /// - true when \p a >= \p b /// - false otherwise /// inline bool operator>=(const langid_t &a, const langid_t & b) { return !operator<(a, b); } #ifdef _WIN32 /// /// Converts language from Windows to ZRCola notation. /// /// \param[in] lang_win Windows language ID /// \param[in,out] lang ZRCola language ID /// void ZRCOLA_API LangConvert(_In_ LANGID lang_win, _Inout_ langid_t &lang); #endif /// /// Memory index /// template class index : public std::vector { protected: std::vector &host; ///< Reference to host data public: /// /// Constructs the index /// /// \param[in] h Reference to vector holding the data /// index(_In_ std::vector &h) : host(h) {} /// /// Returns data at given position according to the index /// /// \param[in] pos Position /// /// \returns Data reference /// inline const T_el& at(size_type pos) const { return *reinterpret_cast(&host[std::vector::at(pos)]); } /// /// Returns data at given position according to the index /// /// \param[in] pos Position /// /// \returns Data reference /// inline T_el& at(size_type pos) { return *reinterpret_cast(&host[std::vector::at(pos)]); } /// /// Returns data at given position according to the index /// /// \param[in] pos Position /// /// \returns Data reference /// inline const T_el& operator[](size_type pos) const { return *reinterpret_cast(&host[std::vector::operator[](pos)]); } /// /// Returns data at given position according to the index /// /// \param[in] pos Position /// /// \returns Data reference /// inline T_el& operator[](size_type pos) { return *reinterpret_cast(&host[std::vector::operator[](pos)]); } /// /// Sorts index /// inline void sort() { qsort_s(data(), size(), sizeof(T_idx), compare_s, this); } /// /// Compares two elements (for searching) /// /// \param[in] a Pointer to first element /// \param[in] b Pointer to second element /// /// \returns /// - <0 when a < b /// - =0 when a == b /// - >0 when a > b /// virtual int compare(_In_ const T_el &a, _In_ const T_el &b) const = 0; /// /// Compares two elements (for sorting) /// /// \param[in] a Pointer to first element /// \param[in] b Pointer to second element /// /// \returns /// - <0 when a < b /// - =0 when a == b /// - >0 when a > b /// virtual int compare_sort(_In_ const T_el &a, _In_ const T_el &b) const { // Revert to `compare()` by default. return compare(a, b); } /// /// Search for the element in the index /// The elements matching \p el are located on the interval [\p start, \p end) in the index. /// /// \param[in] el Element we are looking for (needle) /// \param[out] start Index of the first matching element found /// \param[out] end Index of the first non-matching element found /// /// \returns /// - \c true if found /// - \c false otherwise /// bool find(_In_ const T_el &el, _Out_ size_type &start, _Out_ size_type &end) const { // Start with the full search area. for (start = 0, end = size(); start < end; ) { size_type m = (start + end) / 2; int r = compare(el, at(m)); if (r < 0) end = m; else if (r > 0) start = m + 1; else { // Narrow the search area on the left to start at the first element in the run. for (size_type end2 = m; start < end2;) { size_type m2 = (start + end2) / 2; if (compare(el, at(m2)) <= 0) end2 = m2; else start = m2 + 1; } // Narrow the search area on the right to end at the first element not in the run. for (size_type start2 = m + 1; start2 < end;) { size_type m2 = (start2 + end) / 2; if (0 <= compare(el, at(m2))) start2 = m2 + 1; else end = m2; } return true; } } return false; } /// /// Search for the first element in the index /// /// \param[in] el Element we are looking for (needle) /// \param[out] start Index of the first matching element found /// /// \returns /// - \c true if found /// - \c false otherwise /// bool find(_In_ const T_el &el, _Out_ size_type &start) const { // Start with the full search area. size_t end; for (start = 0, end = size(); start < end; ) { size_type m = (start + end) / 2; int r = compare(el, at(m)); if (r < 0) end = m; else if (r > 0) start = m + 1; else { // Narrow the search area on the left to start at the first element in the run. for (size_type end2 = m; start < end2;) { m = (start + end2) / 2; if (compare(el, at(m)) <= 0) end2 = m; else start = m + 1; } return true; } } return false; } private: static int __cdecl compare_s(void *p, const void *a, const void *b) { const index *_this = reinterpret_cast*>(p); const T_data *data = _this->host.data(); return _this->compare_sort( *reinterpret_cast(data + *reinterpret_cast(a)), *reinterpret_cast(data + *reinterpret_cast(b))); } }; /// /// Memory text index /// template class textindex : public std::vector< mappair_t > { public: typedef std::vector< mappair_t > base_t; std::vector keys; ///< Key data std::vector values; ///< Index values public: /// /// Constructs the index /// textindex() {} /// /// Clears the index /// inline void clear() { std::vector< mappair_t >::clear(); keys .clear(); values.clear(); } /// /// Finds data for given key /// /// \param[in ] key Pointer to key /// \param[in ] key_len Count of \p key elements /// \param[out] val Pointer to receive pointer to key's values /// \param[out] val_len Pointer to receive count of \p val elements /// /// \returns /// - \c true if found /// - \c false otherwise /// bool find(_In_count_(key_len) const T_key *key, _In_ size_t key_len, _Out_ const T_val **val, _Out_ size_t *val_len) const { for (size_type start = 0, end = size(); start < end; ) { size_type m = (start + end) / 2; int r = compare(key, key_len, m); if (r < 0) end = m; else if (r > 0) start = m + 1; else { // Get values at position m. start = base_t::at(m ).idx_val; *val_len = (m < size() ? base_t::at(m + 1).idx_val : values.size()) - start; *val = &values.at(start); return true; } } return false; } protected: inline int compare(_In_count_(key_len) const T_key *key, _In_ size_t key_len, size_type pos) const { // Get key at position pos. size_type pos_next = pos + 1; size_t start = base_t::at(pos ).idx_key, key2_len = (pos_next < size() ? base_t::at(pos_next).idx_key : keys.size()) - start; std::vector::const_pointer key2 = &keys.at(start); // Compare keys. int r = memcmp(key, key2, sizeof(T_key)*std::min(key_len, key2_len)); if (r != 0 ) return r; else if (key_len < key2_len) return -1; else if (key_len > key2_len) return 1; return 0; } }; /// /// Source-destination index transformation mapping /// class ZRCOLA_NOVTABLE ZRCOLA_API mapping { public: size_t src; ///< Character index in source string size_t dst; ///< Character index in destination string inline mapping() {}; inline mapping(_In_ size_t s, _In_ size_t d) : src(s), dst(d) {} /// /// Reverses source and destination indexes /// inline void invert() { size_t tmp = src; src = dst; dst = tmp; } }; /// /// A vector for destination-source index transformation mapping /// class ZRCOLA_API mapping_vector : public std::vector { public: /// /// Transforms character index of destination to source /// /// \param[in] decmp Character index in destination string /// /// \returns Character index in source string /// size_t to_src(_In_ size_t dst) const; /// /// Transforms source index to destination index /// /// \param[in] cmp Character index in source string /// /// \returns Character index in destination string /// size_t to_dst(_In_ size_t src) const; /// /// Reverses source and destination indexes /// inline void invert() { for (iterator i = begin(), iEnd = end(); i != iEnd; ++i) i->invert(); } }; /// /// Binary compares two strings /// /// \param[in] str_a First string /// \param[in] count_a Number of characters in string \p str_a /// \param[in] str_b Second string /// \param[in] count_b Number of characters in string \p str_b /// /// \returns /// - <0 when str_a < str_b /// - =0 when str_a == str_b /// - >0 when str_a > str_b /// /// \note /// The function does not treat \\0 characters as terminators for performance reasons. /// Therefore \p count_a and \p count_b must represent exact string lengths. /// inline int CompareString(_In_ const wchar_t *str_a, _In_ size_t count_a, _In_ const wchar_t *str_b, _In_ size_t count_b) { for (size_t i = 0; ; i++) { if (i >= count_a && i >= count_b) return 0; else if (i >= count_a && i < count_b) return -1; else if (i < count_a && i >= count_b) return +1; else if (str_a[i] < str_b[i]) return -1; else if (str_a[i] > str_b[i]) return +1; } } /// /// Generates and returns Unicode representation of the string using hexadecimal codes. /// /// \param[in] str Unicode string /// \param[in] count Number of characters in string \p str /// \param[in] sep Separator /// inline std::string GetUnicodeDumpA(_In_ const wchar_t *str, _In_ size_t count, _In_opt_z_ const char *sep = "+") { std::string out; size_t dump_len_max = strlen(sep) + 4 + 1; char *dump; std::unique_ptr dump_obj(dump = new char[dump_len_max]); if (count && str[0]) { size_t i = 0; out.insert(out.end(), dump, dump + _snprintf(dump, dump_len_max, "%04X", str[i++])); while (i < count && str[i]) out.insert(out.end(), dump, dump + _snprintf(dump, dump_len_max, "%s%04X", sep, str[i++])); } return out; } /// /// Generates and returns Unicode representation of the string using hexadecimal codes. /// /// \param[in] str Unicode string /// \param[in] count Number of characters in string \p str /// \param[in] sep Separator /// inline std::wstring GetUnicodeDumpW(_In_ const wchar_t *str, _In_ size_t count, _In_opt_z_ const wchar_t *sep = L"+") { std::wstring out; size_t dump_len_max = wcslen(sep) + 4 + 1; wchar_t *dump; std::unique_ptr dump_obj(dump = new wchar_t[dump_len_max]); if (count && str[0]) { size_t i = 0; out.insert(out.end(), dump, dump + _snwprintf(dump, dump_len_max, L"%04X", str[i++])); while (i < count && str[i]) out.insert(out.end(), dump, dump + _snwprintf(dump, dump_len_max, L"%s%04X", sep, str[i++])); } return out; } #ifdef _UNICODE #define GetUnicodeDump GetUnicodeDumpW #else #define GetUnicodeDump GetUnicodeDumpA #endif }; /// /// Writes index to a stream /// /// \param[in] stream Output stream /// \param[in] idx Index /// /// \returns The stream \p stream /// template inline std::ostream& operator <<(_In_ std::ostream& stream, _In_ const ZRCola::index &idx) { // Write index count. auto idx_count = idx.size(); #if defined(_WIN64) || defined(__x86_64__) || defined(__ppc64__) // 4G check if (idx_count > 0xffffffff) { stream.setstate(std::ios_base::failbit); return stream; } #endif if (stream.fail()) return stream; unsigned __int32 count = (unsigned __int32)idx_count; stream.write((const char*)&count, sizeof(count)); // Write index data. if (stream.fail()) return stream; stream.write((const char*)idx.data(), sizeof(T_idx)*count); return stream; } /// /// Reads index from a stream /// /// \param[in] stream Input stream /// \param[out] idx Index /// /// \returns The stream \p stream /// template inline std::istream& operator >>(_In_ std::istream& stream, _Out_ ZRCola::index &idx) { unsigned __int32 count; // Read index count. stream.read((char*)&count, sizeof(count)); if (!stream.good()) return stream; if (count) { // Read index data. idx.resize(count); stream.read((char*)idx.data(), sizeof(T_idx)*count); } else idx.clear(); return stream; } /// /// Writes text index to a stream /// /// \param[in] stream Output stream /// \param[in] idx Text index /// /// \returns The stream \p stream /// template inline std::ostream& operator <<(_In_ std::ostream& stream, _In_ const ZRCola::textindex &idx) { unsigned __int32 count; // Write index count. auto idx_count = idx.size(); #if defined(_WIN64) || defined(__x86_64__) || defined(__ppc64__) // 4G check if (idx_count > 0xffffffff) { stream.setstate(std::ios_base::failbit); return stream; } #endif if (stream.fail()) return stream; count = (unsigned __int32)idx_count; stream.write((const char*)&count, sizeof(count)); // Write index data. if (stream.fail()) return stream; stream.write((const char*)idx.data(), sizeof(ZRCola::textindex::value_type)*count); // Write key count. auto key_count = idx.keys.size(); #if defined(_WIN64) || defined(__x86_64__) || defined(__ppc64__) // 4G check if (idx_count > 0xffffffff) { stream.setstate(std::ios_base::failbit); return stream; } #endif if (stream.fail()) return stream; count = (unsigned __int32)key_count; stream.write((const char*)&count, sizeof(count)); // Write key data. if (stream.fail()) return stream; stream.write((const char*)idx.keys.data(), sizeof(std::vector::value_type)*count); // Write value count. auto value_count = idx.values.size(); #if defined(_WIN64) || defined(__x86_64__) || defined(__ppc64__) // 4G check if (idx_count > 0xffffffff) { stream.setstate(std::ios_base::failbit); return stream; } #endif if (stream.fail()) return stream; count = (unsigned __int32)value_count; stream.write((const char*)&count, sizeof(count)); // Write value data. if (stream.fail()) return stream; stream.write((const char*)idx.values.data(), sizeof(std::vector::value_type)*count); return stream; } /// /// Reads text index from a stream /// /// \param[in] stream Input stream /// \param[out] idx Text index /// /// \returns The stream \p stream /// template inline std::istream& operator >>(_In_ std::istream& stream, _Out_ ZRCola::textindex &idx) { unsigned __int32 count; // Read text index count. stream.read((char*)&count, sizeof(count)); if (!stream.good()) return stream; if (count) { // Read text index. idx.resize(count); stream.read((char*)idx.data(), sizeof(ZRCola::textindex::value_type)*count); if (!stream.good()) return stream; } else idx.clear(); // Read keys count. stream.read((char*)&count, sizeof(count)); if (!stream.good()) return stream; if (count) { // Read keys. idx.keys.resize(count); stream.read((char*)idx.keys.data(), sizeof(std::vector::value_type)*count); if (!stream.good()) return stream; } else idx.keys.clear(); // Read value count. stream.read((char*)&count, sizeof(count)); if (!stream.good()) return stream; if (count) { // Read values. idx.values.resize(count); stream.read((char*)idx.values.data(), sizeof(std::vector::value_type)*count); } else idx.values.clear(); return stream; } #pragma warning(pop)