libZRCola: Initial stab at compiling with gcc

Signed-off-by: Simon Rozman <simon@rozman.si>
This commit is contained in:
2022-09-13 13:27:19 +02:00
parent 63fda12c99
commit eedab7da56
17 changed files with 232 additions and 170 deletions

View File

@@ -11,11 +11,12 @@
#include <assert.h>
#include <algorithm>
#include <istream>
#include <locale>
#include <map>
#include <ostream>
#include <vector>
#include <set>
#include <string>
#include <vector>
#pragma warning(push)
#pragma warning(disable: 4200)
@@ -319,14 +320,14 @@ namespace ZRCola {
{
assert(len <= 0xffff);
std::unique_ptr<character> c((character*)new char[sizeof(character) + sizeof(wchar_t)*len]);
c->character::character(chr, len);
new (c.get()) character(chr, len);
indexChr::size_type start;
return idxChr.find(*c, start) ? idxChr[start].cat : chrcatid_t::blank;
}
};
typedef stdex::idrec::record<character_db, recordid_t, recordsize_t, ZRCOLA_RECORD_ALIGN> character_rec;
typedef stdex::idrec::record<character_db, recordid_t, 0x524843 /*"CHR"*/, recordsize_t, ZRCOLA_RECORD_ALIGN> character_rec;
///
@@ -462,15 +463,8 @@ namespace ZRCola {
if (a.rank < b.rank) return -1;
else if (a.rank > b.rank) return +1;
uint16_t
a_name_len = a.name_len(),
b_name_len = b.name_len();
int r = _wcsncoll(a.name(), b.name(), std::min<uint16_t>(a_name_len, b_name_len));
if (r != 0) return r;
if (a_name_len < b_name_len) return -1;
else if (a_name_len > b_name_len) return +1;
return 0;
auto &coll = std::use_facet<std::collate<wchar_t>>(std::locale());
return coll.compare(a.name(), a.name_end(), b.name(), b.name_end());
}
} idxRank; ///< Rank index
@@ -494,14 +488,10 @@ namespace ZRCola {
};
typedef stdex::idrec::record<chrcat_db, recordid_t, recordsize_t, ZRCOLA_RECORD_ALIGN> chrcat_rec;
typedef stdex::idrec::record<chrcat_db, recordid_t, 0x544343 /*"CCT"*/, recordsize_t, ZRCOLA_RECORD_ALIGN> chrcat_rec;
};
const ZRCola::recordid_t ZRCola::character_rec::id = *(ZRCola::recordid_t*)"CHR";
const ZRCola::recordid_t ZRCola::chrcat_rec ::id = *(ZRCola::recordid_t*)"CCT";
///
/// Reads character database from a stream
///

View File

@@ -8,10 +8,14 @@
#ifdef _WIN32
#define _WINSOCKAPI_ // Prevent inclusion of winsock.h in windows.h.
#include <Windows.h>
#include <sal.h>
#endif
#include <stdex/sal.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <wchar.h>
#include <istream>
#include <memory>
#include <ostream>
#include <utility>
#include <vector>
@@ -30,8 +34,15 @@
///
/// Database IDs
///
#define ZRCOLA_DB_ID (*(ZRCola::recordid_t*)"ZRC")
#define ZRCOLA_DB_ID 0x43525a // "ZRC"
#ifdef __GNUC__
#ifdef __i386__
#define __cdecl __attribute__((__cdecl__))
#else
#define __cdecl
#endif
#endif
namespace ZRCola {
typedef uint32_t recordid_t;
@@ -214,6 +225,11 @@ namespace ZRCola {
template <class T_data, class T_idx = uint32_t, class T_el = T_data>
class index : public std::vector<T_idx>
{
typedef std::vector<T_idx> base_t;
public:
typedef size_t size_type;
protected:
std::vector<T_data> &host; ///< Reference to host data
@@ -235,7 +251,7 @@ namespace ZRCola {
///
inline const T_el& at(size_type pos) const
{
return *reinterpret_cast<const T_el*>(&host[std::vector<T_idx>::at(pos)]);
return *reinterpret_cast<const T_el*>(&host[base_t::at(pos)]);
}
@@ -248,7 +264,7 @@ namespace ZRCola {
///
inline T_el& at(size_type pos)
{
return *reinterpret_cast<T_el*>(&host[std::vector<T_idx>::at(pos)]);
return *reinterpret_cast<T_el*>(&host[base_t::at(pos)]);
}
@@ -261,7 +277,7 @@ namespace ZRCola {
///
inline const T_el& operator[](size_type pos) const
{
return *reinterpret_cast<const T_el*>(&host[std::vector<T_idx>::operator[](pos)]);
return *reinterpret_cast<const T_el*>(&host[base_t::operator[](pos)]);
}
@@ -274,7 +290,7 @@ namespace ZRCola {
///
inline T_el& operator[](size_type pos)
{
return *reinterpret_cast<T_el*>(&host[std::vector<T_idx>::operator[](pos)]);
return *reinterpret_cast<T_el*>(&host[base_t::operator[](pos)]);
}
@@ -283,7 +299,7 @@ namespace ZRCola {
///
inline void sort()
{
qsort_s(data(), size(), sizeof(T_idx), compare_s, this);
qsort_s(base_t::data(), base_t::size(), sizeof(T_idx), compare_s, this);
}
@@ -334,21 +350,21 @@ namespace ZRCola {
bool find(_In_ const T_el &el, _Out_ size_type &start, _Out_ size_type &end) const
{
// Start with the full search area.
for (start = 0, end = size(); start < end; ) {
size_type m = (start + end) / 2;
for (start = 0, end = base_t::size(); start < end; ) {
auto m = (start + end) / 2;
int r = compare(el, at(m));
if (r < 0) end = m;
else if (r > 0) start = m + 1;
else {
// Narrow the search area on the left to start at the first element in the run.
for (size_type end2 = m; start < end2;) {
size_type m2 = (start + end2) / 2;
for (auto end2 = m; start < end2;) {
auto m2 = (start + end2) / 2;
if (compare(el, at(m2)) <= 0) end2 = m2; else start = m2 + 1;
}
// Narrow the search area on the right to end at the first element not in the run.
for (size_type start2 = m + 1; start2 < end;) {
size_type m2 = (start2 + end) / 2;
for (auto start2 = m + 1; start2 < end;) {
auto m2 = (start2 + end) / 2;
if (0 <= compare(el, at(m2))) start2 = m2 + 1; else end = m2;
}
@@ -373,14 +389,14 @@ namespace ZRCola {
{
// Start with the full search area.
size_t end;
for (start = 0, end = size(); start < end; ) {
size_type m = (start + end) / 2;
for (start = 0, end = base_t::size(); start < end; ) {
auto m = (start + end) / 2;
int r = compare(el, at(m));
if (r < 0) end = m;
else if (r > 0) start = m + 1;
else {
// Narrow the search area on the left to start at the first element in the run.
for (size_type end2 = m; start < end2;) {
for (auto end2 = m; start < end2;) {
m = (start + end2) / 2;
if (compare(el, at(m)) <= 0) end2 = m; else start = m + 1;
}
@@ -410,8 +426,10 @@ namespace ZRCola {
template <class T_key, class T_val, class T_idx = uint32_t>
class textindex : public std::vector< mappair_t<T_idx> >
{
public:
typedef std::vector< mappair_t<T_idx> > base_t;
public:
typedef size_t size_type;
std::vector<T_key> keys; ///< Key data
std::vector<T_val> values; ///< Index values
@@ -447,15 +465,15 @@ namespace ZRCola {
///
_Success_(return) bool find(_In_count_(key_len) const T_key *key, _In_ size_t key_len, _Out_ const T_val **val, _Out_ size_t *val_len) const
{
for (size_type start = 0, end = size(); start < end; ) {
size_type m = (start + end) / 2;
for (size_type start = 0, end = base_t::size(); start < end; ) {
auto m = (start + end) / 2;
int r = compare(key, key_len, m);
if (r < 0) end = m;
else if (r > 0) start = m + 1;
else {
// Get values at position m.
start = base_t::at(m ).idx_val;
*val_len = (m < size() ? base_t::at(m + 1).idx_val : values.size()) - start;
start = base_t::at(m ).idx_val;
*val_len = (m < base_t::size() ? base_t::at(m + 1).idx_val : values.size()) - start;
*val = &values.at(start);
return true;
}
@@ -468,11 +486,11 @@ namespace ZRCola {
inline int compare(_In_count_(key_len) const T_key *key, _In_ size_t key_len, size_type pos) const
{
// Get key at position pos.
size_type pos_next = pos + 1;
auto pos_next = pos + 1;
size_t
start = base_t::at(pos ).idx_key,
key2_len = (pos_next < size() ? base_t::at(pos_next).idx_key : keys.size()) - start;
std::vector<T_key>::const_pointer key2 = &keys.at(start);
start = base_t::at(pos ).idx_key,
key2_len = (pos_next < base_t::size() ? base_t::at(pos_next).idx_key : keys.size()) - start;
auto key2 = &keys.at(start);
// Compare keys.
int r = memcmp(key, key2, sizeof(T_key)*std::min<size_t>(key_len, key2_len));
@@ -488,7 +506,7 @@ namespace ZRCola {
///
/// Source-destination index transformation mapping
///
class __declspec(novtable) mapping {
struct mapping {
public:
size_t src; ///< Character index in source string
size_t dst; ///< Character index in destination string
@@ -553,16 +571,7 @@ namespace ZRCola {
/// The function does not treat \\0 characters as terminators for performance reasons.
/// Therefore \p count_a and \p count_b must represent exact string lengths.
///
inline int CompareString(_In_ const wchar_t *str_a, _In_ size_t count_a, _In_ const wchar_t *str_b, _In_ size_t count_b)
{
for (size_t i = 0; ; i++) {
if (i >= count_a && i >= count_b) return 0;
else if (i >= count_a && i < count_b) return -1;
else if (i < count_a && i >= count_b) return +1;
else if (str_a[i] < str_b[i]) return -1;
else if (str_a[i] > str_b[i]) return +1;
}
}
int CompareString(_In_ const wchar_t* str_a, _In_ size_t count_a, _In_ const wchar_t* str_b, _In_ size_t count_b);
///
/// Generates and returns Unicode representation of the string using hexadecimal codes.
@@ -571,21 +580,7 @@ namespace ZRCola {
/// \param[in] count Number of characters in string \p str
/// \param[in] sep Separator
///
inline std::string GetUnicodeDumpA(_In_ const wchar_t *str, _In_ size_t count, _In_z_ const char *sep = "+")
{
std::string out;
size_t dump_len_max = strlen(sep) + 4 + 1;
char *dump;
std::unique_ptr<char> dump_obj(dump = new char[dump_len_max]);
if (count && str[0]) {
size_t i = 0;
out.insert(out.end(), dump, dump + _snprintf(dump, dump_len_max, "%04X", str[i++]));
while (i < count && str[i])
out.insert(out.end(), dump, dump + _snprintf(dump, dump_len_max, "%s%04X", sep, str[i++]));
}
return out;
}
std::string GetUnicodeDumpA(_In_z_count_(count) const wchar_t* str, _In_ size_t count, _In_z_ const char* sep = "+");
///
/// Generates and returns Unicode representation of the string using hexadecimal codes.
@@ -594,21 +589,7 @@ namespace ZRCola {
/// \param[in] count Number of characters in string \p str
/// \param[in] sep Separator
///
inline std::wstring GetUnicodeDumpW(_In_ const wchar_t *str, _In_ size_t count, _In_z_ const wchar_t *sep = L"+")
{
std::wstring out;
size_t dump_len_max = wcslen(sep) + 4 + 1;
wchar_t *dump;
std::unique_ptr<wchar_t> dump_obj(dump = new wchar_t[dump_len_max]);
if (count && str[0]) {
size_t i = 0;
out.insert(out.end(), dump, dump + _snwprintf(dump, dump_len_max, L"%04X", str[i++]));
while (i < count && str[i])
out.insert(out.end(), dump, dump + _snwprintf(dump, dump_len_max, L"%s%04X", sep, str[i++]));
}
return out;
}
std::wstring GetUnicodeDumpW(_In_z_count_(count) const wchar_t* str, _In_ size_t count, _In_z_ const wchar_t* sep = L"+");
#ifdef _UNICODE
#define GetUnicodeDump GetUnicodeDumpW
@@ -709,7 +690,8 @@ inline std::ostream& operator <<(_In_ std::ostream& stream, _In_ const ZRCola::t
// Write index data.
if (stream.fail()) return stream;
stream.write((const char*)idx.data(), sizeof(ZRCola::textindex<T_key, T_val, T_idx>::value_type)*static_cast<std::streamsize>(count));
auto idx_data = idx.data();
stream.write((const char*)idx_data, sizeof(*idx_data)*static_cast<std::streamsize>(count));
// Write key count.
auto key_count = idx.keys.size();
@@ -726,7 +708,8 @@ inline std::ostream& operator <<(_In_ std::ostream& stream, _In_ const ZRCola::t
// Write key data.
if (stream.fail()) return stream;
stream.write((const char*)idx.keys.data(), sizeof(std::vector<T_key>::value_type)*static_cast<std::streamsize>(count));
auto idx_keys_data = idx.keys.data();
stream.write((const char*)idx_keys_data, sizeof(*idx_keys_data)*static_cast<std::streamsize>(count));
// Write value count.
auto value_count = idx.values.size();
@@ -743,7 +726,8 @@ inline std::ostream& operator <<(_In_ std::ostream& stream, _In_ const ZRCola::t
// Write value data.
if (stream.fail()) return stream;
stream.write((const char*)idx.values.data(), sizeof(std::vector<T_val>::value_type)*static_cast<std::streamsize>(count));
auto idx_values_data = idx.values.data();
stream.write((const char*)idx_values_data, sizeof(*idx_values_data)*static_cast<std::streamsize>(count));
return stream;
}
@@ -772,7 +756,8 @@ inline std::istream& operator >>(_In_ std::istream& stream, _Out_ ZRCola::textin
if (count) {
// Read text index.
idx.resize(count);
stream.read((char*)idx.data(), sizeof(ZRCola::textindex<T_key, T_val, T_idx>::value_type)*static_cast<std::streamsize>(count));
auto p = idx.data();
stream.read((char*)p, sizeof(*p)*static_cast<std::streamsize>(count));
if (!stream.good()) return stream;
} else
idx.clear();
@@ -784,7 +769,8 @@ inline std::istream& operator >>(_In_ std::istream& stream, _Out_ ZRCola::textin
if (count) {
// Read keys.
idx.keys.resize(count);
stream.read((char*)idx.keys.data(), sizeof(std::vector<T_key>::value_type)*static_cast<std::streamsize>(count));
auto p = idx.keys.data();
stream.read((char*)p, sizeof(*p)*static_cast<std::streamsize>(count));
if (!stream.good()) return stream;
} else
idx.keys.clear();
@@ -796,7 +782,8 @@ inline std::istream& operator >>(_In_ std::istream& stream, _Out_ ZRCola::textin
if (count) {
// Read values.
idx.values.resize(count);
stream.read((char*)idx.values.data(), sizeof(std::vector<T_val>::value_type)*static_cast<std::streamsize>(count));
auto p = idx.values.data();
stream.read((char*)p, sizeof(*p)*static_cast<std::streamsize>(count));
} else
idx.values.clear();

View File

@@ -166,13 +166,10 @@ namespace ZRCola {
};
typedef stdex::idrec::record<highlight_db, recordid_t, recordsize_t, ZRCOLA_RECORD_ALIGN> highlight_rec;
typedef stdex::idrec::record<highlight_db, recordid_t, 0x484748 /*"HGH"*/, recordsize_t, ZRCOLA_RECORD_ALIGN> highlight_rec;
};
const ZRCola::recordid_t ZRCola::highlight_rec::id = *(ZRCola::recordid_t*)"HGH";
///
/// Writes highlight database to a stream
///

View File

@@ -181,7 +181,7 @@ namespace ZRCola {
};
typedef stdex::idrec::record<langchar_db, recordid_t, recordsize_t, ZRCOLA_RECORD_ALIGN> langchar_rec;
typedef stdex::idrec::record<langchar_db, recordid_t, 0x432d4c /*"L-C"*/, recordsize_t, ZRCOLA_RECORD_ALIGN> langchar_rec;
///
@@ -284,14 +284,10 @@ namespace ZRCola {
};
typedef stdex::idrec::record<language_db, recordid_t, recordsize_t, ZRCOLA_RECORD_ALIGN> language_rec;
typedef stdex::idrec::record<language_db, recordid_t, 0x474e4c /*"LNG"*/, recordsize_t, ZRCOLA_RECORD_ALIGN> language_rec;
};
const ZRCola::recordid_t ZRCola::langchar_rec::id = *(ZRCola::recordid_t*)"L-C";
const ZRCola::recordid_t ZRCola::language_rec::id = *(ZRCola::recordid_t*)"LNG";
///
/// Writes language character database to a stream
///

View File

@@ -214,7 +214,7 @@ namespace ZRCola {
};
typedef stdex::idrec::record<chrtag_db, recordid_t, recordsize_t, ZRCOLA_RECORD_ALIGN> chrtag_rec;
typedef stdex::idrec::record<chrtag_db, recordid_t, 0x542d43 /*"C-T"*/, recordsize_t, ZRCOLA_RECORD_ALIGN> chrtag_rec;
///
@@ -230,7 +230,7 @@ namespace ZRCola {
struct tagname {
public:
tagid_t tag; ///< Tag ID
LCID locale; ///< Locale ID
uint32_t locale; ///< Locale ID
protected:
uint16_t name_to; ///< Tag name end in \c data
@@ -250,10 +250,10 @@ namespace ZRCola {
/// \param[in] name_len Number of UTF-16 characters in \p name
///
inline tagname(
_In_opt_ tagid_t tag = 0,
_In_opt_ LCID locale = MAKELCID(MAKELANGID(LANG_NEUTRAL, SUBLANG_NEUTRAL), SORT_DEFAULT),
_In_opt_z_count_(name_len) const wchar_t *name = NULL,
_In_opt_ size_t name_len = 0)
_In_opt_ tagid_t tag = 0,
_In_opt_ uint32_t locale = 0,
_In_opt_z_count_(name_len) const wchar_t *name = NULL,
_In_opt_ size_t name_len = 0)
{
this->tag = tag;
this->locale = locale;
@@ -285,14 +285,26 @@ namespace ZRCola {
/// The function does not treat \\0 characters as terminators for performance reasons.
/// Therefore \p count_a and \p count_b must represent exact string lengths.
///
static inline int CompareName(LCID locale, const wchar_t *str_a, uint16_t count_a, const wchar_t *str_b, uint16_t count_b)
static inline int CompareName(_In_ uint32_t locale, _In_z_count_(count_a) const wchar_t *str_a, _In_ uint16_t count_a, _In_z_count_(count_b) const wchar_t *str_b, _In_ uint16_t count_b)
{
#ifdef _WIN32
switch (::CompareString(locale, SORT_STRINGSORT | NORM_IGNORECASE, str_a, count_a, str_b, count_b)) {
case CSTR_LESS_THAN : return -1;
case CSTR_EQUAL : return 0;
case CSTR_GREATER_THAN: return 1;
default : assert(0); return -1;
}
#else
assert(0); // TODO: 1. Should honour locale. 2. Should use ICU for lowercase conversion.
std::wstring
a(str_a, count_a),
b(str_b, count_b);
auto tolower = [](wchar_t c){ return std::towlower(c); };
std::transform(a.begin(), a.end(), a.begin(), tolower);
std::transform(b.begin(), b.end(), b.begin(), tolower);
auto &coll = std::use_facet<std::collate<wchar_t>>(std::locale());
return coll.compare(&*a.cbegin(), &*a.cend(), &*b.cbegin(), &*b.cend());
#endif
}
};
#pragma pack(pop)
@@ -423,18 +435,14 @@ namespace ZRCola {
/// \param[in ] fn_abort Pointer to function to periodically test for search cancellation
/// \param[in ] cookie Cookie for \p fn_abort call
///
bool Search(_In_z_ const wchar_t *str, _In_ LCID locale, _Inout_ std::map<tagid_t, uint16_t> &hits, _In_opt_ bool (__cdecl *fn_abort)(void *cookie) = NULL, _In_opt_ void *cookie = NULL) const;
bool Search(_In_z_ const wchar_t *str, _In_ uint32_t locale, _Inout_ std::map<tagid_t, uint16_t> &hits, _In_opt_ bool (__cdecl *fn_abort)(void *cookie) = NULL, _In_opt_ void *cookie = NULL) const;
};
typedef stdex::idrec::record<tagname_db, recordid_t, recordsize_t, ZRCOLA_RECORD_ALIGN> tagname_rec;
typedef stdex::idrec::record<tagname_db, recordid_t, 0x4e4754 /*"TGN"*/, recordsize_t, ZRCOLA_RECORD_ALIGN> tagname_rec;
};
const ZRCola::recordid_t ZRCola::chrtag_rec ::id = *(ZRCola::recordid_t*)"C-T";
const ZRCola::recordid_t ZRCola::tagname_rec::id = *(ZRCola::recordid_t*)"TGN";
///
/// Writes character tag database to a stream
///

View File

@@ -320,7 +320,7 @@ namespace ZRCola {
};
typedef stdex::idrec::record<translation_db, recordid_t, recordsize_t, ZRCOLA_RECORD_ALIGN> translation_rec;
typedef stdex::idrec::record<translation_db, recordid_t, 0x4e5254 /*"TRN"*/, recordsize_t, ZRCOLA_RECORD_ALIGN> translation_rec;
///
@@ -436,7 +436,7 @@ namespace ZRCola {
};
typedef stdex::idrec::record<transet_db, recordid_t, recordsize_t, ZRCOLA_RECORD_ALIGN> transet_rec;
typedef stdex::idrec::record<transet_db, recordid_t, 0x455354 /*"TSE"*/, recordsize_t, ZRCOLA_RECORD_ALIGN> transet_rec;
///
@@ -585,15 +585,8 @@ namespace ZRCola {
if (a.rank < b.rank) return -1;
else if (a.rank > b.rank) return +1;
uint16_t
a_name_len = a.name_len(),
b_name_len = b.name_len();
int r = _wcsncoll(a.name(), b.name(), std::min<uint16_t>(a_name_len, b_name_len));
if (r != 0) return r;
if (a_name_len < b_name_len) return -1;
else if (a_name_len > b_name_len) return +1;
return 0;
auto &coll = std::use_facet<std::collate<wchar_t>>(std::locale());
return coll.compare(a.name(), a.name_end(), b.name(), b.name_end());
}
} idxRank; ///< Rank index
@@ -617,15 +610,10 @@ namespace ZRCola {
};
typedef stdex::idrec::record<transeq_db, recordid_t, recordsize_t, ZRCOLA_RECORD_ALIGN> transeq_rec;
typedef stdex::idrec::record<transeq_db, recordid_t, 0x515354 /*"TSQ"*/, recordsize_t, ZRCOLA_RECORD_ALIGN> transeq_rec;
};
const ZRCola::recordid_t ZRCola::translation_rec::id = *(ZRCola::recordid_t*)"TRN";
const ZRCola::recordid_t ZRCola::transet_rec ::id = *(ZRCola::recordid_t*)"TSE";
const ZRCola::recordid_t ZRCola::transeq_rec ::id = *(ZRCola::recordid_t*)"TSQ";
///
/// Writes translation database to a stream
///