Character Select finished

(closes #11)
This commit is contained in:
2016-05-13 03:44:28 +02:00
parent 53ce3a2411
commit 9c3c1585d5
34 changed files with 3380 additions and 763 deletions

View File

@@ -20,6 +20,94 @@
#include "stdafx.h"
bool ZRCola::DBSource::character_desc_idx::add_keywords(const wchar_t *str, wchar_t chr, size_t sub)
{
wxASSERT_MSG(str, wxT("string is NULL"));
while (*str) {
// Skip white space.
for (;;) {
if (*str == 0)
return true;
else if (!iswspace(*str))
break;
else
str++;
}
// Get term.
std::wstring term;
if (*str == L'"') {
const wchar_t *str_end = ++str;
for (;;) {
if (*str_end == 0) {
term.assign(str, str_end);
break;
} else if (*str_end == L'"') {
term.assign(str, str_end);
str_end++;
break;
} else
str_end++;
}
str = str_end;
} else {
const wchar_t *str_end = str + 1;
for (; *str_end && !iswspace(*str_end); str_end++);
term.assign(str, str_end);
str = str_end;
}
if (!term.empty()) {
std::transform(term.begin(), term.end(), term.begin(), std::towlower);
if (sub) {
std::wstring::size_type j_end = term.size();
if (j_end >= sub) {
// Insert all keyword substrings "sub" or more characters long.
for (std::wstring::size_type i = 0, i_end = j_end - sub; i < i_end; ++i) {
for (std::wstring::size_type j = i + sub; j < j_end; ++j)
add_keyword(term.substr(i, j - i), chr);
}
}
} else {
// Insert exact keyword only.
add_keyword(term, chr);
}
}
}
return true;
}
void ZRCola::DBSource::character_desc_idx::save(ZRCola::textindex<wchar_t, wchar_t, unsigned __int32> &idx) const
{
idx .clear();
idx.keys .clear();
idx.values.clear();
// Pre-allocate memory.
std::vector<wchar_t>::size_type size_keys = 0;
std::vector<wchar_t>::size_type size_values = 0;
for (const_iterator i = cbegin(), i_end = cend(); i != i_end; ++i) {
size_keys += i->first.size();
size_values += i->second.size();
}
idx .reserve(size() );
idx.keys .reserve(size_keys );
idx.values.reserve(size_values);
// Convert the index.
for (const_iterator i = cbegin(), i_end = cend(); i != i_end; ++i) {
ZRCola::mappair_t<unsigned __int32> p = { idx.keys.size(), idx.values.size() };
idx.push_back(p);
idx.keys.insert(idx.keys.end(), i->first.cbegin(), i->first.cend());
idx.values.insert(idx.values.end(), i->second.cbegin(), i->second.cend());
}
}
ZRCola::DBSource::DBSource()
{
}
@@ -348,14 +436,14 @@ bool ZRCola::DBSource::GetChrCat(const ATL::CComPtr<ADOField>& f, chrcatid_t& cc
_ftprintf(stderr, wxT("%s: error ZCC0111: Syntax error in \"%.*ls\" field (\"%.*ls\"). Character category ID must contain ASCII characters only.\n"), m_filename.c_str(), fieldname.Length(), (BSTR)fieldname, n, V_BSTR(&v));
return false;
}
cc[i] = (char)c;
cc.data[i] = (char)c;
} else
cc[i] = 0;
cc.data[i] = 0;
} else
break;
}
} else
memset(cc, 0, sizeof(cc));
memset(cc.data, 0, sizeof(cc));
return true;
}

View File

@@ -24,6 +24,7 @@
#include <atlbase.h>
#include <adoint.h>
#include <map>
#include <string>
#include <vector>
@@ -112,6 +113,61 @@ namespace ZRCola {
};
///
/// Character description index key comparator
///
struct character_desc_idx_less : public std::binary_function<std::wstring, std::wstring, bool>
{
inline bool operator()(const std::wstring& _Left, const std::wstring& _Right) const
{
size_t
_Left_len = _Left .size(),
_Right_len = _Right.size();
int r = _wcsncoll(_Left.c_str(), _Right.c_str(), std::min<size_t>(_Left_len, _Right_len));
if (r != 0 ) return r < 0;
else if (_Left_len < _Right_len) return true;
return false;
}
};
///
/// Character description index
///
class character_desc_idx : public std::map<std::wstring, std::vector<wchar_t>, character_desc_idx_less>
{
public:
bool add_keywords(const wchar_t *str, wchar_t chr, size_t sub = 0);
void save(ZRCola::textindex<wchar_t, wchar_t, unsigned __int32> &idx) const;
protected:
inline void add_keyword(const std::wstring &term, wchar_t chr)
{
iterator idx = find(term);
if (idx == end()) {
// New keyword.
insert(std::make_pair(term, std::vector<wchar_t>(1, chr)));
} else {
// Append to existing keyword.
std::vector<wchar_t> &val = idx->second;
for (std::vector<wchar_t>::iterator i = val.begin(), i_end = val.end(); ; ++i) {
if (i == i_end) {
// End-of-values reached. Append character.
val.push_back(chr);
break;
} else if (*i == chr) {
// Character already among the values.
break;
}
}
}
}
};
///
/// Character category
///
@@ -176,6 +232,19 @@ namespace ZRCola {
}
///
/// Splits string to individual keywords
///
/// \param[in ] str String
/// \param[out] keywords Array of keywords
///
/// \returns
/// - true when successful
/// - false otherwise
///
static bool GetKeywords(const wchar_t *str, std::vector< std::wstring > &keywords);
///
/// Gets boolean from ZRCola.zrc database
///

View File

@@ -20,383 +20,6 @@
#include "stdafx.h"
///
/// Writes translation database to a stream
///
/// \param[in] stream Output stream
/// \param[in] db Translation database
///
/// \returns The stream \p stream
///
inline std::ostream& operator <<(_In_ std::ostream& stream, _In_ const ZRCola::translation_db &db)
{
assert(db.idxComp.size() == db.idxDecomp.size());
unsigned __int32 count;
// Write index count.
ZRCola::translation_db::indexComp::size_type trans_count = db.idxComp.size();
#if defined(_WIN64) || defined(__x86_64__) || defined(__ppc64__)
// 4G check
if (trans_count > 0xffffffff) {
stream.setstate(std::ios_base::failbit);
return stream;
}
#endif
if (stream.fail()) return stream;
count = (unsigned __int32)trans_count;
stream.write((const char*)&count, sizeof(count));
// Write composition index.
if (stream.fail()) return stream;
stream.write((const char*)db.idxComp.data(), sizeof(unsigned __int32)*count);
// Write decomposition index.
if (stream.fail()) return stream;
stream.write((const char*)db.idxDecomp.data(), sizeof(unsigned __int32)*count);
// Write data count.
std::vector<unsigned __int16>::size_type data_count = db.data.size();
#if defined(_WIN64) || defined(__x86_64__) || defined(__ppc64__)
// 4G check
if (data_count > 0xffffffff) {
stream.setstate(std::ios_base::failbit);
return stream;
}
#endif
if (stream.fail()) return stream;
count = (unsigned __int32)data_count;
stream.write((const char*)&count, sizeof(count));
// Write data.
if (stream.fail()) return stream;
stream.write((const char*)db.data.data(), sizeof(unsigned __int16)*count);
return stream;
}
///
/// Writes key sequence database to a stream
///
/// \param[in] stream Output stream
/// \param[in] db Key sequence database
///
/// \returns The stream \p stream
///
inline std::ostream& operator <<(_In_ std::ostream& stream, _In_ const ZRCola::keyseq_db &db)
{
assert(db.idxChr.size() == db.idxKey.size());
unsigned __int32 count;
// Write index count.
ZRCola::keyseq_db::indexChr::size_type ks_count = db.idxChr.size();
#if defined(_WIN64) || defined(__x86_64__) || defined(__ppc64__)
// 4G check
if (ks_count > 0xffffffff) {
stream.setstate(std::ios_base::failbit);
return stream;
}
#endif
if (stream.fail()) return stream;
count = (unsigned __int32)ks_count;
stream.write((const char*)&count, sizeof(count));
// Write character index.
if (stream.fail()) return stream;
stream.write((const char*)db.idxChr.data(), sizeof(unsigned __int32)*count);
// Write key index.
if (stream.fail()) return stream;
stream.write((const char*)db.idxKey.data(), sizeof(unsigned __int32)*count);
// Write data count.
std::vector<unsigned __int16>::size_type data_count = db.data.size();
#if defined(_WIN64) || defined(__x86_64__) || defined(__ppc64__)
// 4G check
if (data_count > 0xffffffff) {
stream.setstate(std::ios_base::failbit);
return stream;
}
#endif
if (stream.fail()) return stream;
count = (unsigned __int32)data_count;
stream.write((const char*)&count, sizeof(count));
// Write data.
if (stream.fail()) return stream;
stream.write((const char*)db.data.data(), sizeof(unsigned __int16)*count);
return stream;
}
///
/// Writes language database to a stream
///
/// \param[in] stream Output stream
/// \param[in] db Language database
///
/// \returns The stream \p stream
///
inline std::ostream& operator <<(_In_ std::ostream& stream, _In_ const ZRCola::language_db &db)
{
unsigned __int32 count;
// Write index count.
ZRCola::language_db::indexLang::size_type lang_count = db.idxLng.size();
#if defined(_WIN64) || defined(__x86_64__) || defined(__ppc64__)
// 4G check
if (lang_count > 0xffffffff) {
stream.setstate(std::ios_base::failbit);
return stream;
}
#endif
if (stream.fail()) return stream;
count = (unsigned __int32)lang_count;
stream.write((const char*)&count, sizeof(count));
// Write language index.
if (stream.fail()) return stream;
stream.write((const char*)db.idxLng.data(), sizeof(unsigned __int32)*count);
// Write data count.
std::vector<unsigned __int16>::size_type data_count = db.data.size();
#if defined(_WIN64) || defined(__x86_64__) || defined(__ppc64__)
// 4G check
if (data_count > 0xffffffff) {
stream.setstate(std::ios_base::failbit);
return stream;
}
#endif
if (stream.fail()) return stream;
count = (unsigned __int32)data_count;
stream.write((const char*)&count, sizeof(count));
// Write data.
if (stream.fail()) return stream;
stream.write((const char*)db.data.data(), sizeof(unsigned __int16)*count);
return stream;
}
///
/// Writes language character database to a stream
///
/// \param[in] stream Output stream
/// \param[in] db Language character database
///
/// \returns The stream \p stream
///
inline std::ostream& operator <<(_In_ std::ostream& stream, _In_ const ZRCola::langchar_db &db)
{
#ifdef ZRCOLA_LANGCHAR_LANG_IDX
assert(db.idxChr.size() == db.idxLng.size());
#endif
unsigned __int32 count;
// Write index count.
ZRCola::langchar_db::indexChar::size_type lc_count = db.idxChr.size();
#if defined(_WIN64) || defined(__x86_64__) || defined(__ppc64__)
// 4G check
if (lc_count > 0xffffffff) {
stream.setstate(std::ios_base::failbit);
return stream;
}
#endif
if (stream.fail()) return stream;
count = (unsigned __int32)lc_count;
stream.write((const char*)&count, sizeof(count));
// Write character index.
if (stream.fail()) return stream;
stream.write((const char*)db.idxChr.data(), sizeof(unsigned __int32)*count);
#ifdef ZRCOLA_LANGCHAR_LANG_IDX
// Write language index.
if (stream.fail()) return stream;
stream.write((const char*)db.idxLng.data(), sizeof(unsigned __int32)*count);
#endif
// Write data count.
std::vector<unsigned __int16>::size_type data_count = db.data.size();
#if defined(_WIN64) || defined(__x86_64__) || defined(__ppc64__)
// 4G check
if (data_count > 0xffffffff) {
stream.setstate(std::ios_base::failbit);
return stream;
}
#endif
if (stream.fail()) return stream;
count = (unsigned __int32)data_count;
stream.write((const char*)&count, sizeof(count));
// Write data.
if (stream.fail()) return stream;
stream.write((const char*)db.data.data(), sizeof(unsigned __int16)*count);
return stream;
}
///
/// Writes character group database to a stream
///
/// \param[in] stream Output stream
/// \param[in] db Character group database
///
/// \returns The stream \p stream
///
inline std::ostream& operator <<(_In_ std::ostream& stream, _In_ const ZRCola::chrgrp_db &db)
{
unsigned __int32 count;
// Write index count.
ZRCola::keyseq_db::indexChr::size_type ks_count = db.idxRnk.size();
#if defined(_WIN64) || defined(__x86_64__) || defined(__ppc64__)
// 4G check
if (ks_count > 0xffffffff) {
stream.setstate(std::ios_base::failbit);
return stream;
}
#endif
if (stream.fail()) return stream;
count = (unsigned __int32)ks_count;
stream.write((const char*)&count, sizeof(count));
// Write rank index.
if (stream.fail()) return stream;
stream.write((const char*)db.idxRnk.data(), sizeof(unsigned __int32)*count);
// Write data count.
std::vector<unsigned __int16>::size_type data_count = db.data.size();
#if defined(_WIN64) || defined(__x86_64__) || defined(__ppc64__)
// 4G check
if (data_count > 0xffffffff) {
stream.setstate(std::ios_base::failbit);
return stream;
}
#endif
if (stream.fail()) return stream;
count = (unsigned __int32)data_count;
stream.write((const char*)&count, sizeof(count));
// Write data.
if (stream.fail()) return stream;
stream.write((const char*)db.data.data(), sizeof(unsigned __int16)*count);
return stream;
}
///
/// Writes character database to a stream
///
/// \param[in] stream Output stream
/// \param[in] db Character database
///
/// \returns The stream \p stream
///
inline std::ostream& operator <<(_In_ std::ostream& stream, _In_ const ZRCola::character_db &db)
{
unsigned __int32 count;
// Write index count.
ZRCola::keyseq_db::indexChr::size_type ks_count = db.idxChr.size();
#if defined(_WIN64) || defined(__x86_64__) || defined(__ppc64__)
// 4G check
if (ks_count > 0xffffffff) {
stream.setstate(std::ios_base::failbit);
return stream;
}
#endif
if (stream.fail()) return stream;
count = (unsigned __int32)ks_count;
stream.write((const char*)&count, sizeof(count));
// Write character index.
if (stream.fail()) return stream;
stream.write((const char*)db.idxChr.data(), sizeof(unsigned __int32)*count);
// Write data count.
std::vector<unsigned __int16>::size_type data_count = db.data.size();
#if defined(_WIN64) || defined(__x86_64__) || defined(__ppc64__)
// 4G check
if (data_count > 0xffffffff) {
stream.setstate(std::ios_base::failbit);
return stream;
}
#endif
if (stream.fail()) return stream;
count = (unsigned __int32)data_count;
stream.write((const char*)&count, sizeof(count));
// Write data.
if (stream.fail()) return stream;
stream.write((const char*)db.data.data(), sizeof(unsigned __int16)*count);
return stream;
}
///
/// Writes character category database to a stream
///
/// \param[in] stream Output stream
/// \param[in] db Character category database
///
/// \returns The stream \p stream
///
inline std::ostream& operator <<(_In_ std::ostream& stream, _In_ const ZRCola::chrcat_db &db)
{
unsigned __int32 count;
// Write index count.
ZRCola::keyseq_db::indexChr::size_type ks_count = db.idxChrCat.size();
#if defined(_WIN64) || defined(__x86_64__) || defined(__ppc64__)
// 4G check
if (ks_count > 0xffffffff) {
stream.setstate(std::ios_base::failbit);
return stream;
}
#endif
if (stream.fail()) return stream;
count = (unsigned __int32)ks_count;
stream.write((const char*)&count, sizeof(count));
// Write character category index.
if (stream.fail()) return stream;
stream.write((const char*)db.idxChrCat.data(), sizeof(unsigned __int32)*count);
// Write rank index.
if (stream.fail()) return stream;
stream.write((const char*)db.idxRnk.data(), sizeof(unsigned __int32)*count);
// Write data count.
std::vector<unsigned __int16>::size_type data_count = db.data.size();
#if defined(_WIN64) || defined(__x86_64__) || defined(__ppc64__)
// 4G check
if (data_count > 0xffffffff) {
stream.setstate(std::ios_base::failbit);
return stream;
}
#endif
if (stream.fail()) return stream;
count = (unsigned __int32)data_count;
stream.write((const char*)&count, sizeof(count));
// Write data.
if (stream.fail()) return stream;
stream.write((const char*)db.data.data(), sizeof(unsigned __int16)*count);
return stream;
}
///
/// Main function
///
@@ -768,6 +391,7 @@ int _tmain(int argc, _TCHAR *argv[])
if (count < 0xffffffff) { // 4G check (-1 is reserved for error condition)
ZRCola::DBSource::character chr;
ZRCola::character_db db;
ZRCola::DBSource::character_desc_idx idxChrDsc, idxChrDscSub;
// Preallocate memory.
db.idxChr.reserve(count);
@@ -781,7 +405,7 @@ int _tmain(int argc, _TCHAR *argv[])
unsigned __int32 idx = db.data.size();
db.data.push_back((unsigned __int16)chr.chr);
for (std::wstring::size_type i = 0; i < sizeof(ZRCola::chrcatid_t)/sizeof(unsigned __int16); i++)
db.data.push_back(((const unsigned __int16*)chr.cat)[i]);
db.data.push_back(((const unsigned __int16*)chr.cat.data)[i]);
std::wstring::size_type n_desc = chr.desc.length();
wxASSERT_MSG(n_desc <= 0xffff, wxT("character description too long"));
db.data.push_back((unsigned __int16)n_desc);
@@ -793,6 +417,12 @@ int _tmain(int argc, _TCHAR *argv[])
for (std::wstring::size_type i = 0; i < n_rel; i++)
db.data.push_back(chr.rel[i]);
db.idxChr.push_back(idx);
// Add description (and keywords) to index.
idxChrDsc .add_keywords(chr.desc .c_str(), chr.chr, 0);
idxChrDsc .add_keywords(chr.keywords.c_str(), chr.chr, 0);
idxChrDscSub.add_keywords(chr.desc .c_str(), chr.chr, 3);
idxChrDscSub.add_keywords(chr.keywords.c_str(), chr.chr, 3);
} else
has_errors = true;
@@ -802,6 +432,10 @@ int _tmain(int argc, _TCHAR *argv[])
// Sort indices.
db.idxChr.sort();
// Save text indices.
idxChrDsc .save(db.idxDsc );
idxChrDscSub.save(db.idxDscSub);
// Write characters to file.
dst << ZRCola::character_rec(db);
} else {
@@ -835,7 +469,7 @@ int _tmain(int argc, _TCHAR *argv[])
// Add character category to index and data.
unsigned __int32 idx = db.data.size();
for (std::wstring::size_type i = 0; i < sizeof(ZRCola::chrcatid_t)/sizeof(unsigned __int16); i++)
db.data.push_back(((const unsigned __int16*)cc.id)[i]);
db.data.push_back(((const unsigned __int16*)cc.id.data)[i]);
wxASSERT_MSG((int)0xffff8000 <= cc.rank && cc.rank <= (int)0x00007fff, wxT("character category rank out of bounds"));
db.data.push_back((unsigned __int16)cc.rank);
std::wstring::size_type n_name = cc.name.length();

View File

@@ -48,6 +48,8 @@
#include <stdlib.h>
#include <algorithm>
#include <codecvt>
#include <cwctype>
#include <fstream>
#include <set>