Add support for Unicode character blocks
Signed-off-by: Simon Rozman <simon@rozman.si>
This commit is contained in:
@@ -132,6 +132,12 @@ bool ZRColaApp::OnInit()
|
||||
wxFAIL_MSG(wxT("Error reading character category data from ZRCola.zrcdb."));
|
||||
m_cc_db.clear();
|
||||
}
|
||||
} else if (id == ZRCola::chrblk_rec::id()) {
|
||||
dat >> ZRCola::chrblk_rec(m_cb_db);
|
||||
if (!dat.good()) {
|
||||
wxFAIL_MSG(wxT("Error reading character block data from ZRCola.zrcdb."));
|
||||
m_cb_db.clear();
|
||||
}
|
||||
} else if (id == ZRCola::chrtag_rec::id()) {
|
||||
dat >> ZRCola::chrtag_rec(m_ct_db);
|
||||
if (!dat.good()) {
|
||||
|
||||
@@ -70,7 +70,8 @@ public:
|
||||
ZRCola::language_db m_lang_db; ///< Language database
|
||||
ZRCola::keyseq_db m_ks_db; ///< Key sequence database
|
||||
ZRCola::character_db m_chr_db; ///< Character database
|
||||
ZRCola::chrcat_db m_cc_db; ///< Characted category database
|
||||
ZRCola::chrcat_db m_cc_db; ///< Character category database
|
||||
ZRCola::chrblk_db m_cb_db; ///< Character block database
|
||||
ZRCola::chrtag_db m_ct_db; ///< Character tag database
|
||||
ZRCola::tagname_db m_tn_db; ///< Tag name database
|
||||
ZRCola::highlight_db m_h_db; ///< Highlight database
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -756,7 +756,32 @@ int _tmain(int argc, _TCHAR *argv[])
|
||||
ZRCola::DBSource::character_desc_idx idxChrDsc, idxChrDscSub;
|
||||
ZRCola::DBSource::character_bank chrs;
|
||||
|
||||
// Phase 1: Parse characters and build indexes.
|
||||
// Phase 1: Get character blocks.
|
||||
com_obj<ADORecordset> rs2;
|
||||
if (src.SelectCharacterBlocks(rs2)) {
|
||||
size_t count2 = src.GetRecordsetCount(rs2);
|
||||
if (count2 < 0xffffffff) { // 4G check (-1 is reserved for error condition)
|
||||
// Parse character blocks and build index and data.
|
||||
for (; !ZRCola::DBSource::IsEOF(rs2); rs2->MoveNext()) {
|
||||
// Read character block from the database.
|
||||
ZRCola::DBSource::chrblk cb;
|
||||
if (src.GetCharacterBlock(rs2, cb))
|
||||
chrs.idxChrBlk[cb.first] = std::move(cb.second);
|
||||
else
|
||||
has_errors = true;
|
||||
}
|
||||
}
|
||||
else {
|
||||
_ftprintf(stderr, wxT("%s: error ZCC0029: Error getting character block count from database or too many character blocks.\n"), (LPCTSTR)filenameIn.c_str());
|
||||
has_errors = true;
|
||||
}
|
||||
}
|
||||
else {
|
||||
_ftprintf(stderr, wxT("%s: error ZCC0028: Error getting character blocks from database. Please make sure the file is ZRCola.zrc compatible.\n"), (LPCTSTR)filenameIn.c_str());
|
||||
has_errors = true;
|
||||
}
|
||||
|
||||
// Phase 2: Parse characters and build indexes.
|
||||
for (; !ZRCola::DBSource::IsEOF(rs); rs->MoveNext()) {
|
||||
// Read character from the database.
|
||||
ZRCola::DBSource::character chr;
|
||||
@@ -766,33 +791,64 @@ int _tmain(int argc, _TCHAR *argv[])
|
||||
has_errors = true;
|
||||
}
|
||||
|
||||
// Phase 2: Build related character lists.
|
||||
// Phase 3: Build related character lists.
|
||||
chrs.build_related();
|
||||
|
||||
ZRCola::character_db db;
|
||||
{
|
||||
ZRCola::character_db db;
|
||||
|
||||
// Preallocate memory.
|
||||
db.idxChr.reserve(count);
|
||||
db.data .reserve(count*4);
|
||||
// Preallocate memory.
|
||||
db.idxChr.reserve(count);
|
||||
db.data.reserve(count * 4);
|
||||
|
||||
// Phase 3: Parse characters and build index and data.
|
||||
for (auto chr = chrs.cbegin(), chr_end = chrs.cend(); chr != chr_end; ++chr) {
|
||||
// Add character to index and data.
|
||||
db << *chr;
|
||||
// Phase 4: Parse characters and build index and data.
|
||||
for (auto chr = chrs.cbegin(), chr_end = chrs.cend(); chr != chr_end; ++chr) {
|
||||
// Add character to index and data.
|
||||
db << *chr;
|
||||
|
||||
// Add description (and keywords) to index.
|
||||
idxChrDsc .add_keywords(chr->second.terms, chr->first, 0);
|
||||
idxChrDscSub.add_keywords(chr->second.terms, chr->first, 3);
|
||||
// Add description (and keywords) to index.
|
||||
idxChrDsc.add_keywords(chr->second.terms, chr->first, 0);
|
||||
idxChrDscSub.add_keywords(chr->second.terms, chr->first, 3);
|
||||
|
||||
// Mark category used.
|
||||
categories_used.insert(chr->second.cat);
|
||||
// Mark category used.
|
||||
categories_used.insert(chr->second.cat);
|
||||
}
|
||||
|
||||
// Write characters to file.
|
||||
db.idxChr.sort();
|
||||
idxChrDsc.save(db.idxDsc);
|
||||
idxChrDscSub.save(db.idxDscSub);
|
||||
dst << ZRCola::character_rec(db);
|
||||
}
|
||||
|
||||
// Write characters to file.
|
||||
db.idxChr.sort();
|
||||
idxChrDsc .save(db.idxDsc );
|
||||
idxChrDscSub.save(db.idxDscSub);
|
||||
dst << ZRCola::character_rec(db);
|
||||
{
|
||||
ZRCola::chrblk_db db;
|
||||
|
||||
// Preallocate memory.
|
||||
db.idxChrId.reserve(chrs.idxChrBlk.size());
|
||||
db.idxRank.reserve(chrs.idxChrBlk.size());
|
||||
db.data.reserve(chrs.idxChrBlk.size() * 16);
|
||||
|
||||
// Phase 5: Parse character blocks and build index and data.
|
||||
for (auto& cb : chrs.idxChrBlk)
|
||||
{
|
||||
if (!cb.second.used) {
|
||||
// Skip unused character blocks.
|
||||
continue;
|
||||
}
|
||||
|
||||
if (build_pot)
|
||||
pot.insert(cb.second.name);
|
||||
|
||||
// Add character block to index and data.
|
||||
db << cb;
|
||||
}
|
||||
|
||||
// Write character blocks to file.
|
||||
db.idxChrId.sort();
|
||||
db.idxRank.sort();
|
||||
dst << ZRCola::chrblk_rec(db);
|
||||
}
|
||||
} else {
|
||||
_ftprintf(stderr, wxT("%s: error ZCC0017: Error getting character count from database or too many characters.\n"), (LPCTSTR)filenameIn.c_str());
|
||||
has_errors = true;
|
||||
|
||||
@@ -171,6 +171,12 @@ namespace ZRCola {
|
||||
}
|
||||
|
||||
|
||||
///
|
||||
/// Character block ID
|
||||
///
|
||||
typedef uint16_t chrblkid_t;
|
||||
|
||||
|
||||
///
|
||||
/// Character Database
|
||||
///
|
||||
@@ -184,6 +190,7 @@ namespace ZRCola {
|
||||
struct character {
|
||||
public:
|
||||
chrcatid_t cat; ///> Character category ID
|
||||
chrblkid_t blk; ///> Character block ID
|
||||
|
||||
protected:
|
||||
uint16_t chr_to; ///< Character end in \c data
|
||||
@@ -202,6 +209,7 @@ namespace ZRCola {
|
||||
/// \param[in] chr Character
|
||||
/// \param[in] chr_len Number of UTF-16 characters in \p chr
|
||||
/// \param[in] cat Category
|
||||
/// \param[in] blk Unicode block
|
||||
/// \param[in] desc Description
|
||||
/// \param[in] desc_len Number of UTF-16 characters in \p desc
|
||||
/// \param[in] rel Related characters list (zero delimited)
|
||||
@@ -211,12 +219,14 @@ namespace ZRCola {
|
||||
_In_opt_z_count_(chr_len) const char_t *chr = NULL,
|
||||
_In_opt_ size_t chr_len = 0,
|
||||
_In_opt_ chrcatid_t cat = chrcatid_t(),
|
||||
_In_opt_ chrblkid_t blk = 0,
|
||||
_In_opt_z_count_(desc_len) const char_t *desc = NULL,
|
||||
_In_opt_ size_t desc_len = 0,
|
||||
_In_opt_z_count_(rel_len) const char_t *rel = NULL,
|
||||
_In_opt_ size_t rel_len = 0)
|
||||
{
|
||||
this->cat = cat;
|
||||
this->blk = blk;
|
||||
this->chr_to = static_cast<uint16_t>(chr_len);
|
||||
if (chr && chr_len) memcpy(this->data, chr, sizeof(char_t)*chr_len);
|
||||
this->desc_to = static_cast<uint16_t>(this->chr_to + desc_len);
|
||||
@@ -330,6 +340,25 @@ namespace ZRCola {
|
||||
return idxChr.find(*c, start) ? idxChr[start].cat : chrcatid_t();
|
||||
}
|
||||
|
||||
///
|
||||
/// Get character block
|
||||
///
|
||||
/// \param[in] chr Character
|
||||
/// \param[in] len Number of UTF-16 characters in \p chr
|
||||
///
|
||||
/// \returns
|
||||
/// - Character block if character found
|
||||
/// - 0 otherwise
|
||||
///
|
||||
chrblkid_t GetCharBlk(_In_z_count_(len) const char_t *chr, _In_ const size_t len) const
|
||||
{
|
||||
assert(len <= 0xffff);
|
||||
std::unique_ptr<character> c((character*)new char[sizeof(character) + sizeof(char_t)*len]);
|
||||
new (c.get()) character(chr, len);
|
||||
indexChr::size_type start;
|
||||
return idxChr.find(*c, start) ? idxChr[start].blk : 0;
|
||||
}
|
||||
|
||||
///
|
||||
/// Writes character database to a stream
|
||||
///
|
||||
@@ -650,6 +679,11 @@ namespace ZRCola {
|
||||
/// Character category database
|
||||
///
|
||||
using chrcat_db = chrclass_db<chrcatid_t>;
|
||||
|
||||
///
|
||||
/// Character block database
|
||||
///
|
||||
using chrblk_db = chrclass_db<chrblkid_t>;
|
||||
};
|
||||
|
||||
#pragma warning(pop)
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
namespace ZRCola {
|
||||
typedef stdex::idrec::record<character_db, recordid_t, 0x524843 /*"CHR"*/, recordsize_t, ZRCOLA_RECORD_ALIGN> character_rec;
|
||||
typedef stdex::idrec::record<chrcat_db, recordid_t, 0x544343 /*"CCT"*/, recordsize_t, ZRCOLA_RECORD_ALIGN> chrcat_rec;
|
||||
typedef stdex::idrec::record<chrblk_db, recordid_t, 0x4c4243 /*"CBL"*/, recordsize_t, ZRCOLA_RECORD_ALIGN> chrblk_rec;
|
||||
typedef stdex::idrec::record<highlight_db, recordid_t, 0x484748 /*"HGH"*/, recordsize_t, ZRCOLA_RECORD_ALIGN> highlight_rec;
|
||||
typedef stdex::idrec::record<langchar_db, recordid_t, 0x432d4c /*"L-C"*/, recordsize_t, ZRCOLA_RECORD_ALIGN> langchar_rec;
|
||||
typedef stdex::idrec::record<language_db, recordid_t, 0x474e4c /*"LNG"*/, recordsize_t, ZRCOLA_RECORD_ALIGN> language_rec;
|
||||
|
||||
Binary file not shown.
@@ -10,6 +10,9 @@ msgstr ""
|
||||
msgid "Albanian"
|
||||
msgstr ""
|
||||
|
||||
msgid "Alphabetic Presentation Forms"
|
||||
msgstr ""
|
||||
|
||||
msgid "Apostrophes 1"
|
||||
msgstr ""
|
||||
|
||||
@@ -19,24 +22,42 @@ msgstr ""
|
||||
msgid "Arabic"
|
||||
msgstr ""
|
||||
|
||||
msgid "Armenian"
|
||||
msgstr ""
|
||||
|
||||
msgid "Arrows"
|
||||
msgstr ""
|
||||
|
||||
msgid "Belarusian"
|
||||
msgstr ""
|
||||
|
||||
msgid "Block Elements"
|
||||
msgstr ""
|
||||
|
||||
msgid "Bosnian – Cyrillic"
|
||||
msgstr ""
|
||||
|
||||
msgid "Bosnian – Latinic"
|
||||
msgstr ""
|
||||
|
||||
msgid "Box Drawing"
|
||||
msgstr ""
|
||||
|
||||
msgid "C0 Controls and Basic Latin (Basic Latin)"
|
||||
msgstr ""
|
||||
|
||||
msgid "C1 Controls and Latin-1 Supplement (Latin-1 Supplement)"
|
||||
msgstr ""
|
||||
|
||||
msgid "CAPITAL Case"
|
||||
msgstr ""
|
||||
|
||||
msgid "CAPITAL Case » small Case"
|
||||
msgstr ""
|
||||
|
||||
msgid "CJK Symbols and Punctuation"
|
||||
msgstr ""
|
||||
|
||||
msgid "Combine"
|
||||
msgstr ""
|
||||
|
||||
@@ -49,6 +70,18 @@ msgstr ""
|
||||
msgid "Combine Over"
|
||||
msgstr ""
|
||||
|
||||
msgid "Combining Diacritical Marks"
|
||||
msgstr ""
|
||||
|
||||
msgid "Combining Diacritical Marks Supplement"
|
||||
msgstr ""
|
||||
|
||||
msgid "Combining Diacritical Marks for Symbols"
|
||||
msgstr ""
|
||||
|
||||
msgid "Combining Half Marks"
|
||||
msgstr ""
|
||||
|
||||
msgid "Combining Marks"
|
||||
msgstr ""
|
||||
|
||||
@@ -58,6 +91,9 @@ msgstr ""
|
||||
msgid "Currencies"
|
||||
msgstr ""
|
||||
|
||||
msgid "Currency Symbols"
|
||||
msgstr ""
|
||||
|
||||
msgid "Cyrillic"
|
||||
msgstr ""
|
||||
|
||||
@@ -118,6 +154,12 @@ msgstr ""
|
||||
msgid "Cyrillic BdC"
|
||||
msgstr ""
|
||||
|
||||
msgid "Cyrillic Extended-B"
|
||||
msgstr ""
|
||||
|
||||
msgid "Cyrillic Supplement"
|
||||
msgstr ""
|
||||
|
||||
msgid "Cyrillic » Latin (GOST2000)"
|
||||
msgstr ""
|
||||
|
||||
@@ -130,9 +172,15 @@ msgstr ""
|
||||
msgid "Diacritics"
|
||||
msgstr ""
|
||||
|
||||
msgid "Dingbats"
|
||||
msgstr ""
|
||||
|
||||
msgid "Encircled Characters"
|
||||
msgstr ""
|
||||
|
||||
msgid "Enclosed Alphanumerics"
|
||||
msgstr ""
|
||||
|
||||
msgid "English"
|
||||
msgstr ""
|
||||
|
||||
@@ -148,6 +196,12 @@ msgstr ""
|
||||
msgid "Friulian"
|
||||
msgstr ""
|
||||
|
||||
msgid "General Punctuation"
|
||||
msgstr ""
|
||||
|
||||
msgid "Geometric Shapes"
|
||||
msgstr ""
|
||||
|
||||
msgid "Geometrical Shapes"
|
||||
msgstr ""
|
||||
|
||||
@@ -169,12 +223,24 @@ msgstr ""
|
||||
msgid "Greek (Old)"
|
||||
msgstr ""
|
||||
|
||||
msgid "Greek Extended"
|
||||
msgstr ""
|
||||
|
||||
msgid "Greek and Coptic"
|
||||
msgstr ""
|
||||
|
||||
msgid "Gujarati"
|
||||
msgstr ""
|
||||
|
||||
msgid "Hebrew"
|
||||
msgstr ""
|
||||
|
||||
msgid "Hungarian"
|
||||
msgstr ""
|
||||
|
||||
msgid "IPA Extensions"
|
||||
msgstr ""
|
||||
|
||||
msgid "Irish Gaelic"
|
||||
msgstr ""
|
||||
|
||||
@@ -295,6 +361,21 @@ msgstr ""
|
||||
msgid "Latin BdC"
|
||||
msgstr ""
|
||||
|
||||
msgid "Latin Extended Additional"
|
||||
msgstr ""
|
||||
|
||||
msgid "Latin Extended-A"
|
||||
msgstr ""
|
||||
|
||||
msgid "Latin Extended-B"
|
||||
msgstr ""
|
||||
|
||||
msgid "Latin Extended-C"
|
||||
msgstr ""
|
||||
|
||||
msgid "Latin Extended-D"
|
||||
msgstr ""
|
||||
|
||||
msgid "Latin » Cyrillic (Belarusian)"
|
||||
msgstr ""
|
||||
|
||||
@@ -433,6 +514,9 @@ msgstr ""
|
||||
msgid "Letter, Uppercase"
|
||||
msgstr ""
|
||||
|
||||
msgid "Letterlike Symbols"
|
||||
msgstr ""
|
||||
|
||||
msgid "Ligatures"
|
||||
msgstr ""
|
||||
|
||||
@@ -457,9 +541,27 @@ msgstr ""
|
||||
msgid "Mathematical And Physical Symbols"
|
||||
msgstr ""
|
||||
|
||||
msgid "Mathematical Operators"
|
||||
msgstr ""
|
||||
|
||||
msgid "Metric"
|
||||
msgstr ""
|
||||
|
||||
msgid "Miscellaneous Mathematical Symbols-A"
|
||||
msgstr ""
|
||||
|
||||
msgid "Miscellaneous Mathematical Symbols-B"
|
||||
msgstr ""
|
||||
|
||||
msgid "Miscellaneous Symbols"
|
||||
msgstr ""
|
||||
|
||||
msgid "Miscellaneous Symbols and Arrows"
|
||||
msgstr ""
|
||||
|
||||
msgid "Miscellaneous Technical"
|
||||
msgstr ""
|
||||
|
||||
msgid "Modified"
|
||||
msgstr ""
|
||||
|
||||
@@ -505,6 +607,9 @@ msgstr ""
|
||||
msgid "Number 9"
|
||||
msgstr ""
|
||||
|
||||
msgid "Number Forms"
|
||||
msgstr ""
|
||||
|
||||
msgid "Number, Decimal Digit"
|
||||
msgstr ""
|
||||
|
||||
@@ -532,12 +637,21 @@ msgstr ""
|
||||
msgid "Parentheses"
|
||||
msgstr ""
|
||||
|
||||
msgid "Phonetic Extensions"
|
||||
msgstr ""
|
||||
|
||||
msgid "Phonetic Extensions Supplement"
|
||||
msgstr ""
|
||||
|
||||
msgid "Polish"
|
||||
msgstr ""
|
||||
|
||||
msgid "Portuguese"
|
||||
msgstr ""
|
||||
|
||||
msgid "Private Use Area"
|
||||
msgstr ""
|
||||
|
||||
msgid "Punctuation"
|
||||
msgstr ""
|
||||
|
||||
@@ -607,12 +721,18 @@ msgstr ""
|
||||
msgid "Spaces"
|
||||
msgstr ""
|
||||
|
||||
msgid "Spacing Modifier Letters"
|
||||
msgstr ""
|
||||
|
||||
msgid "Spanish"
|
||||
msgstr ""
|
||||
|
||||
msgid "Special Characters"
|
||||
msgstr ""
|
||||
|
||||
msgid "Specials"
|
||||
msgstr ""
|
||||
|
||||
msgid "Strokes"
|
||||
msgstr ""
|
||||
|
||||
@@ -622,6 +742,18 @@ msgstr ""
|
||||
msgid "Superscript Sharacters"
|
||||
msgstr ""
|
||||
|
||||
msgid "Superscripts and Subscripts"
|
||||
msgstr ""
|
||||
|
||||
msgid "Supplemental Arrows-A"
|
||||
msgstr ""
|
||||
|
||||
msgid "Supplemental Arrows-B"
|
||||
msgstr ""
|
||||
|
||||
msgid "Supplemental Punctuation"
|
||||
msgstr ""
|
||||
|
||||
msgid "Surrounded"
|
||||
msgstr ""
|
||||
|
||||
@@ -676,6 +808,9 @@ msgstr ""
|
||||
msgid "Symbol, Other"
|
||||
msgstr ""
|
||||
|
||||
msgid "Syriac"
|
||||
msgstr ""
|
||||
|
||||
msgid "Technical Characters"
|
||||
msgstr ""
|
||||
|
||||
@@ -694,6 +829,9 @@ msgstr ""
|
||||
msgid "Ukrainian"
|
||||
msgstr ""
|
||||
|
||||
msgid "Unified Canadian Aboriginal Syllabics"
|
||||
msgstr ""
|
||||
|
||||
msgid "Units"
|
||||
msgstr ""
|
||||
|
||||
|
||||
Reference in New Issue
Block a user