diff --git a/ZRColaCompile/ZRColaCompile.vcxproj b/ZRColaCompile/ZRColaCompile.vcxproj
index 458e11a..5e850ab 100644
--- a/ZRColaCompile/ZRColaCompile.vcxproj
+++ b/ZRColaCompile/ZRColaCompile.vcxproj
@@ -45,6 +45,7 @@
+
Create
Create
@@ -52,6 +53,7 @@
+
diff --git a/ZRColaCompile/ZRColaCompile.vcxproj.filters b/ZRColaCompile/ZRColaCompile.vcxproj.filters
index e19d669..6ef14e2 100644
--- a/ZRColaCompile/ZRColaCompile.vcxproj.filters
+++ b/ZRColaCompile/ZRColaCompile.vcxproj.filters
@@ -28,6 +28,9 @@
Source Files
+
+ Source Files
+
@@ -36,6 +39,9 @@
Header Files
+
+ Header Files
+
diff --git a/ZRColaCompile/main.cpp b/ZRColaCompile/main.cpp
index aec25bb..5fa169b 100644
--- a/ZRColaCompile/main.cpp
+++ b/ZRColaCompile/main.cpp
@@ -188,6 +188,11 @@ int _tmain(int argc, _TCHAR *argv[])
// Read translation from the database.
ZRCola::DBSource::translation trans;
if (src.GetTranslation(rs, trans)) {
+ try {
+ unique_ptr res(ZRCola::DecompParser(trans.src.str).Parse());
+ } catch (...) {
+ }
+
// Add translation to temporary database.
auto const t = db_temp1.find(trans.dst.str);
if (t != db_temp1.end())
diff --git a/ZRColaCompile/parse.cpp b/ZRColaCompile/parse.cpp
new file mode 100644
index 0000000..63f5076
--- /dev/null
+++ b/ZRColaCompile/parse.cpp
@@ -0,0 +1,212 @@
+/*
+ Copyright 2017 Amebis
+
+ This file is part of ZRCola.
+
+ ZRCola is free software: you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ ZRCola is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with ZRCola. If not, see .
+*/
+
+#include "stdafx.h"
+
+using namespace std;
+using namespace stdex;
+using namespace winstd;
+
+
+//////////////////////////////////////////////////////////////////////////
+// ZRCola::DecompParser
+//////////////////////////////////////////////////////////////////////////
+
+ZRCola::DecompParser::DecompParser(_In_ const std::wstring &decomposed) : m_decomposed(decomposed)
+{
+ if (m_decomposed.empty()) {
+ m_sym = 0;
+ m_idx = 0;
+ } else {
+ m_sym = m_decomposed[0];
+ m_idx = 1;
+ }
+}
+
+
+void ZRCola::DecompParser::ParsePrefixes(_Inout_ std::vector &prefix)
+{
+ for (;;) {
+ if (
+ m_sym == 0x203F || // Undertie
+ 0xE0F0 <= m_sym && m_sym <= 0xE0F3 || // 1st Set of Prefix Modifiers
+ m_sym == 0xE0F6 || // Above
+ m_sym == 0xE0F7 || // Below
+ 0xE0F8 <= m_sym && m_sym <= 0xE0FC) // 2nd Set of Prefix Modifiers
+ {
+ // Prefix modifier
+ prefix.push_back(m_sym);
+ NextChar();
+ } else
+ break;
+ }
+}
+
+
+void ZRCola::DecompParser::ParseModifiers(_Inout_ std::vector &mod)
+{
+ for (;;) {
+ if (0xE000 <= m_sym && m_sym <= 0xE05B || // 1st Set of Modifiers
+ 0xE063 <= m_sym && m_sym <= 0xE0BE || // 2nd Set of Modifiers
+ m_sym == 0x0023 || // Number Sign
+ m_sym == 0x003A || // Colon
+ m_sym == 0x02DE || // Rhotic Hook
+ m_sym == 0x2019 || // Right Single Quotation Mark
+ 0x02B0 <= m_sym && m_sym <= 0x02FF || // Standard Unicode Modifiers
+ m_sym == 0xE0C0 || // Circle
+ m_sym == 0xE0C2 || // Square
+ m_sym == 0xE0CA || // Diamond
+ m_sym == 0xE0CD) // Not Circle
+ {
+ mod.push_back(m_sym);
+ NextChar();
+ } else
+ break;
+ }
+}
+
+
+ZRCola::DecompParser::Expression* ZRCola::DecompParser::ParseParentheses(_In_ wchar_t open, _In_ wchar_t close)
+{
+ if (m_sym == open) {
+ // Parenthesis Start
+ NextChar();
+ if (m_sym == 0xE037) {
+ // EVA Parenthesis End
+ unique_ptr res(new Character(close));
+ NextChar();
+ return res.release();
+ } else {
+ unique_ptr res(new Parentheses(open, close, ParseExpression()));
+ if (m_sym == close) {
+ // Parenthesis End
+ NextChar();
+ } else if (m_sym == open && m_idx < m_decomposed.length() && m_decomposed[m_idx] == 0xE037) {
+ // Decomposed Parenthesis End
+ NextChar();
+ NextChar();
+ } else
+ throw invalid_argument(string_printf("%lc is unexpected. Should end with %lc.", m_sym, close));
+ return res.release();
+ }
+ } else
+ return NULL;
+}
+
+
+ZRCola::DecompParser::Expression* ZRCola::DecompParser::ParseCharacter()
+{
+ Expression *res_p;
+ if ((res_p = ParseParentheses(0xE0C5, 0xE0C6)) != NULL || // Superscript
+ (res_p = ParseParentheses(0xE0CE, 0xE0CF)) != NULL || // Subscript
+ (res_p = ParseParentheses(0xE2E0, 0xE2E1)) != NULL || // EVA Double
+ (res_p = ParseParentheses(0xE2E2, 0xE2E3)) != NULL || // EVA Condensed
+ (res_p = ParseParentheses(0xE2E4, 0xE2E5)) != NULL || // EVA Emphasis
+ (res_p = ParseParentheses(0xE2E6, 0xE2E7)) != NULL || // EVA Strike
+ (res_p = ParseParentheses(0xE2E8, 0xE2E9)) != NULL || // EVA Undeal
+ (res_p = ParseParentheses(0xE2EA, 0xE2EB)) != NULL || // EVA Italic
+ (res_p = ParseParentheses(0xE2EC, 0xE2ED)) != NULL || // EVA Superscript
+ (res_p = ParseParentheses(0xE2EE, 0xE2EF)) != NULL) // EVA Subscript
+ {
+ return res_p;
+ }
+
+ unique_ptr res(new Character());
+
+ ParsePrefixes(res->m_prefix);
+
+ if (m_sym < 0xE000 || 0xE0FC < m_sym ||
+ m_sym == 0xE0D6 || // Clockwise Top Semicircle Arrow
+ m_sym == 0xE0D7) // Anticlockwise Top Semicircle Arrow
+ {
+ // Base Character
+ res->m_char = m_sym;
+ NextChar();
+ }
+
+ ParseModifiers(res->m_mod);
+
+ return res.release();
+}
+
+
+ZRCola::DecompParser::Expression* ZRCola::DecompParser::ParseTerm()
+{
+ unique_ptr res(new Ligature(ParseCharacter()));
+ while (m_sym == 0xE0C4) {
+ // Ligature
+ std::unique_ptr res2(new Ligature::Element(m_sym));
+ NextChar();
+ ParseModifiers(res2->m_mod);
+ res2->m_el.reset(ParseCharacter());
+ res->m_rest.push_back(std::move(res2));
+ }
+
+ if (res->m_rest.empty()) {
+ // A ligature of, well one single character, is rather a character alone.
+ return res->m_first.release();
+ } else
+ return res.release();
+}
+
+
+ZRCola::DecompParser::Expression* ZRCola::DecompParser::ParseExpression()
+{
+ unique_ptr res(ParseTerm());
+ if (0xE05C <= m_sym && m_sym <= 0xE062 ||
+ m_sym == 0xE0C1 || // Two Characters in Circle
+ m_sym == 0xE0C3 || // Two Characters in Square
+ m_sym == 0xE0C7 || // Breved below
+ m_sym == 0xE0CB || // Inverse tilded
+ m_sym == 0xE0CC || // Diagonally Barred
+ 0xE0D0 <= m_sym && m_sym <= 0xE0D5)
+ {
+ // Joiner of two terms
+ unique_ptr res_j(new Joined2(m_sym, res.release()));
+ NextChar();
+
+ ParseModifiers(res_j->m_mod);
+
+ res_j->m_second.reset(ParseTerm());
+ return res_j.release();
+ } else if (
+ m_sym == 0xE0C8 || // Inverse breved
+ m_sym == 0xE0C9) // Breved below
+ {
+ // Joiner of three terms
+ unique_ptr res_j(new Joined3(m_sym, res.release()));
+ NextChar();
+
+ ParseModifiers(res_j->m_mod);
+
+ res_j->m_second.reset(ParseTerm());
+ res_j->m_third.reset(ParseTerm());
+ return res_j.release();
+ } else
+ return res.release();
+}
+
+
+ZRCola::DecompParser::Expression* ZRCola::DecompParser::Parse()
+{
+ unique_ptr res(ParseExpression());
+ if (m_sym)
+ throw invalid_argument("Unexpected trailing character(s).");
+ return res.release();
+}
diff --git a/ZRColaCompile/parse.h b/ZRColaCompile/parse.h
new file mode 100644
index 0000000..fdef078
--- /dev/null
+++ b/ZRColaCompile/parse.h
@@ -0,0 +1,154 @@
+/*
+ Copyright 2017 Amebis
+
+ This file is part of ZRCola.
+
+ ZRCola is free software: you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ ZRCola is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with ZRCola. If not, see .
+*/
+
+#pragma once
+
+#include
+#include
+#include
+#include
+#include
+
+
+namespace ZRCola {
+ ///
+ /// Decomposed sequence parser
+ ///
+ class DecompParser
+ {
+ public:
+ ///
+ /// Base class for expressions
+ ///
+ class Expression
+ {
+ };
+
+
+ ///
+ /// Expression in "parentheses"
+ ///
+ class Parentheses : public Expression
+ {
+ public:
+ inline Parentheses(_In_opt_ wchar_t open = 0, _In_opt_ wchar_t close = 0, _In_opt_ Expression *expr = NULL) : m_open(open), m_close(close), m_expr(expr) {}
+
+ public:
+ wchar_t m_open, m_close;
+ std::unique_ptr m_expr;
+ };
+
+
+ ///
+ /// Operator
+ ///
+ class Operator : public Expression
+ {
+ public:
+ inline Operator(_In_opt_ wchar_t chr = 0) : m_char(chr) {}
+
+ wchar_t m_char;
+ std::vector m_mod;
+ };
+
+
+ ///
+ /// Character
+ ///
+ class Character : public Operator
+ {
+ public:
+ inline Character(_In_opt_ wchar_t chr = 0) : Operator(chr) {}
+
+ std::vector m_prefix;
+ };
+
+
+ ///
+ /// Ligature
+ ///
+ class Ligature : public Expression
+ {
+ public:
+ class Element : public Operator
+ {
+ public:
+ inline Element(_In_opt_ wchar_t chr = 0) : Operator(chr) {}
+
+ std::unique_ptr m_el;
+ };
+
+ inline Ligature(_In_opt_ Expression *first = NULL) : m_first(first) {}
+
+ std::unique_ptr m_first;
+ std::list > m_rest;
+ };
+
+
+ ///
+ /// Two joined terms
+ ///
+ class Joined2 : public Operator
+ {
+ public:
+ Joined2(_In_opt_ wchar_t chr = 0, _In_opt_ Expression *first = NULL) : Operator(chr), m_first(first) {}
+
+ std::unique_ptr m_first, m_second;
+ };
+
+
+ ///
+ /// Three joined terms
+ ///
+ class Joined3 : public Joined2
+ {
+ public:
+ Joined3(_In_opt_ wchar_t chr = 0, _In_opt_ Expression *first = NULL) : Joined2(chr, first) {}
+
+ std::unique_ptr m_third;
+ };
+
+
+ public:
+ DecompParser(_In_ const std::wstring &decomposed);
+
+ protected:
+ inline void NextChar();
+ void ParsePrefixes(_Inout_ std::vector &prefix);
+ void ParseModifiers(_Inout_ std::vector &mod);
+ Expression* ParseParentheses(_In_ wchar_t open, _In_ wchar_t close);
+ Expression* ParseCharacter();
+ Expression* ParseTerm();
+ Expression* ParseExpression();
+
+ public:
+ Expression* Parse();
+
+ protected:
+ std::wstring::size_type m_idx; ///< Index of current character
+ const std::wstring &m_decomposed; ///< Decomposed character sequence to analyse
+ wchar_t m_sym; ///< Current character
+ };
+}
+
+
+inline void ZRCola::DecompParser::NextChar()
+{
+ m_sym = m_idx < m_decomposed.length() ? m_decomposed[m_idx++] : 0;
+}
diff --git a/ZRColaCompile/stdafx.h b/ZRColaCompile/stdafx.h
index 6a4ae89..33ac32d 100644
--- a/ZRColaCompile/stdafx.h
+++ b/ZRColaCompile/stdafx.h
@@ -21,6 +21,7 @@
#include "../include/version.h"
#include "dbsource.h"
+#include "parse.h"
#include
#include