diff --git a/ZRColaCompile/ZRColaCompile.vcxproj b/ZRColaCompile/ZRColaCompile.vcxproj index 458e11a..5e850ab 100644 --- a/ZRColaCompile/ZRColaCompile.vcxproj +++ b/ZRColaCompile/ZRColaCompile.vcxproj @@ -45,6 +45,7 @@ + Create Create @@ -52,6 +53,7 @@ + diff --git a/ZRColaCompile/ZRColaCompile.vcxproj.filters b/ZRColaCompile/ZRColaCompile.vcxproj.filters index e19d669..6ef14e2 100644 --- a/ZRColaCompile/ZRColaCompile.vcxproj.filters +++ b/ZRColaCompile/ZRColaCompile.vcxproj.filters @@ -28,6 +28,9 @@ Source Files + + Source Files + @@ -36,6 +39,9 @@ Header Files + + Header Files + diff --git a/ZRColaCompile/main.cpp b/ZRColaCompile/main.cpp index aec25bb..5fa169b 100644 --- a/ZRColaCompile/main.cpp +++ b/ZRColaCompile/main.cpp @@ -188,6 +188,11 @@ int _tmain(int argc, _TCHAR *argv[]) // Read translation from the database. ZRCola::DBSource::translation trans; if (src.GetTranslation(rs, trans)) { + try { + unique_ptr res(ZRCola::DecompParser(trans.src.str).Parse()); + } catch (...) { + } + // Add translation to temporary database. auto const t = db_temp1.find(trans.dst.str); if (t != db_temp1.end()) diff --git a/ZRColaCompile/parse.cpp b/ZRColaCompile/parse.cpp new file mode 100644 index 0000000..63f5076 --- /dev/null +++ b/ZRColaCompile/parse.cpp @@ -0,0 +1,212 @@ +/* + Copyright 2017 Amebis + + This file is part of ZRCola. + + ZRCola is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + ZRCola is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with ZRCola. If not, see . +*/ + +#include "stdafx.h" + +using namespace std; +using namespace stdex; +using namespace winstd; + + +////////////////////////////////////////////////////////////////////////// +// ZRCola::DecompParser +////////////////////////////////////////////////////////////////////////// + +ZRCola::DecompParser::DecompParser(_In_ const std::wstring &decomposed) : m_decomposed(decomposed) +{ + if (m_decomposed.empty()) { + m_sym = 0; + m_idx = 0; + } else { + m_sym = m_decomposed[0]; + m_idx = 1; + } +} + + +void ZRCola::DecompParser::ParsePrefixes(_Inout_ std::vector &prefix) +{ + for (;;) { + if ( + m_sym == 0x203F || // Undertie + 0xE0F0 <= m_sym && m_sym <= 0xE0F3 || // 1st Set of Prefix Modifiers + m_sym == 0xE0F6 || // Above + m_sym == 0xE0F7 || // Below + 0xE0F8 <= m_sym && m_sym <= 0xE0FC) // 2nd Set of Prefix Modifiers + { + // Prefix modifier + prefix.push_back(m_sym); + NextChar(); + } else + break; + } +} + + +void ZRCola::DecompParser::ParseModifiers(_Inout_ std::vector &mod) +{ + for (;;) { + if (0xE000 <= m_sym && m_sym <= 0xE05B || // 1st Set of Modifiers + 0xE063 <= m_sym && m_sym <= 0xE0BE || // 2nd Set of Modifiers + m_sym == 0x0023 || // Number Sign + m_sym == 0x003A || // Colon + m_sym == 0x02DE || // Rhotic Hook + m_sym == 0x2019 || // Right Single Quotation Mark + 0x02B0 <= m_sym && m_sym <= 0x02FF || // Standard Unicode Modifiers + m_sym == 0xE0C0 || // Circle + m_sym == 0xE0C2 || // Square + m_sym == 0xE0CA || // Diamond + m_sym == 0xE0CD) // Not Circle + { + mod.push_back(m_sym); + NextChar(); + } else + break; + } +} + + +ZRCola::DecompParser::Expression* ZRCola::DecompParser::ParseParentheses(_In_ wchar_t open, _In_ wchar_t close) +{ + if (m_sym == open) { + // Parenthesis Start + NextChar(); + if (m_sym == 0xE037) { + // EVA Parenthesis End + unique_ptr res(new Character(close)); + NextChar(); + return res.release(); + } else { + unique_ptr res(new Parentheses(open, close, ParseExpression())); + if (m_sym == close) { + // Parenthesis End + NextChar(); + } else if (m_sym == open && m_idx < m_decomposed.length() && m_decomposed[m_idx] == 0xE037) { + // Decomposed Parenthesis End + NextChar(); + NextChar(); + } else + throw invalid_argument(string_printf("%lc is unexpected. Should end with %lc.", m_sym, close)); + return res.release(); + } + } else + return NULL; +} + + +ZRCola::DecompParser::Expression* ZRCola::DecompParser::ParseCharacter() +{ + Expression *res_p; + if ((res_p = ParseParentheses(0xE0C5, 0xE0C6)) != NULL || // Superscript + (res_p = ParseParentheses(0xE0CE, 0xE0CF)) != NULL || // Subscript + (res_p = ParseParentheses(0xE2E0, 0xE2E1)) != NULL || // EVA Double + (res_p = ParseParentheses(0xE2E2, 0xE2E3)) != NULL || // EVA Condensed + (res_p = ParseParentheses(0xE2E4, 0xE2E5)) != NULL || // EVA Emphasis + (res_p = ParseParentheses(0xE2E6, 0xE2E7)) != NULL || // EVA Strike + (res_p = ParseParentheses(0xE2E8, 0xE2E9)) != NULL || // EVA Undeal + (res_p = ParseParentheses(0xE2EA, 0xE2EB)) != NULL || // EVA Italic + (res_p = ParseParentheses(0xE2EC, 0xE2ED)) != NULL || // EVA Superscript + (res_p = ParseParentheses(0xE2EE, 0xE2EF)) != NULL) // EVA Subscript + { + return res_p; + } + + unique_ptr res(new Character()); + + ParsePrefixes(res->m_prefix); + + if (m_sym < 0xE000 || 0xE0FC < m_sym || + m_sym == 0xE0D6 || // Clockwise Top Semicircle Arrow + m_sym == 0xE0D7) // Anticlockwise Top Semicircle Arrow + { + // Base Character + res->m_char = m_sym; + NextChar(); + } + + ParseModifiers(res->m_mod); + + return res.release(); +} + + +ZRCola::DecompParser::Expression* ZRCola::DecompParser::ParseTerm() +{ + unique_ptr res(new Ligature(ParseCharacter())); + while (m_sym == 0xE0C4) { + // Ligature + std::unique_ptr res2(new Ligature::Element(m_sym)); + NextChar(); + ParseModifiers(res2->m_mod); + res2->m_el.reset(ParseCharacter()); + res->m_rest.push_back(std::move(res2)); + } + + if (res->m_rest.empty()) { + // A ligature of, well one single character, is rather a character alone. + return res->m_first.release(); + } else + return res.release(); +} + + +ZRCola::DecompParser::Expression* ZRCola::DecompParser::ParseExpression() +{ + unique_ptr res(ParseTerm()); + if (0xE05C <= m_sym && m_sym <= 0xE062 || + m_sym == 0xE0C1 || // Two Characters in Circle + m_sym == 0xE0C3 || // Two Characters in Square + m_sym == 0xE0C7 || // Breved below + m_sym == 0xE0CB || // Inverse tilded + m_sym == 0xE0CC || // Diagonally Barred + 0xE0D0 <= m_sym && m_sym <= 0xE0D5) + { + // Joiner of two terms + unique_ptr res_j(new Joined2(m_sym, res.release())); + NextChar(); + + ParseModifiers(res_j->m_mod); + + res_j->m_second.reset(ParseTerm()); + return res_j.release(); + } else if ( + m_sym == 0xE0C8 || // Inverse breved + m_sym == 0xE0C9) // Breved below + { + // Joiner of three terms + unique_ptr res_j(new Joined3(m_sym, res.release())); + NextChar(); + + ParseModifiers(res_j->m_mod); + + res_j->m_second.reset(ParseTerm()); + res_j->m_third.reset(ParseTerm()); + return res_j.release(); + } else + return res.release(); +} + + +ZRCola::DecompParser::Expression* ZRCola::DecompParser::Parse() +{ + unique_ptr res(ParseExpression()); + if (m_sym) + throw invalid_argument("Unexpected trailing character(s)."); + return res.release(); +} diff --git a/ZRColaCompile/parse.h b/ZRColaCompile/parse.h new file mode 100644 index 0000000..fdef078 --- /dev/null +++ b/ZRColaCompile/parse.h @@ -0,0 +1,154 @@ +/* + Copyright 2017 Amebis + + This file is part of ZRCola. + + ZRCola is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + ZRCola is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with ZRCola. If not, see . +*/ + +#pragma once + +#include +#include +#include +#include +#include + + +namespace ZRCola { + /// + /// Decomposed sequence parser + /// + class DecompParser + { + public: + /// + /// Base class for expressions + /// + class Expression + { + }; + + + /// + /// Expression in "parentheses" + /// + class Parentheses : public Expression + { + public: + inline Parentheses(_In_opt_ wchar_t open = 0, _In_opt_ wchar_t close = 0, _In_opt_ Expression *expr = NULL) : m_open(open), m_close(close), m_expr(expr) {} + + public: + wchar_t m_open, m_close; + std::unique_ptr m_expr; + }; + + + /// + /// Operator + /// + class Operator : public Expression + { + public: + inline Operator(_In_opt_ wchar_t chr = 0) : m_char(chr) {} + + wchar_t m_char; + std::vector m_mod; + }; + + + /// + /// Character + /// + class Character : public Operator + { + public: + inline Character(_In_opt_ wchar_t chr = 0) : Operator(chr) {} + + std::vector m_prefix; + }; + + + /// + /// Ligature + /// + class Ligature : public Expression + { + public: + class Element : public Operator + { + public: + inline Element(_In_opt_ wchar_t chr = 0) : Operator(chr) {} + + std::unique_ptr m_el; + }; + + inline Ligature(_In_opt_ Expression *first = NULL) : m_first(first) {} + + std::unique_ptr m_first; + std::list > m_rest; + }; + + + /// + /// Two joined terms + /// + class Joined2 : public Operator + { + public: + Joined2(_In_opt_ wchar_t chr = 0, _In_opt_ Expression *first = NULL) : Operator(chr), m_first(first) {} + + std::unique_ptr m_first, m_second; + }; + + + /// + /// Three joined terms + /// + class Joined3 : public Joined2 + { + public: + Joined3(_In_opt_ wchar_t chr = 0, _In_opt_ Expression *first = NULL) : Joined2(chr, first) {} + + std::unique_ptr m_third; + }; + + + public: + DecompParser(_In_ const std::wstring &decomposed); + + protected: + inline void NextChar(); + void ParsePrefixes(_Inout_ std::vector &prefix); + void ParseModifiers(_Inout_ std::vector &mod); + Expression* ParseParentheses(_In_ wchar_t open, _In_ wchar_t close); + Expression* ParseCharacter(); + Expression* ParseTerm(); + Expression* ParseExpression(); + + public: + Expression* Parse(); + + protected: + std::wstring::size_type m_idx; ///< Index of current character + const std::wstring &m_decomposed; ///< Decomposed character sequence to analyse + wchar_t m_sym; ///< Current character + }; +} + + +inline void ZRCola::DecompParser::NextChar() +{ + m_sym = m_idx < m_decomposed.length() ? m_decomposed[m_idx++] : 0; +} diff --git a/ZRColaCompile/stdafx.h b/ZRColaCompile/stdafx.h index 6a4ae89..33ac32d 100644 --- a/ZRColaCompile/stdafx.h +++ b/ZRColaCompile/stdafx.h @@ -21,6 +21,7 @@ #include "../include/version.h" #include "dbsource.h" +#include "parse.h" #include #include