From 261ad9881215a659df6a0769f78be4cdaa6d0a9c Mon Sep 17 00:00:00 2001 From: Simon Rozman Date: Wed, 15 Mar 2023 21:38:57 +0100 Subject: [PATCH] Add string parsers Ported from Amebis AOsn library to standard C++ Signed-off-by: Simon Rozman --- UnitTests/UnitTests.vcxproj | 1 + UnitTests/UnitTests.vcxproj.filters | 3 + UnitTests/parser.cpp | 225 + UnitTests/pch.h | 2 +- include/stdex/parser.h | 6531 +++++++++++++++++++++++++++ 5 files changed, 6761 insertions(+), 1 deletion(-) create mode 100644 UnitTests/parser.cpp create mode 100644 include/stdex/parser.h diff --git a/UnitTests/UnitTests.vcxproj b/UnitTests/UnitTests.vcxproj index 52f7d4f59..4570edd78 100644 --- a/UnitTests/UnitTests.vcxproj +++ b/UnitTests/UnitTests.vcxproj @@ -115,6 +115,7 @@ + Create diff --git a/UnitTests/UnitTests.vcxproj.filters b/UnitTests/UnitTests.vcxproj.filters index 3c41d8037..a2ef46a40 100644 --- a/UnitTests/UnitTests.vcxproj.filters +++ b/UnitTests/UnitTests.vcxproj.filters @@ -21,6 +21,9 @@ Source Files + + Source Files + diff --git a/UnitTests/parser.cpp b/UnitTests/parser.cpp new file mode 100644 index 000000000..fd7c353ed --- /dev/null +++ b/UnitTests/parser.cpp @@ -0,0 +1,225 @@ +/* + SPDX-License-Identifier: MIT + Copyright © 2023 Amebis +*/ + +#include "pch.h" + +using namespace std; +using namespace stdex; +using namespace stdex::parser; +using namespace Microsoft::VisualStudio::CppUnitTestFramework; + +namespace UnitTests +{ + TEST_CLASS(parser) + { + public: + TEST_METHOD(wtest) + { + static const wchar_t text[] = L"This is a test.\nSecond line."; + + { + wnoop t; + Assert::IsTrue(t.match(text)); + Assert::AreEqual((size_t)0, t.interval.start); + Assert::AreEqual((size_t)0, t.interval.end); + } + + { + wcu t(L't'); + Assert::IsFalse(t.match(text)); + Assert::IsTrue(t.match(text, 0, _countof(text), match_case_insensitive)); + Assert::AreEqual((size_t)0, t.interval.start); + Assert::AreEqual((size_t)1, t.interval.end); + } + + { + wspace_cu t; + Assert::IsFalse(t.match(text)); + Assert::IsTrue(t.match(text, 4)); + Assert::AreEqual((size_t)4, t.interval.start); + Assert::AreEqual((size_t)5, t.interval.end); + } + + { + wpunct_cu t; + Assert::IsFalse(t.match(text)); + Assert::IsTrue(t.match(text, 14)); + Assert::AreEqual((size_t)14, t.interval.start); + Assert::AreEqual((size_t)15, t.interval.end); + } + + { + wspace_or_punct_cu t; + Assert::IsFalse(t.match(text)); + Assert::IsTrue(t.match(text, 4)); + Assert::AreEqual((size_t)4, t.interval.start); + Assert::AreEqual((size_t)5, t.interval.end); + Assert::IsTrue(t.match(text, 14)); + Assert::AreEqual((size_t)14, t.interval.start); + Assert::AreEqual((size_t)15, t.interval.end); + } + + { + wbol t; + Assert::IsTrue(t.match(text)); + Assert::AreEqual((size_t)0, t.interval.start); + Assert::AreEqual((size_t)0, t.interval.end); + Assert::IsFalse(t.match(text, 1)); + Assert::IsFalse(t.match(text, 15)); + Assert::IsTrue(t.match(text, 16)); + Assert::AreEqual((size_t)16, t.interval.start); + Assert::AreEqual((size_t)16, t.interval.end); + } + + { + weol t; + Assert::IsFalse(t.match(text)); + Assert::IsFalse(t.match(text, 1)); + Assert::IsTrue(t.match(text, 15)); + Assert::AreEqual((size_t)15, t.interval.start); + Assert::AreEqual((size_t)15, t.interval.end); + Assert::IsFalse(t.match(text, 16)); + } + + { + wcu_set t(L"abcD"); + Assert::IsFalse(t.match(text)); + Assert::IsTrue(t.match(text, 8)); + Assert::AreEqual((size_t)8, t.interval.start); + Assert::AreEqual((size_t)9, t.interval.end); + Assert::AreEqual((size_t)0, t.hit_offset); + Assert::IsFalse(t.match(text, 21)); + Assert::IsTrue(t.match(text, 21, _countof(text), match_case_insensitive)); + Assert::AreEqual((size_t)21, t.interval.start); + Assert::AreEqual((size_t)22, t.interval.end); + Assert::AreEqual((size_t)3, t.hit_offset); + } + + { + stdex::parser::wstring t(L"this"); + Assert::IsFalse(t.match(text)); + Assert::IsTrue(t.match(text, 0, sizeof(text), match_case_insensitive)); + Assert::AreEqual((size_t)0, t.interval.start); + Assert::AreEqual((size_t)4, t.interval.end); + } + + { + wany_cu chr; + witerations t(make_shared_no_delete(&chr), 1, 5); + Assert::IsTrue(t.match(text)); + Assert::AreEqual((size_t)0, t.interval.start); + Assert::AreEqual((size_t)5, t.interval.end); + } + + { + wspace_cu nospace(true); + witerations t(make_shared_no_delete(&nospace), 1); + Assert::IsTrue(t.match(text)); + Assert::AreEqual((size_t)0, t.interval.start); + Assert::AreEqual((size_t)4, t.interval.end); + } + + { + wcu chr_t(L't'), chr_h(L'h'), chr_i(L'i'), chr_s(L's'); + wspace_cu space; + wsequence t({ + make_shared_no_delete>(&chr_t), + make_shared_no_delete>(&chr_h), + make_shared_no_delete>(&chr_i), + make_shared_no_delete>(&chr_s), + make_shared_no_delete>(&space) }); + Assert::IsFalse(t.match(text)); + Assert::IsTrue(t.match(text, 0, _countof(text), match_case_insensitive)); + Assert::AreEqual((size_t)0, t.interval.start); + Assert::AreEqual((size_t)5, t.interval.end); + } + + { + stdex::parser::wstring apple(L"apple"), orange(L"orange"), _this(L"this"); + wspace_cu space; + wbranch t({ + make_shared_no_delete>(&apple), + make_shared_no_delete>(&orange), + make_shared_no_delete>(&_this), + make_shared_no_delete>(&space) }); + Assert::IsFalse(t.match(text)); + Assert::IsTrue(t.match(text, 0, _countof(text), match_case_insensitive)); + Assert::AreEqual((size_t)2, t.hit_offset); + Assert::AreEqual((size_t)0, t.interval.start); + Assert::AreEqual((size_t)4, t.interval.end); + } + + { + wstring_branch t(L"apple", L"orange", L"this", nullptr); + Assert::IsFalse(t.match(text)); + Assert::IsTrue(t.match(text, 0, _countof(text), match_case_insensitive)); + Assert::AreEqual((size_t)2, t.hit_offset); + Assert::AreEqual((size_t)0, t.interval.start); + Assert::AreEqual((size_t)4, t.interval.end); + } + + { + wcu chr_s(L's'), chr_h(L'h'), chr_i(L'i'), chr_t(L't'); + wpermutation t({ + make_shared_no_delete>(&chr_s), + make_shared_no_delete>(&chr_h), + make_shared_no_delete>(&chr_i), + make_shared_no_delete>(&chr_t) }); + Assert::IsFalse(t.match(text)); + Assert::IsTrue(t.match(text, 0, _countof(text), match_case_insensitive)); + Assert::AreEqual((size_t)0, t.interval.start); + Assert::AreEqual((size_t)4, t.interval.end); + } + } + + TEST_METHOD(sgml_test) + { + static const char text[] = "V kožuščku zlobnega mizarja stopiclja fant\nin kliče 1234567890."; + + { + sgml_noop t; + Assert::IsTrue(t.match(text)); + Assert::AreEqual((size_t)0, t.interval.start); + Assert::AreEqual((size_t)0, t.interval.end); + } + + { + sgml_cp t("v"); + Assert::IsFalse(t.match(text)); + Assert::IsTrue(t.match(text, 0, _countof(text), match_case_insensitive)); + Assert::AreEqual((size_t)0, t.interval.start); + Assert::AreEqual((size_t)1, t.interval.end); + } + + { + sgml_cp t("Ž"); + Assert::IsFalse(t.match(text, 4)); + Assert::IsTrue(t.match(text, 4, _countof(text), match_case_insensitive)); + Assert::AreEqual((size_t)4, t.interval.start); + Assert::AreEqual((size_t)12, t.interval.end); + } + + { + sgml_space_cp t; + Assert::IsFalse(t.match(text)); + Assert::IsTrue(t.match(text, 1)); + Assert::AreEqual((size_t)1, t.interval.start); + Assert::AreEqual((size_t)2, t.interval.end); + Assert::IsTrue(t.match(text, 79)); + Assert::AreEqual((size_t)79, t.interval.start); + Assert::AreEqual((size_t)85, t.interval.end); + } + + { + sgml_string_branch t("apple", "orange", "KoŽuŠčKu", nullptr); + Assert::IsFalse(t.match(text, 2)); + Assert::IsTrue(t.match(text, 2, _countof(text), match_case_insensitive)); + Assert::AreEqual((size_t)2, t.hit_offset); + Assert::AreEqual((size_t)2, t.interval.start); + Assert::AreEqual((size_t)31, t.interval.end); + } + } + }; +} diff --git a/UnitTests/pch.h b/UnitTests/pch.h index 720e3e21d..63cd120af 100644 --- a/UnitTests/pch.h +++ b/UnitTests/pch.h @@ -15,7 +15,7 @@ #include #include #include -//#include +#include #include #include #include diff --git a/include/stdex/parser.h b/include/stdex/parser.h new file mode 100644 index 000000000..9e7e888ca --- /dev/null +++ b/include/stdex/parser.h @@ -0,0 +1,6531 @@ +/* + SPDX-License-Identifier: MIT + Copyright © 2023 Amebis +*/ + +#pragma once + +#include "interval.h" +#include "memory.h" +#include "sal.h" +#include "sgml.h" +#include "string.h" +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef _WIN32 +#include +#else +#include +#include +#endif + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable: 4100) +#endif + +namespace stdex +{ + namespace parser + { + /// + /// Flags used in basic_tester::match() methods + /// + constexpr int match_default = 0; + constexpr int match_case_insensitive = 0x1; + constexpr int match_multiline = 0x2; + + /// + /// Base template for all testers + /// + template + class basic_tester + { + public: + basic_tester(_In_ const std::locale& locale = std::locale()) : m_locale(locale) {} + virtual ~basic_tester() {} + + bool search( + _In_reads_or_z_(end) const T* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + for (size_t i = start; i < end && text[i]; i++) + if (match(text, i, end, flags)) + return true; + return false; + } + + virtual bool match( + _In_reads_or_z_(end) const T* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) = 0; + + template + inline bool match( + const std::basic_string& text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + return match(text.c_str(), start, std::min(end, text.size()), flags); + } + + virtual void invalidate() + { + interval.start = 1; + interval.end = 0; + } + + protected: + /// \cond internal + const wchar_t* next_sgml_cp(_In_ const char* text, _In_ size_t start, _In_ size_t end, _Out_ size_t& chr_end, _Out_ wchar_t(&buf)[3]) + { + if (text[start] == '&') { + // Potential entity start + const auto& ctype = std::use_facet>(m_locale); + for (chr_end = start + 1;; chr_end++) { + if (chr_end >= end || text[chr_end] == 0) { + // Unterminated entity + break; + } + if (text[chr_end] == ';') { + // Entity end + size_t n = chr_end - start - 1; + if (n >= 2 && text[start + 1] == '#') { + // Numerical entity + char32_t unicode; + if (text[start + 2] == 'x' || text[start + 2] == 'X') + unicode = strtou32(text + start + 3, n - 2, nullptr, 16); + else + unicode = strtou32(text + start + 2, n - 1, nullptr, 10); +#ifdef _WIN32 + if (unicode < 0x10000) { + buf[0] = (wchar_t)unicode; + buf[1] = 0; + } + else { + ucs4_to_surrogate_pair(buf, unicode); + buf[2] = 0; + } +#else + buf[0] = (wchar_t)unicode; + buf[1] = 0; +#endif + chr_end++; + return buf; + } + const wchar_t* entity_w = sgml2uni(text + start + 1, n); + if (entity_w) { + chr_end++; + return entity_w; + } + // Unknown entity. + break; + } + else if (text[chr_end] == '&' || ctype.is(ctype.space, text[chr_end])) { + // This char cannot possibly be a part of entity. + break; + } + } + } + buf[0] = text[start]; + buf[1] = 0; + chr_end = start + 1; + return buf; + } + /// \endcond + + public: + interval interval; ///< Test for interval + + protected: + const std::locale& m_locale; + }; + + using tester = basic_tester; + using wtester = basic_tester; +#ifdef _UNICODE + using ttester = wtester; +#else + using ttester = tester; +#endif + using sgml_tester = basic_tester; + + /// + /// "No-op" match + /// + template + class basic_noop : public basic_tester + { + public: + virtual bool match( + _In_reads_or_z_(end) const T* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + if (start < end && text[start]) { + interval.start = interval.end = start; + return true; + } + interval.start = (interval.end = start) + 1; + return false; + } + }; + + using noop = basic_noop; + using wnoop = basic_noop; +#ifdef _UNICODE + using tnoop = wnoop; +#else + using tnoop = noop; +#endif + using sgml_noop = basic_noop; + + /// + /// Test for any code unit + /// + template + class basic_any_cu : public basic_tester + { + public: + virtual bool match( + _In_reads_or_z_(end) const T* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + if (start < end && text[start]) { + interval.end = (interval.start = start) + 1; + return true; + } + interval.start = (interval.end = start) + 1; + return false; + } + }; + + using any_cu = basic_any_cu; + using wany_cu = basic_any_cu; +#ifdef _UNICODE + using tany_cu = wany_cu; +#else + using tany_cu = any_cu; +#endif + + /// + /// Test for any SGML code point + /// + class sgml_any_cp : public basic_any_cu + { + public: + virtual bool match( + _In_reads_or_z_(end) const char* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + if (start < end && text[start]) { + if (text[start] == '&') { + // SGML entity + const auto& ctype = std::use_facet>(m_locale); + for (interval.end = start + 1; interval.end < end && text[interval.end]; interval.end++) + if (text[interval.end] == ';') { + interval.end++; + interval.start = start; + return true; + } + else if (text[interval.end] == '&' || ctype.is(ctype.space, text[interval.end])) + break; + // Unterminated entity + } + interval.end = (interval.start = start) + 1; + return true; + } + interval.start = (interval.end = start) + 1; + return false; + } + }; + + /// + /// Test for specific code unit + /// + template + class basic_cu : public basic_tester + { + public: + basic_cu(T chr, bool invert = false, _In_ const std::locale& locale = std::locale()) : + basic_tester(locale), + m_chr(chr), + m_invert(invert) + {} + + virtual bool match( + _In_reads_or_z_(end) const T* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + if (start < end && text[start]) { + bool r; + if (flags & match_case_insensitive) { + const auto& ctype = std::use_facet>(m_locale); + r = ctype.tolower(text[start]) == ctype.tolower(m_chr); + } + else + r = text[start] == m_chr; + if (r && !m_invert || !r && m_invert) { + interval.end = (interval.start = start) + 1; + return true; + } + } + interval.start = (interval.end = start) + 1; + return false; + } + + protected: + T m_chr; + bool m_invert; + }; + + using cu = basic_cu; + using wcu = basic_cu; +#ifdef _UNICODE + using tcu = wcu; +#else + using tcu = cu; +#endif + + /// + /// Test for specific SGML code point + /// + class sgml_cp : public sgml_tester + { + public: + sgml_cp(const char* chr, size_t count = (size_t)-1, bool invert = false, _In_ const std::locale& locale = std::locale()) : + sgml_tester(locale), + m_invert(invert) + { + assert(chr || !count); + wchar_t buf[3]; + size_t chr_end; + m_chr.assign(count ? next_sgml_cp(chr, 0, count, chr_end, buf) : L""); + } + + virtual bool match( + _In_reads_or_z_(end) const char* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + if (start < end && text[start]) { + wchar_t buf[3]; + const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf); + bool r = ((flags & match_case_insensitive) ? + stdex::strnicmp(chr, (size_t)-1, m_chr.c_str(), m_chr.size(), m_locale) : + stdex::strncmp(chr, (size_t)-1, m_chr.c_str(), m_chr.size())) == 0; + if (r && !m_invert || !r && m_invert) { + interval.start = start; + return true; + } + } + interval.start = (interval.end = start) + 1; + return false; + } + + protected: + std::wstring m_chr; + bool m_invert; + }; + + /// + /// Test for any space code unit + /// + template + class basic_space_cu : public basic_tester + { + public: + basic_space_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) : + basic_tester(locale), + m_invert(invert) + {} + + virtual bool match( + _In_reads_or_z_(end) const T* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + if (start < end && text[start]) { + bool r = + ((flags & match_multiline) || !islbreak(text[start])) && + std::use_facet>(m_locale).is(std::ctype_base::space, text[start]); + if (r && !m_invert || !r && m_invert) { + interval.end = (interval.start = start) + 1; + return true; + } + } + interval.start = (interval.end = start) + 1; + return false; + } + + protected: + bool m_invert; + }; + + using space_cu = basic_space_cu; + using wspace_cu = basic_space_cu; +#ifdef _UNICODE + using tspace_cu = wspace_cu; +#else + using tspace_cu = space_cu; +#endif + + /// + /// Test for any SGML space code point + /// + class sgml_space_cp : public basic_space_cu + { + public: + sgml_space_cp(_In_ bool invert = false, _In_ const std::locale& locale = std::locale()) : + basic_space_cu(invert, locale) + {} + + virtual bool match( + _In_reads_or_z_(end) const char* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + if (start < end && text[start]) { + wchar_t buf[3]; + const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf); + const wchar_t* chr_end = chr + stdex::strlen(chr); + bool r = + ((flags & match_multiline) || !islbreak(chr, (size_t)-1)) && + std::use_facet>(m_locale).scan_not(std::ctype_base::space, chr, chr_end) == chr_end; + if (r && !m_invert || !r && m_invert) { + interval.start = start; + return true; + } + } + + interval.start = (interval.end = start) + 1; + return false; + } + }; + + /// + /// Test for any punctuation code unit + /// + template + class basic_punct_cu : public basic_tester + { + public: + basic_punct_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) : + basic_tester(locale), + m_invert(invert) + {} + + virtual bool match( + _In_reads_or_z_(end) const T* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + if (start < end && text[start]) { + bool r = std::use_facet>(m_locale).is(std::ctype_base::punct, text[start]); + if (r && !m_invert || !r && m_invert) { + interval.end = (interval.start = start) + 1; + return true; + } + } + interval.start = (interval.end = start) + 1; + return false; + } + + protected: + bool m_invert; + }; + + using punct_cu = basic_punct_cu; + using wpunct_cu = basic_punct_cu; +#ifdef _UNICODE + using tpunct_cu = wpunct_cu; +#else + using tpunct_cu = punct_cu; +#endif + + /// + /// Test for any SGML punctuation code point + /// + class sgml_punct_cp : public basic_punct_cu + { + public: + virtual bool match( + _In_reads_or_z_(end) const char* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + if (start < end && text[start]) { + wchar_t buf[3]; + const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf); + const wchar_t* chr_end = chr + stdex::strlen(chr); + bool r = std::use_facet>(m_locale).scan_not(std::ctype_base::punct, chr, chr_end) == chr_end; + if (r && !m_invert || !r && m_invert) { + interval.start = start; + return true; + } + } + interval.start = (interval.end = start) + 1; + return false; + } + }; + + /// + /// Test for any space or punctuation code unit + /// + template + class basic_space_or_punct_cu : public basic_tester + { + public: + basic_space_or_punct_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) : + basic_tester(locale), + m_invert(invert) + {} + + virtual bool match( + _In_reads_or_z_(end) const T* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + if (start < end && text[start]) { + bool r = + ((flags & match_multiline) || !islbreak(text[start])) && + std::use_facet>(m_locale).is(std::ctype_base::space | std::ctype_base::punct, text[start]); + if (r && !m_invert || !r && m_invert) { + interval.end = (interval.start = start) + 1; + return true; + } + } + interval.start = (interval.end = start) + 1; + return false; + } + + protected: + bool m_invert; + }; + + using space_or_punct_cu = basic_space_or_punct_cu; + using wspace_or_punct_cu = basic_space_or_punct_cu; +#ifdef _UNICODE + using tspace_or_punct_cu = wspace_or_punct_cu; +#else + using tspace_or_punct_cu = space_or_punct_cu; +#endif + + /// + /// Test for any SGML space or punctuation code point + /// + class sgml_space_or_punct_cp : public basic_space_or_punct_cu + { + public: + virtual bool match( + _In_reads_or_z_(end) const char* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + if (start < end && text[start]) { + wchar_t buf[3]; + const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf); + const wchar_t* chr_end = chr + stdex::strlen(chr); + bool r = + ((flags & match_multiline) || !islbreak(chr, (size_t)-1)) && + std::use_facet>(m_locale).scan_not(std::ctype_base::space | std::ctype_base::punct, chr, chr_end) == chr_end; + if (r && !m_invert || !r && m_invert) { + interval.start = start; + return true; + } + } + interval.start = (interval.end = start) + 1; + return false; + } + }; + + /// + /// Test for beginning of line + /// + template + class basic_bol : public basic_tester + { + public: + basic_bol(bool invert = false) : m_invert(invert) {} + + virtual bool match( + _In_reads_or_z_(end) const T* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + bool r = start == 0 || start <= end && islbreak(text[start - 1]); + if (r && !m_invert || !r && m_invert) { + interval.end = interval.start = start; + return true; + } + interval.start = (interval.end = start) + 1; + return false; + } + + protected: + bool m_invert; + }; + + using bol = basic_bol; + using wbol = basic_bol; +#ifdef _UNICODE + using tbol = wbol; +#else + using tbol = bol; +#endif + using sgml_bol = basic_bol; + + /// + /// Test for end of line + /// + template + class basic_eol : public basic_tester + { + public: + basic_eol(bool invert = false) : m_invert(invert) {} + + virtual bool match( + _In_reads_or_z_(end) const T* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + bool r = islbreak(text[start]); + if (r && !m_invert || !r && m_invert) { + interval.end = interval.start = start; + return true; + } + interval.start = (interval.end = start) + 1; + return false; + } + + protected: + bool m_invert; + }; + + using eol = basic_eol; + using weol = basic_eol; +#ifdef _UNICODE + using teol = weol; +#else + using teol = eol; +#endif + using sgml_eol = basic_eol; + + template + class basic_set : public basic_tester + { + public: + basic_set(bool invert = false, _In_ const std::locale& locale = std::locale()) : + basic_tester(locale), + hit_offset((size_t)-1), + m_invert(invert) + {} + + virtual bool match( + _In_reads_or_z_(end) const T* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) = 0; + + virtual void invalidate() + { + hit_offset = (size_t)-1; + basic_tester::invalidate(); + } + + public: + size_t hit_offset; + + protected: + bool m_invert; + }; + + /// + /// Test for any code unit from a given string of code units + /// + template + class basic_cu_set : public basic_set + { + public: + basic_cu_set( + _In_reads_or_z_(count) const T* set, + _In_ size_t count = (size_t)-1, + _In_ bool invert = false, + _In_ const std::locale& locale = std::locale()) : + basic_set(invert, locale) + { + if (set) + m_set.assign(set, set + stdex::strnlen(set, count)); + } + + virtual bool match( + _In_reads_or_z_(end) const T* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + if (start < end && text[start]) { + const T* set = m_set.c_str(); + const T* r = (flags & match_case_insensitive) ? + stdex::strnichr(set, text[start], m_set.size(), m_locale) : + stdex::strnchr(set, text[start], m_set.size()); + if (r && !m_invert || !r && m_invert) { + hit_offset = r ? r - set : (size_t)-1; + interval.end = (interval.start = start) + 1; + return true; + } + } + hit_offset = (size_t)-1; + interval.start = (interval.end = start) + 1; + return false; + } + + protected: + std::basic_string m_set; + }; + + using cu_set = basic_cu_set; + using wcu_set = basic_cu_set; +#ifdef _UNICODE + using tcu_set = wcu_set; +#else + using tcu_set = cu_set; +#endif + + /// + /// Test for any SGML code point from a given string of SGML code points + /// + class sgml_cp_set : public basic_set + { + public: + sgml_cp_set(const char* set, size_t count = (size_t)-1, bool invert = false, _In_ const std::locale& locale = std::locale()) : + basic_set(invert, locale) + { + if (set) + m_set = sgml2str(set, count); + } + + virtual bool match( + _In_reads_or_z_(end) const char* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + if (start < end && text[start]) { + wchar_t buf[3]; + const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf); + const wchar_t* set = m_set.c_str(); + const wchar_t* r = (flags & match_case_insensitive) ? + stdex::strnistr(set, chr, m_set.size(), m_locale) : + stdex::strnstr(set, chr, m_set.size()); + if (r && !m_invert || !r && m_invert) { + hit_offset = r ? r - set : (size_t)-1; + interval.start = start; + return true; + } + } + hit_offset = (size_t)-1; + interval.start = (interval.end = start) + 1; + return false; + } + + protected: + std::wstring m_set; + }; + + /// + /// Test for given string + /// + template + class basic_string : public basic_tester + { + public: + basic_string( + _In_reads_or_z_(count) const T* str, + _In_ size_t count = (size_t)-1, + _In_ const std::locale& locale = std::locale()) : + basic_tester(locale), + m_str(str, str + stdex::strnlen(str, count)) + {} + + virtual bool match( + _In_reads_or_z_(end) const T* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + size_t + m = m_str.size(), + n = std::min(end - start, m); + bool r = ((flags & match_case_insensitive) ? + stdex::strnicmp(text + start, n, m_str.c_str(), m, m_locale) : + stdex::strncmp(text + start, n, m_str.c_str(), m)) == 0; + if (r) { + interval.end = (interval.start = start) + n; + return true; + } + interval.start = (interval.end = start) + 1; + return false; + } + + protected: + std::basic_string m_str; + }; + + using string = basic_string; + using wstring = basic_string; +#ifdef _UNICODE + using tstring = wstring; +#else + using tstring = string; +#endif + + /// + /// Test for SGML given string + /// + class sgml_string : public sgml_tester + { + public: + sgml_string(const char* str, size_t count = (size_t)-1, _In_ const std::locale& locale = std::locale()) : + sgml_tester(locale), + m_str(sgml2str(str, count)) + {} + + virtual bool match( + _In_reads_or_z_(end) const char* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + const wchar_t* str = m_str.c_str(); + const bool case_insensitive = flags & match_case_insensitive ? true : false; + const auto& ctype = std::use_facet>(m_locale); + for (interval.end = start;;) { + if (!*str) { + interval.start = start; + return true; + } + if (interval.end >= end || !text[interval.end]) { + interval.start = (interval.end = start) + 1; + return false; + } + wchar_t buf[3]; + const wchar_t* chr = next_sgml_cp(text, interval.end, end, interval.end, buf); + for (; *chr; ++str, ++chr) { + if (!*str || + (case_insensitive ? ctype.tolower(*str) != ctype.tolower(*chr) : *str != *chr)) + { + interval.start = (interval.end = start) + 1; + return false; + } + } + } + } + + protected: + std::wstring m_str; + }; + + /// + /// Test for repeating + /// + template + class basic_iterations : public basic_tester + { + public: + basic_iterations(const std::shared_ptr>& el, size_t min_iterations = 0, size_t max_iterations = (size_t)-1, bool greedy = true) : + m_el(el), + m_min_iterations(min_iterations), + m_max_iterations(max_iterations), + m_greedy(greedy) + {} + + virtual bool match( + _In_reads_or_z_(end) const T* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + interval.start = interval.end = start; + for (size_t i = 0; ; i++) { + if (!m_greedy && i >= m_min_iterations || i >= m_max_iterations) + return true; + if (!m_el->match(text, interval.end, end, flags)) { + if (i >= m_min_iterations) + return true; + break; + } + if (m_el->interval.end == interval.end) { + // Element did match, but the matching interval was empty. Quit instead of spinning. + return true; + } + interval.end = m_el->interval.end; + } + interval.start = (interval.end = start) + 1; + return false; + } + + protected: + std::shared_ptr> m_el; ///< repeating element + size_t m_min_iterations; ///< minimum number of iterations + size_t m_max_iterations; ///< maximum number of iterations + bool m_greedy; ///< try to match as long sequence as possible + }; + + using iterations = basic_iterations; + using witerations = basic_iterations; +#ifdef _UNICODE + using titerations = witerations; +#else + using titerations = iterations; +#endif + using sgml_iterations = basic_iterations; + + /// + /// Base template for collection-holding testers + /// + template + class tester_collection : public basic_tester + { + protected: + tester_collection(_In_ const std::locale& locale) : basic_tester(locale) {} + + public: + tester_collection( + _In_count_(count) const std::shared_ptr>* el, + _In_ size_t count, + _In_ const std::locale& locale = std::locale()) : + basic_tester(locale) + { + assert(el || !count); + m_collection.reserve(count); + for (size_t i = 0; i < count; i++) + m_collection.push_back(el[i]); + } + + tester_collection( + _Inout_ std::vector>>&& collection, + _In_ const std::locale& locale = std::locale()) : + basic_tester(locale), + m_collection(std::move(collection)) + {} + + virtual void invalidate() + { + for (auto i = m_collection.begin(); i != m_collection.end(); ++i) + (*i)->invalidate(); + basic_tester::invalidate(); + } + + protected: + std::vector>> m_collection; + }; + + /// + /// Test for sequence + /// + template + class basic_sequence : public tester_collection + { + public: + basic_sequence( + _In_count_(count) const std::shared_ptr>* el = nullptr, + _In_ size_t count = 0, + _In_ const std::locale& locale = std::locale()) : + tester_collection(el, count, locale) + {} + + basic_sequence( + _Inout_ std::vector>>&& collection, + _In_ const std::locale& locale = std::locale()) : + tester_collection(std::move(collection), locale) + {} + + virtual bool match( + _In_reads_or_z_(end) const T* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + interval.end = start; + for (auto i = m_collection.begin(); i != m_collection.end(); ++i) { + if (!(*i)->match(text, interval.end, end, flags)) { + for (++i; i != m_collection.end(); ++i) + (*i)->invalidate(); + interval.start = (interval.end = start) + 1; + return false; + } + interval.end = (*i)->interval.end; + } + interval.start = start; + return true; + } + }; + + using sequence = basic_sequence; + using wsequence = basic_sequence; +#ifdef _UNICODE + using tsequence = wsequence; +#else + using tsequence = sequence; +#endif + using sgml_sequence = basic_sequence; + + /// + /// Test for any + /// + template + class basic_branch : public tester_collection + { + protected: + basic_branch(_In_ const std::locale& locale) : + tester_collection(locale), + hit_offset((size_t)-1) + {} + + public: + basic_branch( + _In_count_(count) const std::shared_ptr>* el = nullptr, + _In_ size_t count = 0, + _In_ const std::locale& locale = std::locale()) : + tester_collection(el, count, locale), + hit_offset((size_t)-1) + {} + + basic_branch( + _Inout_ std::vector>>&& collection, + _In_ const std::locale& locale = std::locale()) : + tester_collection(std::move(collection), locale), + hit_offset((size_t)-1) + {} + + virtual bool match( + _In_reads_or_z_(end) const T* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + hit_offset = 0; + for (auto i = m_collection.begin(); i != m_collection.end(); ++i, ++hit_offset) { + if ((*i)->match(text, start, end, flags)) { + interval = (*i)->interval; + for (++i; i != m_collection.end(); ++i) + (*i)->invalidate(); + return true; + } + } + hit_offset = (size_t)-1; + interval.start = (interval.end = start) + 1; + return false; + } + + virtual void invalidate() + { + hit_offset = (size_t)-1; + tester_collection::invalidate(); + } + + public: + size_t hit_offset; + }; + + using branch = basic_branch; + using wbranch = basic_branch; +#ifdef _UNICODE + using tbranch = wbranch; +#else + using tbranch = branch; +#endif + using sgml_branch = basic_branch; + + /// + /// Test for any string + /// + template > + class basic_string_branch : public basic_branch + { + public: + inline basic_string_branch( + _In_reads_(count) const T* str_z = nullptr, + _In_ size_t count = 0, + _In_ const std::locale& locale = std::locale()) : + basic_branch(locale) + { + build(str_z, count); + } + + inline basic_string_branch(_In_z_ const T* str, ...) : + basic_branch(std::locale()) + { + va_list params; + va_start(params, str); + build(str, params); + va_end(params); + } + + inline basic_string_branch(_In_ const std::locale& locale, _In_z_ const T* str, ...) : + basic_branch(locale) + { + va_list params; + va_start(params, str); + build(str, params); + va_end(params); + } + + protected: + void build(_In_reads_(count) const T* str_z, _In_ size_t count) + { + assert(str_z || !count); + if (count) { + size_t offset, n; + for ( + offset = n = 0; + offset < count && str_z[offset]; + offset += stdex::strnlen(str_z + offset, count - offset) + 1, ++n); + m_collection.reserve(n); + for ( + offset = 0; + offset < count && str_z[offset]; + offset += stdex::strnlen(str_z + offset, count - offset) + 1) + m_collection.push_back(std::move(std::make_shared(str_z + offset, count - offset, m_locale))); + } + } + + void build(_In_z_ const T* str, _In_ va_list params) + { + const T* p; + for ( + m_collection.push_back(std::move(std::make_shared(str, (size_t)-1, m_locale))); + (p = va_arg(params, const T*)) != nullptr; + m_collection.push_back(std::move(std::make_shared(p, (size_t)-1, m_locale)))); + } + }; + + using string_branch = basic_string_branch; + using wstring_branch = basic_string_branch; +#ifdef _UNICODE + using tstring_branch = wstring_branch; +#else + using tstring_branch = string_branch; +#endif + using sgml_string_branch = basic_string_branch; + + /// + /// Test for permutation + /// + template + class basic_permutation : public tester_collection + { + public: + basic_permutation( + _In_count_(count) const std::shared_ptr>* el = nullptr, + _In_ size_t count = 0, + _In_ const std::locale& locale = std::locale()) : + tester_collection(el, count, locale) + {} + + basic_permutation( + _Inout_ std::vector>>&& collection, + _In_ const std::locale& locale = std::locale()) : + tester_collection(std::move(collection), locale) + {} + + virtual bool match( + _In_reads_or_z_(end) const T* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + for (auto i = m_collection.begin(); i != m_collection.end(); ++i) + (*i)->invalidate(); + if (match_recursively(text, start, end, flags)) { + interval.start = start; + return true; + } + interval.start = (interval.end = start) + 1; + return false; + } + + protected: + bool match_recursively( + _In_reads_or_z_(end) const T* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + bool all_matched = true; + for (auto i = m_collection.begin(); i != m_collection.cend(); ++i) { + if (!(*i)->interval) { + // Element was not matched in permutatuion yet. + all_matched = false; + if ((*i)->match(text, start, end, flags)) { + // Element matched for the first time. + if (match_recursively(text, (*i)->interval.end, end, flags)) { + // Rest of the elements matched too. + return true; + } + (*i)->invalidate(); + } + } + } + if (all_matched) { + interval.end = start; + return true; + } + return false; + } + }; + + using permutation = basic_permutation; + using wpermutation = basic_permutation; +#ifdef _UNICODE + using tpermutation = wpermutation; +#else + using tpermutation = permutation; +#endif + using sgml_permutation = basic_permutation; + + /// + /// Base class for integer testing + /// + template + class basic_integer : public basic_tester + { + public: + basic_integer(_In_ const std::locale& locale = std::locale()) : + basic_tester(locale), + value(0) + {} + + virtual void invalidate() + { + value = 0; + basic_tester::invalidate(); + } + + public: + size_t value; ///< Calculated value of the numeral + }; + + /// + /// Test for decimal integer + /// + template + class basic_integer10 : public basic_integer + { + public: + basic_integer10( + _In_ const std::shared_ptr>& digit_0, + _In_ const std::shared_ptr>& digit_1, + _In_ const std::shared_ptr>& digit_2, + _In_ const std::shared_ptr>& digit_3, + _In_ const std::shared_ptr>& digit_4, + _In_ const std::shared_ptr>& digit_5, + _In_ const std::shared_ptr>& digit_6, + _In_ const std::shared_ptr>& digit_7, + _In_ const std::shared_ptr>& digit_8, + _In_ const std::shared_ptr>& digit_9, + _In_ const std::locale& locale = std::locale()) : + basic_integer(locale), + m_digit_0(digit_0), + m_digit_1(digit_1), + m_digit_2(digit_2), + m_digit_3(digit_3), + m_digit_4(digit_4), + m_digit_5(digit_5), + m_digit_6(digit_6), + m_digit_7(digit_7), + m_digit_8(digit_8), + m_digit_9(digit_9) + {} + + virtual bool match( + _In_reads_or_z_(end) const T* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + for (interval.end = start, value = 0; interval.end < end && text[interval.end];) { + size_t dig; + if (m_digit_0->match(text, interval.end, end, flags)) { dig = 0; interval.end = m_digit_0->interval.end; } + else if (m_digit_1->match(text, interval.end, end, flags)) { dig = 1; interval.end = m_digit_1->interval.end; } + else if (m_digit_2->match(text, interval.end, end, flags)) { dig = 2; interval.end = m_digit_2->interval.end; } + else if (m_digit_3->match(text, interval.end, end, flags)) { dig = 3; interval.end = m_digit_3->interval.end; } + else if (m_digit_4->match(text, interval.end, end, flags)) { dig = 4; interval.end = m_digit_4->interval.end; } + else if (m_digit_5->match(text, interval.end, end, flags)) { dig = 5; interval.end = m_digit_5->interval.end; } + else if (m_digit_6->match(text, interval.end, end, flags)) { dig = 6; interval.end = m_digit_6->interval.end; } + else if (m_digit_7->match(text, interval.end, end, flags)) { dig = 7; interval.end = m_digit_7->interval.end; } + else if (m_digit_8->match(text, interval.end, end, flags)) { dig = 8; interval.end = m_digit_8->interval.end; } + else if (m_digit_9->match(text, interval.end, end, flags)) { dig = 9; interval.end = m_digit_9->interval.end; } + else break; + value = value * 10 + dig; + } + if (start < interval.end) { + interval.start = start; + return true; + } + interval.start = (interval.end = start) + 1; + return false; + } + + protected: + std::shared_ptr> + m_digit_0, + m_digit_1, + m_digit_2, + m_digit_3, + m_digit_4, + m_digit_5, + m_digit_6, + m_digit_7, + m_digit_8, + m_digit_9; + }; + + using integer10 = basic_integer10; + using winteger10 = basic_integer10; +#ifdef _UNICODE + using tinteger10 = winteger10; +#else + using tinteger10 = integer10; +#endif + using sgml_integer10 = basic_integer10; + + /// + /// Test for decimal integer possibly containing thousand separators + /// + template + class basic_integer10ts : public basic_integer + { + public: + basic_integer10ts( + _In_ const std::shared_ptr>& digits, + _In_ const std::shared_ptr>& separator, + _In_ const std::locale& locale = std::locale()) : + basic_integer(locale), + digit_count(0), + has_separators(false), + m_digits(digits), + m_separator(separator) + {} + + virtual bool match( + _In_reads_or_z_(end) const T* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + if (m_digits->match(text, start, end, flags)) { + // Leading part match. + value = m_digits->value; + digit_count = m_digits->interval.size(); + has_separators = false; + interval.start = start; + interval.end = m_digits->interval.end; + if (m_digits->interval.size() <= 3) { + // Maybe separated with thousand separators? + size_t hit_offset = (size_t)-1; + while (m_separator->match(text, interval.end, end, flags) && + (hit_offset == (size_t)-1 || hit_offset == m_separator->hit_offset) && // All separators must be the same, no mixing. + m_digits->match(text, m_separator->interval.end, end, flags) && + m_digits->interval.size() == 3) + { + // Thousand separator and three-digit integer followed. + value = value * 1000 + m_digits->value; + digit_count += 3; + has_separators = true; + interval.end = m_digits->interval.end; + hit_offset = m_separator->hit_offset; + } + } + + return true; + } + value = 0; + interval.start = (interval.end = start) + 1; + return false; + } + + virtual void invalidate() + { + digit_count = 0; + has_separators = false; + basic_integer::invalidate(); + } + + public: + size_t digit_count; ///< Total number of digits in integer + bool has_separators; ///< Did integer have any separators? + + protected: + std::shared_ptr> m_digits; + std::shared_ptr> m_separator; + }; + + using integer10ts = basic_integer10ts; + using winteger10ts = basic_integer10ts; +#ifdef _UNICODE + using tinteger10ts = winteger10ts; +#else + using tinteger10ts = integer10ts; +#endif + using sgml_integer10ts = basic_integer10ts; + + /// + /// Test for hexadecimal integer + /// + template + class basic_integer16 : public basic_integer + { + public: + basic_integer16( + _In_ const std::shared_ptr>& digit_0, + _In_ const std::shared_ptr>& digit_1, + _In_ const std::shared_ptr>& digit_2, + _In_ const std::shared_ptr>& digit_3, + _In_ const std::shared_ptr>& digit_4, + _In_ const std::shared_ptr>& digit_5, + _In_ const std::shared_ptr>& digit_6, + _In_ const std::shared_ptr>& digit_7, + _In_ const std::shared_ptr>& digit_8, + _In_ const std::shared_ptr>& digit_9, + _In_ const std::shared_ptr>& digit_10, + _In_ const std::shared_ptr>& digit_11, + _In_ const std::shared_ptr>& digit_12, + _In_ const std::shared_ptr>& digit_13, + _In_ const std::shared_ptr>& digit_14, + _In_ const std::shared_ptr>& digit_15, + _In_ const std::locale& locale = std::locale()) : + basic_integer(locale), + m_digit_0(digit_0), + m_digit_1(digit_1), + m_digit_2(digit_2), + m_digit_3(digit_3), + m_digit_4(digit_4), + m_digit_5(digit_5), + m_digit_6(digit_6), + m_digit_7(digit_7), + m_digit_8(digit_8), + m_digit_9(digit_9), + m_digit_10(digit_10), + m_digit_11(digit_11), + m_digit_12(digit_12), + m_digit_13(digit_13), + m_digit_14(digit_14), + m_digit_15(digit_15) + {} + + virtual bool match( + _In_reads_or_z_(end) const T* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + for (interval.end = start, value = 0; interval.end < end && text[interval.end];) { + size_t dig; + if (m_digit_0->match(text, interval.end, end, flags)) { dig = 0; interval.end = m_digit_0->interval.end; } + else if (m_digit_1->match(text, interval.end, end, flags)) { dig = 1; interval.end = m_digit_1->interval.end; } + else if (m_digit_2->match(text, interval.end, end, flags)) { dig = 2; interval.end = m_digit_2->interval.end; } + else if (m_digit_3->match(text, interval.end, end, flags)) { dig = 3; interval.end = m_digit_3->interval.end; } + else if (m_digit_4->match(text, interval.end, end, flags)) { dig = 4; interval.end = m_digit_4->interval.end; } + else if (m_digit_5->match(text, interval.end, end, flags)) { dig = 5; interval.end = m_digit_5->interval.end; } + else if (m_digit_6->match(text, interval.end, end, flags)) { dig = 6; interval.end = m_digit_6->interval.end; } + else if (m_digit_7->match(text, interval.end, end, flags)) { dig = 7; interval.end = m_digit_7->interval.end; } + else if (m_digit_8->match(text, interval.end, end, flags)) { dig = 8; interval.end = m_digit_8->interval.end; } + else if (m_digit_9->match(text, interval.end, end, flags)) { dig = 9; interval.end = m_digit_9->interval.end; } + else if (m_digit_10->match(text, interval.end, end, flags)) { dig = 10; interval.end = m_digit_10->interval.end; } + else if (m_digit_11->match(text, interval.end, end, flags)) { dig = 11; interval.end = m_digit_11->interval.end; } + else if (m_digit_12->match(text, interval.end, end, flags)) { dig = 12; interval.end = m_digit_12->interval.end; } + else if (m_digit_13->match(text, interval.end, end, flags)) { dig = 13; interval.end = m_digit_13->interval.end; } + else if (m_digit_14->match(text, interval.end, end, flags)) { dig = 14; interval.end = m_digit_14->interval.end; } + else if (m_digit_15->match(text, interval.end, end, flags)) { dig = 15; interval.end = m_digit_15->interval.end; } + else break; + value = value * 16 + dig; + } + if (start < interval.end) { + interval.start = start; + return true; + } + interval.start = (interval.end = start) + 1; + return false; + } + + protected: + std::shared_ptr> + m_digit_0, + m_digit_1, + m_digit_2, + m_digit_3, + m_digit_4, + m_digit_5, + m_digit_6, + m_digit_7, + m_digit_8, + m_digit_9, + m_digit_10, + m_digit_11, + m_digit_12, + m_digit_13, + m_digit_14, + m_digit_15; + }; + + using integer16 = basic_integer16; + using winteger16 = basic_integer16; +#ifdef _UNICODE + using tinteger16 = winteger16; +#else + using tinteger16 = integer16; +#endif + using sgml_integer16 = basic_integer16; + + /// + /// Test for Roman numeral + /// + template + class basic_roman_numeral : public basic_integer + { + public: + basic_roman_numeral( + _In_ const std::shared_ptr>& digit_1, + _In_ const std::shared_ptr>& digit_5, + _In_ const std::shared_ptr>& digit_10, + _In_ const std::shared_ptr>& digit_50, + _In_ const std::shared_ptr>& digit_100, + _In_ const std::shared_ptr>& digit_500, + _In_ const std::shared_ptr>& digit_1000, + _In_ const std::shared_ptr>& digit_5000, + _In_ const std::shared_ptr>& digit_10000, + _In_ const std::locale& locale = std::locale()) : + basic_integer(locale), + m_digit_1(digit_1), + m_digit_5(digit_5), + m_digit_10(digit_10), + m_digit_50(digit_50), + m_digit_100(digit_100), + m_digit_500(digit_500), + m_digit_1000(digit_1000), + m_digit_5000(digit_5000), + m_digit_10000(digit_10000) + {} + + virtual bool match( + _In_reads_or_z_(end) const T* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + size_t + dig[5] = { (size_t)-1, (size_t)-1, (size_t)-1, (size_t)-1, (size_t)-1 }, + end2; + + for (interval.end = start, value = 0; interval.end < end && text[interval.end]; dig[3] = dig[2], dig[2] = dig[1], dig[1] = dig[0], interval.end = end2) { + if (m_digit_1 && m_digit_1->match(text, interval.end, end, flags)) { dig[0] = 1; end2 = m_digit_1->interval.end; } + else if (m_digit_5 && m_digit_5->match(text, interval.end, end, flags)) { dig[0] = 5; end2 = m_digit_5->interval.end; } + else if (m_digit_10 && m_digit_10->match(text, interval.end, end, flags)) { dig[0] = 10; end2 = m_digit_10->interval.end; } + else if (m_digit_50 && m_digit_50->match(text, interval.end, end, flags)) { dig[0] = 50; end2 = m_digit_50->interval.end; } + else if (m_digit_100 && m_digit_100->match(text, interval.end, end, flags)) { dig[0] = 100; end2 = m_digit_100->interval.end; } + else if (m_digit_500 && m_digit_500->match(text, interval.end, end, flags)) { dig[0] = 500; end2 = m_digit_500->interval.end; } + else if (m_digit_1000 && m_digit_1000->match(text, interval.end, end, flags)) { dig[0] = 1000; end2 = m_digit_1000->interval.end; } + else if (m_digit_5000 && m_digit_5000->match(text, interval.end, end, flags)) { dig[0] = 5000; end2 = m_digit_5000->interval.end; } + else if (m_digit_10000 && m_digit_10000->match(text, interval.end, end, flags)) { dig[0] = 10000; end2 = m_digit_10000->interval.end; } + else break; + + // Store first digit. + if (dig[4] == (size_t)-1) dig[4] = dig[0]; + + if (dig[3] == dig[2] && dig[2] == dig[1] && dig[1] == dig[0] && dig[0] != dig[4]) { + // Same digit repeated four times. No-go, unless first digit. E.g. XIIII vs. XIV. MMMMMCD allowed, IIII also... + break; + } + if (dig[0] <= dig[1]) { + // Digit is less or equal previous one: add. + value += dig[0]; + } + else if ( + dig[1] == 1 && (dig[0] == 5 || dig[0] == 10) || + dig[1] == 10 && (dig[0] == 50 || dig[0] == 100) || + dig[1] == 100 && (dig[0] == 500 || dig[0] == 1000) || + dig[1] == 1000 && (dig[0] == 5000 || dig[0] == 10000)) + { + // Digit is up to two orders bigger than previous one: subtract. But... + if (dig[2] < dig[0]) { + // Digit is also bigger than pre-previous one. E.g. VIX (V < X => invalid) + break; + } + value -= dig[1]; // Cancel addition in the previous step. + dig[0] -= dig[1]; // Combine last two digits. + dig[1] = dig[2]; // The true previous digit is now pre-previous one. :) + dig[2] = dig[3]; // The true pre-previous digit is now pre-pre-previous one. :) + value += dig[0]; // Add combined value. + } + else { + // New digit is too big than the previous one. E.g. VX (V < X => invalid) + break; + } + } + if (value) { + interval.start = start; + return true; + } + interval.start = (interval.end = start) + 1; + return false; + } + + protected: + std::shared_ptr> + m_digit_1, + m_digit_5, + m_digit_10, + m_digit_50, + m_digit_100, + m_digit_500, + m_digit_1000, + m_digit_5000, + m_digit_10000; + }; + + using roman_numeral = basic_roman_numeral; + using wroman_numeral = basic_roman_numeral; +#ifdef _UNICODE + using troman_numeral = wroman_numeral; +#else + using troman_numeral = roman_numeral; +#endif + using sgml_roman_numeral = basic_roman_numeral; + + /// + /// Test for fraction + /// + template + class basic_fraction : public basic_tester + { + public: + basic_fraction( + _In_ const std::shared_ptr>& _numerator, + _In_ const std::shared_ptr>& _fraction_line, + _In_ const std::shared_ptr>& _denominator, + _In_ const std::locale& locale = std::locale()) : + basic_tester(locale), + numerator(_numerator), + fraction_line(_fraction_line), + denominator(_denominator) + {} + + virtual bool match( + _In_reads_or_z_(end) const T* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + if (numerator->match(text, start, end, flags) && + fraction_line->match(text, numerator->interval.end, end, flags) && + denominator->match(text, fraction_line->interval.end, end, flags)) + { + interval.start = start; + interval.end = denominator->interval.end; + return true; + } + numerator->invalidate(); + fraction_line->invalidate(); + denominator->invalidate(); + interval.start = (interval.end = start) + 1; + return false; + } + + virtual void invalidate() + { + numerator->invalidate(); + fraction_line->invalidate(); + denominator->invalidate(); + basic_tester::invalidate(); + } + + public: + std::shared_ptr> numerator; + std::shared_ptr> fraction_line; + std::shared_ptr> denominator; + }; + + using fraction = basic_fraction; + using wfraction = basic_fraction; +#ifdef _UNICODE + using tfraction = wfraction; +#else + using tfraction = fraction; +#endif + using sgml_fraction = basic_fraction; + + /// + /// Test for match score + /// + template + class basic_score : public basic_tester + { + public: + basic_score( + _In_ const std::shared_ptr>& _home, + _In_ const std::shared_ptr>& _separator, + _In_ const std::shared_ptr>& _guest, + _In_ const std::shared_ptr>& space, + _In_ const std::locale& locale = std::locale()) : + basic_tester(locale), + home(_home), + separator(_separator), + guest(_guest), + m_space(space) + {} + + virtual bool match( + _In_reads_or_z_(end) const T* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + interval.end = start; + + if (home->match(text, interval.end, end, flags)) + interval.end = home->interval.end; + else + goto end; + + const int space_match_flags = flags & ~match_multiline; // Spaces in score must never be broken in new line. + for (; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end); + + if (separator->match(text, interval.end, end, flags)) + interval.end = separator->interval.end; + else + goto end; + + for (; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end); + + if (guest->match(text, interval.end, end, flags)) + interval.end = guest->interval.end; + else + goto end; + + interval.start = start; + return true; + + end: + home->invalidate(); + separator->invalidate(); + guest->invalidate(); + interval.start = (interval.end = start) + 1; + return false; + } + + virtual void invalidate() + { + home->invalidate(); + separator->invalidate(); + guest->invalidate(); + basic_tester::invalidate(); + } + + public: + std::shared_ptr> home; + std::shared_ptr> separator; + std::shared_ptr> guest; + + protected: + std::shared_ptr> m_space; + }; + + using score = basic_score; + using wscore = basic_score; +#ifdef _UNICODE + using tscore = wscore; +#else + using tscore = score; +#endif + using sgml_score = basic_score; + + /// + /// Test for signed numeral + /// + template + class basic_signed_numeral : public basic_tester + { + public: + basic_signed_numeral( + _In_ const std::shared_ptr>& _positive_sign, + _In_ const std::shared_ptr>& _negative_sign, + _In_ const std::shared_ptr>& _special_sign, + _In_ const std::shared_ptr>& _number, + _In_ const std::locale& locale = std::locale()) : + basic_tester(locale), + positive_sign(_positive_sign), + negative_sign(_negative_sign), + special_sign(_special_sign), + number(_number) + {} + + virtual bool match( + _In_reads_or_z_(end) const T* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + interval.end = start; + if (positive_sign && positive_sign->match(text, interval.end, end, flags)) { + interval.end = positive_sign->interval.end; + if (negative_sign) negative_sign->invalidate(); + if (special_sign) special_sign->invalidate(); + } + else if (negative_sign && negative_sign->match(text, interval.end, end, flags)) { + interval.end = negative_sign->interval.end; + if (positive_sign) positive_sign->invalidate(); + if (special_sign) special_sign->invalidate(); + } + else if (special_sign && special_sign->match(text, interval.end, end, flags)) { + interval.end = special_sign->interval.end; + if (positive_sign) positive_sign->invalidate(); + if (negative_sign) negative_sign->invalidate(); + } + else { + if (positive_sign) positive_sign->invalidate(); + if (negative_sign) negative_sign->invalidate(); + if (special_sign) special_sign->invalidate(); + } + if (number->match(text, interval.end, end, flags)) { + interval.start = start; + interval.end = number->interval.end; + return true; + } + if (positive_sign) positive_sign->invalidate(); + if (negative_sign) negative_sign->invalidate(); + if (special_sign) special_sign->invalidate(); + number->invalidate(); + interval.start = (interval.end = start) + 1; + return false; + } + + virtual void invalidate() + { + if (positive_sign) positive_sign->invalidate(); + if (negative_sign) negative_sign->invalidate(); + if (special_sign) special_sign->invalidate(); + number->invalidate(); + basic_tester::invalidate(); + } + + public: + std::shared_ptr> positive_sign; ///< Positive sign + std::shared_ptr> negative_sign; ///< Negative sign + std::shared_ptr> special_sign; ///< Special sign (e.g. plus-minus '±') + std::shared_ptr> number; ///< Number + }; + + using signed_numeral = basic_signed_numeral; + using wsigned_numeral = basic_signed_numeral; +#ifdef _UNICODE + using tsigned_numeral = wsigned_numeral; +#else + using tsigned_numeral = signed_numeral; +#endif + using sgml_signed_numeral = basic_signed_numeral; + + /// + /// Test for mixed numeral + /// + template + class basic_mixed_numeral : public basic_tester + { + public: + basic_mixed_numeral( + _In_ const std::shared_ptr>& _positive_sign, + _In_ const std::shared_ptr>& _negative_sign, + _In_ const std::shared_ptr>& _special_sign, + _In_ const std::shared_ptr>& _integer, + _In_ const std::shared_ptr>& space, + _In_ const std::shared_ptr>& _fraction, + _In_ const std::locale& locale = std::locale()) : + basic_tester(locale), + positive_sign(_positive_sign), + negative_sign(_negative_sign), + special_sign(_special_sign), + integer(_integer), + fraction(_fraction), + m_space(space) + {} + + virtual bool match( + _In_reads_or_z_(end) const T* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + interval.end = start; + + if (positive_sign && positive_sign->match(text, interval.end, end, flags)) { + interval.end = positive_sign->interval.end; + if (negative_sign) negative_sign->invalidate(); + if (special_sign) special_sign->invalidate(); + } + else if (negative_sign && negative_sign->match(text, interval.end, end, flags)) { + interval.end = negative_sign->interval.end; + if (positive_sign) positive_sign->invalidate(); + if (special_sign) special_sign->invalidate(); + } + else if (special_sign && special_sign->match(text, interval.end, end, flags)) { + interval.end = special_sign->interval.end; + if (positive_sign) positive_sign->invalidate(); + if (negative_sign) negative_sign->invalidate(); + } + else { + if (positive_sign) positive_sign->invalidate(); + if (negative_sign) negative_sign->invalidate(); + if (special_sign) special_sign->invalidate(); + } + + // Check for + const int space_match_flags = flags & ~match_multiline; // Spaces in fractions must never be broken in new line. + if (integer->match(text, interval.end, end, flags) && + m_space->match(text, integer->interval.end, end, space_match_flags)) + { + for (interval.end = m_space->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end); + if (fraction->match(text, interval.end, end, flags)) { + interval.start = start; + interval.end = fraction->interval.end; + return true; + } + fraction->invalidate(); + interval.start = start; + interval.end = integer->interval.end; + return true; + } + + // Check for + if (fraction->match(text, interval.end, end, flags)) { + integer->invalidate(); + interval.start = start; + interval.end = fraction->interval.end; + return true; + } + + // Check for + if (integer->match(text, interval.end, end, flags)) { + fraction->invalidate(); + interval.start = start; + interval.end = integer->interval.end; + return true; + } + + if (positive_sign) positive_sign->invalidate(); + if (negative_sign) negative_sign->invalidate(); + if (special_sign) special_sign->invalidate(); + integer->invalidate(); + fraction->invalidate(); + interval.start = (interval.end = start) + 1; + return false; + } + + virtual void invalidate() + { + if (positive_sign) positive_sign->invalidate(); + if (negative_sign) negative_sign->invalidate(); + if (special_sign) special_sign->invalidate(); + integer->invalidate(); + fraction->invalidate(); + basic_tester::invalidate(); + } + + public: + std::shared_ptr> positive_sign; ///< Positive sign + std::shared_ptr> negative_sign; ///< Negative sign + std::shared_ptr> special_sign; ///< Special sign (e.g. plus-minus '±') + std::shared_ptr> integer; ///< Integer part + std::shared_ptr> fraction; ///< fraction + + protected: + std::shared_ptr> m_space; + }; + + using mixed_numeral = basic_mixed_numeral; + using wmixed_numeral = basic_mixed_numeral; +#ifdef _UNICODE + using tmixed_numeral = wmixed_numeral; +#else + using tmixed_numeral = mixed_numeral; +#endif + using sgml_mixed_numeral = basic_mixed_numeral; + + /// + /// Test for scientific numeral + /// + template + class basic_scientific_numeral : public basic_tester + { + public: + basic_scientific_numeral( + _In_ const std::shared_ptr>& _positive_sign, + _In_ const std::shared_ptr>& _negative_sign, + _In_ const std::shared_ptr>& _special_sign, + _In_ const std::shared_ptr>& _integer, + _In_ const std::shared_ptr>& _decimal_separator, + _In_ const std::shared_ptr>& _decimal, + _In_ const std::shared_ptr>& _exponent_symbol, + _In_ const std::shared_ptr>& _positive_exp_sign, + _In_ const std::shared_ptr>& _negative_exp_sign, + _In_ const std::shared_ptr>& _exponent, + _In_ const std::locale& locale = std::locale()) : + basic_tester(locale), + positive_sign(_positive_sign), + negative_sign(_negative_sign), + special_sign(_special_sign), + integer(_integer), + decimal_separator(_decimal_separator), + decimal(_decimal), + exponent_symbol(_exponent_symbol), + positive_exp_sign(_positive_exp_sign), + negative_exp_sign(_negative_exp_sign), + exponent(_exponent), + value(std::numeric_limits::quiet_NaN()) + {} + + virtual bool match( + _In_reads_or_z_(end) const T* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + interval.end = start; + + if (positive_sign && positive_sign->match(text, interval.end, end, flags)) { + interval.end = positive_sign->interval.end; + if (negative_sign) negative_sign->invalidate(); + if (special_sign) special_sign->invalidate(); + } + else if (negative_sign && negative_sign->match(text, interval.end, end, flags)) { + interval.end = negative_sign->interval.end; + if (positive_sign) positive_sign->invalidate(); + if (special_sign) special_sign->invalidate(); + } + else if (special_sign && special_sign->match(text, interval.end, end, flags)) { + interval.end = special_sign->interval.end; + if (positive_sign) positive_sign->invalidate(); + if (negative_sign) negative_sign->invalidate(); + } + else { + if (positive_sign) positive_sign->invalidate(); + if (negative_sign) negative_sign->invalidate(); + if (special_sign) special_sign->invalidate(); + } + + if (integer->match(text, interval.end, end, flags)) + interval.end = integer->interval.end; + + if (decimal_separator->match(text, interval.end, end, flags) && + decimal->match(text, decimal_separator->interval.end, end, flags)) + interval.end = decimal->interval.end; + else { + decimal_separator->invalidate(); + decimal->invalidate(); + } + + if (!integer->interval.empty() && + decimal->interval.empty()) + { + // No integer part, no decimal part. + if (positive_sign) positive_sign->invalidate(); + if (negative_sign) negative_sign->invalidate(); + if (special_sign) special_sign->invalidate(); + integer->invalidate(); + decimal_separator->invalidate(); + decimal->invalidate(); + if (exponent_symbol) exponent_symbol->invalidate(); + if (positive_exp_sign) positive_exp_sign->invalidate(); + if (negative_exp_sign) negative_exp_sign->invalidate(); + if (exponent) exponent->invalidate(); + interval.start = (interval.end = start) + 1; + return false; + } + + if (exponent_symbol && exponent_symbol->match(text, interval.end, end, flags) && + (positive_exp_sign && positive_exp_sign->match(text, exponent_symbol->interval.end, end, flags) && + exponent && exponent->match(text, positive_exp_sign->interval.end, end, flags) || + exponent && exponent->match(text, exponent_symbol->interval.end, end, flags))) + { + interval.end = exponent->interval.end; + if (negative_exp_sign) negative_exp_sign->invalidate(); + } + else if (exponent_symbol && exponent_symbol->match(text, interval.end, end, flags) && + negative_exp_sign && negative_exp_sign->match(text, exponent_symbol->interval.end, end, flags) && + exponent && exponent->match(text, negative_exp_sign->interval.end, end, flags)) + { + interval.end = exponent->interval.end; + if (positive_exp_sign) positive_exp_sign->invalidate(); + } + else { + if (exponent_symbol) exponent_symbol->invalidate(); + if (positive_exp_sign) positive_exp_sign->invalidate(); + if (negative_exp_sign) negative_exp_sign->invalidate(); + if (exponent) exponent->invalidate(); + } + + value = (double)integer->value; + if (decimal->interval) + value += (double)decimal->value * pow(10.0, -(double)decimal->interval.size()); + if (negative_sign && negative_sign->interval) + value = -value; + if (exponent && exponent->interval) { + double e = (double)exponent->value; + if (negative_exp_sign && negative_exp_sign->interval) + e = -e; + value *= pow(10.0, e); + } + + interval.start = start; + return true; + } + + virtual void invalidate() + { + if (positive_sign) positive_sign->invalidate(); + if (negative_sign) negative_sign->invalidate(); + if (special_sign) special_sign->invalidate(); + integer->invalidate(); + decimal_separator->invalidate(); + decimal->invalidate(); + if (exponent_symbol) exponent_symbol->invalidate(); + if (positive_exp_sign) positive_exp_sign->invalidate(); + if (negative_exp_sign) negative_exp_sign->invalidate(); + if (exponent) exponent->invalidate(); + value = std::numeric_limits::quiet_NaN(); + basic_tester::invalidate(); + } + + public: + std::shared_ptr> positive_sign; ///< Positive sign + std::shared_ptr> negative_sign; ///< Negative sign + std::shared_ptr> special_sign; ///< Special sign (e.g. plus-minus '±') + std::shared_ptr> integer; ///< Integer part + std::shared_ptr> decimal_separator; ///< Decimal separator + std::shared_ptr> decimal; ///< Decimal part + std::shared_ptr> exponent_symbol; ///< Exponent symbol (e.g. 'e') + std::shared_ptr> positive_exp_sign; ///< Positive exponent sign (e.g. '+') + std::shared_ptr> negative_exp_sign; ///< Negative exponent sign (e.g. '-') + std::shared_ptr> exponent; ///< Exponent part + double value; ///< Calculated value of the numeral + }; + + using scientific_numeral = basic_scientific_numeral; + using wscientific_numeral = basic_scientific_numeral; +#ifdef _UNICODE + using tscientific_numeral = wscientific_numeral; +#else + using tscientific_numeral = scientific_numeral; +#endif + using sgml_scientific_numeral = basic_scientific_numeral; + + /// + /// Test for monetary numeral + /// + template + class basic_monetary_numeral : public basic_tester + { + public: + basic_monetary_numeral( + _In_ const std::shared_ptr>& _positive_sign, + _In_ const std::shared_ptr>& _negative_sign, + _In_ const std::shared_ptr>& _special_sign, + _In_ const std::shared_ptr>& _currency, + _In_ const std::shared_ptr>& _integer, + _In_ const std::shared_ptr>& _decimal_separator, + _In_ const std::shared_ptr>& _decimal, + _In_ const std::locale& locale = std::locale()) : + basic_tester(locale), + positive_sign(_positive_sign), + negative_sign(_negative_sign), + special_sign(_special_sign), + currency(_currency), + integer(_integer), + decimal_separator(_decimal_separator), + decimal(_decimal) + {} + + virtual bool match( + _In_reads_or_z_(end) const T* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + interval.end = start; + + if (positive_sign->match(text, interval.end, end, flags)) { + interval.end = positive_sign->interval.end; + if (negative_sign) negative_sign->invalidate(); + if (special_sign) special_sign->invalidate(); + } + else if (negative_sign->match(text, interval.end, end, flags)) { + interval.end = negative_sign->interval.end; + if (positive_sign) positive_sign->invalidate(); + if (special_sign) special_sign->invalidate(); + } + else if (special_sign->match(text, interval.end, end, flags)) { + interval.end = special_sign->interval.end; + if (positive_sign) positive_sign->invalidate(); + if (negative_sign) negative_sign->invalidate(); + } + else { + if (positive_sign) positive_sign->invalidate(); + if (negative_sign) negative_sign->invalidate(); + if (special_sign) special_sign->invalidate(); + } + + if (currency->match(text, interval.end, end, flags)) + interval.end = currency->interval.end; + else { + if (positive_sign) positive_sign->invalidate(); + if (negative_sign) negative_sign->invalidate(); + if (special_sign) special_sign->invalidate(); + integer->invalidate(); + decimal_separator->invalidate(); + decimal->invalidate(); + interval.start = (interval.end = start) + 1; + return false; + } + + if (integer->match(text, interval.end, end, flags)) + interval.end = integer->interval.end; + if (decimal_separator->match(text, interval.end, end, flags) && + decimal->match(text, decimal_separator->interval.end, end, flags)) + interval.end = decimal->interval.end; + else { + decimal_separator->invalidate(); + decimal->invalidate(); + } + + if (integer->interval.empty() && + decimal->interval.empty()) + { + // No integer part, no decimal part. + if (positive_sign) positive_sign->invalidate(); + if (negative_sign) negative_sign->invalidate(); + if (special_sign) special_sign->invalidate(); + currency->invalidate(); + integer->invalidate(); + decimal_separator->invalidate(); + decimal->invalidate(); + interval.start = (interval.end = start) + 1; + return false; + } + + interval.start = start; + return true; + } + + virtual void invalidate() + { + if (positive_sign) positive_sign->invalidate(); + if (negative_sign) negative_sign->invalidate(); + if (special_sign) special_sign->invalidate(); + currency->invalidate(); + integer->invalidate(); + decimal_separator->invalidate(); + decimal->invalidate(); + basic_tester::invalidate(); + } + + public: + std::shared_ptr> positive_sign; ///< Positive sign + std::shared_ptr> negative_sign; ///< Negative sign + std::shared_ptr> special_sign; ///< Special sign (e.g. plus-minus '±') + std::shared_ptr> currency; ///< Currency part + std::shared_ptr> integer; ///< Integer part + std::shared_ptr> decimal_separator; ///< Decimal separator + std::shared_ptr> decimal; ///< Decimal part + }; + + using monetary_numeral = basic_monetary_numeral; + using wmonetary_numeral = basic_monetary_numeral; +#ifdef _UNICODE + using tmonetary_numeral = wmonetary_numeral; +#else + using tmonetary_numeral = monetary_numeral; +#endif + using sgml_monetary_numeral = basic_monetary_numeral; + + /// + /// Test for IPv4 address + /// + template + class basic_ipv4_address : public basic_tester + { + public: + basic_ipv4_address( + _In_ const std::shared_ptr>& digit_0, + _In_ const std::shared_ptr>& digit_1, + _In_ const std::shared_ptr>& digit_2, + _In_ const std::shared_ptr>& digit_3, + _In_ const std::shared_ptr>& digit_4, + _In_ const std::shared_ptr>& digit_5, + _In_ const std::shared_ptr>& digit_6, + _In_ const std::shared_ptr>& digit_7, + _In_ const std::shared_ptr>& digit_8, + _In_ const std::shared_ptr>& digit_9, + _In_ const std::shared_ptr>& separator, + _In_ const std::locale& locale = std::locale()) : + basic_tester(locale), + m_digit_0(digit_0), + m_digit_1(digit_1), + m_digit_2(digit_2), + m_digit_3(digit_3), + m_digit_4(digit_4), + m_digit_5(digit_5), + m_digit_6(digit_6), + m_digit_7(digit_7), + m_digit_8(digit_8), + m_digit_9(digit_9), + m_separator(separator) + { + value.s_addr = 0; + } + + virtual bool match( + _In_reads_or_z_(end) const T* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + interval.end = start; + value.s_addr = 0; + + size_t i; + for (i = 0; i < 4; i++) { + if (i) { + if (m_separator->match(text, interval.end, end, flags)) + interval.end = m_separator->interval.end; + else + goto error; + } + + components[i].start = interval.end; + bool is_empty = true; + size_t x; + for (x = 0; interval.end < end && text[interval.end];) { + size_t dig, digit_end; + if (m_digit_0->match(text, interval.end, end, flags)) { dig = 0; digit_end = m_digit_0->interval.end; } + else if (m_digit_1->match(text, interval.end, end, flags)) { dig = 1; digit_end = m_digit_1->interval.end; } + else if (m_digit_2->match(text, interval.end, end, flags)) { dig = 2; digit_end = m_digit_2->interval.end; } + else if (m_digit_3->match(text, interval.end, end, flags)) { dig = 3; digit_end = m_digit_3->interval.end; } + else if (m_digit_4->match(text, interval.end, end, flags)) { dig = 4; digit_end = m_digit_4->interval.end; } + else if (m_digit_5->match(text, interval.end, end, flags)) { dig = 5; digit_end = m_digit_5->interval.end; } + else if (m_digit_6->match(text, interval.end, end, flags)) { dig = 6; digit_end = m_digit_6->interval.end; } + else if (m_digit_7->match(text, interval.end, end, flags)) { dig = 7; digit_end = m_digit_7->interval.end; } + else if (m_digit_8->match(text, interval.end, end, flags)) { dig = 8; digit_end = m_digit_8->interval.end; } + else if (m_digit_9->match(text, interval.end, end, flags)) { dig = 9; digit_end = m_digit_9->interval.end; } + else break; + size_t x_n = x * 10 + dig; + if (x_n <= 255) { + x = x_n; + interval.end = digit_end; + is_empty = false; + } + else + break; + } + if (is_empty) + goto error; + components[i].end = interval.end; + value.s_addr = (value.s_addr << 8) | (uint8_t)x; + } + if (i < 4) + goto error; + + interval.start = start; + return true; + + error: + components[0].start = 1; + components[0].end = 0; + components[1].start = 1; + components[1].end = 0; + components[2].start = 1; + components[2].end = 0; + components[3].start = 1; + components[3].end = 0; + value = 0; + interval.start = (interval.end = start) + 1; + return false; + } + + virtual void invalidate() + { + components[0].start = 1; + components[0].end = 0; + components[1].start = 1; + components[1].end = 0; + components[2].start = 1; + components[2].end = 0; + components[3].start = 1; + components[3].end = 0; + value = 0; + basic_tester::invalidate(); + } + + public: + stdex::interval components[4]; ///< Individual component intervals + struct in_addr value; ///< IPv4 address value + + protected: + std::shared_ptr> + m_digit_0, + m_digit_1, + m_digit_2, + m_digit_3, + m_digit_4, + m_digit_5, + m_digit_6, + m_digit_7, + m_digit_8, + m_digit_9; + std::shared_ptr> m_separator; + }; + + using ipv4_address = basic_ipv4_address; + using wipv4_address = basic_ipv4_address; +#ifdef _UNICODE + using tipv4_address = wipv4_address; +#else + using tipv4_address = ipv4_address; +#endif + using sgml_ipv4_address = basic_ipv4_address; + + /// + /// Test for valid IPv6 address scope ID character + /// + template + class basic_ipv6_scope_id_char : public basic_tester + { + public: + basic_ipv6_scope_id_char(_In_ const std::locale& locale = std::locale()) : basic_tester(locale) + {} + + virtual bool match( + _In_reads_or_z_(end) const T* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + if (start < end && text[start]) { + if (text[start] == '-' || + text[start] == '_' || + text[start] == ':' || + std::use_facet>(m_locale).is(std::ctype_base::alnum, text[start])) + { + interval.end = (interval.start = start) + 1; + return true; + } + } + interval.start = (interval.end = start) + 1; + return false; + } + }; + + using ipv6_scope_id_char = basic_ipv6_scope_id_char; + using wipv6_scope_id_char = basic_ipv6_scope_id_char; +#ifdef _UNICODE + using tipv6_scope_id_char = wipv6_scope_id_char; +#else + using tipv6_scope_id_char = ipv6_scope_id_char; +#endif + + /// + /// Test for valid IPv6 address scope ID SGML character + /// + class sgml_ipv6_scope_id_char : public sgml_tester + { + public: + sgml_ipv6_scope_id_char(_In_ const std::locale& locale = std::locale()) : sgml_tester(locale) + {} + + virtual bool match( + _In_reads_or_z_(end) const char* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + if (start < end && text[start]) { + wchar_t buf[3]; + const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf); + const wchar_t* chr_end = chr + stdex::strlen(chr); + if ((chr[0] == L'-' || + chr[0] == L'_' || + chr[0] == L':') && chr[1] == 0 || + std::use_facet>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end) + { + interval.start = start; + return true; + } + } + interval.start = (interval.end = start) + 1; + return false; + } + }; + + /// + /// Test for IPv6 address + /// + template + class basic_ipv6_address : public basic_tester + { + public: + basic_ipv6_address( + _In_ const std::shared_ptr>& digit_0, + _In_ const std::shared_ptr>& digit_1, + _In_ const std::shared_ptr>& digit_2, + _In_ const std::shared_ptr>& digit_3, + _In_ const std::shared_ptr>& digit_4, + _In_ const std::shared_ptr>& digit_5, + _In_ const std::shared_ptr>& digit_6, + _In_ const std::shared_ptr>& digit_7, + _In_ const std::shared_ptr>& digit_8, + _In_ const std::shared_ptr>& digit_9, + _In_ const std::shared_ptr>& digit_10, + _In_ const std::shared_ptr>& digit_11, + _In_ const std::shared_ptr>& digit_12, + _In_ const std::shared_ptr>& digit_13, + _In_ const std::shared_ptr>& digit_14, + _In_ const std::shared_ptr>& digit_15, + _In_ const std::shared_ptr>& separator, + _In_ const std::shared_ptr>& scope_id_separator = nullptr, + _In_ const std::shared_ptr>& _scope_id = nullptr, + _In_ const std::locale& locale = std::locale()) : + basic_tester(locale), + m_digit_0(digit_0), + m_digit_1(digit_1), + m_digit_2(digit_2), + m_digit_3(digit_3), + m_digit_4(digit_4), + m_digit_5(digit_5), + m_digit_6(digit_6), + m_digit_7(digit_7), + m_digit_8(digit_8), + m_digit_9(digit_9), + m_digit_10(digit_10), + m_digit_11(digit_11), + m_digit_12(digit_12), + m_digit_13(digit_13), + m_digit_14(digit_14), + m_digit_15(digit_15), + m_separator(separator), + m_scope_id_separator(scope_id_separator), + scope_id(_scope_id) + { + memset(value, 0, sizeof(value)); + } + + virtual bool match( + _In_reads_or_z_(end) const T* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + interval.end = start; + memset(value, 0, sizeof(value)); + + size_t i, compaction_i = (size_t)-1, compaction_start = start; + for (i = 0; i < 8; i++) { + bool is_empty = true; + + if (m_separator->match(text, interval.end, end, flags)) { + if (m_separator->match(text, m_separator->interval.end, end, flags)) { + // :: found + if (compaction_i == (size_t)-1) { + // Zero compaction start + compaction_i = i; + compaction_start = m_separator->interval.start; + interval.end = m_separator->interval.end; + } + else { + // More than one zero compaction + break; + } + } + else if (i) { + // Inner : found + interval.end = m_separator->interval.end; + } + else { + // Leading : found + goto error; + } + } + else if (i) { + // : missing + break; + } + + components[i].start = interval.end; + size_t x; + for (x = 0; interval.end < end && text[interval.end];) { + size_t dig, digit_end; + if (m_digit_0->match(text, interval.end, end, flags)) { dig = 0; digit_end = m_digit_0->interval.end; } + else if (m_digit_1->match(text, interval.end, end, flags)) { dig = 1; digit_end = m_digit_1->interval.end; } + else if (m_digit_2->match(text, interval.end, end, flags)) { dig = 2; digit_end = m_digit_2->interval.end; } + else if (m_digit_3->match(text, interval.end, end, flags)) { dig = 3; digit_end = m_digit_3->interval.end; } + else if (m_digit_4->match(text, interval.end, end, flags)) { dig = 4; digit_end = m_digit_4->interval.end; } + else if (m_digit_5->match(text, interval.end, end, flags)) { dig = 5; digit_end = m_digit_5->interval.end; } + else if (m_digit_6->match(text, interval.end, end, flags)) { dig = 6; digit_end = m_digit_6->interval.end; } + else if (m_digit_7->match(text, interval.end, end, flags)) { dig = 7; digit_end = m_digit_7->interval.end; } + else if (m_digit_8->match(text, interval.end, end, flags)) { dig = 8; digit_end = m_digit_8->interval.end; } + else if (m_digit_9->match(text, interval.end, end, flags)) { dig = 9; digit_end = m_digit_9->interval.end; } + else if (m_digit_10->match(text, interval.end, end, flags)) { dig = 10; digit_end = m_digit_10->interval.end; } + else if (m_digit_11->match(text, interval.end, end, flags)) { dig = 11; digit_end = m_digit_11->interval.end; } + else if (m_digit_12->match(text, interval.end, end, flags)) { dig = 12; digit_end = m_digit_12->interval.end; } + else if (m_digit_13->match(text, interval.end, end, flags)) { dig = 13; digit_end = m_digit_13->interval.end; } + else if (m_digit_14->match(text, interval.end, end, flags)) { dig = 14; digit_end = m_digit_14->interval.end; } + else if (m_digit_15->match(text, interval.end, end, flags)) { dig = 15; digit_end = m_digit_15->interval.end; } + else break; + size_t x_n = x * 16 + dig; + if (x_n <= 0xffff) { + x = x_n; + interval.end = digit_end; + is_empty = false; + } + else + break; + } + if (is_empty) { + if (compaction_i != (size_t)-1) { + // Zero compaction active: no sweat. + break; + } + goto error; + } + components[i].end = interval.end; + value.s6_words[i] = (uint16_t)x; + } + + if (compaction_i != (size_t)-1) { + // Align components right due to zero compaction. + size_t j, k; + for (j = 8, k = i; k > compaction_i;) { + value.s6_words[--j] = value.s6_words[--k]; + components[j] = components[k]; + } + for (; j > compaction_i;) { + value.s6_words[--j] = 0; + components[j].start = + components[j].end = compaction_start; + } + } + else if (i < 8) + goto error; + + if (m_scope_id_separator && m_scope_id_separator->match(text, interval.end, end, flags) && + scope_id && scope_id->match(text, m_scope_id_separator->interval.end, end, flags)) + interval.end = scope_id->interval.end; + else if (scope_id) + scope_id->invalidate(); + + interval.start = start; + return true; + + error: + components[0].start = 1; + components[0].end = 0; + components[1].start = 1; + components[1].end = 0; + components[2].start = 1; + components[2].end = 0; + components[3].start = 1; + components[3].end = 0; + components[4].start = 1; + components[4].end = 0; + components[5].start = 1; + components[5].end = 0; + components[6].start = 1; + components[6].end = 0; + components[7].start = 1; + components[7].end = 0; + memset(value, 0, sizeof(value)); + if (scope_id) scope_id->invalidate(); + interval.start = (interval.end = start) + 1; + return false; + } + + virtual void invalidate() + { + components[0].start = 1; + components[0].end = 0; + components[1].start = 1; + components[1].end = 0; + components[2].start = 1; + components[2].end = 0; + components[3].start = 1; + components[3].end = 0; + components[4].start = 1; + components[4].end = 0; + components[5].start = 1; + components[5].end = 0; + components[6].start = 1; + components[6].end = 0; + components[7].start = 1; + components[7].end = 0; + memset(value, 0, sizeof(value)); + if (scope_id) scope_id->invalidate(); + basic_tester::invalidate(); + } + + public: + stdex::interval components[8]; ///< Individual component intervals + struct in6_addr value; ///< IPv6 address value + std::shared_ptr> scope_id; ///< Scope ID (e.g. NIC index with link-local addresses) + + protected: + std::shared_ptr> + m_digit_0, + m_digit_1, + m_digit_2, + m_digit_3, + m_digit_4, + m_digit_5, + m_digit_6, + m_digit_7, + m_digit_8, + m_digit_9, + m_digit_10, + m_digit_11, + m_digit_12, + m_digit_13, + m_digit_14, + m_digit_15; + std::shared_ptr> m_separator, m_scope_id_separator; + }; + + using ipv6_address = basic_ipv6_address; + using wipv6_address = basic_ipv6_address; +#ifdef _UNICODE + using tipv6_address = wipv6_address; +#else + using tipv6_address = ipv6_address; +#endif + using sgml_ipv6_address = basic_ipv6_address; + + /// + /// Test for valid DNS domain character + /// + template + class basic_dns_domain_char : public basic_tester + { + public: + basic_dns_domain_char( + _In_ bool allow_idn, + _In_ const std::locale& locale = std::locale()) : + basic_tester(locale), + m_allow_idn(allow_idn), + allow_on_edge(true) + {} + + virtual bool match( + _In_reads_or_z_(end) const T* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + if (start < end && text[start]) { + if (('A' <= text[start] && text[start] <= 'Z') || + ('a' <= text[start] && text[start] <= 'z') || + ('0' <= text[start] && text[start] <= '9')) + allow_on_edge = true; + else if (text[start] == '-') + allow_on_edge = false; + else if (m_allow_idn && std::use_facet>(m_locale).is(std::ctype_base::alnum, text[start])) + allow_on_edge = true; + else { + interval.start = (interval.end = start) + 1; + return false; + } + interval.end = (interval.start = start) + 1; + return true; + } + interval.start = (interval.end = start) + 1; + return false; + } + + public: + bool allow_on_edge; ///< Is character allowed at the beginning or an end of a DNS domain? + + protected: + bool m_allow_idn; + }; + + using dns_domain_char = basic_dns_domain_char; + using wdns_domain_char = basic_dns_domain_char; +#ifdef _UNICODE + using tdns_domain_char = wdns_domain_char; +#else + using tdns_domain_char = dns_domain_char; +#endif + + /// + /// Test for valid DNS domain SGML character + /// + class sgml_dns_domain_char : public basic_dns_domain_char + { + public: + sgml_dns_domain_char( + _In_ bool allow_idn, + _In_ const std::locale& locale = std::locale()) : + basic_dns_domain_char(allow_idn, locale) + {} + + virtual bool match( + _In_reads_or_z_(end) const char* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + if (start < end && text[start]) { + wchar_t buf[3]; + const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf); + const wchar_t* chr_end = chr + stdex::strlen(chr); + if ((('A' <= chr[0] && chr[0] <= 'Z') || + ('a' <= chr[0] && chr[0] <= 'z') || + ('0' <= chr[0] && chr[0] <= '9')) && chr[1] == 0) + allow_on_edge = true; + else if (chr[0] == '-' && chr[1] == 0) + allow_on_edge = false; + else if (m_allow_idn && std::use_facet>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end) + allow_on_edge = true; + else { + interval.start = (interval.end = start) + 1; + return false; + } + interval.start = start; + return true; + } + interval.start = (interval.end = start) + 1; + return false; + } + }; + + /// + /// Test for DNS domain/hostname + /// + template + class basic_dns_name : public basic_tester + { + public: + basic_dns_name( + _In_ bool allow_absolute, + _In_ const std::shared_ptr>& domain_char, + _In_ const std::shared_ptr>& separator, + _In_ const std::locale& locale = std::locale()) : + basic_tester(locale), + m_allow_absolute(allow_absolute), + m_domain_char(domain_char), + m_separator(separator) + {} + + virtual bool match( + _In_reads_or_z_(end) const T* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + size_t i = start, count; + for (count = 0; i < end && text[i] && count < 127; count++) { + if (m_domain_char->match(text, i, end, flags) && + m_domain_char->allow_on_edge) + { + // Domain start + interval.end = i = m_domain_char->interval.end; + while (i < end && text[i]) { + if (m_domain_char->allow_on_edge && + m_separator->match(text, i, end, flags)) + { + // Domain end + if (m_allow_absolute) + interval.end = i = m_separator->interval.end; + else { + interval.end = i; + i = m_separator->interval.end; + } + break; + } + if (m_domain_char->match(text, i, end, flags)) { + if (m_domain_char->allow_on_edge) + interval.end = i = m_domain_char->interval.end; + else + i = m_domain_char->interval.end; + } + else { + interval.start = start; + return true; + } + } + } + else + break; + } + if (count) { + interval.start = start; + return true; + } + interval.start = (interval.end = start) + 1; + return false; + } + + protected: + bool m_allow_absolute; ///< May DNS names end with a dot (absolute name)? + std::shared_ptr> m_domain_char; + std::shared_ptr> m_separator; + }; + + using dns_name = basic_dns_name; + using wdns_name = basic_dns_name; +#ifdef _UNICODE + using tdns_name = wdns_name; +#else + using tdns_name = dns_name; +#endif + using sgml_dns_name = basic_dns_name; + + /// + /// Test for valid URL username character + /// + template + class basic_url_username_char : public basic_tester + { + public: + basic_url_username_char(_In_ const std::locale& locale = std::locale()) : basic_tester(locale) {} + + virtual bool match( + _In_reads_or_z_(end) const T* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + if (start < end && text[start]) { + if (text[start] == '-' || + text[start] == '.' || + text[start] == '_' || + text[start] == '~' || + text[start] == '%' || + text[start] == '!' || + text[start] == '$' || + text[start] == '&' || + text[start] == '\'' || + //text[start] == '(' || + //text[start] == ')' || + text[start] == '*' || + text[start] == '+' || + text[start] == ',' || + text[start] == ';' || + text[start] == '=' || + std::use_facet>(m_locale).is(std::ctype_base::alnum, text[start])) + { + interval.end = (interval.start = start) + 1; + return true; + } + } + interval.start = (interval.end = start) + 1; + return false; + } + }; + + using url_username_char = basic_url_username_char; + using wurl_username_char = basic_url_username_char; +#ifdef _UNICODE + using turl_username_char = wurl_username_char; +#else + using turl_username_char = url_username_char; +#endif + + /// + /// Test for valid URL username SGML character + /// + class sgml_url_username_char : public basic_url_username_char + { + public: + sgml_url_username_char(_In_ const std::locale& locale = std::locale()) : basic_url_username_char(locale) {} + + virtual bool match( + _In_reads_or_z_(end) const char* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + if (start < end && text[start]) { + wchar_t buf[3]; + const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf); + const wchar_t* chr_end = chr + stdex::strlen(chr); + if ((chr[0] == L'-' || + chr[0] == L'.' || + chr[0] == L'_' || + chr[0] == L'~' || + chr[0] == L'%' || + chr[0] == L'!' || + chr[0] == L'$' || + chr[0] == L'&' || + chr[0] == L'\'' || + //chr[0] == L'(' || + //chr[0] == L')' || + chr[0] == L'*' || + chr[0] == L'+' || + chr[0] == L',' || + chr[0] == L';' || + chr[0] == L'=') && chr[1] == 0 || + std::use_facet>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end) + { + interval.start = start; + return true; + } + } + + interval.start = (interval.end = start) + 1; + return false; + } + }; + + /// + /// Test for valid URL password character + /// + template + class basic_url_password_char : public basic_tester + { + public: + basic_url_password_char(_In_ const std::locale& locale = std::locale()) : basic_tester(locale) {} + + virtual bool match( + _In_reads_or_z_(end) const T* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + if (start < end && text[start]) { + if (text[start] == '-' || + text[start] == '.' || + text[start] == '_' || + text[start] == '~' || + text[start] == '%' || + text[start] == '!' || + text[start] == '$' || + text[start] == '&' || + text[start] == '\'' || + text[start] == '(' || + text[start] == ')' || + text[start] == '*' || + text[start] == '+' || + text[start] == ',' || + text[start] == ';' || + text[start] == '=' || + text[start] == ':' || + std::use_facet>(m_locale).is(std::ctype_base::alnum, text[start])) + { + interval.end = (interval.start = start) + 1; + return true; + } + } + interval.start = (interval.end = start) + 1; + return false; + } + }; + + using url_password_char = basic_url_password_char; + using wurl_password_char = basic_url_password_char; +#ifdef _UNICODE + using turl_password_char = wurl_password_char; +#else + using turl_password_char = url_password_char; +#endif + + /// + /// Test for valid URL password SGML character + /// + class sgml_url_password_char : public basic_url_password_char + { + public: + sgml_url_password_char(_In_ const std::locale& locale = std::locale()) : basic_url_password_char(locale) {} + + virtual bool match( + _In_reads_or_z_(end) const char* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + if (start < end && text[start]) { + wchar_t buf[3]; + const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf); + const wchar_t* chr_end = chr + stdex::strlen(chr); + if ((chr[0] == L'-' || + chr[0] == L'.' || + chr[0] == L'_' || + chr[0] == L'~' || + chr[0] == L'%' || + chr[0] == L'!' || + chr[0] == L'$' || + chr[0] == L'&' || + chr[0] == L'\'' || + chr[0] == L'(' || + chr[0] == L')' || + chr[0] == L'*' || + chr[0] == L'+' || + chr[0] == L',' || + chr[0] == L';' || + chr[0] == L'=' || + chr[0] == L':') && chr[1] == 0 || + std::use_facet>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end) + { + interval.start = start; + return true; + } + } + interval.start = (interval.end = start) + 1; + return false; + } + }; + + /// + /// Test for valid URL path character + /// + template + class basic_url_path_char : public basic_tester + { + public: + basic_url_path_char(_In_ const std::locale& locale = std::locale()) : basic_tester(locale) {} + + virtual bool match( + _In_reads_or_z_(end) const T* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + if (start < end && text[start]) { + if (text[start] == '/' || + text[start] == '-' || + text[start] == '.' || + text[start] == '_' || + text[start] == '~' || + text[start] == '%' || + text[start] == '!' || + text[start] == '$' || + text[start] == '&' || + text[start] == '\'' || + text[start] == '(' || + text[start] == ')' || + text[start] == '*' || + text[start] == '+' || + text[start] == ',' || + text[start] == ';' || + text[start] == '=' || + text[start] == ':' || + text[start] == '@' || + text[start] == '?' || + text[start] == '#' || + std::use_facet>(m_locale).is(std::ctype_base::alnum, text[start])) + { + interval.end = (interval.start = start) + 1; + return true; + } + } + interval.start = (interval.end = start) + 1; + return false; + } + }; + + using url_path_char = basic_url_path_char; + using wurl_path_char = basic_url_path_char; +#ifdef _UNICODE + using turl_path_char = wurl_path_char; +#else + using turl_path_char = url_path_char; +#endif + + /// + /// Test for valid URL path SGML character + /// + class sgml_url_path_char : public basic_url_path_char + { + public: + sgml_url_path_char(_In_ const std::locale& locale = std::locale()) : basic_url_path_char(locale) {} + + virtual bool match( + _In_reads_or_z_(end) const char* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + if (start < end && text[start]) { + wchar_t buf[3]; + const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf); + const wchar_t* chr_end = chr + stdex::strlen(chr); + if ((chr[0] == L'/' || + chr[0] == L'-' || + chr[0] == L'.' || + chr[0] == L'_' || + chr[0] == L'~' || + chr[0] == L'%' || + chr[0] == L'!' || + chr[0] == L'$' || + chr[0] == L'&' || + chr[0] == L'\'' || + chr[0] == L'(' || + chr[0] == L')' || + chr[0] == L'*' || + chr[0] == L'+' || + chr[0] == L',' || + chr[0] == L';' || + chr[0] == L'=' || + chr[0] == L':' || + chr[0] == L'@' || + chr[0] == L'?' || + chr[0] == L'#') && chr[1] == 0 || + std::use_facet>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end) + { + interval.start = start; + return true; + } + } + interval.start = (interval.end = start) + 1; + return false; + } + }; + + /// + /// Test for URL path + /// + template + class basic_url_path : public basic_tester + { + public: + basic_url_path( + _In_ const std::shared_ptr>& path_char, + _In_ const std::shared_ptr>& query_start, + _In_ const std::shared_ptr>& bookmark_start, + _In_ const std::locale& locale = std::locale()) : + basic_tester(locale), + m_path_char(path_char), + m_query_start(query_start), + m_bookmark_start(bookmark_start) + {} + + virtual bool match( + _In_reads_or_z_(end) const T* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + + interval.end = start; + path.start = start; + query.start = 1; + query.end = 0; + bookmark.start = 1; + bookmark.end = 0; + + for (;;) { + if (interval.end >= end || !text[interval.end]) + break; + if (m_query_start->match(text, interval.end, end, flags)) { + path.end = interval.end; + query.start = interval.end = m_query_start->interval.end; + for (;;) { + if (interval.end >= end || !text[interval.end]) { + query.end = interval.end; + break; + } + if (m_bookmark_start->match(text, interval.end, end, flags)) { + query.end = interval.end; + bookmark.start = interval.end = m_bookmark_start->interval.end; + for (;;) { + if (interval.end >= end || !text[interval.end]) { + bookmark.end = interval.end; + break; + } + if (m_path_char->match(text, interval.end, end, flags)) + interval.end = m_path_char->interval.end; + else { + bookmark.end = interval.end; + break; + } + } + interval.start = start; + return true; + } + if (m_path_char->match(text, interval.end, end, flags)) + interval.end = m_path_char->interval.end; + else { + query.end = interval.end; + break; + } + } + interval.start = start; + return true; + } + if (m_bookmark_start->match(text, interval.end, end, flags)) { + path.end = interval.end; + bookmark.start = interval.end = m_bookmark_start->interval.end; + for (;;) { + if (interval.end >= end || !text[interval.end]) { + bookmark.end = interval.end; + break; + } + if (m_path_char->match(text, interval.end, end, flags)) + interval.end = m_path_char->interval.end; + else { + bookmark.end = interval.end; + break; + } + } + interval.start = start; + return true; + } + if (m_path_char->match(text, interval.end, end, flags)) + interval.end = m_path_char->interval.end; + else + break; + } + + if (start < interval.end) { + path.end = interval.end; + interval.start = start; + return true; + } + + path.start = 1; + path.end = 0; + bookmark.start = 1; + bookmark.end = 0; + interval.start = (interval.end = start) + 1; + return false; + } + + virtual void invalidate() + { + path.start = 1; + path.end = 0; + query.start = 1; + query.end = 0; + bookmark.start = 1; + bookmark.end = 0; + basic_tester::invalidate(); + } + + public: + stdex::interval path; + stdex::interval query; + stdex::interval bookmark; + + protected: + std::shared_ptr> m_path_char; + std::shared_ptr> m_query_start; + std::shared_ptr> m_bookmark_start; + }; + + using url_path = basic_url_path; + using wurl_path = basic_url_path; +#ifdef _UNICODE + using turl_path = wurl_path; +#else + using turl_path = url_path; +#endif + using sgml_url_path = basic_url_path; + + /// + /// Test for URL + /// + template + class basic_url : public basic_tester + { + public: + basic_url( + _In_ const std::shared_ptr>& _http_scheme, + _In_ const std::shared_ptr>& _ftp_scheme, + _In_ const std::shared_ptr>& _mailto_scheme, + _In_ const std::shared_ptr>& _file_scheme, + _In_ const std::shared_ptr>& colon, + _In_ const std::shared_ptr>& slash, + _In_ const std::shared_ptr>& _username, + _In_ const std::shared_ptr>& _password, + _In_ const std::shared_ptr>& at, + _In_ const std::shared_ptr>& ip_lbracket, + _In_ const std::shared_ptr>& ip_rbracket, + _In_ const std::shared_ptr>& _ipv4_host, + _In_ const std::shared_ptr>& _ipv6_host, + _In_ const std::shared_ptr>& _dns_host, + _In_ const std::shared_ptr>& _port, + _In_ const std::shared_ptr>& _path, + _In_ const std::locale& locale = std::locale()) : + basic_tester(locale), + http_scheme(_http_scheme), + ftp_scheme(_ftp_scheme), + mailto_scheme(_mailto_scheme), + file_scheme(_file_scheme), + m_colon(colon), + m_slash(slash), + username(_username), + password(_password), + m_at(at), + m_ip_lbracket(ip_lbracket), + m_ip_rbracket(ip_rbracket), + ipv4_host(_ipv4_host), + ipv6_host(_ipv6_host), + dns_host(_dns_host), + port(_port), + path(_path) + {} + + virtual bool match( + _In_reads_or_z_(end) const T* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + + interval.end = start; + + if (http_scheme->match(text, interval.end, end, flags) && + m_colon->match(text, http_scheme->interval.end, end, flags) && + m_slash->match(text, m_colon->interval.end, end, flags) && + m_slash->match(text, m_slash->interval.end, end, flags)) + { + // http:// + interval.end = m_slash->interval.end; + ftp_scheme->invalidate(); + mailto_scheme->invalidate(); + file_scheme->invalidate(); + } + else if (ftp_scheme->match(text, interval.end, end, flags) && + m_colon->match(text, ftp_scheme->interval.end, end, flags) && + m_slash->match(text, m_colon->interval.end, end, flags) && + m_slash->match(text, m_slash->interval.end, end, flags)) + { + // ftp:// + interval.end = m_slash->interval.end; + http_scheme->invalidate(); + mailto_scheme->invalidate(); + file_scheme->invalidate(); + } + else if (mailto_scheme->match(text, interval.end, end, flags) && + m_colon->match(text, mailto_scheme->interval.end, end, flags)) + { + // mailto: + interval.end = m_colon->interval.end; + http_scheme->invalidate(); + ftp_scheme->invalidate(); + file_scheme->invalidate(); + } + else if (file_scheme->match(text, interval.end, end, flags) && + m_colon->match(text, file_scheme->interval.end, end, flags) && + m_slash->match(text, m_colon->interval.end, end, flags) && + m_slash->match(text, m_slash->interval.end, end, flags)) + { + // file:// + interval.end = m_slash->interval.end; + http_scheme->invalidate(); + ftp_scheme->invalidate(); + mailto_scheme->invalidate(); + } + else { + // Default to http: + http_scheme->invalidate(); + ftp_scheme->invalidate(); + mailto_scheme->invalidate(); + file_scheme->invalidate(); + } + + if (ftp_scheme->interval) { + if (username->match(text, interval.end, end, flags)) { + if (m_colon->match(text, username->interval.end, end, flags) && + password->match(text, m_colon->interval.end, end, flags) && + m_at->match(text, password->interval.end, end, flags)) + { + // Username and password + interval.end = m_at->interval.end; + } + else if (m_at->match(text, interval.end, end, flags)) { + // Username only + interval.end = m_at->interval.end; + password->invalidate(); + } + else { + username->invalidate(); + password->invalidate(); + } + } + else { + username->invalidate(); + password->invalidate(); + } + + if (ipv4_host->match(text, interval.end, end, flags)) { + // Host is IPv4 + interval.end = ipv4_host->interval.end; + ipv6_host->invalidate(); + dns_host->invalidate(); + } + else if ( + m_ip_lbracket->match(text, interval.end, end, flags) && + ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) && + m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags)) + { + // Host is IPv6 + interval.end = m_ip_rbracket->interval.end; + ipv4_host->invalidate(); + dns_host->invalidate(); + } + else if (dns_host->match(text, interval.end, end, flags)) { + // Host is hostname + interval.end = dns_host->interval.end; + ipv4_host->invalidate(); + ipv6_host->invalidate(); + } + else { + invalidate(); + return false; + } + + if (m_colon->match(text, interval.end, end, flags) && + port->match(text, m_colon->interval.end, end, flags)) + { + // Port + interval.end = port->interval.end; + } + else + port->invalidate(); + + if (path->match(text, interval.end, end, flags)) { + // Path + interval.end = path->interval.end; + } + + interval.start = start; + return true; + } + + if (mailto_scheme->interval) { + if (username->match(text, interval.end, end, flags) && + m_at->match(text, username->interval.end, end, flags)) + { + // Username + interval.end = m_at->interval.end; + } + else { + invalidate(); + return false; + } + + if (m_ip_lbracket->match(text, interval.end, end, flags) && + ipv4_host->match(text, m_ip_lbracket->interval.end, end, flags) && + m_ip_rbracket->match(text, ipv4_host->interval.end, end, flags)) + { + // Host is IPv4 + interval.end = m_ip_rbracket->interval.end; + ipv6_host->invalidate(); + dns_host->invalidate(); + } + else if ( + m_ip_lbracket->match(text, interval.end, end, flags) && + ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) && + m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags)) + { + // Host is IPv6 + interval.end = m_ip_rbracket->interval.end; + ipv4_host->invalidate(); + dns_host->invalidate(); + } + else if (dns_host->match(text, interval.end, end, flags)) { + // Host is hostname + interval.end = dns_host->interval.end; + ipv4_host->invalidate(); + ipv6_host->invalidate(); + } + else { + invalidate(); + return false; + } + + password->invalidate(); + port->invalidate(); + path->invalidate(); + interval.start = start; + return true; + } + + if (file_scheme->interval) { + if (path->match(text, interval.end, end, flags)) { + // Path + interval.end = path->interval.end; + } + + username->invalidate(); + password->invalidate(); + ipv4_host->invalidate(); + ipv6_host->invalidate(); + dns_host->invalidate(); + port->invalidate(); + interval.start = start; + return true; + } + + // "http://" found or defaulted to + + // If "http://" explicit, test for username&password. + if (http_scheme->interval && + username->match(text, interval.end, end, flags)) + { + if (m_colon->match(text, username->interval.end, end, flags) && + password->match(text, m_colon->interval.end, end, flags) && + m_at->match(text, password->interval.end, end, flags)) + { + // Username and password + interval.end = m_at->interval.end; + } + else if (m_at->match(text, username->interval.end, end, flags)) { + // Username only + interval.end = m_at->interval.end; + password->invalidate(); + } + else { + username->invalidate(); + password->invalidate(); + } + } + else { + username->invalidate(); + password->invalidate(); + } + + if (ipv4_host->match(text, interval.end, end, flags)) { + // Host is IPv4 + interval.end = ipv4_host->interval.end; + ipv6_host->invalidate(); + dns_host->invalidate(); + } + else if ( + m_ip_lbracket->match(text, interval.end, end, flags) && + ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) && + m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags)) + { + // Host is IPv6 + interval.end = m_ip_rbracket->interval.end; + ipv4_host->invalidate(); + dns_host->invalidate(); + } + else if (dns_host->match(text, interval.end, end, flags)) { + // Host is hostname + interval.end = dns_host->interval.end; + ipv4_host->invalidate(); + ipv6_host->invalidate(); + } + else { + invalidate(); + return false; + } + + if (m_colon->match(text, interval.end, end, flags) && + port->match(text, m_colon->interval.end, end, flags)) + { + // Port + interval.end = port->interval.end; + } + else + port->invalidate(); + + if (path->match(text, interval.end, end, flags)) { + // Path + interval.end = path->interval.end; + } + + interval.start = start; + return true; + } + + virtual void invalidate() + { + http_scheme->invalidate(); + ftp_scheme->invalidate(); + mailto_scheme->invalidate(); + file_scheme->invalidate(); + username->invalidate(); + password->invalidate(); + ipv4_host->invalidate(); + ipv6_host->invalidate(); + dns_host->invalidate(); + port->invalidate(); + path->invalidate(); + basic_tester::invalidate(); + } + + public: + std::shared_ptr> http_scheme; + std::shared_ptr> ftp_scheme; + std::shared_ptr> mailto_scheme; + std::shared_ptr> file_scheme; + std::shared_ptr> username; + std::shared_ptr> password; + std::shared_ptr> ipv4_host; + std::shared_ptr> ipv6_host; + std::shared_ptr> dns_host; + std::shared_ptr> port; + std::shared_ptr> path; + + protected: + std::shared_ptr> m_colon; + std::shared_ptr> m_slash; + std::shared_ptr> m_at; + std::shared_ptr> m_ip_lbracket; + std::shared_ptr> m_ip_rbracket; + }; + + using url = basic_url; + using wurl = basic_url; +#ifdef _UNICODE + using turl = wurl; +#else + using turl = url; +#endif + using sgml_url = basic_url; + + /// + /// Test for e-mail address + /// + template + class basic_email_address : public basic_tester + { + public: + basic_email_address( + _In_ const std::shared_ptr>& _username, + _In_ const std::shared_ptr>& at, + _In_ const std::shared_ptr>& ip_lbracket, + _In_ const std::shared_ptr>& ip_rbracket, + _In_ const std::shared_ptr>& _ipv4_host, + _In_ const std::shared_ptr>& _ipv6_host, + _In_ const std::shared_ptr>& _dns_host, + _In_ const std::locale& locale = std::locale()) : + basic_tester(locale), + username(_username), + m_at(at), + m_ip_lbracket(ip_lbracket), + m_ip_rbracket(ip_rbracket), + ipv4_host(_ipv4_host), + ipv6_host(_ipv6_host), + dns_host(_dns_host) + {} + + virtual bool match( + _In_reads_or_z_(end) const T* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + + if (username->match(text, start, end, flags) && + m_at->match(text, username->interval.end, end, flags)) + { + // Username@ + if (m_ip_lbracket->match(text, m_at->interval.end, end, flags) && + ipv4_host->match(text, m_ip_lbracket->interval.end, end, flags) && + m_ip_rbracket->match(text, ipv4_host->interval.end, end, flags)) + { + // Host is IPv4 + interval.end = m_ip_rbracket->interval.end; + ipv6_host->invalidate(); + dns_host->invalidate(); + } + else if ( + m_ip_lbracket->match(text, m_at->interval.end, end, flags) && + ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) && + m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags)) + { + // Host is IPv6 + interval.end = m_ip_rbracket->interval.end; + ipv4_host->invalidate(); + dns_host->invalidate(); + } + else if (dns_host->match(text, m_at->interval.end, end, flags)) { + // Host is hostname + interval.end = dns_host->interval.end; + ipv4_host->invalidate(); + ipv6_host->invalidate(); + } + else + goto error; + interval.start = start; + return true; + } + + error: + username->invalidate(); + ipv4_host->invalidate(); + ipv6_host->invalidate(); + dns_host->invalidate(); + interval.start = (interval.end = start) + 1; + return false; + } + + virtual void invalidate() + { + username->invalidate(); + ipv4_host->invalidate(); + ipv6_host->invalidate(); + dns_host->invalidate(); + basic_tester::invalidate(); + } + + public: + std::shared_ptr> username; + std::shared_ptr> ipv4_host; + std::shared_ptr> ipv6_host; + std::shared_ptr> dns_host; + + protected: + std::shared_ptr> m_at; + std::shared_ptr> m_ip_lbracket; + std::shared_ptr> m_ip_rbracket; + }; + + using email_address = basic_email_address; + using wemail_address = basic_email_address; +#ifdef _UNICODE + using temail_address = wemail_address; +#else + using temail_address = email_address; +#endif + using sgml_email_address = basic_email_address; + + /// + /// Test for emoticon + /// + template + class basic_emoticon : public basic_tester + { + public: + basic_emoticon( + _In_ const std::shared_ptr>& _emoticon, + _In_ const std::shared_ptr>& _apex, + _In_ const std::shared_ptr>& _eyes, + _In_ const std::shared_ptr>& _nose, + _In_ const std::shared_ptr>& _mouth, + _In_ const std::locale& locale = std::locale()) : + basic_tester(locale), + emoticon(_emoticon), + apex(_apex), + eyes(_eyes), + nose(_nose), + mouth(_mouth) + {} + + virtual bool match( + _In_reads_or_z_(end) const T* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + + if (emoticon && emoticon->match(text, start, end, flags)) { + if (apex) apex->invalidate(); + eyes->invalidate(); + if (nose) nose->invalidate(); + mouth->invalidate(); + interval.start = start; + interval.end = emoticon->interval.end; + return true; + } + + interval.end = start; + + if (apex && apex->match(text, interval.end, end, flags)) + interval.end = apex->interval.end; + + if (eyes->match(text, interval.end, end, flags)) { + if (nose && nose->match(text, eyes->interval.end, end, flags) && + mouth->match(text, nose->interval.end, end, flags)) + { + size_t + start_mouth = mouth->interval.start, + hit_offset = mouth->hit_offset; + // Mouth may repeat :-))))))) + for (interval.end = mouth->interval.end; mouth->match(text, interval.end, end, flags) && mouth->hit_offset == hit_offset; interval.end = mouth->interval.end); + mouth->interval.start = start_mouth; + mouth->interval.end = interval.end; + interval.start = start; + return true; + } + if (mouth->match(text, eyes->interval.end, end, flags)) { + size_t + start_mouth = mouth->interval.start, + hit_offset = mouth->hit_offset; + // Mouth may repeat :-))))))) + for (interval.end = mouth->interval.end; mouth->match(text, interval.end, end, flags) && mouth->hit_offset == hit_offset; interval.end = mouth->interval.end); + if (nose) nose->invalidate(); + mouth->interval.start = start_mouth; + mouth->interval.end = interval.end; + interval.start = start; + return true; + } + } + + if (emoticon) emoticon->invalidate(); + if (apex) apex->invalidate(); + eyes->invalidate(); + if (nose) nose->invalidate(); + mouth->invalidate(); + interval.start = (interval.end = start) + 1; + return false; + } + + virtual void invalidate() + { + if (emoticon) emoticon->invalidate(); + if (apex) apex->invalidate(); + eyes->invalidate(); + if (nose) nose->invalidate(); + mouth->invalidate(); + basic_tester::invalidate(); + } + + public: + std::shared_ptr> emoticon; ///< emoticon as a whole (e.g. 😀, 🤔, 😶) + std::shared_ptr> apex; ///< apex/eyebrows/halo (e.g. O, 0) + std::shared_ptr> eyes; ///< eyes (e.g. :, ;, >, |, B) + std::shared_ptr> nose; ///< nose (e.g. -, o) + std::shared_ptr> mouth; ///< mouth (e.g. ), ), (, (, |, P, D, p, d) + }; + + using emoticon = basic_emoticon; + using wemoticon = basic_emoticon; +#ifdef _UNICODE + using temoticon = wemoticon; +#else + using temoticon = emoticon; +#endif + using sgml_emoticon = basic_emoticon; + + /// + /// Test for date + /// + template + class basic_date : public basic_tester + { + public: + enum class format { + dmy = 0x1, + mdy = 0x2, + ymd = 0x4, + ym = 0x8, + my = 0x10, + dm = 0x20, + md = 0x40, + }; + + basic_date( + _In_ int format_mask, + _In_ const std::shared_ptr>& _day, + _In_ const std::shared_ptr>& _month, + _In_ const std::shared_ptr>& _year, + _In_ const std::shared_ptr>& separator, + _In_ const std::shared_ptr>& space, + _In_ const std::locale& locale = std::locale()) : + basic_tester(locale), + format(0), + m_format_mask(format_mask), + day(_day), + month(_month), + year(_year), + m_separator(separator), + m_space(space) + {} + + virtual bool match( + _In_reads_or_z_(end) const T* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + + const int space_match_flags = flags & ~match_multiline; // Spaces in dates must never be broken in new line. + if ((m_format_mask & format::dmy) != 0) { + if (day->match(text, start, end, flags)) { + for (interval.end = day->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end); + if (m_separator->match(text, interval.end, end, flags)) { + size_t hit_offset = m_separator->hit_offset; + for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end); + if (month->match(text, interval.end, end, flags)) { + for (interval.end = month->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end); + if (m_separator->match(text, interval.end, end, flags) && + m_separator->hit_offset == hit_offset) // Both separators must match. + { + for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end); + if (year->match(text, interval.end, end, flags) && + is_valid(day->value, month->value)) + { + interval.start = start; + interval.end = year->interval.end; + format = format::dmy; + return true; + } + } + } + } + } + } + + if ((m_format_mask & format::mdy) != 0) { + if (month->match(text, start, end, flags)) { + for (interval.end = month->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end); + if (m_separator->match(text, interval.end, end, flags)) { + size_t hit_offset = m_separator->hit_offset; + for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end); + if (day->match(text, interval.end, end, flags)) { + for (interval.end = day->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end); + if (m_separator->match(text, interval.end, end, flags) && + m_separator->hit_offset == hit_offset) // Both separators must match. + { + for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end); + if (year->match(text, interval.end, end, flags) && + is_valid(day->value, month->value)) + { + interval.start = start; + interval.end = year->interval.end; + format = format::mdy; + return true; + } + } + } + } + } + } + + if ((m_format_mask & format::ymd) != 0) { + if (year->match(text, start, end, flags)) { + for (interval.end = year->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end); + if (m_separator->match(text, interval.end, end, flags)) { + size_t hit_offset = m_separator->hit_offset; + for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end); + if (month->match(text, interval.end, end, flags)) { + for (interval.end = month->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end); + if (m_separator->match(text, interval.end, end, flags) && + m_separator->hit_offset == hit_offset) // Both separators must match. + { + for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end); + if (day->match(text, interval.end, end, flags) && + is_valid(day->value, month->value)) + { + interval.start = start; + interval.end = day->interval.end; + format = format::ymd; + return true; + } + } + } + } + } + } + + if ((m_format_mask & format::ym) != 0) { + if (year->match(text, start, end, flags)) { + for (interval.end = year->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end); + if (m_separator->match(text, interval.end, end, flags)) { + for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end); + if (month->match(text, interval.end, end, flags) && + is_valid((size_t)-1, month->value)) + { + if (day) day->invalidate(); + interval.start = start; + interval.end = month->interval.end; + format = format::ym; + return true; + } + } + } + } + + if ((m_format_mask & format::my) != 0) { + if (month->match(text, start, end, flags)) { + for (interval.end = month->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end); + if (m_separator->match(text, interval.end, end, flags)) { + for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end); + if (year->match(text, interval.end, end, flags) && + is_valid((size_t)-1, month->value)) + { + if (day) day->invalidate(); + interval.start = start; + interval.end = year->interval.end; + format = format::my; + return true; + } + } + } + } + + if ((m_format_mask & format::dm) != 0) { + if (day->match(text, start, end, flags)) { + for (interval.end = day->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end); + if (m_separator->match(text, interval.end, end, flags)) { + size_t hit_offset = m_separator->hit_offset; + for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end); + if (month->match(text, interval.end, end, flags) && + is_valid(day->value, month->value)) + { + if (year) year->invalidate(); + interval.start = start; + for (interval.end = month->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end); + if (m_separator->match(text, interval.end, end, flags) && + m_separator->hit_offset == hit_offset) // Both separators must match. + interval.end = m_separator->interval.end; + else + interval.end = month->interval.end; + format = format::dm; + return true; + } + } + } + } + + if ((m_format_mask & format::md) != 0) { + if (month->match(text, start, end, flags)) { + for (interval.end = month->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end); + if (m_separator->match(text, interval.end, end, flags)) { + size_t hit_offset = m_separator->hit_offset; + for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end); + if (day->match(text, interval.end, end, flags) && + is_valid(day->value, month->value)) + { + if (year) year->invalidate(); + interval.start = start; + for (interval.end = day->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end); + if (m_separator->match(text, interval.end, end, flags) && + m_separator->hit_offset == hit_offset) // Both separators must match. + interval.end = m_separator->interval.end; + else + interval.end = day->interval.end; + format = format::md; + return true; + } + } + } + } + + if (day) day->invalidate(); + if (month) month->invalidate(); + if (year) year->invalidate(); + format = 0; + interval.start = (interval.end = start) + 1; + return false; + } + + virtual void invalidate() + { + if (day) day->invalidate(); + if (month) month->invalidate(); + if (year) year->invalidate(); + format = 0; + basic_tester::invalidate(); + } + + protected: + static inline bool is_valid(size_t day, size_t month) + { + if (month == (size_t)-1) { + // Default to January. This allows validating day only, as January has all 31 days. + month = 1; + } + if (day == (size_t)-1) { + // Default to 1st day in month. This allows validating month only, as each month has 1st day. + day = 1; + } + + switch (month) { + case 1: + case 3: + case 5: + case 7: + case 8: + case 10: + case 12: + return 1 <= day && day <= 31; + case 2: + return 1 <= day && day <= 29; + case 4: + case 6: + case 9: + case 11: + return 1 <= day && day <= 30; + default: + return false; + } + } + + public: + format format; + std::shared_ptr> day; + std::shared_ptr> month; + std::shared_ptr> year; + + protected: + int m_format_mask; + std::shared_ptr> m_separator; + std::shared_ptr> m_space; + }; + + using date = basic_date; + using wdate = basic_date; +#ifdef _UNICODE + using tdate = wdate; +#else + using tdate = date; +#endif + using sgml_date = basic_date; + + /// + /// Test for time + /// + template + class basic_time : public basic_tester + { + public: + basic_time( + _In_ const std::shared_ptr>& _hour, + _In_ const std::shared_ptr>& _minute, + _In_ const std::shared_ptr>& _second, + _In_ const std::shared_ptr>& _millisecond, + _In_ const std::shared_ptr>& separator, + _In_ const std::shared_ptr>& millisecond_separator, + _In_ const std::locale& locale = std::locale()) : + basic_tester(locale), + hour(_hour), + minute(_minute), + second(_second), + millisecond(_millisecond), + m_separator(separator), + m_millisecond_separator(millisecond_separator) + {} + + virtual bool match( + _In_reads_or_z_(end) const T* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + + if (hour->match(text, start, end, flags) && + m_separator->match(text, hour->interval.end, end, flags) && + minute->match(text, m_separator->interval.end, end, flags) && + minute->value < 60) + { + // hh::mm + size_t hit_offset = m_separator->hit_offset; + if (m_separator->match(text, minute->interval.end, end, flags) && + m_separator->hit_offset == hit_offset && // Both separators must match. + second && second->match(text, m_separator->interval.end, end, flags) && + second->value < 60) + { + // hh::mm:ss + if (m_millisecond_separator && m_millisecond_separator->match(text, second->interval.end, end, flags) && + millisecond && millisecond->match(text, m_millisecond_separator->interval.end, end, flags) && + millisecond->value < 1000) + { + // hh::mm:ss.mmmm + interval.end = millisecond->interval.end; + } + else { + if (millisecond) millisecond->invalidate(); + interval.end = second->interval.end; + } + } + else { + if (second) second->invalidate(); + if (millisecond) millisecond->invalidate(); + interval.end = minute->interval.end; + } + interval.start = start; + return true; + } + + hour->invalidate(); + minute->invalidate(); + if (second) second->invalidate(); + if (millisecond) millisecond->invalidate(); + interval.start = (interval.end = start) + 1; + return false; + } + + virtual void invalidate() + { + hour->invalidate(); + minute->invalidate(); + if (second) second->invalidate(); + if (millisecond) millisecond->invalidate(); + basic_tester::invalidate(); + } + + public: + std::shared_ptr> hour; + std::shared_ptr> minute; + std::shared_ptr> second; + std::shared_ptr> millisecond; + + protected: + std::shared_ptr> m_separator; + std::shared_ptr> m_millisecond_separator; + }; + + using time = basic_time; + using wtime = basic_time; +#ifdef _UNICODE + using ttime = wtime; +#else + using ttime = time; +#endif + using sgml_time = basic_time; + + /// + /// Test for angle in d°mm'ss.dddd form + /// + template + class basic_angle : public basic_tester + { + public: + basic_angle( + _In_ const std::shared_ptr>& _degree, + _In_ const std::shared_ptr>& _degree_separator, + _In_ const std::shared_ptr>& _minute, + _In_ const std::shared_ptr>& _minute_separator, + _In_ const std::shared_ptr>& _second, + _In_ const std::shared_ptr>& _second_separator, + _In_ const std::shared_ptr>& _decimal, + _In_ const std::locale& locale = std::locale()) : + basic_tester(locale), + degree(_degree), + degree_separator(_degree_separator), + minute(_minute), + minute_separator(_minute_separator), + second(_second), + second_separator(_second_separator), + decimal(_decimal) + {} + + virtual bool match( + _In_reads_or_z_(end) const T* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + + interval.end = start; + + if (degree->match(text, interval.end, end, flags) && + degree_separator->match(text, degree->interval.end, end, flags)) + { + // Degrees + interval.end = degree_separator->interval.end; + } + else { + degree->invalidate(); + degree_separator->invalidate(); + } + + if (minute->match(text, interval.end, end, flags) && + minute->value < 60 && + minute_separator->match(text, minute->interval.end, end, flags)) + { + // Minutes + interval.end = minute_separator->interval.end; + } + else { + minute->invalidate(); + minute_separator->invalidate(); + } + + if (second && second->match(text, interval.end, end, flags) && + second->value < 60) + { + // Seconds + interval.end = second->interval.end; + if (second_separator && second_separator->match(text, interval.end, end, flags)) + interval.end = second_separator->interval.end; + else + if (second_separator) second_separator->invalidate(); + } + else { + if (second) second->invalidate(); + if (second_separator) second_separator->invalidate(); + } + + if (degree->interval.start < degree->interval.end || + minute->interval.start < minute->interval.end || + second && second->interval.start < second->interval.end) + { + if (decimal && decimal->match(text, interval.end, end, flags)) { + // Decimals + interval.end = decimal->interval.end; + } + else if (decimal) + decimal->invalidate(); + interval.start = start; + return true; + } + if (decimal) decimal->invalidate(); + interval.start = (interval.end = start) + 1; + return false; + } + + virtual void invalidate() + { + degree->invalidate(); + degree_separator->invalidate(); + minute->invalidate(); + minute_separator->invalidate(); + if (second) second->invalidate(); + if (second_separator) second_separator->invalidate(); + if (decimal) decimal->invalidate(); + basic_tester::invalidate(); + } + + public: + std::shared_ptr> degree; + std::shared_ptr> degree_separator; + std::shared_ptr> minute; + std::shared_ptr> minute_separator; + std::shared_ptr> second; + std::shared_ptr> second_separator; + std::shared_ptr> decimal; + }; + + using angle = basic_angle; + using wangle = basic_angle; +#ifdef _UNICODE + using RRegElKot = wangle; +#else + using RRegElKot = angle; +#endif + using sgml_angle = basic_angle; + + /// + /// Test for phone number + /// + template + class basic_phone_number : public basic_tester + { + public: + basic_phone_number( + _In_ const std::shared_ptr>& digit, + _In_ const std::shared_ptr>& plus_sign, + _In_ const std::shared_ptr>& lparenthesis, + _In_ const std::shared_ptr>& rparenthesis, + _In_ const std::shared_ptr>& separator, + _In_ const std::shared_ptr>& space, + _In_ const std::locale& locale = std::locale()) : + basic_tester(locale), + m_digit(digit), + m_plus_sign(plus_sign), + m_lparenthesis(lparenthesis), + m_rparenthesis(rparenthesis), + m_separator(separator), + m_space(space) + {} + + virtual bool match( + _In_reads_or_z_(end) const T* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + + size_t safe_digit_end = start, safe_value_size = 0; + bool has_digits = false, after_digit = false, in_parentheses = false, after_parentheses = false; + const int space_match_flags = flags & ~match_multiline; // Spaces in phone numbers must never be broken in new line. + + interval.end = start; + value.clear(); + m_lparenthesis->invalidate(); + m_rparenthesis->invalidate(); + + if (m_plus_sign && m_plus_sign->match(text, interval.end, end, flags)) { + value.append(text + m_plus_sign->interval.start, text + m_plus_sign->interval.end); + safe_value_size = value.size(); + interval.end = m_plus_sign->interval.end; + } + + for (;;) { + assert(text || interval.end >= end); + if (interval.end >= end || !text[interval.end]) + break; + if (m_digit->match(text, interval.end, end, flags)) { + // Digit + value.append(text + m_digit->interval.start, text + m_digit->interval.end); + interval.end = m_digit->interval.end; + if (!in_parentheses) { + safe_digit_end = interval.end; + safe_value_size = value.size(); + has_digits = true; + } + after_digit = true; + after_parentheses = false; + } + else if ( + m_lparenthesis && !m_lparenthesis->interval && // No left parenthesis yet + m_rparenthesis && !m_rparenthesis->interval && // Right parenthesis after left + m_lparenthesis->match(text, interval.end, end, flags)) + { + // Left parenthesis + value.Prilepi(text + m_lparenthesis->interval.start, m_lparenthesis->interval.size()); + interval.end = m_lparenthesis->interval.end; + in_parentheses = true; + after_digit = false; + after_parentheses = false; + } + else if ( + in_parentheses && // After left parenthesis + m_rparenthesis && !m_rparenthesis->interval && // No right parenthesis yet + m_rparenthesis->match(text, interval.end, end, flags) && + m_lparenthesis->hit_offset == m_rparenthesis->hit_offset) // Left and right parentheses must match + { + // Right parenthesis + value.append(text + m_rparenthesis->interval.start, text + m_rparenthesis->interval.end); + interval.end = m_rparenthesis->interval.end; + safe_digit_end = interval.end; + safe_value_size = value.size(); + in_parentheses = false; + after_digit = false; + after_parentheses = true; + } + else if ( + after_digit && + !in_parentheses && // No separators inside parentheses + !after_parentheses && // No separators following right parenthesis + m_separator && m_separator->match(text, interval.end, end, flags)) + { + // Separator + interval.end = m_separator->interval.end; + after_digit = false; + after_parentheses = false; + } + else if ( + (after_digit || after_parentheses) && + m_space && m_space->match(text, interval.end, end, space_match_flags)) + { + // Space + interval.end = m_space->interval.end; + after_digit = false; + after_parentheses = false; + } + else + break; + } + if (has_digits) { + value.erase(safe_value_size); + interval.start = start; + interval.end = safe_digit_end; + return true; + } + value.clear(); + interval.start = (interval.end = start) + 1; + return false; + } + + virtual void invalidate() + { + value.clear(); + basic_tester::invalidate(); + } + + public: + std::basic_string value; ///< Normalized phone number + + protected: + std::shared_ptr> m_digit; + std::shared_ptr> m_plus_sign; + std::shared_ptr> m_lparenthesis; + std::shared_ptr> m_rparenthesis; + std::shared_ptr> m_separator; + std::shared_ptr> m_space; + }; + + using phone_number = basic_phone_number; + using wphone_number = basic_phone_number; +#ifdef _UNICODE + using tphone_number = wphone_number; +#else + using tphone_number = phone_number; +#endif + using sgml_phone_number = basic_phone_number; + + /// + /// Test for chemical formula + /// + template + class basic_chemical_formula : public basic_tester + { + public: + basic_chemical_formula( + _In_ const std::shared_ptr>& element, + _In_ const std::shared_ptr>& digit, + _In_ const std::shared_ptr>& sign, + _In_ const std::locale& locale = std::locale()) : + basic_tester(locale), + m_element(element), + m_digit(digit), + m_sign(sign), + has_digits(false), + has_charge(false) + {} + + virtual bool match( + _In_reads_or_z_(end) const T* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + + has_digits = false; + has_charge = false; + interval.end = start; + + const int element_match_flags = flags & ~match_case_insensitive; // Chemical elements are always case-sensitive. + for (;;) { + if (m_element->match(text, interval.end, end, element_match_flags)) { + interval.end = m_element->interval.end; + while (m_digit->match(text, interval.end, end, flags)) { + interval.end = m_digit->interval.end; + has_digits = true; + } + } + else if (start < interval.end) { + if (m_sign->match(text, interval.end, end, flags)) { + interval.end = m_sign->interval.end; + has_charge = true; + } + interval.start = start; + return true; + } + else { + interval.start = (interval.end = start) + 1; + return false; + } + } + } + + virtual void invalidate() + { + has_digits = false; + has_charge = false; + basic_tester::invalidate(); + } + + public: + bool has_digits; + bool has_charge; + + protected: + std::shared_ptr> m_element; + std::shared_ptr> m_digit; + std::shared_ptr> m_sign; + }; + + using chemical_formula = basic_chemical_formula; + using wchemical_formula = basic_chemical_formula; +#ifdef _UNICODE + using tchemical_formula = wchemical_formula; +#else + using tchemical_formula = chemical_formula; +#endif + using sgml_chemical_formula = basic_chemical_formula; + + /// + /// Test for HTTP line break (RFC2616: CRLF | LF) + /// + class http_line_break : public tester + { + public: + virtual bool match( + _In_reads_or_z_(end) const char* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + interval.end = start; + + assert(text || interval.end >= end); + if (interval.end < end && text[interval.end]) { + if (text[interval.end] == '\r') { + interval.end++; + if (interval.end < end && text[interval.end] == '\n') { + interval.start = start; + interval.end++; + return true; + } + } + else if (text[interval.end] == '\n') { + interval.start = start; + interval.end++; + return true; + } + } + interval.start = (interval.end = start) + 1; + return false; + } + }; + + /// + /// Test for HTTP space (RFC2616: LWS) + /// + class http_space : public tester + { + public: + virtual bool match( + _In_reads_or_z_(end) const char* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + interval.end = start; + if (m_line_break.match(text, interval.end, end, flags)) { + interval.end = m_line_break.interval.end; + if (interval.end < end && text[interval.end] && isspace(text[interval.end])) { + interval.start = start; + interval.end++; + while (interval.end < end && text[interval.end] && isspace(text[interval.end])) interval.end++; + return true; + } + } + else if (interval.end < end && text[interval.end] && isspace(text[interval.end])) { + interval.start = start; + interval.end++; + while (interval.end < end && text[interval.end] && isspace(text[interval.end])) interval.end++; + return true; + } + interval.start = (interval.end = start) + 1; + return false; + } + + protected: + http_line_break m_line_break; + }; + + /// + /// Test for HTTP text character (RFC2616: TEXT) + /// + class http_text_char : public tester + { + public: + virtual bool match( + _In_reads_or_z_(end) const char* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + interval.end = start; + + assert(text || interval.end >= end); + if (m_space.match(text, interval.end, end, flags)) { + interval.start = start; + interval.end = m_space.interval.end; + return true; + } + else if (interval.end < end && text[interval.end] && text[interval.end] >= 0x20) { + interval.start = start; + interval.end++; + return true; + } + interval.start = (interval.end = start) + 1; + return false; + } + + protected: + http_space m_space; + }; + + /// + /// Test for HTTP token (RFC2616: token - tolerates non-ASCII) + /// + class http_token : public tester + { + public: + virtual bool match( + _In_reads_or_z_(end) const char* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + interval.end = start; + for (;;) { + if (interval.end < end && text[interval.end]) { + if ((unsigned int)text[interval.end] < 0x20 || + (unsigned int)text[interval.end] == 0x7f || + text[interval.end] == '(' || + text[interval.end] == ')' || + text[interval.end] == '<' || + text[interval.end] == '>' || + text[interval.end] == '@' || + text[interval.end] == ',' || + text[interval.end] == ';' || + text[interval.end] == ':' || + text[interval.end] == '\\' || + text[interval.end] == '\"' || + text[interval.end] == '/' || + text[interval.end] == '[' || + text[interval.end] == ']' || + text[interval.end] == '?' || + text[interval.end] == '=' || + text[interval.end] == '{' || + text[interval.end] == '}' || + isspace(text[interval.end])) + break; + else + interval.end++; + } + else + break; + } + if (start < interval.end) { + interval.start = start; + return true; + } + else { + interval.start = (interval.end = start) + 1; + return false; + } + } + }; + + /// + /// Test for HTTP quoted string (RFC2616: quoted-string) + /// + class http_quoted_string : public tester + { + public: + virtual bool match( + _In_reads_or_z_(end) const char* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + interval.end = start; + if (interval.end < end && text[interval.end] != '"') + goto error; + interval.end++; + content.start = interval.end; + for (;;) { + assert(text || interval.end >= end); + if (interval.end < end && text[interval.end]) { + if (text[interval.end] == '"') { + content.end = interval.end; + interval.end++; + break; + } + else if (text[interval.end] == '\\') { + interval.end++; + if (interval.end < end && text[interval.end]) { + interval.end++; + } + else + goto error; + } + else if (m_chr.match(text, interval.end, end, flags)) + interval.end++; + else + goto error; + } + else + goto error; + } + interval.start = start; + return true; + + error: + content.start = 1; + content.end = 0; + interval.start = (interval.end = start) + 1; + return false; + } + + virtual void invalidate() + { + content.start = 1; + content.end = 0; + tester::invalidate(); + } + + public: + stdex::interval content; ///< String content (without quotes) + + protected: + http_text_char m_chr; + }; + + /// + /// Test for HTTP value (RFC2616: value) + /// + class http_value : public tester + { + public: + virtual bool match( + _In_reads_or_z_(end) const char* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + interval.end = start; + if (string.match(text, interval.end, end, flags)) { + token.invalidate(); + interval.end = string.interval.end; + interval.start = start; + return true; + } + else if (token.match(text, interval.end, end, flags)) { + string.invalidate(); + interval.end = token.interval.end; + interval.start = start; + return true; + } + else { + interval.start = (interval.end = start) + 1; + return false; + } + } + + virtual void invalidate() + { + string.invalidate(); + token.invalidate(); + tester::invalidate(); + } + + public: + http_quoted_string string; ///< Value when matched as quoted string + http_token token; ///< Value when matched as token + }; + + /// + /// Test for HTTP parameter (RFC2616: parameter) + /// + class http_parameter : public tester + { + public: + virtual bool match( + _In_reads_or_z_(end) const char* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + interval.end = start; + if (name.match(text, interval.end, end, flags)) + interval.end = name.interval.end; + else + goto error; + while (m_space.match(text, interval.end, end, flags)) + interval.end = m_space.interval.end; + assert(text || interval.end >= end); + if (interval.end < end && text[interval.end] == '=') + interval.end++; + else + while (m_space.match(text, interval.end, end, flags)) + interval.end = m_space.interval.end; + if (value.match(text, interval.end, end, flags)) + interval.end = value.interval.end; + else + goto error; + interval.start = start; + return true; + + error: + name.invalidate(); + value.invalidate(); + interval.start = (interval.end = start) + 1; + return false; + } + + virtual void invalidate() + { + name.invalidate(); + value.invalidate(); + tester::invalidate(); + } + + public: + http_token name; ///< Parameter name + http_value value; ///< Parameter value + + protected: + http_space m_space; + }; + + /// + /// Test for HTTP any type + /// + class http_any_type : public tester + { + public: + virtual bool match( + _In_reads_or_z_(end) const char* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + if (start + 2 < end && + text[start] == '*' && + text[start + 1] == '/' && + text[start + 2] == '*') + { + interval.end = (interval.start = start) + 3; + return true; + } + else if (start < end && text[start] == '*') { + interval.end = (interval.start = start) + 1; + return true; + } + else { + interval.start = (interval.end = start) + 1; + return false; + } + } + }; + + /// + /// Test for HTTP media range (RFC2616: media-range) + /// + class http_media_range : public tester + { + public: + virtual bool match( + _In_reads_or_z_(end) const char* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + interval.end = start; + if (type.match(text, interval.end, end, flags)) + interval.end = type.interval.end; + else + goto error; + while (m_space.match(text, interval.end, end, flags)) + interval.end = m_space.interval.end; + if (interval.end < end && text[interval.end] == '/') + interval.end++; + else + goto error; + while (m_space.match(text, interval.end, end, flags)) + interval.end = m_space.interval.end; + if (subtype.match(text, interval.end, end, flags)) + interval.end = subtype.interval.end; + else + goto error; + interval.start = start; + return true; + + error: + type.invalidate(); + subtype.invalidate(); + interval.start = (interval.end = start) + 1; + return false; + } + + virtual void invalidate() + { + type.invalidate(); + subtype.invalidate(); + tester::invalidate(); + } + + public: + http_token type; + http_token subtype; + + protected: + http_space m_space; + }; + + /// + /// Test for HTTP media type (RFC2616: media-type) + /// + class http_media_type : public http_media_range + { + public: + virtual bool match( + _In_reads_or_z_(end) const char* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + if (!http_media_range::match(text, start, end, flags)) + goto error; + params.clear(); + for (;;) { + if (interval.end < end && text[interval.end]) { + if (m_space.match(text, interval.end, end, flags)) + interval.end = m_space.interval.end; + else if (text[interval.end] == ';') { + interval.end++; + while (m_space.match(text, interval.end, end, flags)) + interval.end = m_space.interval.end; + http_parameter param; + if (param.match(text, interval.end, end, flags)) { + interval.end = param.interval.end; + params.push_back(std::move(param)); + } + else + break; + } + else + break; + } + else + break; + } + interval.end = params.empty() ? subtype.interval.end : params.back().interval.end; + return true; + + error: + http_media_range::invalidate(); + params.clear(); + interval.start = (interval.end = start) + 1; + return false; + } + + virtual void invalidate() + { + params.clear(); + http_media_range::invalidate(); + } + + public: + std::list params; + }; + + /// + /// Test for HTTP URL server + /// + class http_url_server : public tester + { + public: + virtual bool match( + _In_reads_or_z_(end) const char* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + interval.end = start; + for (;;) { + if (interval.end < end && text[interval.end]) { + if ((unsigned int)text[interval.end] < 0x20 || + (unsigned int)text[interval.end] == 0x7f || + text[interval.end] == ':' || + text[interval.end] == '/' || + isspace(text[interval.end])) + break; + else + interval.end++; + } + else + break; + } + if (start < interval.end) { + interval.start = start; + return true; + } + interval.start = (interval.end = start) + 1; + return false; + } + }; + + /// + /// Test for HTTP URL port + /// + class http_url_port : public tester + { + public: + http_url_port(_In_ const std::locale& locale = std::locale()) : + tester(locale), + value(0) + {} + + virtual bool match( + _In_reads_or_z_(end) const char* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + value = 0; + interval.end = start; + for (;;) { + if (interval.end < end && text[interval.end]) { + if ('0' <= text[interval.end] && text[interval.end] <= '9') { + size_t _value = (size_t)value * 10 + text[interval.end] - '0'; + if (_value > (uint16_t)-1) { + value = 0; + interval.start = (interval.end = start) + 1; + return false; + } + value = (uint16_t)_value; + interval.end++; + } + else + break; + } + else + break; + } + if (start < interval.end) { + interval.start = start; + return true; + } + interval.start = (interval.end = start) + 1; + return false; + } + + virtual void invalidate() + { + value = 0; + tester::invalidate(); + } + + public: + uint16_t value; + }; + + /// + /// Test for HTTP URL path segment + /// + class http_url_path_segment : public tester + { + public: + virtual bool match( + _In_reads_or_z_(end) const char* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + interval.end = start; + for (;;) { + if (interval.end < end && text[interval.end]) { + if ((unsigned int)text[interval.end] < 0x20 || + (unsigned int)text[interval.end] == 0x7f || + text[interval.end] == '?' || + text[interval.end] == '/' || + isspace(text[interval.end])) + break; + else + interval.end++; + } + else + break; + } + interval.start = start; + return true; + } + }; + + /// + /// Test for HTTP URL path segment + /// + class http_url_path : public tester + { + public: + virtual bool match( + _In_reads_or_z_(end) const char* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + http_url_path_segment s; + interval.end = start; + segments.clear(); + assert(text || interval.end >= end); + if (interval.end < end && text[interval.end] != '/') + goto error; + interval.end++; + s.match(text, interval.end, end, flags); + segments.push_back(s); + interval.end = s.interval.end; + for (;;) { + if (interval.end < end && text[interval.end]) { + if (text[interval.end] == '/') { + interval.end++; + s.match(text, interval.end, end, flags); + segments.push_back(s); + interval.end = s.interval.end; + } + else + break; + } + else + break; + } + interval.start = start; + return true; + + error: + segments.clear(); + interval.start = (interval.end = start) + 1; + return false; + } + + virtual void invalidate() + { + segments.clear(); + tester::invalidate(); + } + + public: + std::vector segments; ///< Path segments + }; + + /// + /// Test for HTTP URL parameter + /// + class http_url_parameter : public tester + { + public: + virtual bool match( + _In_reads_or_z_(end) const char* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + interval.end = start; + name.start = interval.end; + for (;;) { + if (interval.end < end && text[interval.end]) { + if ((unsigned int)text[interval.end] < 0x20 || + (unsigned int)text[interval.end] == 0x7f || + text[interval.end] == '&' || + text[interval.end] == '=' || + isspace(text[interval.end])) + break; + else + interval.end++; + } + else + break; + } + if (start < interval.end) + name.end = interval.end; + else + goto error; + if (text[interval.end] == '=') { + interval.end++; + value.start = interval.end; + for (;;) { + if (interval.end < end && text[interval.end]) { + if ((unsigned int)text[interval.end] < 0x20 || + (unsigned int)text[interval.end] == 0x7f || + text[interval.end] == '&' || + isspace(text[interval.end])) + break; + else + interval.end++; + } + else + break; + } + value.end = interval.end; + } + else { + value.start = 1; + value.end = 0; + } + interval.start = start; + return true; + + error: + name.start = 1; + name.end = 0; + value.start = 1; + value.end = 0; + interval.start = (interval.end = start) + 1; + return false; + } + + virtual void invalidate() + { + name.start = 1; + name.end = 0; + value.start = 1; + value.end = 0; + tester::invalidate(); + } + + public: + stdex::interval name; + stdex::interval value; + }; + + /// + /// Test for HTTP URL + /// + class http_url : public tester + { + public: + virtual bool match( + _In_reads_or_z_(end) const char* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + interval.end = start; + + if (interval.end + 7 <= end && stdex::strnicmp(text + interval.end, 7, "http://", (size_t)-1, m_locale) == 0) { + interval.end += 7; + if (server.match(text, interval.end, end, flags)) + interval.end = server.interval.end; + else + goto error; + if (interval.end < end && text[interval.end] == ':') { + interval.end++; + if (port.match(text, interval.end, end, flags)) + interval.end = port.interval.end; + } + else { + port.invalidate(); + port.value = 80; + } + } + else { + server.invalidate(); + port.invalidate(); + port.value = 80; + } + + if (path.match(text, interval.end, end, flags)) + interval.end = path.interval.end; + else + goto error; + + params.clear(); + + if (interval.end < end && text[interval.end] == '?') { + interval.end++; + for (;;) { + if (interval.end < end && text[interval.end]) { + if ((unsigned int)text[interval.end] < 0x20 || + (unsigned int)text[interval.end] == 0x7f || + isspace(text[interval.end])) + break; + else if (text[interval.end] == '&') + interval.end++; + else { + http_url_parameter param; + if (param.match(text, interval.end, end, flags)) { + interval.end = param.interval.end; + params.push_back(std::move(param)); + } + else + break; + } + } + else + break; + } + } + + interval.start = start; + return true; + + error: + server.invalidate(); + port.invalidate(); + path.invalidate(); + params.clear(); + interval.start = (interval.end = start) + 1; + return false; + } + + virtual void invalidate() + { + server.invalidate(); + port.invalidate(); + path.invalidate(); + params.clear(); + tester::invalidate(); + } + + public: + http_url_server server; + http_url_port port; + http_url_path path; + std::list params; + }; + + /// + /// Test for HTTP language (RFC1766) + /// + class http_language : public tester + { + public: + virtual bool match( + _In_reads_or_z_(end) const char* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + interval.end = start; + components.clear(); + for (;;) { + if (interval.end < end && text[interval.end]) { + stdex::interval k; + k.end = interval.end; + for (;;) { + if (k.end < end && text[k.end]) { + if (isalpha(text[k.end])) + k.end++; + else + break; + } + else + break; + } + if (interval.end < k.end) { + k.start = interval.end; + interval.end = k.end; + components.push_back(k); + } + else + break; + if (interval.end < end && text[interval.end] == '-') + interval.end++; + else + break; + } + else + break; + } + if (!components.empty()) { + interval.start = start; + interval.end = components.back().end; + return true; + } + interval.start = (interval.end = start) + 1; + return false; + } + + virtual void invalidate() + { + components.clear(); + tester::invalidate(); + } + + public: + std::vector> components; + }; + + /// + /// Test for HTTP weight factor + /// + class http_weight : public tester + { + public: + http_weight(_In_ const std::locale& locale = std::locale()) : + tester(locale), + value(1.0f) + {} + + virtual bool match( + _In_reads_or_z_(end) const char* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + size_t celi_del = 0, decimalni_del = 0, decimalni_del_n = 1; + interval.end = start; + for (;;) { + if (interval.end < end && text[interval.end]) { + if ('0' <= text[interval.end] && text[interval.end] <= '9') { + celi_del = celi_del * 10 + text[interval.end] - '0'; + interval.end++; + } + else if (text[interval.end] == '.') { + interval.end++; + for (;;) { + if (interval.end < end && text[interval.end]) { + if ('0' <= text[interval.end] && text[interval.end] <= '9') { + decimalni_del = decimalni_del * 10 + text[interval.end] - '0'; + decimalni_del_n *= 10; + interval.end++; + } + else + break; + } + else + break; + } + break; + } + else + break; + } + else + break; + } + if (start < interval.end) { + value = (float)((double)celi_del + (double)decimalni_del / decimalni_del_n); + interval.start = start; + return true; + } + value = 1.0f; + interval.start = (interval.end = start) + 1; + return false; + } + + virtual void invalidate() + { + value = 1.0f; + tester::invalidate(); + } + + public: + float value; ///< Calculated value of the weight factor + }; + + /// + /// Test for HTTP asterisk + /// + class http_asterisk : public tester + { + public: + virtual bool match( + _In_reads_or_z_(end) const char* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || end <= start); + if (start < end && text[start] == '*') { + interval.end = (interval.start = start) + 1; + return true; + } + interval.start = (interval.end = start) + 1; + return false; + } + }; + + /// + /// Test for HTTP weighted value + /// + template + class http_weighted_value : public tester + { + public: + virtual bool match( + _In_reads_or_z_(end) const char* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + size_t konec_vrednosti; + interval.end = start; + if (asterisk.match(text, interval.end, end, flags)) { + interval.end = konec_vrednosti = asterisk.interval.end; + value.invalidate(); + } + else if (value.match(text, interval.end, end, flags)) { + interval.end = konec_vrednosti = value.interval.end; + asterisk.invalidate(); + } + else { + asterisk.invalidate(); + value.invalidate(); + interval.start = (interval.end = start) + 1; + return false; + } + + while (interval.end < end && text[interval.end] && isspace(text[interval.end])) interval.end++; + if (interval.end < end && text[interval.end] == ';') { + interval.end++; + while (interval.end < end && text[interval.end] && isspace(text[interval.end])) interval.end++; + if (interval.end < end && (text[interval.end] == 'q' || text[interval.end] == 'Q')) { + interval.end++; + while (interval.end < end && text[interval.end] && isspace(text[interval.end])) interval.end++; + if (interval.end < end && text[interval.end] == '=') { + interval.end++; + while (interval.end < end && text[interval.end] && isspace(text[interval.end])) interval.end++; + if (factor.match(text, interval.end, end, flags)) + interval.end = factor.interval.end; + } + } + } + if (!factor.interval) { + factor.invalidate(); + interval.end = konec_vrednosti; + } + interval.start = start; + return true; + } + + virtual void invalidate() + { + asterisk.invalidate(); + value.invalidate(); + factor.invalidate(); + tester::invalidate(); + } + + public: + T_asterisk asterisk; + T value; + http_weight factor; + }; + + /// + /// Test for HTTP cookie parameter (RFC2109) + /// + class http_cookie_parameter : public tester + { + public: + virtual bool match( + _In_reads_or_z_(end) const char* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + interval.end = start; + if (interval.end < end && text[interval.end] == '$') + interval.end++; + else + goto error; + if (name.match(text, interval.end, end, flags)) + interval.end = name.interval.end; + else + goto error; + while (m_space.match(text, interval.end, end, flags)) + interval.end = m_space.interval.end; + if (interval.end < end && text[interval.end] == '=') + interval.end++; + else + goto error; + while (m_space.match(text, interval.end, end, flags)) + interval.end = m_space.interval.end; + if (value.match(text, interval.end, end, flags)) + interval.end = value.interval.end; + else + goto error; + interval.start = start; + return true; + + error: + name.invalidate(); + value.invalidate(); + interval.start = (interval.end = start) + 1; + return false; + } + + virtual void invalidate() + { + name.invalidate(); + value.invalidate(); + tester::invalidate(); + } + + public: + http_token name; + http_value value; + + protected: + http_space m_space; + }; + + /// + /// Test for HTTP cookie (RFC2109) + /// + class http_cookie : public tester + { + public: + virtual bool match( + _In_reads_or_z_(end) const char* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + interval.end = start; + if (name.match(text, interval.end, end, flags)) + interval.end = name.interval.end; + else + goto error; + while (m_space.match(text, interval.end, end, flags)) + interval.end = m_space.interval.end; + if (interval.end < end && text[interval.end] == '=') + interval.end++; + else + goto error; + while (m_space.match(text, interval.end, end, flags)) + interval.end = m_space.interval.end; + if (value.match(text, interval.end, end, flags)) + interval.end = value.interval.end; + else + goto error; + params.clear(); + for (;;) { + if (interval.end < end && text[interval.end]) { + if (m_space.match(text, interval.end, end, flags)) + interval.end = m_space.interval.end; + else if (text[interval.end] == ';') { + interval.end++; + while (m_space.match(text, interval.end, end, flags)) + interval.end = m_space.interval.end; + http_cookie_parameter param; + if (param.match(text, interval.end, end, flags)) { + interval.end = param.interval.end; + params.push_back(std::move(param)); + } + else + break; + } + else + break; + } + else + break; + } + interval.start = start; + interval.end = params.empty() ? value.interval.end : params.back().interval.end; + return true; + + error: + name.invalidate(); + value.invalidate(); + params.clear(); + interval.start = (interval.end = start) + 1; + return false; + } + + virtual void invalidate() + { + name.invalidate(); + value.invalidate(); + params.clear(); + tester::invalidate(); + } + + public: + http_token name; ///< Cookie name + http_value value; ///< Cookie value + std::list params; ///< List of cookie parameters + + protected: + http_space m_space; + }; + + /// + /// Test for HTTP agent + /// + class http_agent : public tester + { + public: + virtual bool match( + _In_reads_or_z_(end) const char* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + interval.end = start; + type.start = interval.end; + for (;;) { + if (interval.end < end && text[interval.end]) { + if (text[interval.end] == '/') { + type.end = interval.end; + interval.end++; + version.start = interval.end; + for (;;) { + if (interval.end < end && text[interval.end]) { + if (isspace(text[interval.end])) { + version.end = interval.end; + break; + } + else + interval.end++; + } + else { + version.end = interval.end; + break; + } + } + break; + } + else if (isspace(text[interval.end])) { + type.end = interval.end; + break; + } + else + interval.end++; + } + else { + type.end = interval.end; + break; + } + } + if (start < interval.end) { + interval.start = start; + return true; + } + type.start = 1; + type.end = 0; + version.start = 1; + version.end = 0; + interval.start = 1; + interval.end = 0; + return false; + } + + virtual void invalidate() + { + type.start = 1; + type.end = 0; + version.start = 1; + version.end = 0; + tester::invalidate(); + } + + public: + stdex::interval type; + stdex::interval version; + }; + + /// + /// Test for HTTP protocol + /// + class http_protocol : public tester + { + public: + http_protocol(_In_ const std::locale& locale = std::locale()) : + tester(locale), + version(0x009) + {} + + virtual bool match( + _In_reads_or_z_(end) const char* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + interval.end = start; + type.start = interval.end; + for (;;) { + if (interval.end < end && text[interval.end]) { + if (text[interval.end] == '/') { + type.end = interval.end; + interval.end++; + break; + } + else if (isspace(text[interval.end])) + goto error; + else + interval.end++; + } + else { + type.end = interval.end; + goto error; + } + } + version_maj.start = interval.end; + for (;;) { + if (interval.end < end && text[interval.end]) { + if (text[interval.end] == '.') { + version_maj.end = interval.end; + interval.end++; + version_min.start = interval.end; + for (;;) { + if (interval.end < end && text[interval.end]) { + if (isspace(text[interval.end])) { + version_min.end = interval.end; + version = + (uint16_t)strtoui(text + version_maj.start, version_maj.size(), nullptr, 10) * 0x100 + + (uint16_t)strtoui(text + version_min.start, version_min.size(), nullptr, 10); + break; + } + else + interval.end++; + } + else + goto error; + } + break; + } + else if (isspace(text[interval.end])) { + version_maj.end = interval.end; + version_min.start = 1; + version_min.end = 0; + version = (uint16_t)strtoui(text + version_maj.start, version_maj.size(), nullptr, 10) * 0x100; + break; + } + else + interval.end++; + } + else + goto error; + } + interval.start = start; + return true; + + error: + type.start = 1; + type.end = 0; + version_maj.start = 1; + version_maj.end = 0; + version_min.start = 1; + version_min.end = 0; + version = 0x009; + interval.start = 1; + interval.end = 0; + return false; + } + + virtual void invalidate() + { + type.start = 1; + type.end = 0; + version_maj.start = 1; + version_maj.end = 0; + version_min.start = 1; + version_min.end = 0; + version = 0x009; + tester::invalidate(); + } + + public: + stdex::interval type; + stdex::interval version_maj; + stdex::interval version_min; + uint16_t version; ///< HTTP protocol version: 0x100 = 1.0, 0x101 = 1.1... + }; + + /// + /// Test for HTTP request + /// + class http_request : public tester + { + public: + virtual bool match( + _In_reads_or_z_(end) const char* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + interval.end = start; + + for (;;) { + if (m_line_break.match(text, interval.end, end, flags)) + goto error; + else if (interval.end < end && text[interval.end]) { + if (isspace(text[interval.end])) + interval.end++; + else + break; + } + else + goto error; + } + verb.start = interval.end; + for (;;) { + if (m_line_break.match(text, interval.end, end, flags)) + goto error; + else if (interval.end < end && text[interval.end]) { + if (isspace(text[interval.end])) { + verb.end = interval.end; + interval.end++; + break; + } + else + interval.end++; + } + else + goto error; + } + + for (;;) { + if (m_line_break.match(text, interval.end, end, flags)) + goto error; + else if (interval.end < end && text[interval.end]) { + if (isspace(text[interval.end])) + interval.end++; + else + break; + } + else + goto error; + } + if (url.match(text, interval.end, end, flags)) + interval.end = url.interval.end; + else + goto error; + + protocol.invalidate(); + for (;;) { + if (m_line_break.match(text, interval.end, end, flags)) { + interval.end = m_line_break.interval.end; + goto end; + } + else if (interval.end < end && text[interval.end]) { + if (isspace(text[interval.end])) + interval.end++; + else + break; + } + else + goto end; + } + for (;;) { + if (m_line_break.match(text, interval.end, end, flags)) { + interval.end = m_line_break.interval.end; + goto end; + } + else if (protocol.match(text, interval.end, end, flags)) { + interval.end = protocol.interval.end; + break; + } + else + goto end; + } + + for (;;) { + if (m_line_break.match(text, interval.end, end, flags)) { + interval.end = m_line_break.interval.end; + break; + } + else if (interval.end < end && text[interval.end]) + interval.end++; + else + goto end; + } + + end: + interval.start = start; + return true; + + error: + verb.start = 1; + verb.end = 0; + url.invalidate(); + protocol.invalidate(); + interval.start = 1; + interval.end = 0; + return false; + } + + virtual void invalidate() + { + verb.start = 1; + verb.end = 0; + url.invalidate(); + protocol.invalidate(); + tester::invalidate(); + } + + public: + stdex::interval verb; + http_url url; + http_protocol protocol; + + protected: + http_line_break m_line_break; + }; + + /// + /// Test for HTTP header + /// + class http_header : public tester + { + public: + virtual bool match( + _In_reads_or_z_(end) const char* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + interval.end = start; + + if (m_line_break.match(text, interval.end, end, flags) || + interval.end < end && text[interval.end] && isspace(text[interval.end])) + goto error; + name.start = interval.end; + for (;;) { + if (m_line_break.match(text, interval.end, end, flags)) + goto error; + else if (interval.end < end && text[interval.end]) { + if (isspace(text[interval.end])) { + name.end = interval.end; + interval.end++; + for (;;) { + if (m_line_break.match(text, interval.end, end, flags)) + goto error; + else if (interval.end < end && text[interval.end]) { + if (isspace(text[interval.end])) + interval.end++; + else + break; + } + else + goto error; + } + if (interval.end < end && text[interval.end] == ':') { + interval.end++; + break; + } + else + goto error; + break; + } + else if (text[interval.end] == ':') { + name.end = interval.end; + interval.end++; + break; + } + else + interval.end++; + } + else + goto error; + } + value.start = (size_t)-1; + value.end = 0; + for (;;) { + if (m_line_break.match(text, interval.end, end, flags)) { + interval.end = m_line_break.interval.end; + if (!m_line_break.match(text, interval.end, end, flags) && + interval.end < end && text[interval.end] && isspace(text[interval.end])) + interval.end++; + else + break; + } + else if (interval.end < end && text[interval.end]) { + if (isspace(text[interval.end])) + interval.end++; + else { + if (value.start == (size_t)-1) value.start = interval.end; + value.end = ++interval.end; + } + } + else + break; + } + interval.start = start; + return true; + + error: + name.start = 1; + name.end = 0; + value.start = 1; + value.end = 0; + interval.start = 1; + interval.end = 0; + return false; + } + + virtual void invalidate() + { + name.start = 1; + name.end = 0; + value.start = 1; + value.end = 0; + tester::invalidate(); + } + + public: + stdex::interval name; // interval imena glave + stdex::interval value; // interval vrednosti glave + + protected: + http_line_break m_line_break; + }; + + /// + /// Collection of HTTP headers + /// + template + class http_header_collection : public T + { + public: + void insert( + _In_reads_or_z_(end) const T* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + while (start < end) { + while (start < end && text[start] && isspace(text[start])) start++; + if (start < end && text[start] == ',') { + start++; + while (start < end&& text[start] && isspace(text[start])) start++; + } + T::key_type el; + if (el.match(text, start, end, flags)) { + start = el.interval.end; + T::insert(std::move(el)); + } + else + break; + } + } + }; + + template + struct http_factor_more { + constexpr bool operator()(const T& a, const T& b) const noexcept + { + return a.factor.value > b.factor.value; + } + }; + + /// + /// Collection of weighted HTTP headers + /// + template , class _Alloc = std::allocator> + using http_weighted_header_collection = http_header_collection>; + + /// + /// Test for JSON string + /// + template + class basic_json_string : public basic_tester + { + public: + basic_json_string( + _In_ const std::shared_ptr>& quote, + _In_ const std::shared_ptr>& chr, + _In_ const std::shared_ptr>& escape, + _In_ const std::shared_ptr>& sol, + _In_ const std::shared_ptr>& bs, + _In_ const std::shared_ptr>& ff, + _In_ const std::shared_ptr>& lf, + _In_ const std::shared_ptr>& cr, + _In_ const std::shared_ptr>& htab, + _In_ const std::shared_ptr>& uni, + _In_ const std::shared_ptr>& hex, + _In_ const std::locale& locale = std::locale()) : + basic_tester(locale), + m_quote(quote), + m_chr(chr), + m_escape(escape), + m_sol(sol), + m_bs(bs), + m_ff(ff), + m_lf(lf), + m_cr(cr), + m_htab(htab), + m_uni(uni), + m_hex(hex) + {} + + virtual bool match( + _In_reads_or_z_(end) const T* text, + _In_ size_t start = 0, + _In_ size_t end = (size_t)-1, + _In_ int flags = match_default) + { + assert(text || start >= end); + interval.end = start; + if (m_quote->match(text, interval.end, end, flags)) { + interval.end = m_quote->interval.end; + value.clear(); + for (;;) { + if (m_quote->match(text, interval.end, end, flags)) { + interval.start = start; + interval.end = m_quote->interval.end; + return true; + } + if (m_escape->match(text, interval.end, end, flags)) { + if (m_quote->match(text, m_escape->interval.end, end, flags)) { + value += '"'; interval.end = m_quote->interval.end; + continue; + } + if (m_sol->match(text, m_escape->interval.end, end, flags)) { + value += '/'; interval.end = m_sol->interval.end; + continue; + } + if (m_bs->match(text, m_escape->interval.end, end, flags)) { + value += '\b'; interval.end = m_bs->interval.end; + continue; + } + if (m_ff->match(text, m_escape->interval.end, end, flags)) { + value += '\f'; interval.end = m_ff->interval.end; + continue; + } + if (m_lf->match(text, m_escape->interval.end, end, flags)) { + value += '\n'; interval.end = m_lf->interval.end; + continue; + } + if (m_cr->match(text, m_escape->interval.end, end, flags)) { + value += '\r'; interval.end = m_cr->interval.end; + continue; + } + if (m_htab->match(text, m_escape->interval.end, end, flags)) { + value += '\t'; interval.end = m_htab->interval.end; + continue; + } + if ( + m_uni->match(text, m_escape->interval.end, end, flags) && + m_hex->match(text, m_uni->interval.end, std::min(m_uni->interval.end + 4, end), flags | match_case_insensitive) && + m_hex->interval.size() == 4 /* JSON requests 4-digit Unicode sequneces: \u.... */) + { + assert(m_hex->value <= 0xffff); + if (sizeof(T) == 1) { + if (m_hex->value > 0x7ff) { + value += (T)(0xe0 | (m_hex->value >> 12) & 0x0f); + value += (T)(0x80 | (m_hex->value >> 6) & 0x3f); + value += (T)(0x80 | m_hex->value & 0x3f); + } + else if (m_hex->value > 0x7f) { + value += (T)(0xc0 | (m_hex->value >> 6) & 0x1f); + value += (T)(0x80 | m_hex->value & 0x3f); + } + else + value += (T)(m_hex->value & 0x7f); + } + else + value += (T)m_hex->value; + interval.end = m_hex->interval.end; + continue; + } + if (m_escape->match(text, m_escape->interval.end, end, flags)) { + value += '\\'; interval.end = m_escape->interval.end; + continue; + } + } + if (m_chr->match(text, interval.end, end, flags)) { + value.Prilepi(text + m_chr->interval.start, m_chr->interval.size()); + interval.end = m_chr->interval.end; + continue; + } + break; + } + } + value.clear(); + interval.start = (interval.end = start) + 1; + return false; + } + + virtual void invalidate() + { + value.clear(); + basic_tester::invalidate(); + } + + public: + std::basic_string value; + + protected: + std::shared_ptr> m_quote; + std::shared_ptr> m_chr; + std::shared_ptr> m_escape; + std::shared_ptr> m_sol; + std::shared_ptr> m_bs; + std::shared_ptr> m_ff; + std::shared_ptr> m_lf; + std::shared_ptr> m_cr; + std::shared_ptr> m_htab; + std::shared_ptr> m_uni; + std::shared_ptr> m_hex; + }; + + using json_string = basic_json_string; + using wjson_string = basic_json_string; +#ifdef _UNICODE + using tjson_string = wjson_string; +#else + using tjson_string = json_string; +#endif + } +} + +#ifdef _MSC_VER +#pragma warning(pop) +#endif