diff --git a/UnitTests/pch.hpp b/UnitTests/pch.hpp index 830b74348..d5885d3d0 100644 --- a/UnitTests/pch.hpp +++ b/UnitTests/pch.hpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/include/stdex/html.hpp b/include/stdex/html.hpp new file mode 100644 index 000000000..45b0a7f34 --- /dev/null +++ b/include/stdex/html.hpp @@ -0,0 +1,2501 @@ +/* + SPDX-License-Identifier: MIT + Copyright © 2016-2023 Amebis +*/ + +#pragma once + +#include "compat.hpp" +#include "exception.hpp" +#include "interval.hpp" +#include "mapping.hpp" +#include "parser.hpp" +#include "progress.hpp" +#include "sgml.hpp" +#include "string.hpp" +#include "system.hpp" +#include "unicode.hpp" +#include +#include +#include +#include +#include +#include + +#ifdef _WIN32 +#undef small +#endif + +namespace stdex +{ + namespace html + { + /// + /// Appends HTML escaped string + /// + /// \param[in,out] dst String to append to + /// \param[in] src Source string + /// \param[in] num_chars Code unit limit in string `src` + /// + template, class _Alloc = std::allocator> + inline void escape( + _Inout_ std::basic_string& dst, + _In_reads_or_z_opt_(num_chars) const char* src, _In_ size_t num_chars = SIZE_MAX) + { + _Assume_(src || !num_chars); + for (size_t i = 0; i < num_chars && src[i]; ++i) { + switch (src[i]) { + case '&': dst += "&"; break; + case ';': dst += ";"; break; + case '\"': dst += """; break; + case '\'': dst += "'"; break; + case '<': dst += "<"; break; + case '>': dst += ">"; break; + case 0x00a0: dst += " "; break; // No-break space must be escaped as SGML entity, otherwise browsers treat it as a normal space. + default: dst += src[i]; break; + } + } + } + + /// + /// Appends HTML escaped string + /// + /// \param[in,out] dst String to append to + /// \param[in] src Source string + /// \param[in] num_chars Code unit limit in string `src` + /// + template, class _Alloc = std::allocator> + inline void escape( + _Inout_ std::basic_string& dst, + _In_reads_or_z_opt_(num_chars) const wchar_t* src, _In_ size_t num_chars = SIZE_MAX) + { + _Assume_(src || !num_chars); + for (size_t i = 0; i < num_chars && src[i]; ++i) { + switch (src[i]) { + case L'&': dst += L"&"; break; + case L';': dst += L";"; break; + case L'\"': dst += L"""; break; + case L'\'': dst += L"'"; break; + case L'<': dst += L"<"; break; + case L'>': dst += L">"; break; + case L'\u00a0': dst += L" "; break; // No-break space must be escaped as SGML entity, otherwise browsers treat it as a normal space. + default: dst += src[i]; break; + } + } + } + + /// + /// Appends HTML escaped character + /// + /// \param[in,out] dst String to append to + /// \param[in] chr Source character + /// + template, class _Alloc = std::allocator> + inline void escape_min(_Inout_ std::basic_string& dst, _In_ char chr) + { + switch (chr) { + case '&': dst += "&"; break; + case '<': dst += "<"; break; + case '>': dst += ">"; break; + case 0x00a0: dst += " "; break; // No-break space must be escaped as SGML entity, otherwise browsers treat it as a normal space. + default: dst += chr; break; + } + } + + /// + /// Appends HTML escaped character + /// + /// \param[in,out] dst String to append to + /// \param[in] chr Source character + /// + template, class _Alloc = std::allocator> + inline void escape_min(_Inout_ std::basic_string& dst, _In_ wchar_t chr) + { + switch (chr) { + case L'&': dst += L"&"; break; + case L'<': dst += L"<"; break; + case L'>': dst += L">"; break; + case L'\u00a0': dst += L" "; break; // No-break space must be escaped as SGML entity, otherwise browsers treat it as a normal space. + default: dst += chr; break; + } + } + + /// + /// Appends HTML escaped string + /// + /// \param[in,out] dst String to append to + /// \param[in] src Source string + /// \param[in] num_chars Code unit limit in string `src` + /// + template, class _Alloc = std::allocator> + inline void escape_min( + _Inout_ std::basic_string& dst, + _In_reads_or_z_opt_(num_chars) const char* src, _In_ size_t num_chars = SIZE_MAX) + { + _Assume_(src || !num_chars); + for (size_t i = 0; i < num_chars && src[i]; ++i) { + switch (src[i]) { + case '&': dst += "&"; break; + case '<': dst += "<"; break; + case '>': dst += ">"; break; + case 0x00a0: dst += " "; break; // No-break space must be escaped as SGML entity, otherwise browsers treat it as a normal space. + default: dst += src[i]; break; + } + } + } + + /// + /// Appends HTML escaped string + /// + /// \param[in,out] dst String to append to + /// \param[in] src Source string + /// \param[in] num_chars Code unit limit in string `src` + /// + template, class _Alloc = std::allocator> + inline void escape_min( + _Inout_ std::basic_string& dst, + _In_reads_or_z_opt_(num_chars) const wchar_t* src, _In_ size_t num_chars = SIZE_MAX) + { + _Assume_(src || !num_chars); + for (size_t i = 0; i < num_chars && src[i]; ++i) { + switch (src[i]) { + case L'&': dst += L"&"; break; + case L'<': dst += L"<"; break; + case L'>': dst += L">"; break; + case L'\u00a0': dst += L" "; break; // No-break space must be escaped as SGML entity, otherwise browsers treat it as a normal space. + default: dst += src[i]; break; + } + } + } + + /// + /// Appends unescaped URL string + /// + /// \param[in,out] dst String to append to + /// \param[in] src Source string + /// \param[in] num_chars Code unit limit in string `src` + /// + template, class _Alloc = std::allocator> + inline void url_unescape( + _Inout_ std::basic_string& dst, + _In_reads_or_z_opt_(num_chars) const char* src, _In_ size_t num_chars = SIZE_MAX) + { + _Assume_(src || !num_chars); + for (size_t i = 0; i < num_chars && src[i];) { + switch (src[i]) { + case '+': + dst += ' '; i++; + break; + + case '%': { + i++; + + uint8_t chr; + if ('0' <= src[i] && src[i] <= '9') chr = (src[i++] - '0') << 4; + else if ('A' <= src[i] && src[i] <= 'F') chr = (src[i++] - 'A' + 10) << 4; + else if ('a' <= src[i] && src[i] <= 'f') chr = (src[i++] - 'a' + 10) << 4; + else { dst += '%'; continue; } + if ('0' <= src[i] && src[i] <= '9') chr |= (src[i++] - '0'); + else if ('A' <= src[i] && src[i] <= 'F') chr |= (src[i++] - 'A' + 10); + else if ('a' <= src[i] && src[i] <= 'f') chr |= (src[i++] - 'a' + 10); + else { dst += '%'; dst += src[i - 1]; continue; } + + dst += static_cast(chr); + break; + } + + default: + dst += src[i++]; + } + } + } + + /// + /// Appends escaped URL string + /// + /// \param[in,out] dst String to append to + /// \param[in] src Source string + /// \param[in] num_chars Code unit limit in string `src` + /// + template, class _Alloc = std::allocator> + inline void url_escape( + _Inout_ std::basic_string& dst, + _In_reads_or_z_opt_(num_chars) const char* src, _In_ size_t num_chars = SIZE_MAX) + { + _Assume_(src || !num_chars); + for (size_t i = 0; i < num_chars && src[i]; ++i) { + switch (src[i]) { + case ' ': dst += "+"; break; + case '<': dst += "%3C"; break; + case '>': dst += "%3E"; break; + case '#': dst += "%23"; break; + case '%': dst += "%25"; break; + case '{': dst += "%7B"; break; + case '}': dst += "%7D"; break; + case '|': dst += "%7C"; break; + case '\\': dst += "%5C"; break; + case '^': dst += "%5E"; break; + case '~': dst += "%7E"; break; + case '[': dst += "%5B"; break; + case ']': dst += "%5D"; break; + case '`': dst += "%60"; break; + case ';': dst += "%3B"; break; + case '/': dst += "%2F"; break; + case '?': dst += "%3F"; break; + case ':': dst += "%3A"; break; + case '@': dst += "%40"; break; + case '=': dst += "%3D"; break; + case '&': dst += "%26"; break; + case '$': dst += "%24"; break; + default: + if (0x20 < static_cast(src[i]) && static_cast(src[i]) < 0x7f) + dst += src[i]; + else { + dst += '%'; + uint8_t n = (static_cast(src[i]) & 0xf0) >> 4; + dst += n < 10 ? static_cast('0' + n) : static_cast('A' + n - 10); + n = ((uint8_t)src[i] & 0x0f); + dst += n < 10 ? static_cast('0' + n) : static_cast('A' + n - 10); + } + } + } + } + + /// + /// Appends unescaped CSS string + /// + /// \param[in,out] dst String to append to + /// \param[in] src Source string + /// \param[in] num_chars Code unit limit in string `src` + /// + template, class _Alloc = std::allocator<_Elem>> + inline void css_unescape( + _Inout_ std::basic_string<_Elem, _Traits, _Alloc>& dst, + _In_reads_or_z_opt_(num_chars) const _Elem* src, _In_ size_t num_chars = SIZE_MAX) + { + _Assume_(src || !num_chars); + for (size_t i = 0; i < num_chars && src[i];) { + if (src[i] != '\\') + dst += src[i++]; + else if (i + 1 < num_chars) { + i++; + + switch (src[i]) { + // Classic escapes + case 'n': dst += '\n'; i++; break; + case 'r': dst += '\r'; i++; break; + case 't': dst += '\t'; i++; break; + + // `\` at the end of the line + case '\n': i++; break; + + // `\nnnn` escape + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case 'A': case 'a': + case 'B': case 'b': + case 'C': case 'c': + case 'D': case 'd': + case 'E': case 'e': + case 'F': case 'f': { + wchar_t chr = 0; + size_t end = std::min(num_chars, i + 6); + + for (; i < end; ++i) { + if ('0' <= src[i] && src[i] <= '9') chr = chr * 0x10 + src[i] - '0'; + else if ('A' <= src[i] && src[i] <= 'F') chr = chr * 0x10 + src[i] - 'A' + 10; + else if ('a' <= src[i] && src[i] <= 'f') chr = chr * 0x10 + src[i] - 'a' + 10; + else break; + } + + dst += static_cast<_Elem>(chr); + + if (i < end && src[i] == ' ') { + // Skip space after `\nnnn`. + i++; + } + break; + } + + default: dst += src[i++]; + } + } + } + } + + /// + /// Appends escaped CSS string + /// + /// \param[in,out] dst String to append to + /// \param[in] src Source string + /// \param[in] num_chars Code unit limit in string `src` + /// + template, class _Alloc = std::allocator> + inline void css_escape( + _Inout_ std::basic_string& dst, + _In_reads_or_z_opt_(num_chars) const char* src, _In_ size_t num_chars = SIZE_MAX) + { + _Assume_(src || !num_chars); + for (size_t i = 0; i < num_chars && src[i]; ++i) { + switch (src[i]) { + case '\\': dst += "\\\\"; break; + case '\n': dst += "\\n"; break; + case '\r': dst += "\\r"; break; + case '\t': dst += "\\t"; break; + case '\"': dst += "\\\""; break; + case '\'': dst += "\\'"; break; + default: dst += src[i]; break; + } + } + } + + /// + /// Appends escaped CSS string + /// + /// \param[in,out] dst String to append to + /// \param[in] src Source string + /// \param[in] num_chars Code unit limit in string `src` + /// + template, class _Alloc = std::allocator> + inline void css_escape( + _Inout_ std::basic_string& dst, + _In_reads_or_z_opt_(num_chars) const wchar_t* src, _In_ size_t num_chars = SIZE_MAX) + { + _Assume_(src || !num_chars); + for (size_t i = 0; i < num_chars && src[i]; ++i) { + switch (src[i]) { + case L'\\': dst += L"\\\\"; break; + case L'\n': dst += L"\\n"; break; + case L'\r': dst += L"\\r"; break; + case L'\t': dst += L"\\t"; break; + case L'\"': dst += L"\\\""; break; + case L'\'': dst += L"\\'"; break; + default: dst += src[i]; break; + } + } + } + + /// + /// HTML element type + /// + enum class element_t { + empty = 0, + a, + abbr, + acronym, + address, + applet, + area, + b, + base, + basefont, + bdo, + bgsound, // Microsoft Specific + big, + blink, // Microsoft Specific + blockquote, + body, + br, + button, + caption, + center, + cite, + code, + col, + colgroup, + comment, // Microsoft Specific + dd, + del, + dfn, + dir, + div, + dl, + dt, + em, + embed, // Microsoft Specific + fieldset, + font, + form, + frame, + frameset, + h1, + h2, + h3, + h4, + h5, + h6, + head, + hr, + html, + i, + iframe, + img, + input, + ins, + isindex, + kbd, + label, + legend, + li, + link, + listing, // Microsoft Specific + map, + marquee, // Microsoft Specific + menu, + meta, + nextid, // Microsoft Specific + nobr, // Microsoft Specific + noembed, // Microsoft Specific + noframes, + noscript, + object, + ol, + optgroup, + option, + p, + param, + plaintext, // Microsoft Specific + pre, + q, + rt, // Microsoft Specific + ruby, // Microsoft Specific + s, + samp, + script, + select, + small, + span, + strike, + strong, + style, + sub, + sup, + table, + tbody, + td, + textarea, + tfoot, + th, + thead, + title, + tr, + tt, + u, + ul, + var, + wbr, // Microsoft Specific + xmp, // Microsoft Specific + + unknown = -1, + PCDATA = -2, + CDATA = -3, + }; + + /// + /// Expected pairing of and + /// + enum class element_span_t { + needs_end = 0, ///< May start and end in a single ; otherwise, needs explicit end (e.g. `...`) + end_optional, ///< End is optional. May not contain the same type child elements. (e.g. `

`) + immediate, ///< Never spans. Only or forms. (e.g. `
`) + }; + + /// + /// Describes attributes associated with a HTML element + /// + struct element_traits + { + /// + /// Returns expected element span in HTML code + /// + /// \param[in] code Element code + /// + static inline element_span_t span(_In_ element_t code) + { + static element_span_t lookup[] = { + element_span_t::needs_end, // a + element_span_t::needs_end, // abbr + element_span_t::needs_end, // acronym + element_span_t::needs_end, // address + element_span_t::needs_end, // applet + element_span_t::immediate, // area + element_span_t::needs_end, // b + element_span_t::immediate, // base + element_span_t::immediate, // basefont + element_span_t::needs_end, // bdo + element_span_t::immediate, // bgsound + element_span_t::needs_end, // big + element_span_t::needs_end, // blink + element_span_t::needs_end, // blockquote + element_span_t::end_optional, // body + element_span_t::immediate, // br + element_span_t::needs_end, // button + element_span_t::needs_end, // caption + element_span_t::needs_end, // center + element_span_t::needs_end, // cite + element_span_t::needs_end, // code + element_span_t::immediate, // col + element_span_t::end_optional, // colgroup + element_span_t::needs_end, // comment + element_span_t::end_optional, // dd + element_span_t::needs_end, // del + element_span_t::needs_end, // dfn + element_span_t::needs_end, // dir + element_span_t::needs_end, // div + element_span_t::needs_end, // dl + element_span_t::end_optional, // dt + element_span_t::needs_end, // em + element_span_t::immediate, // embed + element_span_t::needs_end, // fieldset + element_span_t::needs_end, // font + element_span_t::needs_end, // form + element_span_t::immediate, // frame + element_span_t::needs_end, // frameset + element_span_t::needs_end, // h1 + element_span_t::needs_end, // h2 + element_span_t::needs_end, // h3 + element_span_t::needs_end, // h4 + element_span_t::needs_end, // h5 + element_span_t::needs_end, // h6 + element_span_t::end_optional, // head + element_span_t::immediate, // hr + element_span_t::end_optional, // html + element_span_t::needs_end, // i + element_span_t::needs_end, // iframe + element_span_t::immediate, // img + element_span_t::immediate, // input + element_span_t::needs_end, // ins + element_span_t::immediate, // isindex + element_span_t::needs_end, // kbd + element_span_t::needs_end, // label + element_span_t::needs_end, // legend + element_span_t::end_optional, // li + element_span_t::immediate, // link + element_span_t::needs_end, // listing + element_span_t::needs_end, // map + element_span_t::needs_end, // marquee + element_span_t::needs_end, // menu + element_span_t::immediate, // meta + element_span_t::immediate, // nextid + element_span_t::needs_end, // nobr + element_span_t::needs_end, // noembed + element_span_t::needs_end, // noframes + element_span_t::needs_end, // noscript + element_span_t::needs_end, // object + element_span_t::needs_end, // ol + element_span_t::needs_end, // optgroup + element_span_t::end_optional, // option + element_span_t::end_optional, // p + element_span_t::immediate, // param + element_span_t::end_optional, // plaintext + element_span_t::needs_end, // pre + element_span_t::needs_end, // q + element_span_t::immediate, // rt + element_span_t::needs_end, // ruby + element_span_t::needs_end, // s + element_span_t::needs_end, // samp + element_span_t::needs_end, // script + element_span_t::needs_end, // select + element_span_t::needs_end, // small + element_span_t::needs_end, // span + element_span_t::needs_end, // strike + element_span_t::needs_end, // strong + element_span_t::needs_end, // style + element_span_t::needs_end, // sub + element_span_t::needs_end, // sup + element_span_t::needs_end, // table + element_span_t::end_optional, // tbody + element_span_t::end_optional, // td + element_span_t::needs_end, // textarea + element_span_t::end_optional, // tfoot + element_span_t::end_optional, // th + element_span_t::end_optional, // thead + element_span_t::needs_end, // title + element_span_t::end_optional, // tr + element_span_t::needs_end, // tt + element_span_t::needs_end, // u + element_span_t::needs_end, // ul + element_span_t::needs_end, // var + element_span_t::immediate, // wbr + element_span_t::needs_end, // xmp + }; + return element_t::a <= code && code <= element_t::xmp ? + lookup[static_cast(code) - static_cast(element_t::a)] : + element_span_t::needs_end; + } + + /// + /// Does element represent font styling? + /// + /// \param[in] code Element code + /// + static inline bool is_fontstyle(_In_ element_t code) + { + switch (code) { + case element_t::tt: + case element_t::i: + case element_t::b: + case element_t::u: + case element_t::s: + case element_t::strike: + case element_t::blink: + case element_t::big: + case element_t::small: + return true; + }; + return false; + } + + /// + /// Does element represent a phrase-of-speech? + /// + /// \param[in] code Element code + /// + static inline bool is_phrase(_In_ element_t code) + { + switch (code) { + case element_t::em: + case element_t::strong: + case element_t::dfn: + case element_t::code: + case element_t::samp: + case element_t::kbd: + case element_t::var: + case element_t::cite: + case element_t::abbr: + case element_t::acronym: + case element_t::xmp: + return true; + }; + return false; + } + + /// + /// Does element represent non-textual item in the document? + /// + /// \param[in] code Element code + /// + static inline bool is_special(_In_ element_t code) + { + switch (code) { + case element_t::a: + case element_t::img: + case element_t::applet: + case element_t::object: + case element_t::embed: + case element_t::font: + case element_t::basefont: + case element_t::br: + case element_t::wbr: + case element_t::rt: + case element_t::script: + case element_t::map: + case element_t::q: + case element_t::sub: + case element_t::sup: + case element_t::ruby: + case element_t::span: + case element_t::bdo: + case element_t::iframe: + case element_t::nobr: + return true; + }; + return false; + } + + /// + /// Does element represent a form control? + /// + /// \param[in] code Element code + /// + static inline bool is_formctrl(_In_ element_t code) + { + switch (code) { + case element_t::input: + case element_t::select: + case element_t::textarea: + case element_t::label: + case element_t::button: + return true; + }; + return false; + } + + /// + /// Is element typically displayed inline with text? + /// + /// \param[in] code Element code + /// + static inline bool is_inline(_In_ element_t code) + { + return + code == element_t::PCDATA || + is_fontstyle(code) || + is_phrase(code) || + is_special(code) || + is_formctrl(code); + } + + /// + /// Does element represent a heading? + /// + /// \param[in] code Element code + /// + static inline bool is_heading(_In_ element_t code) + { + switch (code) { + case element_t::h1: + case element_t::h2: + case element_t::h3: + case element_t::h4: + case element_t::h5: + case element_t::h6: + return true; + }; + return false; + } + + /// + /// Does element represent a list of items? + /// + /// \param[in] code Element code + /// + static inline bool is_list(_In_ element_t code) + { + switch (code) { + case element_t::ul: + case element_t::ol: + case element_t::dir: + case element_t::menu: + return true; + }; + return false; + } + + /// + /// Does element represent preformatted text, source code etc.? + /// + /// \param[in] code Element code + /// + static inline bool is_preformatted(_In_ element_t code) + { + switch (code) { + case element_t::pre: + case element_t::listing: + return true; + } + return false; + } + + /// + /// Is element typically displayed as a stand-alone section of text? + /// + /// \param[in] code Element code + /// + static inline bool is_block(_In_ element_t code) + { + if (is_heading(code) || + is_list(code) || + is_preformatted(code)) return true; + switch (code) { + case element_t::p: + case element_t::dl: + case element_t::div: + case element_t::center: + case element_t::marquee: + case element_t::noscript: + case element_t::noframes: + case element_t::noembed: + case element_t::blockquote: + case element_t::form: + case element_t::isindex: + case element_t::hr: + case element_t::table: + case element_t::fieldset: + case element_t::address: + return true; + }; + return false; + } + + /// + /// Does element typically represent text? + /// + /// \param[in] code Element code + /// + static inline bool is_flow(_In_ element_t code) + { + return is_block(code) || is_inline(code); + } + + /// + /// Is element part of the document head? + /// + /// \param[in] code Element code + /// + static inline bool is_head_content(_In_ element_t code) + { + switch (code) { + case element_t::title: + case element_t::isindex: + case element_t::base: + case element_t::nextid: + return true; + }; + return false; + } + + /// + /// May element be a part of document head? + /// + /// \param[in] code Element code + /// + static inline bool is_head_misc(_In_ element_t code) + { + switch (code) { + case element_t::script: + case element_t::style: + case element_t::meta: + case element_t::link: + case element_t::object: + return true; + }; + return false; + } + + /// + /// May element be a part of

?
+			///
+			/// \param[in] code  Element code
+			///
+			static inline bool is_pre_exclusion(_In_ element_t code)
+			{
+				switch (code) {
+				case element_t::img:
+				case element_t::object:
+				case element_t::applet:
+				case element_t::embed:
+				case element_t::big:
+				case element_t::small:
+				case element_t::sub:
+				case element_t::sup:
+				case element_t::ruby:
+				case element_t::font:
+				case element_t::basefont:
+				case element_t::nobr:
+					return true;
+				};
+				return false;
+			}
+
+			///
+			/// Does element represent the document body?
+			///
+			/// \param[in] code  Element code
+			///
+			static inline bool is_html_content(_In_ element_t code)
+			{
+				switch (code) {
+				case element_t::head:
+				case element_t::body:
+				case element_t::frameset:
+					return true;
+				};
+				return false;
+			}
+
+			///
+			/// Does element represent a separate part of text?
+			///
+			/// \param[in] code  Element code
+			///
+			static inline bool is_group(_In_ element_t code)
+			{
+				if (is_block(code) ||
+					is_html_content(code) ||
+					is_head_content(code)) return true;
+				switch (code) {
+				case element_t::col:
+				case element_t::colgroup:
+				case element_t::dd:
+				case element_t::dir:
+				case element_t::dt:
+				case element_t::frame:
+				case element_t::iframe:
+				case element_t::legend:
+				case element_t::td:
+				case element_t::th:
+				case element_t::tr:
+					return true;
+				};
+				return false;
+			}
+
+			///
+			/// Checks if one element may nest inside another
+			///
+			/// \param[in] parent  Parent element code
+			/// \param[in] child   Child element code
+			///
+			/// \returns `true` if `child` may nest in `parent`; `false` otherwise
+			///
+			static inline bool may_contain(_In_ element_t parent, _In_ element_t child)
+			{
+				if (child == element_t::unknown || child == element_t::comment)
+					return true;
+				if (is_fontstyle(parent) || is_phrase(parent))
+					return is_inline(child);
+				if (is_heading(parent))
+					return is_inline(child);
+
+				switch (parent) {
+				case element_t::a:             return is_inline(child) && child != element_t::a;
+				case element_t::address:       return is_inline(child) || child == element_t::p;
+				case element_t::applet:        return is_flow(child) || child == element_t::param;
+				case element_t::area:          return false;
+				case element_t::base:          return false;
+				case element_t::basefont:      return false;
+				case element_t::bdo:           return is_inline(child);
+				case element_t::blockquote:    return is_flow(child);
+				case element_t::body:          return is_flow(child) || child == element_t::ins || child == element_t::del;
+				case element_t::br:            return false;
+				case element_t::button:        return is_flow(child) && !is_formctrl(child) && child != element_t::a && child != element_t::form && child != element_t::isindex && child != element_t::fieldset && child != element_t::iframe;
+				case element_t::caption:       return is_inline(child);
+				case element_t::center:        return is_flow(child);
+				case element_t::col:           return false;
+				case element_t::colgroup:      return child == element_t::col;
+				case element_t::comment:       return child == element_t::CDATA;
+				case element_t::dd:            return is_flow(child);
+				case element_t::del:           return is_flow(child);
+				case element_t::dir:           return child == element_t::li;
+				case element_t::div:           return is_flow(child);
+				case element_t::dl:            return child == element_t::dt || child == element_t::dd;
+				case element_t::dt:            return is_inline(child);
+				case element_t::embed:         return is_flow(child) || child == element_t::param;
+				case element_t::fieldset:      return is_flow(child) || child == element_t::legend || child == element_t::PCDATA;
+				case element_t::font:          return is_inline(child);
+				case element_t::form:          return is_flow(child) && child != element_t::form;
+				case element_t::frame:         return false;
+				case element_t::frameset:      return child == element_t::frameset || child == element_t::frame || child == element_t::noframes;
+				case element_t::head:          return is_head_content(child) || is_head_misc(child);
+				case element_t::hr:            return false;
+				case element_t::html:          return is_html_content(child);
+				case element_t::iframe:        return is_flow(child);
+				case element_t::img:           return false;
+				case element_t::input:         return false;
+				case element_t::ins:           return is_flow(child);
+				case element_t::isindex:       return false;
+				case element_t::label:         return is_inline(child) && child != element_t::label;
+				case element_t::legend:        return is_inline(child);
+				case element_t::li:            return is_flow(child);
+				case element_t::link:          return false;
+				case element_t::listing:       return child == element_t::CDATA;
+				case element_t::map:           return is_block(child) || child == element_t::area;
+				case element_t::marquee:       return is_flow(child);
+				case element_t::menu:          return child == element_t::li;
+				case element_t::meta:          return false;
+				case element_t::nobr:          return is_inline(child) || child == element_t::wbr;
+				case element_t::noframes:      return (is_flow(child) || child == element_t::body) && child != element_t::noframes;
+				case element_t::noscript:      return is_flow(child);
+				case element_t::noembed:       return is_flow(child);
+				case element_t::object:        return is_flow(child) || child == element_t::param;
+				case element_t::ol:            return child == element_t::li;
+				case element_t::optgroup:      return child == element_t::option;
+				case element_t::option:        return child == element_t::PCDATA;
+				case element_t::p:             return is_inline(child);
+				case element_t::param:         return false;
+				case element_t::plaintext:     return is_flow(child);
+				case element_t::pre:           return is_inline(child) && !is_pre_exclusion(child);
+				case element_t::q:             return is_inline(child);
+				case element_t::rt:            return false;
+				case element_t::ruby:          return is_inline(child);
+				case element_t::script:        return child == element_t::CDATA;
+				case element_t::select:        return child == element_t::optgroup || child == element_t::option;
+				case element_t::span:          return is_inline(child);
+				case element_t::style:         return child == element_t::CDATA;
+				case element_t::sub:           return is_inline(child);
+				case element_t::sup:           return is_inline(child);
+				case element_t::table:         return child == element_t::caption || child == element_t::col || child == element_t::colgroup || child == element_t::thead || child == element_t::tfoot || child == element_t::tbody;
+				case element_t::tbody:         return child == element_t::tr;
+				case element_t::td:            return is_flow(child);
+				case element_t::textarea:      return child == element_t::PCDATA;
+				case element_t::tfoot:         return child == element_t::tr;
+				case element_t::th:            return is_flow(child);
+				case element_t::thead:         return child == element_t::tr;
+				case element_t::title:         return child == element_t::PCDATA;
+				case element_t::tr:            return child == element_t::td || child == element_t::th;
+				case element_t::ul:            return child == element_t::li;
+				case element_t::wbr:           return false;
+				case element_t::unknown:       return true;
+				}
+				return false;
+			}
+
+			///
+			/// Checks if expected element attribute value is URI
+			///
+			/// \param[in] code       Element code
+			/// \param[in] attr_name  Attribute name
+			/// \param[in] num_chars  Code unit limit in `attr_name`
+			///
+			template 
+			static inline bool is_uri(_In_ element_t code, _In_reads_or_z_opt_(num_chars) const T* attr_name, _In_ size_t num_chars)
+			{
+				_Assume_(attr_name || !num_chars);
+				switch (code) {
+				case element_t::a:          return !stdex::strnicmp(attr_name, num_chars, "href", SIZE_MAX, stdex::std_locale_C);
+				case element_t::applet:     return !stdex::strnicmp(attr_name, num_chars, "code", SIZE_MAX, stdex::std_locale_C) ||
+					!stdex::strnicmp(attr_name, num_chars, "codebase", SIZE_MAX, stdex::std_locale_C) ||
+					!stdex::strnicmp(attr_name, num_chars, "src", SIZE_MAX, stdex::std_locale_C);
+				case element_t::area:       return !stdex::strnicmp(attr_name, num_chars, "href", SIZE_MAX, stdex::std_locale_C);
+				case element_t::base:       return !stdex::strnicmp(attr_name, num_chars, "href", SIZE_MAX, stdex::std_locale_C);
+				case element_t::bgsound:    return !stdex::strnicmp(attr_name, num_chars, "src", SIZE_MAX, stdex::std_locale_C);
+				case element_t::blockquote: return !stdex::strnicmp(attr_name, num_chars, "cite", SIZE_MAX, stdex::std_locale_C);
+				case element_t::body:       return !stdex::strnicmp(attr_name, num_chars, "background", SIZE_MAX, stdex::std_locale_C);
+				case element_t::comment:    return !stdex::strnicmp(attr_name, num_chars, "data", SIZE_MAX, stdex::std_locale_C);
+				case element_t::del:        return !stdex::strnicmp(attr_name, num_chars, "cite", SIZE_MAX, stdex::std_locale_C);
+				case element_t::embed:      return !stdex::strnicmp(attr_name, num_chars, "pluginspage", SIZE_MAX, stdex::std_locale_C) ||
+					!stdex::strnicmp(attr_name, num_chars, "src", SIZE_MAX, stdex::std_locale_C);
+				case element_t::form:       return !stdex::strnicmp(attr_name, num_chars, "action", SIZE_MAX, stdex::std_locale_C);
+				case element_t::frame:      return !stdex::strnicmp(attr_name, num_chars, "longdesc", SIZE_MAX, stdex::std_locale_C) ||
+					!stdex::strnicmp(attr_name, num_chars, "src", SIZE_MAX, stdex::std_locale_C);
+				case element_t::head:       return !stdex::strnicmp(attr_name, num_chars, "profile", SIZE_MAX, stdex::std_locale_C);
+				case element_t::iframe:     return !stdex::strnicmp(attr_name, num_chars, "longdesc", SIZE_MAX, stdex::std_locale_C) ||
+					!stdex::strnicmp(attr_name, num_chars, "src", SIZE_MAX, stdex::std_locale_C);
+				case element_t::img:        return !stdex::strnicmp(attr_name, num_chars, "longdesc", SIZE_MAX, stdex::std_locale_C) ||
+					!stdex::strnicmp(attr_name, num_chars, "lowsrc", SIZE_MAX, stdex::std_locale_C) ||
+					!stdex::strnicmp(attr_name, num_chars, "src", SIZE_MAX, stdex::std_locale_C) ||
+					!stdex::strnicmp(attr_name, num_chars, "usemap", SIZE_MAX, stdex::std_locale_C);
+				case element_t::input:      return !stdex::strnicmp(attr_name, num_chars, "lowsrc", SIZE_MAX, stdex::std_locale_C) ||
+					!stdex::strnicmp(attr_name, num_chars, "src", SIZE_MAX, stdex::std_locale_C) ||
+					!stdex::strnicmp(attr_name, num_chars, "usemap", SIZE_MAX, stdex::std_locale_C);
+				case element_t::ins:        return !stdex::strnicmp(attr_name, num_chars, "cite", SIZE_MAX, stdex::std_locale_C);
+				case element_t::link:       return !stdex::strnicmp(attr_name, num_chars, "href", SIZE_MAX, stdex::std_locale_C);
+				case element_t::object:     return !stdex::strnicmp(attr_name, num_chars, "basehref", SIZE_MAX, stdex::std_locale_C) ||
+					!stdex::strnicmp(attr_name, num_chars, "classid", SIZE_MAX, stdex::std_locale_C) ||
+					!stdex::strnicmp(attr_name, num_chars, "code", SIZE_MAX, stdex::std_locale_C) ||
+					!stdex::strnicmp(attr_name, num_chars, "codebase", SIZE_MAX, stdex::std_locale_C) ||
+					!stdex::strnicmp(attr_name, num_chars, "data", SIZE_MAX, stdex::std_locale_C) ||
+					!stdex::strnicmp(attr_name, num_chars, "usemap", SIZE_MAX, stdex::std_locale_C);
+				case element_t::q:          return !stdex::strnicmp(attr_name, num_chars, "cite", SIZE_MAX, stdex::std_locale_C);
+				case element_t::script:     return !stdex::strnicmp(attr_name, num_chars, "src", SIZE_MAX, stdex::std_locale_C);
+				case element_t::table:      return !stdex::strnicmp(attr_name, num_chars, "background", SIZE_MAX, stdex::std_locale_C);
+				case element_t::td:         return !stdex::strnicmp(attr_name, num_chars, "background", SIZE_MAX, stdex::std_locale_C);
+				case element_t::th:         return !stdex::strnicmp(attr_name, num_chars, "background", SIZE_MAX, stdex::std_locale_C);
+				}
+				return false;
+			}
+
+			///
+			/// Checks if expected element attribute value is localizable
+			///
+			/// \param[in] code       Element code
+			/// \param[in] attr_name  Attribute name
+			/// \param[in] num_chars  Code unit limit in `attr_name`
+			///
+			template 
+			static inline bool is_localizable(element_t code, const T* attr_name, size_t num_chars)
+			{
+				_Assume_(attr_name || !num_chars);
+				if (!stdex::strnicmp(attr_name, num_chars, "title", SIZE_MAX, stdex::std_locale_C))
+					return true;
+				switch (code) {
+				case element_t::applet: return !stdex::strnicmp(attr_name, num_chars, "alt", SIZE_MAX, stdex::std_locale_C);
+				case element_t::area:   return !stdex::strnicmp(attr_name, num_chars, "alt", SIZE_MAX, stdex::std_locale_C);
+				case element_t::img:    return !stdex::strnicmp(attr_name, num_chars, "alt", SIZE_MAX, stdex::std_locale_C);
+				case element_t::input:  return !stdex::strnicmp(attr_name, num_chars, "alt", SIZE_MAX, stdex::std_locale_C);
+				case element_t::object: return !stdex::strnicmp(attr_name, num_chars, "alt", SIZE_MAX, stdex::std_locale_C);
+				case element_t::table:  return !stdex::strnicmp(attr_name, num_chars, "summary", SIZE_MAX, stdex::std_locale_C);
+				case element_t::td:     return !stdex::strnicmp(attr_name, num_chars, "abbr", SIZE_MAX, stdex::std_locale_C);
+				case element_t::th:     return !stdex::strnicmp(attr_name, num_chars, "abbr", SIZE_MAX, stdex::std_locale_C);
+				}
+				return false;
+			}
+		};
+
+		class sequence;
+		using sequence_store = std::vector>;
+
+		///
+		/// Base class for HTML sequences
+		///
+		class sequence
+		{
+		public:
+			stdex::parser::html_sequence_t type; ///< Sequence type. Enum is used for performance reasons (vs. `dynamic_cast`)
+			stdex::interval interval;    ///< Sequence position in source
+			sequence* parent;                    ///< Parent sequence
+
+			sequence(_In_ stdex::parser::html_sequence_t _type = stdex::parser::html_sequence_t::unknown, _In_ size_t start = 0, size_t end = 0, _In_opt_ sequence* _parent = nullptr) :
+				type(_type),
+				interval(start, end),
+				parent(_parent)
+			{}
+
+			virtual ~sequence() {} // make polymorphic
+		};
+
+		///
+		/// HTML element `<.../>`
+		///
+		class element : public sequence
+		{
+		public:
+			template 
+			inline element(_Inout_ stdex::parser::basic_html_tag&& tag, _In_z_ const T* src, _In_opt_ sequence* parent = nullptr) :
+				sequence(tag.type, tag.interval.start, tag.interval.end, parent),
+				code(element_code(src + tag.name.start, tag.name.size())),
+				name(std::move(tag.name)),
+				attributes(std::move(tag.attributes))
+			{}
+
+			template 
+			static element_t element_code(_In_reads_z_(num_chars) const T* name, size_t num_chars)
+			{
+				static const struct {
+					const char* name;
+					element_t code;
+				} mapping[] = {
+					{ "a",          element_t::a,          },
+					{ "abbr",       element_t::abbr,       },
+					{ "acronym",    element_t::acronym,    },
+					{ "address",    element_t::address,    },
+					{ "applet",     element_t::applet,     },
+					{ "area",       element_t::area,       },
+					{ "b",          element_t::b,          },
+					{ "base",       element_t::base,       },
+					{ "basefont",   element_t::basefont,   },
+					{ "bdo",        element_t::bdo,        },
+					{ "bgsound",    element_t::bgsound,    },
+					{ "big",        element_t::big,        },
+					{ "blink",      element_t::blink,      },
+					{ "blockquote", element_t::blockquote, },
+					{ "body",       element_t::body,       },
+					{ "br",         element_t::br,         },
+					{ "button",     element_t::button,     },
+					{ "caption",    element_t::caption,    },
+					{ "center",     element_t::center,     },
+					{ "cite",       element_t::cite,       },
+					{ "code",       element_t::code,       },
+					{ "col",        element_t::col,        },
+					{ "colgroup",   element_t::colgroup,   },
+					{ "comment",    element_t::comment,    },
+					{ "dd",         element_t::dd,         },
+					{ "del",        element_t::del,        },
+					{ "dfn",        element_t::dfn,        },
+					{ "dir",        element_t::dir,        },
+					{ "div",        element_t::div,        },
+					{ "dl",         element_t::dl,         },
+					{ "dt",         element_t::dt,         },
+					{ "em",         element_t::em,         },
+					{ "embed",      element_t::embed,      },
+					{ "fieldset",   element_t::fieldset,   },
+					{ "font",       element_t::font,       },
+					{ "form",       element_t::form,       },
+					{ "frame",      element_t::frame,      },
+					{ "frameset",   element_t::frameset,   },
+					{ "h1",         element_t::h1,         },
+					{ "h2",         element_t::h2,         },
+					{ "h3",         element_t::h3,         },
+					{ "h4",         element_t::h4,         },
+					{ "h5",         element_t::h5,         },
+					{ "h6",         element_t::h6,         },
+					{ "head",       element_t::head,       },
+					{ "hr",         element_t::hr,         },
+					{ "html",       element_t::html,       },
+					{ "i",          element_t::i,          },
+					{ "iframe",     element_t::iframe,     },
+					{ "img",        element_t::img,        },
+					{ "input",      element_t::input,      },
+					{ "ins",        element_t::ins,        },
+					{ "isindex",    element_t::isindex,    },
+					{ "kbd",        element_t::kbd,        },
+					{ "label",      element_t::label,      },
+					{ "legend",     element_t::legend,     },
+					{ "li",         element_t::li,         },
+					{ "link",       element_t::link,       },
+					{ "listing",    element_t::listing,    },
+					{ "map",        element_t::map,        },
+					{ "marquee",    element_t::marquee,    },
+					{ "menu",       element_t::menu,       },
+					{ "meta",       element_t::meta,       },
+					{ "nextid",     element_t::nextid,     },
+					{ "nobr",       element_t::nobr,       },
+					{ "noembed",    element_t::noembed,    },
+					{ "noframes",   element_t::noframes,   },
+					{ "noscript",   element_t::noscript,   },
+					{ "object",     element_t::object,     },
+					{ "ol",         element_t::ol,         },
+					{ "optgroup",   element_t::optgroup,   },
+					{ "option",     element_t::option,     },
+					{ "p",          element_t::p,          },
+					{ "param",      element_t::param,      },
+					{ "plaintext",  element_t::plaintext,  },
+					{ "pre",        element_t::pre,        },
+					{ "q",          element_t::q,          },
+					{ "rt",         element_t::rt,         },
+					{ "ruby",       element_t::ruby,       },
+					{ "s",          element_t::s,          },
+					{ "samp",       element_t::samp,       },
+					{ "script",     element_t::script,     },
+					{ "select",     element_t::select,     },
+					{ "small",      element_t::small,      },
+					{ "span",       element_t::span,       },
+					{ "strike",     element_t::strike,     },
+					{ "strong",     element_t::strong,     },
+					{ "style",      element_t::style,      },
+					{ "sub",        element_t::sub,        },
+					{ "sup",        element_t::sup,        },
+					{ "table",      element_t::table,      },
+					{ "tbody",      element_t::tbody,      },
+					{ "td",         element_t::td,         },
+					{ "textarea",   element_t::textarea,   },
+					{ "tfoot",      element_t::tfoot,      },
+					{ "th",         element_t::th,         },
+					{ "thead",      element_t::thead,      },
+					{ "title",      element_t::title,      },
+					{ "tr",         element_t::tr,         },
+					{ "tt",         element_t::tt,         },
+					{ "u",          element_t::u,          },
+					{ "ul",         element_t::ul,         },
+					{ "var",        element_t::var,        },
+					{ "wbr",        element_t::wbr,        },
+					{ "xmp",        element_t::xmp,        },
+				};
+#ifdef _DEBUG
+				// The mapping table MUST be sorted and all names in lowercase.
+				for (size_t i = 1; i < _countof(mapping); i++)
+					_Assume_(stdex::strcmp(mapping[i - 1].name, mapping[i].name) <= 0);
+				const auto& ctype = std::use_facet>(stdex::std_locale_C);
+				for (size_t i = 0; i < _countof(mapping); i++) {
+					for (size_t j = 0; mapping[i].name[j]; j++)
+						_Assume_(ctype.is(ctype.lower | ctype.digit, mapping[i].name[j]));
+				}
+#endif
+				const auto& ctypeT = std::use_facet>(stdex::std_locale_C);
+				for (size_t i = 0, j = _countof(mapping); i < j; ) {
+					size_t m = (i + j) / 2;
+					int r = 0;
+					for (size_t i1 = 0, i2 = 0;;) {
+						if (!mapping[m].name[i1]) {
+							r = i2 >= num_chars || !name[i2] ? 0 : -1;
+							break;
+						}
+						if (i2 >= num_chars || !name[i2]) {
+							r = 1;
+							break;
+						}
+
+						auto chr = static_cast(ctypeT.tolower(name[i2++]));
+						if (mapping[m].name[i1] > chr) {
+							r = 1;
+							break;
+						}
+						if (mapping[m].name[i1] < chr) {
+							r = -1;
+							break;
+						}
+						i1++;
+					}
+
+					if (r < 0)
+						i = m + 1;
+					else if (r > 0)
+						j = m;
+					else
+						return mapping[m].code;
+				}
+				return element_t::unknown;
+			}
+
+		public:
+			element_t code;                                        ///< Element code
+			stdex::interval name;                          ///< Element name position in source
+			std::vector attributes; ///< Element attribute positions in source
+		};
+
+		class element_end;
+
+		///
+		/// Starting tag of an HTML element `<...>`
+		///
+		class element_start : public element
+		{
+		public:
+			template 
+			inline element_start(_Inout_ stdex::parser::basic_html_tag&& tag, _In_z_ const T* src, _In_opt_ sequence* parent = nullptr, _In_opt_ sequence* _end = nullptr) :
+				element(std::move(tag), src, parent),
+				end(_end)
+			{}
+
+		public:
+			sequence* end; ///< Corresponding ending tag of type `element_end`; When element is ended by a start of another element, this points to the another element start.
+		};
+
+		///
+		/// Ending tag of an HTML element ``
+		///
+		class element_end : public sequence
+		{
+		public:
+			template 
+			inline element_end(_Inout_ stdex::parser::basic_html_tag&& tag, _In_z_ const T* src, _In_opt_ sequence* parent = nullptr, _In_opt_ element_start* _start = nullptr) :
+				sequence(tag.type, tag.interval.start, tag.interval.end, parent),
+				code(element::element_code(src + tag.name.start, tag.name.size())),
+				name(std::move(tag.name)),
+				start(_start)
+			{}
+
+		public:
+			element_t code;                    ///< Element code
+			stdex::interval name;      ///< Element name position in source
+			element_start* start;              ///< Corresponding starting tag
+		};
+
+		///
+		/// HTML declaration
+		///
+		class declaration : public sequence
+		{
+		public:
+			template 
+			inline declaration(_Inout_ stdex::parser::basic_html_tag&& tag, _In_opt_ sequence* parent = nullptr) :
+				sequence(tag.type, tag.interval.start, tag.interval.end, parent),
+				name(std::move(tag.name)),
+				attributes(std::move(tag.attributes))
+			{}
+
+		public:
+			stdex::interval name;                          ///< Declaration name position in source
+			std::vector attributes; ///< Declaration attribute positions in source
+		};
+
+		///
+		/// HTML comment
+		///
+		class comment : public sequence
+		{
+		public:
+			template 
+			inline comment(_Inout_ stdex::parser::basic_html_tag&& tag, _In_opt_ sequence* parent = nullptr) :
+				sequence(tag.type, tag.interval.start, tag.interval.end, parent),
+				content(std::move(tag.name))
+			{}
+
+		public:
+			stdex::interval content; ///< Comment content position in source
+		};
+
+		///
+		/// HTML instruction
+		///
+		class instruction : public sequence
+		{
+		public:
+			template 
+			inline instruction(_Inout_ stdex::parser::basic_html_tag&& tag, _In_opt_ sequence* parent = nullptr) :
+				sequence(tag.type, tag.interval.start, tag.interval.end, parent),
+				content(std::move(tag.name))
+			{}
+
+		public:
+			stdex::interval content; ///< Instruction content position in source
+		};
+
+		///
+		/// HTML entity
+		///
+		template, class _Alloc = std::allocator<_Elem>>
+		struct entity
+		{
+			stdex::interval name;                    ///< Name position in source
+			std::basic_string<_Elem, _Traits, _Alloc> value; ///< Entity value
+		};
+
+		///
+		/// HTML parser
+		///
+		template, class _Alloc = std::allocator<_Elem>>
+		class parser;
+
+		///
+		/// HTML document
+		///
+		template, class _Alloc = std::allocator<_Elem>>
+		class document
+		{
+		public:
+			document() :
+				m_num_parsed(0),
+				m_charset(stdex::charset_id::system),
+
+				// Declaration parsing data
+				m_num_valid_conditions(0),
+				m_num_invalid_conditions(0),
+				m_is_cdata(false),
+				m_is_rcdata(false),
+
+				// Element parsing data
+				m_is_special_element(false)
+			{}
+
+			///
+			/// Empties document
+			///
+			void clear()
+			{
+				m_source.clear();
+				m_num_parsed = 0;
+				m_charset = stdex::charset_id::system;
+
+				// Declaration parsing data
+				m_num_valid_conditions = m_num_invalid_conditions = 0;
+				m_is_cdata = m_is_rcdata = false;
+				m_entities.clear();
+
+				// Element parsing data
+				m_sequences.clear();
+
+				m_element_stack.clear();
+				m_is_special_element = false;
+			}
+
+			///
+			/// Parses HTML source code by chunks
+			///
+			void append(_In_reads_or_z_opt_(num_chars) const _Elem* source, _In_ size_t num_chars = SIZE_MAX)
+			{
+				_Assume_(source || !num_chars);
+				m_source.append(source, stdex::strnlen(source, num_chars));
+				source = m_source.data();
+				num_chars = m_source.size();
+
+				for (size_t i = m_num_parsed; i < num_chars;) {
+					if (m_is_cdata || m_is_rcdata) {
+						if (m_condition_end.match(source, i, num_chars)) {
+							m_sequences.push_back(std::move(std::unique_ptr(new sequence(
+								m_is_cdata ? stdex::parser::html_sequence_t::CDATA : stdex::parser::html_sequence_t::PCDATA,
+								m_num_parsed, i,
+								active_element()))));
+							m_is_cdata = m_is_rcdata = false;
+							i = m_num_parsed = m_condition_end.interval.end;
+							continue;
+						}
+						goto next_char;
+					}
+
+					if (m_num_invalid_conditions) {
+						if (m_condition_end.match(source, i, num_chars)) {
+							m_num_invalid_conditions--;
+							i = m_num_parsed = m_condition_end.interval.end;
+							continue;
+						}
+						goto next_char;
+					}
+
+					if (m_num_valid_conditions && m_condition_end.match(source, i, num_chars)) {
+						if (m_num_parsed < i)
+							m_sequences.push_back(std::move(std::unique_ptr(new sequence(stdex::parser::html_sequence_t::text, m_num_parsed, i, active_element()))));
+
+						m_num_valid_conditions--;
+						i = m_num_parsed = m_condition_end.interval.end;
+						continue;
+					}
+
+					if (m_condition_start.match(source, i, num_chars)) {
+						auto condition_src(replace_entities(source + m_condition_start.condition.start, m_condition_start.condition.size()));
+						if (!stdex::strcmp(condition_src.c_str(), "CDATA"))
+							m_is_cdata = true;
+						else if (!stdex::strcmp(condition_src.c_str(), "RCDATA"))
+							m_is_rcdata = true;
+						if (m_num_invalid_conditions)
+							m_num_invalid_conditions++;
+						else if (!stdex::strcmp(condition_src.c_str(), "IGNORE"))
+							m_num_invalid_conditions++;
+						else
+							m_num_valid_conditions++;
+
+						i = m_num_parsed = m_condition_start.interval.end;
+						continue;
+					}
+
+					if (m_is_special_element) {
+						auto parent = active_element();
+						_Assume_(parent);
+						if (m_tag.match(source, i, num_chars) &&
+							m_tag.type == stdex::parser::html_sequence_t::element_end &&
+							element::element_code(source + m_tag.name.start, m_tag.name.size()) == parent->code)
+						{
+							if (m_num_parsed < i)
+								m_sequences.push_back(std::move(std::unique_ptr(new sequence(stdex::parser::html_sequence_t::text, m_num_parsed, i, parent))));
+							i = m_num_parsed = m_tag.interval.end;
+							std::unique_ptr e(new element_end(std::move(m_tag), source, parent->parent, parent));
+							parent->end = e.get();
+							m_sequences.push_back(std::move(e));
+							m_element_stack.pop_back();
+							m_is_special_element = false;
+							continue;
+						}
+						goto next_char;
+					}
+
+					if (m_tag.match(source, i, num_chars)) {
+						if (m_num_parsed < i)
+							m_sequences.push_back(std::move(std::unique_ptr(new sequence(stdex::parser::html_sequence_t::text, m_num_parsed, i, active_element()))));
+						i = m_num_parsed = m_tag.interval.end;
+
+						switch (m_tag.type) {
+						case stdex::parser::html_sequence_t::element:
+						case stdex::parser::html_sequence_t::element_start: {
+							std::unique_ptr e(
+								m_tag.type == stdex::parser::html_sequence_t::element ? new element(std::move(m_tag), source) :
+								m_tag.type == stdex::parser::html_sequence_t::element_start ? new element_start(std::move(m_tag), source) :
+								nullptr);
+
+							// Does this tag end any of the started elements?
+							for (size_t j = m_element_stack.size(); j--; ) {
+								auto starting_tag = m_element_stack[j];
+								_Assume_(starting_tag && starting_tag->type == stdex::parser::html_sequence_t::element_start);
+								if (element_traits::may_contain(starting_tag->code, e->code)) {
+									e->parent = starting_tag;
+									break;
+								}
+								e->parent = starting_tag->parent;
+								starting_tag->end = e.get();
+								m_element_stack.resize(j);
+							}
+
+							if (e->type == stdex::parser::html_sequence_t::element_start) {
+								auto e_start = static_cast(e.get());
+								if (element_traits::span(e->code) == element_span_t::immediate)
+									e_start->end = e.get();
+								else {
+									m_element_stack.push_back(e_start);
+									switch (e->code) {
+									case element_t::code:
+									case element_t::comment:
+									case element_t::script:
+									case element_t::style:
+										m_is_special_element = true;
+										break;
+									}
+								}
+							}
+
+							if (e->code == element_t::meta && m_charset == stdex::charset_id::system) {
+								bool is_content_type = false;
+								stdex::parser::html_attribute* content_attr = nullptr;
+								for (auto& attr : e->attributes) {
+									if (!stdex::strnicmp(source + attr.name.start, attr.name.size(), "http-equiv", SIZE_MAX, stdex::std_locale_C) &&
+										!stdex::strnicmp(source + attr.value.start, attr.value.size(), "content-type", SIZE_MAX, stdex::std_locale_C))
+										is_content_type = true;
+									else if (!stdex::strnicmp(source + attr.name.start, attr.name.size(), "content", SIZE_MAX, stdex::std_locale_C))
+										content_attr = &attr;
+								}
+								if (is_content_type && content_attr) {
+									//  found.
+									stdex::parser::basic_mime_type<_Elem> content;
+									if (content.match(source, content_attr->value.start, content_attr->value.end) &&
+										content.charset)
+									{
+										std::string str;
+										str.reserve(content.charset.size());
+										for (size_t j = content.charset.start; j < content.charset.end; ++j)
+											str.push_back(static_cast(source[j]));
+										m_charset = stdex::charset_from_name(str.c_str());
+									}
+								}
+							}
+
+							m_sequences.push_back(std::move(e));
+							break;
+						}
+						case stdex::parser::html_sequence_t::element_end: {
+							std::unique_ptr e(new element_end(std::move(m_tag), source, active_element()));
+
+							for (size_t j = m_element_stack.size(); j--; ) {
+								auto starting_tag = m_element_stack[j];
+								_Assume_(starting_tag && starting_tag->type == stdex::parser::html_sequence_t::element_start);
+								if (starting_tag->code == e->code ||
+									starting_tag->code == element_t::unknown && e->code == element_t::unknown && !stdex::strnicmp(source + starting_tag->name.start, starting_tag->name.size(), source + e->name.start, e->name.size(), stdex::std_locale_C))
+								{
+									e->start = starting_tag;
+									e->parent = starting_tag->parent;
+									starting_tag->end = e.get();
+									m_element_stack.resize(j);
+									break;
+								}
+							}
+
+							m_sequences.push_back(std::move(e));
+							break;
+						}
+						case stdex::parser::html_sequence_t::declaration:
+							if (m_tag.attributes.size() > 3 &&
+								!stdex::strnicmp(source + m_tag.attributes[0].name.start, m_tag.attributes[0].name.size(), "entity", SIZE_MAX, stdex::std_locale_C))
+							{
+								if (!stdex::strncmp(source + m_tag.attributes[1].name.start, m_tag.attributes[1].name.size(), "%", SIZE_MAX) &&
+									stdex::strncmp(source + m_tag.attributes[3].name.start, m_tag.attributes[3].name.size(), "SYSTEM", SIZE_MAX) &&
+									stdex::strncmp(source + m_tag.attributes[3].name.start, m_tag.attributes[3].name.size(), "PUBLIC", SIZE_MAX))
+								{
+									std::unique_ptr> e(new entity<_Elem, _Traits, _Alloc>());
+									e->name = m_tag.attributes[2].name;
+									e->value = std::move(replace_entities(source + m_tag.attributes[3].name.start, m_tag.attributes[3].name.size()));
+									m_entities.push_back(std::move(e));
+								}
+
+								// TODO: Parse & entities and entities in SYSTEM and PUBLIC external files.
+							}
+							m_sequences.push_back(std::move(std::unique_ptr(new declaration(std::move(m_tag), active_element()))));
+							break;
+						case stdex::parser::html_sequence_t::comment:
+							m_sequences.push_back(std::move(std::unique_ptr(new comment(std::move(m_tag), active_element()))));
+							break;
+						case stdex::parser::html_sequence_t::instruction:
+							m_sequences.push_back(std::move(std::unique_ptr(new instruction(std::move(m_tag), active_element()))));
+							break;
+						default:
+							throw std::invalid_argument("unknown tag type");
+						}
+
+						continue;
+					}
+
+				next_char:
+					if (m_any_char.match(source, i, num_chars)) {
+						// Skip any character, but don't declare it as parsed yet. It might be a part of unfinished tag.
+						i = m_any_char.interval.end;
+					}
+					else
+						break;
+				}
+			}
+
+			///
+			/// Finalizes document when no more appending is planned
+			///
+			void finalize()
+			{
+				size_t i = m_source.size();
+				if (m_num_parsed < i)
+					m_sequences.push_back(std::move(std::unique_ptr(new sequence(stdex::parser::html_sequence_t::text, m_num_parsed, i, active_element()))));
+				m_num_parsed = i;
+				m_element_stack.clear();
+			}
+
+			///
+			/// Parses HTML document source code
+			///
+			inline void assign(_In_reads_or_z_opt_(num_chars) const _Elem* source, _In_ size_t num_chars = SIZE_MAX)
+			{
+				clear();
+				append(source, num_chars);
+				finalize();
+			}
+
+			///
+			/// Returns document HTML source code
+			///
+			inline const std::basic_string<_Elem, _Traits, _Alloc>& source() const { return m_source; }
+
+			friend class parser<_Elem, _Traits, _Alloc>;
+
+		protected:
+			///
+			/// Returns starting tag of currently active element or nullptr if no element is known to be started.
+			///
+			inline element_start* active_element() const
+			{
+				return m_element_stack.empty() ? nullptr : m_element_stack.back();
+			}
+
+			///
+			/// Replaces entities with their content
+			///
+			std::basic_string<_Elem, _Traits, _Alloc> replace_entities(_In_reads_or_z_opt_(num_chars) const _Elem* input, _In_ size_t num_chars) const
+			{
+				_Assume_(input || !num_chars);
+				const size_t num_entities = m_entities.size();
+				const _Elem* source = m_source.data();
+				std::basic_string<_Elem, _Traits, _Alloc> output;
+				for (size_t i = 0; i < num_chars && input[i];) {
+					if (input[i] == '%') {
+						for (size_t j = 0; j < num_entities; j++) {
+							auto& e = m_entities[j];
+							size_t entity_size = e->name.size();
+							if (i + entity_size + 1 < num_chars &&
+								!stdex::strncmp(input + i + 1, source + e->name.start, entity_size) &&
+								input[i + entity_size + 1] == ';')
+							{
+								output += e->value;
+								i += entity_size + 2;
+								goto next_char;
+							}
+						}
+						throw std::runtime_error("undefined entity");
+					}
+					output += input[i++];
+				next_char:;
+				}
+				return output;
+			}
+
+		protected:
+			std::basic_string<_Elem, _Traits, _Alloc> m_source; ///< Document HTML source code
+			size_t m_num_parsed;                                ///< Number of characters already parsed
+			stdex::charset_id m_charset;                        ///< Document charset
+
+			// Declaration parsing data
+			size_t m_num_valid_conditions;   ///< Number of started valid conditions
+			size_t m_num_invalid_conditions; ///< Number of started invalid conditions
+			bool m_is_cdata;                 ///< Inside of CDATA?
+			bool m_is_rcdata;                ///< Inside of RCDATA?
+			stdex::parser::basic_html_declaration_condition_start<_Elem> m_condition_start;
+			stdex::parser::basic_html_declaration_condition_end<_Elem> m_condition_end;
+			stdex::parser::basic_any_cu<_Elem> m_any_char;
+			std::vector>> m_entities; ///< Array of entities
+
+			// Element parsing data
+			stdex::parser::basic_html_tag<_Elem> m_tag;
+			sequence_store m_sequences;                         ///< Store of sequences
+			std::vector m_element_stack;        ///< LIFO stack of started elements
+			bool m_is_special_element;                          ///< Inside of a special element (