diff --git a/include/stdex/html.hpp b/include/stdex/html.hpp index 0ed5db605..38fde6277 100644 --- a/include/stdex/html.hpp +++ b/include/stdex/html.hpp @@ -50,8 +50,9 @@ namespace stdex _Inout_ std::basic_string& dst, _In_reads_or_z_opt_(num_chars) const char* src, _In_ size_t num_chars) { - stdex_assert(src || !num_chars); - for (size_t i = 0; i < num_chars && src[i]; ++i) { + num_chars = stdex::strnlen(src, num_chars); + dst.reserve(dst.size() + num_chars + (num_chars >> 2)); + for (size_t i = 0; i < num_chars; ++i) { switch (src[i]) { case '&': dst += "&"; break; case ';': dst += ";"; break; @@ -77,8 +78,9 @@ namespace stdex _Inout_ std::basic_string& dst, _In_reads_or_z_opt_(num_chars) const wchar_t* src, _In_ size_t num_chars) { - stdex_assert(src || !num_chars); - for (size_t i = 0; i < num_chars && src[i]; ++i) { + num_chars = stdex::strnlen(src, num_chars); + dst.reserve(dst.size() + num_chars + (num_chars >> 2)); + for (size_t i = 0; i < num_chars; ++i) { switch (src[i]) { case L'&': dst += L"&"; break; case L';': dst += L";"; break; @@ -92,6 +94,34 @@ namespace stdex } } + /// + /// Appends HTML escaped string + /// + /// \param[in,out] dst String to append to + /// \param[in] src Source string + /// \param[in] num_chars Code unit limit in string `src` + /// + template, class AX = std::allocator> + void escape( + _Inout_ std::basic_string& dst, + _In_reads_or_z_opt_(num_chars) const char16_t* src, _In_ size_t num_chars) + { + num_chars = stdex::strnlen(src, num_chars); + dst.reserve(dst.size() + num_chars + (num_chars >> 2)); + for (size_t i = 0; i < num_chars; ++i) { + switch (src[i]) { + case L'&': dst += u"&"; break; + case L';': dst += u";"; break; + case L'\"': dst += u"""; break; + case L'\'': dst += u"'"; break; + case L'<': dst += u"<"; break; + case L'>': dst += u">"; break; + case L'\u00a0': dst += u" "; break; // No-break space must be escaped as SGML entity, otherwise browsers treat it as a normal space. + default: dst += src[i]; break; + } + } + } + /// /// Appends HTML escaped string /// @@ -156,6 +186,24 @@ namespace stdex } } + /// + /// Appends HTML escaped character + /// + /// \param[in,out] dst String to append to + /// \param[in] chr Source character + /// + template, class AX = std::allocator> + void escape_min(_Inout_ std::basic_string& dst, _In_ char16_t chr) + { + switch (chr) { + case L'&': dst += u"&"; break; + case L'<': dst += u"<"; break; + case L'>': dst += u">"; break; + case L'\u00a0': dst += u" "; break; // No-break space must be escaped as SGML entity, otherwise browsers treat it as a normal space. + default: dst += chr; break; + } + } + /// /// Appends HTML escaped string /// @@ -168,8 +216,9 @@ namespace stdex _Inout_ std::basic_string& dst, _In_reads_or_z_opt_(num_chars) const char* src, _In_ size_t num_chars) { - stdex_assert(src || !num_chars); - for (size_t i = 0; i < num_chars && src[i]; ++i) { + num_chars = stdex::strnlen(src, num_chars); + dst.reserve(dst.size() + num_chars + (num_chars >> 2)); + for (size_t i = 0; i < num_chars; ++i) { switch (src[i]) { case '&': dst += "&"; break; case '<': dst += "<"; break; @@ -192,8 +241,9 @@ namespace stdex _Inout_ std::basic_string& dst, _In_reads_or_z_opt_(num_chars) const wchar_t* src, _In_ size_t num_chars) { - stdex_assert(src || !num_chars); - for (size_t i = 0; i < num_chars && src[i]; ++i) { + num_chars = stdex::strnlen(src, num_chars); + dst.reserve(dst.size() + num_chars + (num_chars >> 2)); + for (size_t i = 0; i < num_chars; ++i) { switch (src[i]) { case L'&': dst += L"&"; break; case L'<': dst += L"<"; break; @@ -204,6 +254,31 @@ namespace stdex } } + /// + /// Appends HTML escaped string + /// + /// \param[in,out] dst String to append to + /// \param[in] src Source string + /// \param[in] num_chars Code unit limit in string `src` + /// + template, class AX = std::allocator> + void escape_min( + _Inout_ std::basic_string& dst, + _In_reads_or_z_opt_(num_chars) const char16_t* src, _In_ size_t num_chars) + { + num_chars = stdex::strnlen(src, num_chars); + dst.reserve(dst.size() + num_chars + (num_chars >> 2)); + for (size_t i = 0; i < num_chars; ++i) { + switch (src[i]) { + case L'&': dst += u"&"; break; + case L'<': dst += u"<"; break; + case L'>': dst += u">"; break; + case L'\u00a0': dst += u" "; break; // No-break space must be escaped as SGML entity, otherwise browsers treat it as a normal space. + default: dst += src[i]; break; + } + } + } + /// /// Appends HTML escaped string /// @@ -427,7 +502,7 @@ namespace stdex case 'D': case 'd': case 'E': case 'e': case 'F': case 'f': { - wchar_t chr = 0; + char32_t chr = 0; size_t end = std::min(num_chars, i + 6); for (; i < end; ++i) { @@ -487,46 +562,21 @@ namespace stdex /// \param[in] src Source string /// \param[in] num_chars Code unit limit in string `src` /// - template, class AX = std::allocator> + template, class AX = std::allocator> void css_escape( - _Inout_ std::basic_string& dst, - _In_reads_or_z_opt_(num_chars) const char* src, _In_ size_t num_chars) + _Inout_ std::basic_string& dst, + _In_reads_or_z_opt_(num_chars) const T* src, _In_ size_t num_chars) { - stdex_assert(src || !num_chars); - for (size_t i = 0; i < num_chars && src[i]; ++i) { + num_chars = stdex::strnlen(src, num_chars); + dst.reserve(dst.size() + num_chars + (num_chars >> 3)); + for (size_t i = 0; i < num_chars; ++i) { switch (src[i]) { - case '\\': dst += "\\\\"; break; - case '\n': dst += "\\n"; break; - case '\r': dst += "\\r"; break; - case '\t': dst += "\\t"; break; - case '\"': dst += "\\\""; break; - case '\'': dst += "\\'"; break; - default: dst += src[i]; break; - } - } - } - - /// - /// Appends escaped CSS string - /// - /// \param[in,out] dst String to append to - /// \param[in] src Source string - /// \param[in] num_chars Code unit limit in string `src` - /// - template, class AX = std::allocator> - void css_escape( - _Inout_ std::basic_string& dst, - _In_reads_or_z_opt_(num_chars) const wchar_t* src, _In_ size_t num_chars) - { - stdex_assert(src || !num_chars); - for (size_t i = 0; i < num_chars && src[i]; ++i) { - switch (src[i]) { - case L'\\': dst += L"\\\\"; break; - case L'\n': dst += L"\\n"; break; - case L'\r': dst += L"\\r"; break; - case L'\t': dst += L"\\t"; break; - case L'\"': dst += L"\\\""; break; - case L'\'': dst += L"\\'"; break; + case '\\': dst += '\\'; dst+= '\\'; break; + case '\n': dst += '\\'; dst+= 'n'; break; + case '\r': dst += '\\'; dst+= 'r'; break; + case '\t': dst += '\\'; dst+= 't'; break; + case '\"': dst += '\\'; dst+= '"'; break; + case '\'': dst += '\\'; dst+= '\''; break; default: dst += src[i]; break; } } @@ -2015,27 +2065,47 @@ namespace stdex /// /// \returns Number of code units appended /// - template, class AX = std::allocator> - size_t append_tag(_Inout_ std::basic_string& str) const + template, class AX = std::allocator> + size_t append_tag(_Inout_ std::basic_string& str) const { size_t n = str.size(); - // Use %X instead of %p to omit leading zeros and save space. - stdex::appendf(str, "%c%zX%c", stdex::locale_C, token_tag_start, reinterpret_cast(this), token_tag_end); - return str.size() - n; - } + str.reserve(str.size() + 2 + sizeof(this)*2); + str += token_tag_start; - /// - /// Appends token tag to the source code - /// - /// \param[in,out] str Source code - /// - /// \returns Number of code units appended - /// - template, class AX = std::allocator> - size_t append_tag(_Inout_ std::basic_string& str) const - { - // Use %X instead of %p to omit leading zeros and save space. - return stdex::appendf(str, L"%c%zX%c", stdex::locale_C, static_cast(token_tag_start), reinterpret_cast(this), static_cast(token_tag_end)); + // Sure snprintf looks cleaner, but we don't have it for char16_t. + static const char digits[] = + "000102030405060708090A0B0C0D0E0F" + "101112131415161718191A1B1C1D1E1F" + "202122232425262728292A2B2C2D2E2F" + "303132333435363738393A3B3C3D3E3F" + "404142434445464748494A4B4C4D4E4F" + "505152535455565758595A5B5C5D5E5F" + "606162636465666768696A6B6C6D6E6F" + "707172737475767778797A7B7C7D7E7F" + "808182838485868788898A8B8C8D8E8F" + "909192939495969798999A9B9C9D9E9F" + "A0A1A2A3A4A5A6A7A8A9AAABACADAEAF" + "B0B1B2B3B4B5B6B7B8B9BABBBCBDBEBF" + "C0C1C2C3C4C5C6C7C8C9CACBCCCDCECF" + "D0D1D2D3D4D5D6D7D8D9DADBDCDDDEDF" + "E0E1E2E3E4E5E6E7E8E9EAEBECEDEEEF" + "F0F1F2F3F4F5F6F7F8F9FAFBFCFDFEFF"; + char buf[16]; // Enough for 128-bit numbers + auto x = reinterpret_cast(this); + for (size_t i = _countof(buf); i;) { + size_t pos = (x & 0xFF) << 1; + buf[--i] = digits[pos + 1]; + buf[--i] = digits[pos]; + x >>= 8; + if (!x) { + for (i += pos < 32 ? 1 : 2; i < _countof(buf); ++i) + str += buf[i]; + break; + } + } + + str += token_tag_end; + return str.size() - n; } template