unicode: inject invalid character on encode failure

...rather than throw. This mimics Windows behaviour.

Signed-off-by: Simon Rozman <simon@rozman.si>
This commit is contained in:
Simon Rozman 2025-06-05 12:06:07 +02:00
parent f454ae9cab
commit 2bfe310d47
2 changed files with 37 additions and 22 deletions

View File

@ -63,6 +63,7 @@ namespace UnitTests
}
void unicode::charset_encoder()
{
{
stdex::charset_encoder<char, char> win1250_to_utf8(stdex::charset_id::windows1250, stdex::charset_id::utf8);
@ -86,6 +87,12 @@ namespace UnitTests
win1250_to_utf8.convert(nullptr, 0).c_str());
}
{
stdex::charset_encoder<char16_t, char> encode(stdex::charset_id::utf16, stdex::charset_id::ascii, '?');
Assert::AreEqual("Te?t.", encode.convert(u"Tešt.").c_str());
}
}
void unicode::normalize()
{
#ifdef _WIN32

View File

@ -1,4 +1,4 @@
/*
/*
SPDX-License-Identifier: MIT
Copyright © 2023-2025 Amebis
*/
@ -153,11 +153,13 @@ namespace stdex
{
protected:
charset_id m_from, m_to;
T_to m_invalid;
public:
charset_encoder(_In_ charset_id from, _In_ charset_id to) :
charset_encoder(_In_ charset_id from, _In_ charset_id to, _In_ T_to invalid = '?') :
m_from(from),
m_to(to)
m_to(to),
m_invalid(invalid)
{
#ifdef _WIN32
m_from_wincp = to_encoding(from);
@ -198,7 +200,6 @@ namespace stdex
#ifdef _WIN32
constexpr DWORD dwFlagsWCMB = 0;
constexpr LPCCH lpDefaultChar = NULL;
stdex_assert(src);
if (m_from_wincp == m_to_wincp) _Unlikely_{
@ -235,6 +236,7 @@ namespace stdex
#pragma warning(suppress: 4127) // Can't use precompiler #if on template arguments, using "if" makes MSVC warnings.
if constexpr (sizeof(T_from) == sizeof(wchar_t) && sizeof(T_to) == sizeof(char)) {
stdex_assert(count_src < INT_MAX || count_src == SIZE_MAX);
LPCCH lpDefaultChar = m_to_wincp == charset_id::utf8 || m_to_wincp == charset_id::utf7 ? NULL : &m_invalid;
// Try to convert to stack buffer first.
CHAR szStackBuffer[1024 / sizeof(CHAR)];
@ -261,6 +263,7 @@ namespace stdex
#pragma warning(suppress: 4127) // Can't use precompiler #if on template arguments, using "if" makes MSVC warnings.
if constexpr (sizeof(T_from) == sizeof(char) && sizeof(T_to) == sizeof(char)) {
stdex_assert(count_src < INT_MAX || count_src == SIZE_MAX);
LPCCH lpDefaultChar = m_to_wincp == charset_id::utf8 || m_to_wincp == charset_id::utf7 ? NULL : &m_invalid;
// Try to convert to stack buffer first.
DWORD dwResult;
@ -325,6 +328,11 @@ namespace stdex
break;
if (errno == E2BIG)
continue;
if (errno == EILSEQ) {
dst.append(1, m_invalid);
++src; src_size -= sizeof(T_from);
continue;
}
throw std::system_error(errno, std::system_category(), "iconv failed");
}
#endif