unicode: inject invalid character on encode failure
...rather than throw. This mimics Windows behaviour. Signed-off-by: Simon Rozman <simon@rozman.si>
This commit is contained in:
parent
f454ae9cab
commit
2bfe310d47
@ -64,26 +64,33 @@ namespace UnitTests
|
|||||||
|
|
||||||
void unicode::charset_encoder()
|
void unicode::charset_encoder()
|
||||||
{
|
{
|
||||||
stdex::charset_encoder<char, char> win1250_to_utf8(stdex::charset_id::windows1250, stdex::charset_id::utf8);
|
{
|
||||||
|
stdex::charset_encoder<char, char> win1250_to_utf8(stdex::charset_id::windows1250, stdex::charset_id::utf8);
|
||||||
|
|
||||||
Assert::AreEqual(
|
Assert::AreEqual(
|
||||||
"This is a test.",
|
"This is a test.",
|
||||||
win1250_to_utf8.convert("This is a test.").c_str());
|
win1250_to_utf8.convert("This is a test.").c_str());
|
||||||
Assert::AreEqual(
|
Assert::AreEqual(
|
||||||
"Thíš i· a teşt.",
|
"Thíš i· a teşt.",
|
||||||
win1250_to_utf8.convert("Th\xed\x9a i\xb7 a te\xbat.").c_str());
|
win1250_to_utf8.convert("Th\xed\x9a i\xb7 a te\xbat.").c_str());
|
||||||
std::string src, dst;
|
std::string src, dst;
|
||||||
for (size_t i = 0; i < 1000; i++) {
|
for (size_t i = 0; i < 1000; i++) {
|
||||||
src += "V ko\x9eu\x9a\xe8ku zlobnega mizarja stopiclja fant in kli\xe8" "e 0123456789.\r\n";
|
src += "V ko\x9eu\x9a\xe8ku zlobnega mizarja stopiclja fant in kli\xe8" "e 0123456789.\r\n";
|
||||||
dst += "V kožuščku zlobnega mizarja stopiclja fant in kliče 0123456789.\r\n";
|
dst += "V kožuščku zlobnega mizarja stopiclja fant in kliče 0123456789.\r\n";
|
||||||
|
}
|
||||||
|
Assert::AreEqual(dst.c_str(), win1250_to_utf8.convert(src).c_str());
|
||||||
|
Assert::AreEqual(
|
||||||
|
"",
|
||||||
|
win1250_to_utf8.convert("test", 0).c_str());
|
||||||
|
Assert::AreEqual(
|
||||||
|
"",
|
||||||
|
win1250_to_utf8.convert(nullptr, 0).c_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
stdex::charset_encoder<char16_t, char> encode(stdex::charset_id::utf16, stdex::charset_id::ascii, '?');
|
||||||
|
Assert::AreEqual("Te?t.", encode.convert(u"Tešt.").c_str());
|
||||||
}
|
}
|
||||||
Assert::AreEqual(dst.c_str(), win1250_to_utf8.convert(src).c_str());
|
|
||||||
Assert::AreEqual(
|
|
||||||
"",
|
|
||||||
win1250_to_utf8.convert("test", 0).c_str());
|
|
||||||
Assert::AreEqual(
|
|
||||||
"",
|
|
||||||
win1250_to_utf8.convert(nullptr, 0).c_str());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void unicode::normalize()
|
void unicode::normalize()
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
/*
|
/*
|
||||||
SPDX-License-Identifier: MIT
|
SPDX-License-Identifier: MIT
|
||||||
Copyright © 2023-2025 Amebis
|
Copyright © 2023-2025 Amebis
|
||||||
*/
|
*/
|
||||||
@ -153,11 +153,13 @@ namespace stdex
|
|||||||
{
|
{
|
||||||
protected:
|
protected:
|
||||||
charset_id m_from, m_to;
|
charset_id m_from, m_to;
|
||||||
|
T_to m_invalid;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
charset_encoder(_In_ charset_id from, _In_ charset_id to) :
|
charset_encoder(_In_ charset_id from, _In_ charset_id to, _In_ T_to invalid = '?') :
|
||||||
m_from(from),
|
m_from(from),
|
||||||
m_to(to)
|
m_to(to),
|
||||||
|
m_invalid(invalid)
|
||||||
{
|
{
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
m_from_wincp = to_encoding(from);
|
m_from_wincp = to_encoding(from);
|
||||||
@ -198,7 +200,6 @@ namespace stdex
|
|||||||
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
constexpr DWORD dwFlagsWCMB = 0;
|
constexpr DWORD dwFlagsWCMB = 0;
|
||||||
constexpr LPCCH lpDefaultChar = NULL;
|
|
||||||
|
|
||||||
stdex_assert(src);
|
stdex_assert(src);
|
||||||
if (m_from_wincp == m_to_wincp) _Unlikely_{
|
if (m_from_wincp == m_to_wincp) _Unlikely_{
|
||||||
@ -235,6 +236,7 @@ namespace stdex
|
|||||||
#pragma warning(suppress: 4127) // Can't use precompiler #if on template arguments, using "if" makes MSVC warnings.
|
#pragma warning(suppress: 4127) // Can't use precompiler #if on template arguments, using "if" makes MSVC warnings.
|
||||||
if constexpr (sizeof(T_from) == sizeof(wchar_t) && sizeof(T_to) == sizeof(char)) {
|
if constexpr (sizeof(T_from) == sizeof(wchar_t) && sizeof(T_to) == sizeof(char)) {
|
||||||
stdex_assert(count_src < INT_MAX || count_src == SIZE_MAX);
|
stdex_assert(count_src < INT_MAX || count_src == SIZE_MAX);
|
||||||
|
LPCCH lpDefaultChar = m_to_wincp == charset_id::utf8 || m_to_wincp == charset_id::utf7 ? NULL : &m_invalid;
|
||||||
|
|
||||||
// Try to convert to stack buffer first.
|
// Try to convert to stack buffer first.
|
||||||
CHAR szStackBuffer[1024 / sizeof(CHAR)];
|
CHAR szStackBuffer[1024 / sizeof(CHAR)];
|
||||||
@ -261,6 +263,7 @@ namespace stdex
|
|||||||
#pragma warning(suppress: 4127) // Can't use precompiler #if on template arguments, using "if" makes MSVC warnings.
|
#pragma warning(suppress: 4127) // Can't use precompiler #if on template arguments, using "if" makes MSVC warnings.
|
||||||
if constexpr (sizeof(T_from) == sizeof(char) && sizeof(T_to) == sizeof(char)) {
|
if constexpr (sizeof(T_from) == sizeof(char) && sizeof(T_to) == sizeof(char)) {
|
||||||
stdex_assert(count_src < INT_MAX || count_src == SIZE_MAX);
|
stdex_assert(count_src < INT_MAX || count_src == SIZE_MAX);
|
||||||
|
LPCCH lpDefaultChar = m_to_wincp == charset_id::utf8 || m_to_wincp == charset_id::utf7 ? NULL : &m_invalid;
|
||||||
|
|
||||||
// Try to convert to stack buffer first.
|
// Try to convert to stack buffer first.
|
||||||
DWORD dwResult;
|
DWORD dwResult;
|
||||||
@ -325,6 +328,11 @@ namespace stdex
|
|||||||
break;
|
break;
|
||||||
if (errno == E2BIG)
|
if (errno == E2BIG)
|
||||||
continue;
|
continue;
|
||||||
|
if (errno == EILSEQ) {
|
||||||
|
dst.append(1, m_invalid);
|
||||||
|
++src; src_size -= sizeof(T_from);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
throw std::system_error(errno, std::system_category(), "iconv failed");
|
throw std::system_error(errno, std::system_category(), "iconv failed");
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
x
Reference in New Issue
Block a user