unicode: inject invalid character on encode failure

...rather than throw. This mimics Windows behaviour.

Signed-off-by: Simon Rozman <simon@rozman.si>
This commit is contained in:
2025-06-05 12:06:07 +02:00
parent f454ae9cab
commit 2bfe310d47
2 changed files with 37 additions and 22 deletions

View File

@@ -64,26 +64,33 @@ namespace UnitTests
void unicode::charset_encoder()
{
stdex::charset_encoder<char, char> win1250_to_utf8(stdex::charset_id::windows1250, stdex::charset_id::utf8);
{
stdex::charset_encoder<char, char> win1250_to_utf8(stdex::charset_id::windows1250, stdex::charset_id::utf8);
Assert::AreEqual(
"This is a test.",
win1250_to_utf8.convert("This is a test.").c_str());
Assert::AreEqual(
"Thíš i· a teşt.",
win1250_to_utf8.convert("Th\xed\x9a i\xb7 a te\xbat.").c_str());
std::string src, dst;
for (size_t i = 0; i < 1000; i++) {
src += "V ko\x9eu\x9a\xe8ku zlobnega mizarja stopiclja fant in kli\xe8" "e 0123456789.\r\n";
dst += "V kožuščku zlobnega mizarja stopiclja fant in kliče 0123456789.\r\n";
Assert::AreEqual(
"This is a test.",
win1250_to_utf8.convert("This is a test.").c_str());
Assert::AreEqual(
"Thíš i· a teşt.",
win1250_to_utf8.convert("Th\xed\x9a i\xb7 a te\xbat.").c_str());
std::string src, dst;
for (size_t i = 0; i < 1000; i++) {
src += "V ko\x9eu\x9a\xe8ku zlobnega mizarja stopiclja fant in kli\xe8" "e 0123456789.\r\n";
dst += "V kožuščku zlobnega mizarja stopiclja fant in kliče 0123456789.\r\n";
}
Assert::AreEqual(dst.c_str(), win1250_to_utf8.convert(src).c_str());
Assert::AreEqual(
"",
win1250_to_utf8.convert("test", 0).c_str());
Assert::AreEqual(
"",
win1250_to_utf8.convert(nullptr, 0).c_str());
}
{
stdex::charset_encoder<char16_t, char> encode(stdex::charset_id::utf16, stdex::charset_id::ascii, '?');
Assert::AreEqual("Te?t.", encode.convert(u"Tešt.").c_str());
}
Assert::AreEqual(dst.c_str(), win1250_to_utf8.convert(src).c_str());
Assert::AreEqual(
"",
win1250_to_utf8.convert("test", 0).c_str());
Assert::AreEqual(
"",
win1250_to_utf8.convert(nullptr, 0).c_str());
}
void unicode::normalize()