Windows takes care of internal converter state in MultiByteToWideChar and WideCharToMultiByte and keeps them thread-safe. On other platforms, iconv requires user to setup and keep converter state for thread-safe conversions. This sounds time consuming for every string conversion, therefore the concept of string converter (or converter state) has been extended to Windows too, allowing uniform client code. On Windows, using charset_encoder has no performance benefit, where on Linux and macOS, there should be. To be measured... Signed-off-by: Simon Rozman <simon@rozman.si>
89 lines
2.5 KiB
C++
89 lines
2.5 KiB
C++
/*
|
|
SPDX-License-Identifier: MIT
|
|
Copyright © 2023 Amebis
|
|
*/
|
|
|
|
#include "pch.h"
|
|
|
|
using namespace std;
|
|
#ifdef _WIN32
|
|
using namespace Microsoft::VisualStudio::CppUnitTestFramework;
|
|
#endif
|
|
|
|
namespace UnitTests
|
|
{
|
|
TEST_CLASS(unicode)
|
|
{
|
|
public:
|
|
TEST_METHOD(str2wstr)
|
|
{
|
|
Assert::AreEqual(
|
|
L"This is a test.",
|
|
stdex::str2wstr("This is a test.", stdex::charset_id::utf8).c_str());
|
|
Assert::AreEqual(
|
|
L"Th\u00ed\u0161 i\u22c5 a te\u0073\u0304t. 😀😅",
|
|
stdex::str2wstr("Thíš i⋅ a tes̄t. 😀😅", stdex::charset_id::utf8).c_str());
|
|
string src;
|
|
wstring dst;
|
|
for (size_t i = 0; i < 2000; i++) {
|
|
src += "🐔Test🐮\r\n";
|
|
dst += L"🐔Test🐮\r\n";
|
|
}
|
|
Assert::AreEqual(dst.c_str(), stdex::str2wstr(src, stdex::charset_id::utf8).c_str());
|
|
Assert::AreEqual(
|
|
L"",
|
|
stdex::str2wstr("test", 0, stdex::charset_id::utf8).c_str());
|
|
Assert::AreEqual(
|
|
L"",
|
|
stdex::str2wstr(nullptr, 0, stdex::charset_id::utf8).c_str());
|
|
}
|
|
|
|
TEST_METHOD(wstr2str)
|
|
{
|
|
Assert::AreEqual(
|
|
"This is a test.",
|
|
stdex::wstr2str(L"This is a test.", stdex::charset_id::utf8).c_str());
|
|
Assert::AreEqual(
|
|
"Th\xc3\xad\xc5\xa1 i\xe2\x8b\x85 a tes\xcc\x84t. \xf0\x9f\x98\x80\xf0\x9f\x98\x85",
|
|
stdex::wstr2str(L"Thíš i⋅ a tes̄t. 😀😅", stdex::charset_id::utf8).c_str());
|
|
wstring src;
|
|
string dst;
|
|
for (size_t i = 0; i < 2000; i++) {
|
|
src += L"🐔Test🐮\r\n";
|
|
dst += "🐔Test🐮\r\n";
|
|
}
|
|
Assert::AreEqual(dst.c_str(), stdex::wstr2str(src, stdex::charset_id::utf8).c_str());
|
|
Assert::AreEqual(
|
|
"",
|
|
stdex::wstr2str(L"test", 0, stdex::charset_id::utf8).c_str());
|
|
Assert::AreEqual(
|
|
"",
|
|
stdex::wstr2str(nullptr, 0, stdex::charset_id::utf8).c_str());
|
|
}
|
|
|
|
TEST_METHOD(charset_encoder)
|
|
{
|
|
stdex::charset_encoder<char, char> win1250_to_utf8(stdex::charset_id::windows1250, stdex::charset_id::utf8);
|
|
|
|
Assert::AreEqual(
|
|
"This is a test.",
|
|
win1250_to_utf8.convert("This is a test.").c_str());
|
|
Assert::AreEqual(
|
|
"Thíš i· a teşt.",
|
|
win1250_to_utf8.convert("Th\xed\x9a i\xb7 a te\xbat.").c_str());
|
|
string src, dst;
|
|
for (size_t i = 0; i < 1000; i++) {
|
|
src += "V ko\x9eu\x9a\xe8ku zlobnega mizarja stopiclja fant in kli\xe8" "e 0123456789.\r\n";
|
|
dst += "V kožuščku zlobnega mizarja stopiclja fant in kliče 0123456789.\r\n";
|
|
}
|
|
Assert::AreEqual(dst.c_str(), win1250_to_utf8.convert(src).c_str());
|
|
Assert::AreEqual(
|
|
"",
|
|
win1250_to_utf8.convert("test", 0).c_str());
|
|
Assert::AreEqual(
|
|
"",
|
|
win1250_to_utf8.convert(nullptr, 0).c_str());
|
|
}
|
|
};
|
|
}
|