From 13703b17477026034fee0e456042311c5734bf4b Mon Sep 17 00:00:00 2001
From: Simon Rozman <simon@rozman.si>
Date: Thu, 14 Sep 2023 12:28:05 +0200
Subject: [PATCH] unicode: extend conversion with reusable charset_encoder

Windows takes care of internal converter state in MultiByteToWideChar
and WideCharToMultiByte and keeps them thread-safe. On other platforms,
iconv requires user to setup and keep converter state for thread-safe
conversions. This sounds time consuming for every string conversion,
therefore the concept of string converter (or converter state) has been
extended to Windows too, allowing uniform client code. On Windows, using
charset_encoder has no performance benefit, where on Linux and macOS,
there should be. To be measured...

Signed-off-by: Simon Rozman <simon@rozman.si>
---
 UnitTests/main.cpp        |   1 +
 UnitTests/unicode.cpp     |  38 ++++
 include/stdex/unicode.hpp | 383 ++++++++++++++++++++++++++++++++------
 3 files changed, 360 insertions(+), 62 deletions(-)
diff --git a/UnitTests/main.cpp b/UnitTests/main.cpp
index 65b481d29..266de47ed 100644
--- a/UnitTests/main.cpp
+++ b/UnitTests/main.cpp
@@ -28,6 +28,7 @@ int main(int argc, const char * argv[])
 		UnitTests::stream::open_close();
 		UnitTests::unicode::str2wstr();
 		UnitTests::unicode::wstr2str();
+		UnitTests::unicode::charset_encoder();
 		std::cout << "PASS\n";
 		return 0;
 	}
diff --git a/UnitTests/unicode.cpp b/UnitTests/unicode.cpp
index 5976cd601..fcc0bc40a 100644
--- a/UnitTests/unicode.cpp
+++ b/UnitTests/unicode.cpp
@@ -23,6 +23,13 @@ namespace UnitTests
 			Assert::AreEqual(
 				L"Th\u00ed\u0161 i\u22c5 a te\u0073\u0304t. 😀😅",
 				stdex::str2wstr("Thíš i⋅ a tes̄t. 😀😅", stdex::charset_id::utf8).c_str());
+			string src;
+			wstring dst;
+			for (size_t i = 0; i < 2000; i++) {
+				src += "🐔Test🐮\r\n";
+				dst += L"🐔Test🐮\r\n";
+			}
+			Assert::AreEqual(dst.c_str(), stdex::str2wstr(src, stdex::charset_id::utf8).c_str());
 			Assert::AreEqual(
 				L"",
 				stdex::str2wstr("test", 0, stdex::charset_id::utf8).c_str());
@@ -39,6 +46,13 @@ namespace UnitTests
 			Assert::AreEqual(
 				"Th\xc3\xad\xc5\xa1 i\xe2\x8b\x85 a tes\xcc\x84t. \xf0\x9f\x98\x80\xf0\x9f\x98\x85",
 				stdex::wstr2str(L"Thíš i⋅ a tes̄t. 😀😅", stdex::charset_id::utf8).c_str());
+			wstring src;
+			string dst;
+			for (size_t i = 0; i < 2000; i++) {
+				src += L"🐔Test🐮\r\n";
+				dst += "🐔Test🐮\r\n";
+			}
+			Assert::AreEqual(dst.c_str(), stdex::wstr2str(src, stdex::charset_id::utf8).c_str());
 			Assert::AreEqual(
 				"",
 				stdex::wstr2str(L"test", 0, stdex::charset_id::utf8).c_str());
@@ -46,5 +60,29 @@ namespace UnitTests
 				"",
 				stdex::wstr2str(nullptr, 0, stdex::charset_id::utf8).c_str());
 		}
+
+		TEST_METHOD(charset_encoder)
+		{
+			stdex::charset_encoder<char, char> win1250_to_utf8(stdex::charset_id::windows1250, stdex::charset_id::utf8);
+
+			Assert::AreEqual(
+				"This is a test.",
+				win1250_to_utf8.convert("This is a test.").c_str());
+			Assert::AreEqual(
+				"Thíš i· a teşt.",
+				win1250_to_utf8.convert("Th\xed\x9a i\xb7 a te\xbat.").c_str());
+			string src, dst;
+			for (size_t i = 0; i < 1000; i++) {
+				src += "V ko\x9eu\x9a\xe8ku zlobnega mizarja stopiclja fant in kli\xe8" "e 0123456789.\r\n";
+				dst += "V kožuščku zlobnega mizarja stopiclja fant in kliče 0123456789.\r\n";
+			}
+			Assert::AreEqual(dst.c_str(), win1250_to_utf8.convert(src).c_str());
+			Assert::AreEqual(
+				"",
+				win1250_to_utf8.convert("test", 0).c_str());
+			Assert::AreEqual(
+				"",
+				win1250_to_utf8.convert(nullptr, 0).c_str());
+		}
 	};
 }
diff --git a/include/stdex/unicode.hpp b/include/stdex/unicode.hpp
index a20584cd8..369592fb9 100644
--- a/include/stdex/unicode.hpp
+++ b/include/stdex/unicode.hpp
@@ -13,6 +13,7 @@
 #include <stdint.h>
 #ifndef _WIN32
 #include <iconv.h>
+#include <langinfo.h>
 #endif
 #include <memory>
 #include <string>
@@ -22,40 +23,179 @@ namespace stdex
 	enum class charset_id : uint16_t {
 #ifdef _WIN32
 		system = CP_ACP,
+		oem = CP_OEMCP,
 		utf8 = CP_UTF8,
 		utf16 = 1200 /*CP_WINUNICODE*/,
+		windows1250 = 1250,
+		windows1251 = 1251,
+		windows1252 = 1252,
 #else
 		system = 0,
 		utf8,
 		utf16,
 		utf32,
+		windows1250,
+		windows1251,
+		windows1252,
+
+		_max
 #endif
 	};
 
-#ifndef _WIN32
+#ifdef _WIN32
+	constexpr charset_id wchar_t_charset = charset_id::utf16;
+#else
+	constexpr charset_id wchar_t_charset = charset_id::utf32;
+#endif
+
 	///
-	/// Unicode converter context
+	/// Encoding converter context
 	///
 	template <typename T_from, typename T_to>
-	class iconverter
+	class charset_encoder
 	{
 	public:
-		iconverter(_In_ charset_id from, _In_ charset_id to)
+		charset_encoder(_In_ charset_id from, _In_ charset_id to)
 		{
+#ifdef _WIN32
+			m_from = to_encoding(from);
+			m_to = to_encoding(to);
+#else
 			m_handle = iconv_open(to_encoding(to), to_encoding(from));
 			if (m_handle == (iconv_t)-1)
 				throw std::runtime_error("iconv_open failed");
+#endif
 		}
 
-		~iconverter()
+#ifndef _WIN32
+		~charset_encoder()
 		{
 			iconv_close(m_handle);
 		}
+#endif
 
-		void convert(_Inout_ std::basic_string<T_to> &dst, _In_reads_or_z_opt_(count) const T_from* src, _In_ size_t count_src) const
+		///
+		/// Convert string and append to string
+		///
+		/// \param[in,out] dst        String to append converted string to
+		/// \param[in]     src        String to convert
+		/// \param[in]     count_src  String to convert code unit limit
+		///
+		template <class _Traits_to = std::char_traits<T_to>, class _Alloc_to = std::allocator<T_to>>
+		void strcat(
+			_Inout_ std::basic_string<T_to, _Traits_to, _Alloc_to> &dst,
+			_In_reads_or_z_opt_(count_src) const T_from* src, _In_ size_t count_src)
 		{
-			T_to buf[0x100];
+			constexpr DWORD dwFlagsMBWC = MB_PRECOMPOSED;
+			constexpr DWORD dwFlagsWCMB = 0;
+			constexpr LPCCH lpDefaultChar = NULL;
+
+			assert(src || !count_src);
 			count_src = stdex::strnlen(src, count_src);
+			if (!count_src) _Unlikely_
+				return;
+#ifdef _WIN32
+			_Analysis_assume_(src);
+			if (m_from == m_to) _Unlikely_{
+				dst.append(reinterpret_cast<const T_to*>(src), count_src);
+				return;
+			}
+
+			if constexpr (sizeof(T_from) == sizeof(char) && sizeof(T_to) == sizeof(wchar_t)) {
+				assert(count_src < INT_MAX || count_src == SIZE_MAX);
+
+				// Try to convert to stack buffer first.
+				WCHAR szStackBuffer[1024 / sizeof(WCHAR)];
+#pragma warning(suppress: 6387) // Testing indicates src may be NULL when count_src is also 0. Is SAL of the lpMultiByteStr parameter wrong?
+				int cch = MultiByteToWideChar(static_cast<UINT>(m_from), dwFlagsMBWC, reinterpret_cast<LPCCH>(src), static_cast<int>(count_src), szStackBuffer, _countof(szStackBuffer));
+				if (cch) {
+					// Append from stack.
+					dst.append(reinterpret_cast<const T_to*>(szStackBuffer), count_src != SIZE_MAX ? wcsnlen(szStackBuffer, cch) : static_cast<size_t>(cch) - 1);
+					return;
+				}
+				if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
+					// Query the required output size. Allocate buffer. Then convert again.
+					cch = MultiByteToWideChar(static_cast<UINT>(m_from), dwFlagsMBWC, reinterpret_cast<LPCCH>(src), static_cast<int>(count_src), NULL, 0);
+					std::unique_ptr<WCHAR[]> szBuffer(new WCHAR[cch]);
+					cch = MultiByteToWideChar(static_cast<UINT>(m_from), dwFlagsMBWC, reinterpret_cast<LPCCH>(src), static_cast<int>(count_src), szBuffer.get(), cch);
+					dst.append(reinterpret_cast<const T_to*>(szBuffer.get()), count_src != SIZE_MAX ? wcsnlen(szBuffer.get(), cch) : static_cast<size_t>(cch) - 1);
+					return;
+				}
+				throw std::runtime_error("MultiByteToWideChar failed");
+			}
+
+			if constexpr (sizeof(T_from) == sizeof(wchar_t) && sizeof(T_to) == sizeof(char)) {
+				assert(count_src < INT_MAX || count_src == SIZE_MAX);
+
+				// Try to convert to stack buffer first.
+				CHAR szStackBuffer[1024 / sizeof(CHAR)];
+#pragma warning(suppress: 6387) // Testing indicates src may be NULL when count_src is also 0. Is SAL of the lpWideCharStr parameter wrong?
+				int cch = WideCharToMultiByte(static_cast<UINT>(m_to), dwFlagsWCMB, reinterpret_cast<LPCWCH>(src), static_cast<int>(count_src), szStackBuffer, _countof(szStackBuffer), lpDefaultChar, NULL);
+				if (cch) {
+					// Copy from stack. Be careful not to include zero terminator.
+					dst.append(reinterpret_cast<const T_to*>(szStackBuffer), count_src != SIZE_MAX ? strnlen(szStackBuffer, cch) : static_cast<size_t>(cch) - 1);
+					return;
+				}
+				if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
+					// Query the required output size. Allocate buffer. Then convert again.
+					cch = WideCharToMultiByte(static_cast<UINT>(m_to), dwFlagsWCMB, reinterpret_cast<LPCWCH>(src), static_cast<int>(count_src), NULL, 0, lpDefaultChar, NULL);
+					std::unique_ptr<CHAR[]> szBuffer(new CHAR[cch]);
+					cch = WideCharToMultiByte(static_cast<UINT>(m_to), dwFlagsWCMB, reinterpret_cast<LPCWCH>(src), static_cast<int>(count_src), szBuffer.get(), cch, lpDefaultChar, NULL);
+					dst.append(reinterpret_cast<const T_to*>(szBuffer.get()), count_src != SIZE_MAX ? strnlen(szBuffer.get(), cch) : static_cast<size_t>(cch) - 1);
+					return;
+				}
+				throw std::runtime_error("WideCharToMultiByte failed");
+			}
+
+			if constexpr (sizeof(T_from) == sizeof(char) && sizeof(T_to) == sizeof(char)) {
+				assert(count_src < INT_MAX || count_src == SIZE_MAX);
+
+				// Try to convert to stack buffer first.
+				WCHAR szStackBufferMBWC[512 / sizeof(WCHAR)];
+#pragma warning(suppress: 6387) // Testing indicates src may be NULL when count_src is also 0. Is SAL of the lpMultiByteStr parameter wrong?
+				int cch = MultiByteToWideChar(static_cast<UINT>(m_from), dwFlagsMBWC, reinterpret_cast<LPCCH>(src), static_cast<int>(count_src), szStackBufferMBWC, _countof(szStackBufferMBWC));
+				if (cch) {
+					// Append from stack.
+					size_t count_inter = count_src != SIZE_MAX ? wcsnlen(szStackBufferMBWC, cch) : static_cast<size_t>(cch) - 1;
+					assert(count_inter < INT_MAX);
+
+					// Try to convert to stack buffer first.
+					CHAR szStackBufferWCMB[512 / sizeof(CHAR)];
+#pragma warning(suppress: 6387) // Testing indicates szStackBufferMBWC may be NULL when count_inter is also 0. Is SAL of the lpWideCharStr parameter wrong?
+					cch = WideCharToMultiByte(static_cast<UINT>(m_to), dwFlagsWCMB, szStackBufferMBWC, static_cast<int>(count_inter), szStackBufferWCMB, _countof(szStackBufferWCMB), lpDefaultChar, NULL);
+					if (cch) {
+						// Copy from stack. Be careful not to include zero terminator.
+						dst.append(reinterpret_cast<const T_to*>(szStackBufferWCMB), strnlen(szStackBufferWCMB, cch));
+						return;
+					}
+					if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
+						// Query the required output size. Allocate buffer. Then convert again.
+						cch = WideCharToMultiByte(static_cast<UINT>(m_to), dwFlagsWCMB, szStackBufferMBWC, static_cast<int>(count_inter), NULL, 0, lpDefaultChar, NULL);
+						std::unique_ptr<CHAR[]> szBufferWCMB(new CHAR[cch]);
+						cch = WideCharToMultiByte(static_cast<UINT>(m_to), dwFlagsWCMB, szStackBufferMBWC, static_cast<int>(count_inter), szBufferWCMB.get(), cch, lpDefaultChar, NULL);
+						dst.append(reinterpret_cast<const T_to*>(szBufferWCMB.get()), strnlen(szBufferWCMB.get(), cch));
+						return;
+					}
+					throw std::runtime_error("WideCharToMultiByte failed");
+				}
+				if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
+					// Query the required output size. Allocate buffer. Then convert again.
+					cch = MultiByteToWideChar(static_cast<UINT>(m_from), dwFlagsMBWC, reinterpret_cast<LPCCH>(src), static_cast<int>(count_src), NULL, 0);
+					std::unique_ptr<WCHAR[]> szBufferMBWC(new WCHAR[cch]);
+					cch = MultiByteToWideChar(static_cast<UINT>(m_from), dwFlagsMBWC, reinterpret_cast<LPCCH>(src), static_cast<int>(count_src), szBufferMBWC.get(), cch);
+					size_t count_inter = count_src != SIZE_MAX ? wcsnlen(szBufferMBWC.get(), cch) : static_cast<size_t>(cch) - 1;
+
+					// Query the required output size. Allocate buffer. Then convert again.
+					cch = WideCharToMultiByte(static_cast<UINT>(m_to), dwFlagsWCMB, szBufferMBWC.get(), static_cast<int>(count_inter), NULL, 0, lpDefaultChar, NULL);
+					std::unique_ptr<CHAR[]> szBufferWCMB(new CHAR[cch]);
+					cch = WideCharToMultiByte(static_cast<UINT>(m_to), dwFlagsWCMB, szBufferMBWC.get(), static_cast<int>(count_inter), szBufferWCMB.get(), cch, lpDefaultChar, NULL);
+					dst.append(reinterpret_cast<const T_to*>(szBufferWCMB.get()), strnlen(szBufferWCMB.get(), cch));
+					return;
+				}
+				throw std::runtime_error("MultiByteToWideChar failed");
+			}
+#else
+			T_to buf[1024 / sizeof(T_to)];
 			size_t src_size = stdex::mul(sizeof(T_from), count_src);
 			do {
 				T_to* output = &buf[0];
@@ -64,35 +204,171 @@ namespace stdex
 				iconv(m_handle, (char**)&src, &src_size, (char**)&output, &output_size);
 				if (errno)
 					throw std::runtime_error("iconv failed");
-				dst.insert(dst.end(), buf, (T_to*)((char*)buf + sizeof(buf) - output_size));
+				dst.append(buf, reinterpret_cast<T_to*>(reinterpret_cast<char*>(buf) + sizeof(buf) - output_size));
 			} while (src_size);
+#endif
+		}
+
+		///
+		/// Convert string and append to string
+		///
+		/// \param[in,out] dst        String to append converted string to
+		/// \param[in]     src        Zero-terminated string to convert
+		///
+		template <class _Traits_to = std::char_traits<T_to>, class _Alloc_to = std::allocator<T_to>>
+		inline void strcat(
+			_Inout_ std::basic_string<T_to, _Traits_to, _Alloc_to>& dst,
+			_In_z_ const T_from* src)
+		{
+			strcat(dst, src, SIZE_MAX);
+		}
+
+		///
+		/// Convert string and append to string
+		///
+		/// \param[in,out] dst        String to append converted string to
+		/// \param[in]     src        String to convert
+		///
+		template <class _Traits_to = std::char_traits<T_to>, class _Alloc_to = std::allocator<T_to>, class _Traits_from = std::char_traits<T_from>, class _Alloc_from = std::allocator<T_from>>
+		inline void strcat(
+			_Inout_ std::basic_string<T_to, _Traits_to, _Alloc_to>& dst,
+			_In_ const std::basic_string<T_from, _Traits_from, _Alloc_from>& src)
+		{
+			strcat(dst, src.data(), src.size());
+		}
+
+		///
+		/// Convert string
+		///
+		/// \param[in,out] dst        String to write converted string to
+		/// \param[in]     src        String to convert
+		/// \param[in]     count_src  String to convert code unit limit
+		///
+		template <class _Traits_to = std::char_traits<T_to>, class _Alloc_to = std::allocator<T_to>>
+		inline void strcpy(
+			_Inout_ std::basic_string<T_to, _Traits_to, _Alloc_to>& dst,
+			_In_reads_or_z_opt_(count_src) const T_from* src, _In_ size_t count_src)
+		{
+			dst.clear();
+			strcat(dst, src, count_src);
+		}
+
+		///
+		/// Convert string
+		///
+		/// \param[in,out] dst        String to write converted string to
+		/// \param[in]     src        Zero-terminated string to convert
+		///
+		template <class _Traits_to = std::char_traits<T_to>, class _Alloc_to = std::allocator<T_to>>
+		inline void strcpy(
+			_Inout_ std::basic_string<T_to, _Traits_to, _Alloc_to>& dst,
+			_In_z_ const T_from* src)
+		{
+			strcpy(dst, src, SIZE_MAX);
+		}
+
+		///
+		/// Convert string
+		///
+		/// \param[in,out] dst        String to write converted string to
+		/// \param[in]     src        String to convert
+		///
+		template <class _Traits_to = std::char_traits<T_to>, class _Alloc_to = std::allocator<T_to>, class _Traits_from = std::char_traits<T_from>, class _Alloc_from = std::allocator<T_from>>
+		inline void strcpy(
+			_Inout_ std::basic_string<T_to, _Traits_to, _Alloc_to>& dst,
+			_In_ const std::basic_string<T_from, _Traits_from, _Alloc_from>& src)
+		{
+			strcpy(dst, src.data(), src.size());
+		}
+
+		///
+		/// Return converted string
+		///
+		/// \param[in]     src        String to convert
+		/// \param[in]     count_src  String to convert code unit limit
+		///
+		template <class _Traits_to = std::char_traits<T_to>, class _Alloc_to = std::allocator<T_to>>
+		inline std::basic_string<T_to, _Traits_to, _Alloc_to> convert(_In_reads_or_z_opt_(count_src) const T_from* src, _In_ size_t count_src)
+		{
+			std::basic_string<T_to, _Traits_to, _Alloc_to> dst;
+			strcat(dst, src, count_src);
+			return dst;
+		}
+
+		///
+		/// Return converted string
+		///
+		/// \param[in]     src        Zero-terminated string to convert
+		///
+		template <class _Traits_to = std::char_traits<T_to>, class _Alloc_to = std::allocator<T_to>>
+		inline std::basic_string<T_to, _Traits_to, _Alloc_to> convert(_In_z_ const T_from* src)
+		{
+			return convert(src, SIZE_MAX);
+		}
+
+		///
+		/// Return converted string
+		///
+		/// \param[in]     src        String to convert
+		///
+		template <class _Traits_to = std::char_traits<T_to>, class _Alloc_to = std::allocator<T_to>, class _Traits_from = std::char_traits<T_from>, class _Alloc_from = std::allocator<T_from>>
+		inline std::basic_string<T_to, _Traits_to, _Alloc_to> convert(_In_ const std::basic_string<T_from, _Traits_from, _Alloc_from>& src)
+		{
+			return convert(src.data(), src.size());
+		}
+
+		inline void clear()
+		{
+#ifndef _WIN32
+			iconv(m_handle, NULL, NULL, NULL, NULL);
+#endif
+		}
+
+#ifdef _WIN32
+	protected:
+		static UINT to_encoding(_In_ charset_id charset)
+		{
+			return
+				charset == charset_id::system ? GetACP() :
+				charset == charset_id::oem ? GetOEMCP() :
+				static_cast<UINT>(charset);
 		}
 
+	protected:
+		UINT m_from, m_to;
+#else
 	protected:
 		static const char* to_encoding(_In_ charset_id charset)
 		{
-			switch (charset) {
-				case charset_id::system:
-				case charset_id::utf8: return "UTF-8";
+			static const char* const encodings[static_cast<std::underlying_type_t<charset_id>>(charset_id::_max)] = {
+				"",         // system
+				"UTF-8",    // utf8
 #if BYTE_ORDER == BIG_ENDIAN
-				case charset_id::utf16: return "UTF-16BE";
-				case charset_id::utf32: return "UTF-32BE";
+				"UTF-16BE", // utf16
+				"UTF-32BE", // utf32
 #else
-				case charset_id::utf16: return "UTF-16LE";
-				case charset_id::utf32: return "UTF-32LE";
+				"UTF-16LE", // utf16
+				"UTF-32LE", // utf32
 #endif
-				default: throw std::invalid_argument("unsupported charset");
+				"CP1250",   // windows1250
+				"CP1251",   // windows1251
+				"CP1252",   // windows1252
 			}
+			return
+				charset == charset_id::system ? nl_langinfo(LC_CTYPE) :
+				encodings[static_cast<std::underlying_type_t<charset_id>>(charset))];
 		}
 
 	protected:
 		iconv_t m_handle;
-	};
 #endif
+	};
 
 	///
 	/// Convert string to Unicode (UTF-16 on Windows, UTF-32 elsewhere)) and append to string
 	///
+	/// \note For better performance, consider a reusable charset_encoder.
+	///
 	/// \param[in,out] dst        String to append Unicode to
 	/// \param[in]     src        String
 	/// \param[in]     count_src  String character count limit
@@ -103,28 +379,7 @@ namespace stdex
 		_In_reads_or_z_opt_(count_src) const char* src, _In_ size_t count_src,
 		_In_ charset_id charset = charset_id::system)
 	{
-		assert(src || !count_src);
-#ifdef _WIN32
-		assert(count_src < INT_MAX || count_src == SIZE_MAX);
-		constexpr DWORD dwFlags = MB_PRECOMPOSED;
-
-		// Try to convert to stack buffer first.
-		WCHAR szStackBuffer[1024/sizeof(WCHAR)];
-#pragma warning(suppress: 6387) // Testing indicates src may be NULL when count_src is also 0. Is SAL of the lpMultiByteStr parameter wrong?
-		int cch = MultiByteToWideChar(static_cast<UINT>(charset), dwFlags, src, static_cast<int>(count_src), szStackBuffer, _countof(szStackBuffer));
-		if (cch) {
-			// Append from stack.
-			dst.append(szStackBuffer, count_src != SIZE_MAX ? wcsnlen(szStackBuffer, cch) : (size_t)cch - 1);
-		} else if (::GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
-			// Query the required output size. Allocate buffer. Then convert again.
-			cch = MultiByteToWideChar(static_cast<UINT>(charset), dwFlags, src, static_cast<int>(count_src), NULL, 0);
-			std::unique_ptr<WCHAR[]> szBuffer(new WCHAR[cch]);
-			cch = MultiByteToWideChar(static_cast<UINT>(charset), dwFlags, src, static_cast<int>(count_src), szBuffer.get(), cch);
-			dst.append(szBuffer.get(), count_src != SIZE_MAX ? wcsnlen(szBuffer.get(), cch) : (size_t)cch - 1);
-		}
-#else
-		iconverter<char, wchar_t>(charset, charset_id::utf32).convert(dst, src, count_src);
-#endif
+		charset_encoder<char, wchar_t>(charset, wchar_t_charset).strcat(dst, src, count_src);
 	}
 
 	_Deprecated_("Use stdex::strcat")
@@ -139,6 +394,8 @@ namespace stdex
 	///
 	/// Convert string to Unicode (UTF-16 on Windows) and append to string
 	///
+	/// \note For better performance, consider a reusable charset_encoder.
+	///
 	/// \param[in,out] dst        String to append Unicode to
 	/// \param[in]     src        String
 	/// \param[in]     charset    Charset (stdex::charset_id::system - system default)
@@ -163,6 +420,8 @@ namespace stdex
 	///
 	/// Convert string to Unicode (UTF-16 on Windows)
 	///
+	/// \note For better performance, consider a reusable charset_encoder.
+	///
 	/// \param[in,out] dst        String to write Unicode to
 	/// \param[in]     src        String
 	/// \param[in]     count_src  String character count limit
@@ -180,6 +439,8 @@ namespace stdex
 	///
 	/// Convert string to Unicode (UTF-16 on Windows)
 	///
+	/// \note For better performance, consider a reusable charset_encoder.
+	///
 	/// \param[in,out] dst        String to write Unicode to
 	/// \param[in]     src        String
 	/// \param[in]     charset    Charset (stdex::charset_id::system - system default)
@@ -195,6 +456,8 @@ namespace stdex
 	///
 	/// Convert string to Unicode string (UTF-16 on Windows)
 	///
+	/// \note For better performance, consider a reusable charset_encoder.
+	///
 	/// \param[in]  src        String. Must be zero-terminated.
 	/// \param[in]  charset    Charset (stdex::charset_id::system - system default)
 	///
@@ -212,6 +475,8 @@ namespace stdex
 	///
 	/// Convert string to Unicode string (UTF-16 on Windows)
 	///
+	/// \note For better performance, consider a reusable charset_encoder.
+	///
 	/// \param[in]  src        String
 	/// \param[in]  count_src  String character count limit
 	/// \param[in]  charset    Charset (stdex::charset_id::system - system default)
@@ -230,6 +495,8 @@ namespace stdex
 	///
 	/// Convert string to Unicode string (UTF-16 on Windows)
 	///
+	/// \note For better performance, consider a reusable charset_encoder.
+	///
 	/// \param[in]  src        String
 	/// \param[in]  charset    Charset (stdex::charset_id::system - system default)
 	///
@@ -245,6 +512,8 @@ namespace stdex
 	///
 	/// Convert Unicode string (UTF-16 on Windows, UTF-32 elsewhere) to SGML and append to string
 	///
+	/// \note For better performance, consider a reusable charset_encoder.
+	///
 	/// \param[in,out] dst        String to append SGML to
 	/// \param[in]     src        Unicode string
 	/// \param[in]     count_src  Unicode string character count limit
@@ -255,29 +524,7 @@ namespace stdex
 		_In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src,
 		_In_ charset_id charset = charset_id::system)
 	{
-		assert(src || !count_src);
-#ifdef _WIN32
-		assert(count_src < INT_MAX || count_src == SIZE_MAX);
-		constexpr DWORD dwFlags = 0;
-		constexpr LPCCH lpDefaultChar = NULL;
-
-		// Try to convert to stack buffer first.
-		CHAR szStackBuffer[1024/sizeof(CHAR)];
-#pragma warning(suppress: 6387) // Testing indicates src may be NULL when count_src is also 0. Is SAL of the lpWideCharStr parameter wrong?
-		int cch = WideCharToMultiByte(static_cast<UINT>(charset), dwFlags, src, static_cast<int>(count_src), szStackBuffer, _countof(szStackBuffer), lpDefaultChar, NULL);
-		if (cch) {
-			// Copy from stack. Be careful not to include zero terminator.
-			dst.append(szStackBuffer, count_src != SIZE_MAX ? strnlen(szStackBuffer, cch) : (size_t)cch - 1);
-		} else if (::GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
-			// Query the required output size. Allocate buffer. Then convert again.
-			cch = WideCharToMultiByte(static_cast<UINT>(charset), dwFlags, src, static_cast<int>(count_src), NULL, 0, lpDefaultChar, NULL);
-			std::unique_ptr<CHAR[]> szBuffer(new CHAR[cch]);
-			cch = WideCharToMultiByte(static_cast<UINT>(charset), dwFlags, src, static_cast<int>(count_src), szBuffer.get(), cch, lpDefaultChar, NULL);
-			dst.append(szBuffer.get(), count_src != SIZE_MAX ? strnlen(szBuffer.get(), cch) : (size_t)cch - 1);
-		}
-#else
-		iconverter<wchar_t, char>(charset_id::utf32, charset).convert(dst, src, count_src);
-#endif
+		charset_encoder<wchar_t, char>(wchar_t_charset, charset).strcat(dst, src, count_src);
 	}
 
 	_Deprecated_("Use stdex::strcat")
@@ -292,6 +539,8 @@ namespace stdex
 	///
 	/// Convert Unicode string (UTF-16 on Windows) to SGML and append to string
 	///
+	/// \note For better performance, consider a reusable charset_encoder.
+	///
 	/// \param[in,out] dst        String to append SGML to
 	/// \param[in]     src        Unicode string
 	/// \param[in]     charset    Charset (stdex::charset_id::system - system default)
@@ -316,6 +565,8 @@ namespace stdex
 	///
 	/// Convert Unicode string (UTF-16 on Windows) to SGML
 	///
+	/// \note For better performance, consider a reusable charset_encoder.
+	///
 	/// \param[in,out] dst        String to write SGML to
 	/// \param[in]     src        Unicode string
 	/// \param[in]     count_src  Unicode string character count limit
@@ -333,6 +584,8 @@ namespace stdex
 	///
 	/// Convert Unicode string (UTF-16 on Windows) to SGML
 	///
+	/// \note For better performance, consider a reusable charset_encoder.
+	///
 	/// \param[in,out] dst        String to write SGML to
 	/// \param[in]     src        Unicode string
 	/// \param[in]     charset    Charset (stdex::charset_id::system - system default)
@@ -348,6 +601,8 @@ namespace stdex
 	///
 	/// Convert Unicode string (UTF-16 on Windows) to string
 	///
+	/// \note For better performance, consider a reusable charset_encoder.
+	///
 	/// \param[in]  src        Unicode string. Must be zero-terminated.
 	/// \param[in]  charset    Charset (stdex::charset_id::system - system default)
 	///
@@ -365,6 +620,8 @@ namespace stdex
 	///
 	/// Convert Unicode string (UTF-16 on Windows) to string
 	///
+	/// \note For better performance, consider a reusable charset_encoder.
+	///
 	/// \param[in]  src        Unicode string
 	/// \param[in]  count_src  Unicode string character count limit
 	/// \param[in]  charset    Charset (stdex::charset_id::system - system default)
@@ -383,6 +640,8 @@ namespace stdex
 	///
 	/// Convert Unicode string (UTF-16 on Windows) to string
 	///
+	/// \note For better performance, consider a reusable charset_encoder.
+	///
 	/// \param[in]  src        Unicode string
 	/// \param[in]  charset    Charset (stdex::charset_id::system - system default)
 	///