unicode: upgrade and promote use of charset_encoder

Signed-off-by: Simon Rozman <simon@rozman.si>
This commit is contained in:
Simon Rozman 2023-09-22 13:02:27 +02:00
parent cb010bd72e
commit 921d235459
2 changed files with 76 additions and 19 deletions

View File

@ -8,6 +8,9 @@
using namespace std; using namespace std;
#ifdef _WIN32 #ifdef _WIN32
using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Microsoft::VisualStudio::CppUnitTestFramework;
#else
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
#endif #endif
namespace UnitTests namespace UnitTests
@ -86,3 +89,7 @@ namespace UnitTests
} }
}; };
} }
#ifndef _WIN32
#pragma GCC diagnostic pop
#endif

View File

@ -54,12 +54,17 @@ namespace stdex
template <typename T_from, typename T_to> template <typename T_from, typename T_to>
class charset_encoder class charset_encoder
{ {
protected:
charset_id m_from, m_to;
public: public:
charset_encoder(_In_ charset_id from, _In_ charset_id to) charset_encoder(_In_ charset_id from, _In_ charset_id to) :
m_from(from),
m_to(to)
{ {
#ifdef _WIN32 #ifdef _WIN32
m_from = to_encoding(from); m_from_wincp = to_encoding(from);
m_to = to_encoding(to); m_to_wincp = to_encoding(to);
#else #else
m_handle = iconv_open(to_encoding(to), to_encoding(from)); m_handle = iconv_open(to_encoding(to), to_encoding(from));
if (m_handle == (iconv_t)-1) if (m_handle == (iconv_t)-1)
@ -74,6 +79,9 @@ namespace stdex
} }
#endif #endif
inline charset_id from_encoding() const { return m_from; }
inline charset_id to_encoding() const { return m_to; }
/// ///
/// Convert string and append to string /// Convert string and append to string
/// ///
@ -97,7 +105,7 @@ namespace stdex
constexpr LPCCH lpDefaultChar = NULL; constexpr LPCCH lpDefaultChar = NULL;
_Analysis_assume_(src); _Analysis_assume_(src);
if (m_from == m_to) _Unlikely_{ if (m_from_wincp == m_to_wincp) _Unlikely_{
dst.append(reinterpret_cast<const T_to*>(src), count_src); dst.append(reinterpret_cast<const T_to*>(src), count_src);
return; return;
} }
@ -108,7 +116,7 @@ namespace stdex
// Try to convert to stack buffer first. // Try to convert to stack buffer first.
WCHAR szStackBuffer[1024 / sizeof(WCHAR)]; WCHAR szStackBuffer[1024 / sizeof(WCHAR)];
#pragma warning(suppress: 6387) // Testing indicates src may be NULL when count_src is also 0. Is SAL of the lpMultiByteStr parameter wrong? #pragma warning(suppress: 6387) // Testing indicates src may be NULL when count_src is also 0. Is SAL of the lpMultiByteStr parameter wrong?
int cch = MultiByteToWideChar(static_cast<UINT>(m_from), dwFlagsMBWC, reinterpret_cast<LPCCH>(src), static_cast<int>(count_src), szStackBuffer, _countof(szStackBuffer)); int cch = MultiByteToWideChar(static_cast<UINT>(m_from_wincp), dwFlagsMBWC, reinterpret_cast<LPCCH>(src), static_cast<int>(count_src), szStackBuffer, _countof(szStackBuffer));
if (cch) { if (cch) {
// Append from stack. // Append from stack.
dst.append(reinterpret_cast<const T_to*>(szStackBuffer), count_src != SIZE_MAX ? wcsnlen(szStackBuffer, cch) : static_cast<size_t>(cch) - 1); dst.append(reinterpret_cast<const T_to*>(szStackBuffer), count_src != SIZE_MAX ? wcsnlen(szStackBuffer, cch) : static_cast<size_t>(cch) - 1);
@ -116,9 +124,9 @@ namespace stdex
} }
if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) { if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
// Query the required output size. Allocate buffer. Then convert again. // Query the required output size. Allocate buffer. Then convert again.
cch = MultiByteToWideChar(static_cast<UINT>(m_from), dwFlagsMBWC, reinterpret_cast<LPCCH>(src), static_cast<int>(count_src), NULL, 0); cch = MultiByteToWideChar(static_cast<UINT>(m_from_wincp), dwFlagsMBWC, reinterpret_cast<LPCCH>(src), static_cast<int>(count_src), NULL, 0);
std::unique_ptr<WCHAR[]> szBuffer(new WCHAR[cch]); std::unique_ptr<WCHAR[]> szBuffer(new WCHAR[cch]);
cch = MultiByteToWideChar(static_cast<UINT>(m_from), dwFlagsMBWC, reinterpret_cast<LPCCH>(src), static_cast<int>(count_src), szBuffer.get(), cch); cch = MultiByteToWideChar(static_cast<UINT>(m_from_wincp), dwFlagsMBWC, reinterpret_cast<LPCCH>(src), static_cast<int>(count_src), szBuffer.get(), cch);
dst.append(reinterpret_cast<const T_to*>(szBuffer.get()), count_src != SIZE_MAX ? wcsnlen(szBuffer.get(), cch) : static_cast<size_t>(cch) - 1); dst.append(reinterpret_cast<const T_to*>(szBuffer.get()), count_src != SIZE_MAX ? wcsnlen(szBuffer.get(), cch) : static_cast<size_t>(cch) - 1);
return; return;
} }
@ -131,7 +139,7 @@ namespace stdex
// Try to convert to stack buffer first. // Try to convert to stack buffer first.
CHAR szStackBuffer[1024 / sizeof(CHAR)]; CHAR szStackBuffer[1024 / sizeof(CHAR)];
#pragma warning(suppress: 6387) // Testing indicates src may be NULL when count_src is also 0. Is SAL of the lpWideCharStr parameter wrong? #pragma warning(suppress: 6387) // Testing indicates src may be NULL when count_src is also 0. Is SAL of the lpWideCharStr parameter wrong?
int cch = WideCharToMultiByte(static_cast<UINT>(m_to), dwFlagsWCMB, reinterpret_cast<LPCWCH>(src), static_cast<int>(count_src), szStackBuffer, _countof(szStackBuffer), lpDefaultChar, NULL); int cch = WideCharToMultiByte(static_cast<UINT>(m_to_wincp), dwFlagsWCMB, reinterpret_cast<LPCWCH>(src), static_cast<int>(count_src), szStackBuffer, _countof(szStackBuffer), lpDefaultChar, NULL);
if (cch) { if (cch) {
// Copy from stack. Be careful not to include zero terminator. // Copy from stack. Be careful not to include zero terminator.
dst.append(reinterpret_cast<const T_to*>(szStackBuffer), count_src != SIZE_MAX ? strnlen(szStackBuffer, cch) : static_cast<size_t>(cch) - 1); dst.append(reinterpret_cast<const T_to*>(szStackBuffer), count_src != SIZE_MAX ? strnlen(szStackBuffer, cch) : static_cast<size_t>(cch) - 1);
@ -139,9 +147,9 @@ namespace stdex
} }
if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) { if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
// Query the required output size. Allocate buffer. Then convert again. // Query the required output size. Allocate buffer. Then convert again.
cch = WideCharToMultiByte(static_cast<UINT>(m_to), dwFlagsWCMB, reinterpret_cast<LPCWCH>(src), static_cast<int>(count_src), NULL, 0, lpDefaultChar, NULL); cch = WideCharToMultiByte(static_cast<UINT>(m_to_wincp), dwFlagsWCMB, reinterpret_cast<LPCWCH>(src), static_cast<int>(count_src), NULL, 0, lpDefaultChar, NULL);
std::unique_ptr<CHAR[]> szBuffer(new CHAR[cch]); std::unique_ptr<CHAR[]> szBuffer(new CHAR[cch]);
cch = WideCharToMultiByte(static_cast<UINT>(m_to), dwFlagsWCMB, reinterpret_cast<LPCWCH>(src), static_cast<int>(count_src), szBuffer.get(), cch, lpDefaultChar, NULL); cch = WideCharToMultiByte(static_cast<UINT>(m_to_wincp), dwFlagsWCMB, reinterpret_cast<LPCWCH>(src), static_cast<int>(count_src), szBuffer.get(), cch, lpDefaultChar, NULL);
dst.append(reinterpret_cast<const T_to*>(szBuffer.get()), count_src != SIZE_MAX ? strnlen(szBuffer.get(), cch) : static_cast<size_t>(cch) - 1); dst.append(reinterpret_cast<const T_to*>(szBuffer.get()), count_src != SIZE_MAX ? strnlen(szBuffer.get(), cch) : static_cast<size_t>(cch) - 1);
return; return;
} }
@ -154,7 +162,7 @@ namespace stdex
// Try to convert to stack buffer first. // Try to convert to stack buffer first.
WCHAR szStackBufferMBWC[512 / sizeof(WCHAR)]; WCHAR szStackBufferMBWC[512 / sizeof(WCHAR)];
#pragma warning(suppress: 6387) // Testing indicates src may be NULL when count_src is also 0. Is SAL of the lpMultiByteStr parameter wrong? #pragma warning(suppress: 6387) // Testing indicates src may be NULL when count_src is also 0. Is SAL of the lpMultiByteStr parameter wrong?
int cch = MultiByteToWideChar(static_cast<UINT>(m_from), dwFlagsMBWC, reinterpret_cast<LPCCH>(src), static_cast<int>(count_src), szStackBufferMBWC, _countof(szStackBufferMBWC)); int cch = MultiByteToWideChar(static_cast<UINT>(m_from_wincp), dwFlagsMBWC, reinterpret_cast<LPCCH>(src), static_cast<int>(count_src), szStackBufferMBWC, _countof(szStackBufferMBWC));
if (cch) { if (cch) {
// Append from stack. // Append from stack.
size_t count_inter = count_src != SIZE_MAX ? wcsnlen(szStackBufferMBWC, cch) : static_cast<size_t>(cch) - 1; size_t count_inter = count_src != SIZE_MAX ? wcsnlen(szStackBufferMBWC, cch) : static_cast<size_t>(cch) - 1;
@ -163,7 +171,7 @@ namespace stdex
// Try to convert to stack buffer first. // Try to convert to stack buffer first.
CHAR szStackBufferWCMB[512 / sizeof(CHAR)]; CHAR szStackBufferWCMB[512 / sizeof(CHAR)];
#pragma warning(suppress: 6387) // Testing indicates szStackBufferMBWC may be NULL when count_inter is also 0. Is SAL of the lpWideCharStr parameter wrong? #pragma warning(suppress: 6387) // Testing indicates szStackBufferMBWC may be NULL when count_inter is also 0. Is SAL of the lpWideCharStr parameter wrong?
cch = WideCharToMultiByte(static_cast<UINT>(m_to), dwFlagsWCMB, szStackBufferMBWC, static_cast<int>(count_inter), szStackBufferWCMB, _countof(szStackBufferWCMB), lpDefaultChar, NULL); cch = WideCharToMultiByte(static_cast<UINT>(m_to_wincp), dwFlagsWCMB, szStackBufferMBWC, static_cast<int>(count_inter), szStackBufferWCMB, _countof(szStackBufferWCMB), lpDefaultChar, NULL);
if (cch) { if (cch) {
// Copy from stack. Be careful not to include zero terminator. // Copy from stack. Be careful not to include zero terminator.
dst.append(reinterpret_cast<const T_to*>(szStackBufferWCMB), strnlen(szStackBufferWCMB, cch)); dst.append(reinterpret_cast<const T_to*>(szStackBufferWCMB), strnlen(szStackBufferWCMB, cch));
@ -171,9 +179,9 @@ namespace stdex
} }
if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) { if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
// Query the required output size. Allocate buffer. Then convert again. // Query the required output size. Allocate buffer. Then convert again.
cch = WideCharToMultiByte(static_cast<UINT>(m_to), dwFlagsWCMB, szStackBufferMBWC, static_cast<int>(count_inter), NULL, 0, lpDefaultChar, NULL); cch = WideCharToMultiByte(static_cast<UINT>(m_to_wincp), dwFlagsWCMB, szStackBufferMBWC, static_cast<int>(count_inter), NULL, 0, lpDefaultChar, NULL);
std::unique_ptr<CHAR[]> szBufferWCMB(new CHAR[cch]); std::unique_ptr<CHAR[]> szBufferWCMB(new CHAR[cch]);
cch = WideCharToMultiByte(static_cast<UINT>(m_to), dwFlagsWCMB, szStackBufferMBWC, static_cast<int>(count_inter), szBufferWCMB.get(), cch, lpDefaultChar, NULL); cch = WideCharToMultiByte(static_cast<UINT>(m_to_wincp), dwFlagsWCMB, szStackBufferMBWC, static_cast<int>(count_inter), szBufferWCMB.get(), cch, lpDefaultChar, NULL);
dst.append(reinterpret_cast<const T_to*>(szBufferWCMB.get()), strnlen(szBufferWCMB.get(), cch)); dst.append(reinterpret_cast<const T_to*>(szBufferWCMB.get()), strnlen(szBufferWCMB.get(), cch));
return; return;
} }
@ -181,15 +189,15 @@ namespace stdex
} }
if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) { if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
// Query the required output size. Allocate buffer. Then convert again. // Query the required output size. Allocate buffer. Then convert again.
cch = MultiByteToWideChar(static_cast<UINT>(m_from), dwFlagsMBWC, reinterpret_cast<LPCCH>(src), static_cast<int>(count_src), NULL, 0); cch = MultiByteToWideChar(static_cast<UINT>(m_from_wincp), dwFlagsMBWC, reinterpret_cast<LPCCH>(src), static_cast<int>(count_src), NULL, 0);
std::unique_ptr<WCHAR[]> szBufferMBWC(new WCHAR[cch]); std::unique_ptr<WCHAR[]> szBufferMBWC(new WCHAR[cch]);
cch = MultiByteToWideChar(static_cast<UINT>(m_from), dwFlagsMBWC, reinterpret_cast<LPCCH>(src), static_cast<int>(count_src), szBufferMBWC.get(), cch); cch = MultiByteToWideChar(static_cast<UINT>(m_from_wincp), dwFlagsMBWC, reinterpret_cast<LPCCH>(src), static_cast<int>(count_src), szBufferMBWC.get(), cch);
size_t count_inter = count_src != SIZE_MAX ? wcsnlen(szBufferMBWC.get(), cch) : static_cast<size_t>(cch) - 1; size_t count_inter = count_src != SIZE_MAX ? wcsnlen(szBufferMBWC.get(), cch) : static_cast<size_t>(cch) - 1;
// Query the required output size. Allocate buffer. Then convert again. // Query the required output size. Allocate buffer. Then convert again.
cch = WideCharToMultiByte(static_cast<UINT>(m_to), dwFlagsWCMB, szBufferMBWC.get(), static_cast<int>(count_inter), NULL, 0, lpDefaultChar, NULL); cch = WideCharToMultiByte(static_cast<UINT>(m_to_wincp), dwFlagsWCMB, szBufferMBWC.get(), static_cast<int>(count_inter), NULL, 0, lpDefaultChar, NULL);
std::unique_ptr<CHAR[]> szBufferWCMB(new CHAR[cch]); std::unique_ptr<CHAR[]> szBufferWCMB(new CHAR[cch]);
cch = WideCharToMultiByte(static_cast<UINT>(m_to), dwFlagsWCMB, szBufferMBWC.get(), static_cast<int>(count_inter), szBufferWCMB.get(), cch, lpDefaultChar, NULL); cch = WideCharToMultiByte(static_cast<UINT>(m_to_wincp), dwFlagsWCMB, szBufferMBWC.get(), static_cast<int>(count_inter), szBufferWCMB.get(), cch, lpDefaultChar, NULL);
dst.append(reinterpret_cast<const T_to*>(szBufferWCMB.get()), strnlen(szBufferWCMB.get(), cch)); dst.append(reinterpret_cast<const T_to*>(szBufferWCMB.get()), strnlen(szBufferWCMB.get(), cch));
return; return;
} }
@ -366,7 +374,7 @@ namespace stdex
} }
protected: protected:
UINT m_from, m_to; UINT m_from_wincp, m_to_wincp;
#else #else
protected: protected:
static const char* to_encoding(_In_ charset_id charset) static const char* to_encoding(_In_ charset_id charset)
@ -405,6 +413,9 @@ namespace stdex
/// \param[in] count_src String character count limit /// \param[in] count_src String character count limit
/// \param[in] charset Charset (stdex::charset_id::system - system default) /// \param[in] charset Charset (stdex::charset_id::system - system default)
/// ///
#ifndef _WIN32
_Deprecated_("For better performance, consider a reusable charset_encoder")
#endif
inline void strcat( inline void strcat(
_Inout_ std::wstring& dst, _Inout_ std::wstring& dst,
_In_reads_or_z_opt_(count_src) const char* src, _In_ size_t count_src, _In_reads_or_z_opt_(count_src) const char* src, _In_ size_t count_src,
@ -431,6 +442,9 @@ namespace stdex
/// \param[in] src String /// \param[in] src String
/// \param[in] charset Charset (stdex::charset_id::system - system default) /// \param[in] charset Charset (stdex::charset_id::system - system default)
/// ///
#ifndef _WIN32
_Deprecated_("For better performance, consider a reusable charset_encoder")
#endif
inline void strcat( inline void strcat(
_Inout_ std::wstring& dst, _Inout_ std::wstring& dst,
_In_ const std::string& src, _In_ const std::string& src,
@ -458,6 +472,9 @@ namespace stdex
/// \param[in] count_src String character count limit /// \param[in] count_src String character count limit
/// \param[in] charset Charset (stdex::charset_id::system - system default) /// \param[in] charset Charset (stdex::charset_id::system - system default)
/// ///
#ifndef _WIN32
_Deprecated_("For better performance, consider a reusable charset_encoder")
#endif
inline void strcpy( inline void strcpy(
_Inout_ std::wstring& dst, _Inout_ std::wstring& dst,
_In_reads_or_z_opt_(count_src) const char* src, _In_ size_t count_src, _In_reads_or_z_opt_(count_src) const char* src, _In_ size_t count_src,
@ -476,6 +493,9 @@ namespace stdex
/// \param[in] src String /// \param[in] src String
/// \param[in] charset Charset (stdex::charset_id::system - system default) /// \param[in] charset Charset (stdex::charset_id::system - system default)
/// ///
#ifndef _WIN32
_Deprecated_("For better performance, consider a reusable charset_encoder")
#endif
inline void strcpy( inline void strcpy(
_Inout_ std::wstring& dst, _Inout_ std::wstring& dst,
_In_ const std::string& src, _In_ const std::string& src,
@ -494,6 +514,9 @@ namespace stdex
/// ///
/// \return Unicode string /// \return Unicode string
/// ///
#ifndef _WIN32
_Deprecated_("For better performance, consider a reusable charset_encoder")
#endif
inline std::wstring str2wstr( inline std::wstring str2wstr(
_In_z_ const char* src, _In_z_ const char* src,
_In_ charset_id charset = charset_id::system) _In_ charset_id charset = charset_id::system)
@ -514,6 +537,9 @@ namespace stdex
/// ///
/// \return Unicode string /// \return Unicode string
/// ///
#ifndef _WIN32
_Deprecated_("For better performance, consider a reusable charset_encoder")
#endif
inline std::wstring str2wstr( inline std::wstring str2wstr(
_In_reads_or_z_opt_(count_src) const char* src, _In_ size_t count_src, _In_reads_or_z_opt_(count_src) const char* src, _In_ size_t count_src,
_In_ charset_id charset = charset_id::system) _In_ charset_id charset = charset_id::system)
@ -533,6 +559,9 @@ namespace stdex
/// ///
/// \return Unicode string /// \return Unicode string
/// ///
#ifndef _WIN32
_Deprecated_("For better performance, consider a reusable charset_encoder")
#endif
inline std::wstring str2wstr( inline std::wstring str2wstr(
_In_ const std::string& src, _In_ const std::string& src,
_In_ charset_id charset = charset_id::system) _In_ charset_id charset = charset_id::system)
@ -550,6 +579,9 @@ namespace stdex
/// \param[in] count_src Unicode string character count limit /// \param[in] count_src Unicode string character count limit
/// \param[in] charset Charset (stdex::charset_id::system - system default) /// \param[in] charset Charset (stdex::charset_id::system - system default)
/// ///
#ifndef _WIN32
_Deprecated_("For better performance, consider a reusable charset_encoder")
#endif
inline void strcat( inline void strcat(
_Inout_ std::string& dst, _Inout_ std::string& dst,
_In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src, _In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src,
@ -576,6 +608,9 @@ namespace stdex
/// \param[in] src Unicode string /// \param[in] src Unicode string
/// \param[in] charset Charset (stdex::charset_id::system - system default) /// \param[in] charset Charset (stdex::charset_id::system - system default)
/// ///
#ifndef _WIN32
_Deprecated_("For better performance, consider a reusable charset_encoder")
#endif
inline void strcat( inline void strcat(
_Inout_ std::string& dst, _Inout_ std::string& dst,
_In_ const std::wstring& src, _In_ const std::wstring& src,
@ -603,6 +638,9 @@ namespace stdex
/// \param[in] count_src Unicode string character count limit /// \param[in] count_src Unicode string character count limit
/// \param[in] charset Charset (stdex::charset_id::system - system default) /// \param[in] charset Charset (stdex::charset_id::system - system default)
/// ///
#ifndef _WIN32
_Deprecated_("For better performance, consider a reusable charset_encoder")
#endif
inline void strcpy( inline void strcpy(
_Inout_ std::string& dst, _Inout_ std::string& dst,
_In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src, _In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src,
@ -621,6 +659,9 @@ namespace stdex
/// \param[in] src Unicode string /// \param[in] src Unicode string
/// \param[in] charset Charset (stdex::charset_id::system - system default) /// \param[in] charset Charset (stdex::charset_id::system - system default)
/// ///
#ifndef _WIN32
_Deprecated_("For better performance, consider a reusable charset_encoder")
#endif
inline void strcpy( inline void strcpy(
_Inout_ std::string& dst, _Inout_ std::string& dst,
_In_ const std::wstring& src, _In_ const std::wstring& src,
@ -639,6 +680,9 @@ namespace stdex
/// ///
/// \return String /// \return String
/// ///
#ifndef _WIN32
_Deprecated_("For better performance, consider a reusable charset_encoder")
#endif
inline std::string wstr2str( inline std::string wstr2str(
_In_z_ const wchar_t* src, _In_z_ const wchar_t* src,
_In_ charset_id charset = charset_id::system) _In_ charset_id charset = charset_id::system)
@ -659,6 +703,9 @@ namespace stdex
/// ///
/// \return String /// \return String
/// ///
#ifndef _WIN32
_Deprecated_("For better performance, consider a reusable charset_encoder")
#endif
inline std::string wstr2str( inline std::string wstr2str(
_In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src, _In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src,
_In_ charset_id charset = charset_id::system) _In_ charset_id charset = charset_id::system)
@ -678,6 +725,9 @@ namespace stdex
/// ///
/// \return String /// \return String
/// ///
#ifndef _WIN32
_Deprecated_("For better performance, consider a reusable charset_encoder")
#endif
inline std::string wstr2str( inline std::string wstr2str(
_In_ const std::wstring& src, _In_ const std::wstring& src,
_In_ charset_id charset = charset_id::system) _In_ charset_id charset = charset_id::system)