unicode: add conversion between char* and wchar_t*

It's implemented for Windows-only for the time being.

Signed-off-by: Simon Rozman <simon@rozman.si>
This commit is contained in:
Simon Rozman 2023-07-17 12:53:03 +02:00
parent 6cdcb08365
commit 434cf6d3e2

196
include/stdex/unicode.hpp Normal file
View File

@ -0,0 +1,196 @@
/*
SPDX-License-Identifier: MIT
Copyright © 2023 Amebis
*/
#pragma once
#include "sal.hpp"
#include <assert.h>
#ifdef _WIN32
#include <windows.h>
#endif
#include <memory>
#include <string>
namespace stdex
{
enum class charset_id {
default = 0,
};
///
/// Convert string to Unicode (UTF-16 on Windows) and append to string
///
/// \param[inout] dst String to append Unicode to
/// \param[in] src String
/// \param[in] count_src String character count limit
/// \param[in] charset Charset (stdex::charset_id::default - system default)
///
/// \return Unicode string
///
inline void str2wstr(
_Inout_ std::wstring& dst,
_In_reads_or_z_opt_(count_src) const char* src, _In_ size_t count_src,
_In_ charset_id charset = charset_id::default)
{
assert(src || !count_src);
#ifdef _WIN32
assert(count_src < INT_MAX || count_src == SIZE_MAX);
constexpr DWORD dwFlags = MB_PRECOMPOSED;
// Try to convert to stack buffer first.
WCHAR szStackBuffer[1024/sizeof(WCHAR)];
int cch = MultiByteToWideChar(static_cast<UINT>(charset), dwFlags, src, static_cast<int>(count_src), szStackBuffer, _countof(szStackBuffer));
if (cch) {
// Append from stack.
dst.append(szStackBuffer, count_src != SIZE_MAX ? wcsnlen(szStackBuffer, cch) : (size_t)cch - 1);
} else if (::GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
// Query the required output size. Allocate buffer. Then convert again.
cch = MultiByteToWideChar(static_cast<UINT>(charset), dwFlags, src, static_cast<int>(count_src), NULL, 0);
std::unique_ptr<WCHAR[]> szBuffer(new WCHAR[cch]);
cch = MultiByteToWideChar(static_cast<UINT>(charset), dwFlags, src, static_cast<int>(count_src), szBuffer.get(), cch);
dst.append(szBuffer.get(), count_src != SIZE_MAX ? wcsnlen(szBuffer.get(), cch) : (size_t)cch - 1);
}
#else
throw std::exception("not implemented");
#endif
}
///
/// Convert string to Unicode (UTF-16 on Windows) and append to string
///
/// \param[inout] dst String to append Unicode to
/// \param[in] src String
/// \param[in] charset Charset (stdex::charset_id::default - system default)
///
/// \return Unicode string
///
inline void str2wstr(
_Inout_ std::wstring& dst,
_In_ const std::string& src,
_In_ charset_id charset = charset_id::default)
{
str2wstr(dst, src.data(), src.size(), charset);
}
///
/// Convert string to Unicode string (UTF-16 on Windows)
///
/// \param[in] src String
/// \param[in] count_src String character count limit
/// \param[in] charset Charset (stdex::charset_id::default - system default)
///
/// \return Unicode string
///
inline std::wstring str2wstr(
_In_reads_or_z_opt_(count_src) const char* src, _In_ size_t count_src,
_In_ charset_id charset = charset_id::default)
{
std::wstring dst;
str2wstr(dst, src, count_src, charset);
return dst;
}
///
/// Convert string to Unicode string (UTF-16 on Windows)
///
/// \param[in] src String
/// \param[in] charset Charset (stdex::charset_id::default - system default)
///
/// \return Unicode string
///
inline std::wstring str2wstr(
_In_ const std::string& src,
_In_ charset_id charset = charset_id::default)
{
return str2wstr(src.c_str(), src.size(), charset);
}
///
/// Convert Unicode string (UTF-16 on Windows) to SGML and append to string
///
/// \param[inout] dst String to append SGML to
/// \param[in] src Unicode string
/// \param[in] count_src Unicode string character count limit
/// \param[in] charset Charset (stdex::charset_id::default - system default)
///
inline void wstr2str(
_Inout_ std::string& dst,
_In_reads_or_z_opt_(count_src) const wchar_t* src,
_In_ size_t count_src,
_In_ charset_id charset = charset_id::default)
{
assert(src || !count_src);
#ifdef _WIN32
assert(count_src < INT_MAX || count_src == SIZE_MAX);
constexpr DWORD dwFlags = 0;
constexpr LPCCH lpDefaultChar = NULL;
// Try to convert to stack buffer first.
CHAR szStackBuffer[1024/sizeof(CHAR)];
int cch = WideCharToMultiByte(static_cast<UINT>(charset), dwFlags, src, static_cast<int>(count_src), szStackBuffer, _countof(szStackBuffer), lpDefaultChar, NULL);
if (cch) {
// Copy from stack. Be careful not to include zero terminator.
dst.append(szStackBuffer, count_src != SIZE_MAX ? strnlen(szStackBuffer, cch) : (size_t)cch - 1);
} else if (::GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
// Query the required output size. Allocate buffer. Then convert again.
cch = WideCharToMultiByte(static_cast<UINT>(charset), dwFlags, src, static_cast<int>(count_src), NULL, 0, lpDefaultChar, NULL);
std::unique_ptr<CHAR[]> szBuffer(new CHAR[cch]);
cch = WideCharToMultiByte(static_cast<UINT>(charset), dwFlags, src, static_cast<int>(count_src), szBuffer.get(), cch, lpDefaultChar, NULL);
dst.append(szBuffer.get(), count_src != SIZE_MAX ? strnlen(szBuffer.get(), cch) : (size_t)cch - 1);
}
#else
throw std::exception("not implemented");
#endif
}
///
/// Convert Unicode string (UTF-16 on Windows) to SGML and append to string
///
/// \param[inout] dst String to append SGML to
/// \param[in] src Unicode string
/// \param[in] charset Charset (stdex::charset_id::default - system default)
///
inline void wstr2str(
_Inout_ std::string& dst,
_In_ const std::wstring& src,
_In_ charset_id charset = charset_id::default)
{
wstr2str(dst, src.c_str(), src.size(), charset);
}
///
/// Convert Unicode string (UTF-16 on Windows) to string
///
/// \param[in] src Unicode string
/// \param[in] count_src Unicode string character count limit
/// \param[in] charset Charset (stdex::charset_id::default - system default)
///
/// \return String
///
inline std::string wstr2str(
_In_reads_or_z_opt_(count_src) const wchar_t* src,
_In_ size_t count_src,
_In_ charset_id charset = charset_id::default)
{
std::string dst;
wstr2str(dst, src, count_src, charset);
return dst;
}
///
/// Convert Unicode string (UTF-16 on Windows) to string
///
/// \param[in] src Unicode string
/// \param[in] charset Charset (stdex::charset_id::default - system default)
///
/// \return String
///
inline std::string wstr2str(
_In_ const std::wstring& src,
_In_ charset_id charset = charset_id::default)
{
return wstr2str(src.c_str(), src.size(), charset);
}
}