22#pragma GCC diagnostic push
23#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
28 enum class charset_id : uint16_t {
54 constexpr charset_id wchar_t_charset = charset_id::utf16;
56 constexpr charset_id system_charset = charset_id::utf16;
58 constexpr charset_id system_charset = charset_id::system;
61 constexpr charset_id wchar_t_charset = charset_id::utf32;
62 constexpr charset_id system_charset = charset_id::system;
65 inline charset_id charset_from_name(_In_z_
const char* name)
68 inline bool operator()(_In_z_
const char* a, _In_z_
const char* b)
const
70 return stdex::stricmp(a, b, stdex::std_locale_C) < 0;
73 static const std::map<const char*, charset_id, charset_less> charsets = {
74 {
"UNICODE-1-1-UTF-7", charset_id::utf7 },
75 {
"UTF-7", charset_id::utf7 },
76 {
"CSUNICODE11UTF7", charset_id::utf7 },
78 {
"UTF-8", charset_id::utf8 },
79 {
"UTF8", charset_id::utf8 },
81 {
"UTF-16", charset_id::utf16 },
82#if BYTE_ORDER == BIG_ENDIAN
83 {
"UTF-16BE", charset_id::utf16 },
85 {
"UTF-16LE", charset_id::utf16 },
88 {
"UTF-32", charset_id::utf32 },
89#if BYTE_ORDER == BIG_ENDIAN
90 {
"UTF-32BE", charset_id::utf32 },
92 {
"UTF-32LE", charset_id::utf32 },
95 {
"CP1250", charset_id::windows1250 },
96 {
"MS-EE", charset_id::windows1250 },
97 {
"WINDOWS-1250", charset_id::windows1250 },
99 {
"CP1251", charset_id::windows1251 },
100 {
"MS-CYRL", charset_id::windows1251 },
101 {
"WINDOWS-1251", charset_id::windows1251 },
103 {
"CP1252", charset_id::windows1252 },
104 {
"MS-ANSI", charset_id::windows1252 },
105 {
"WINDOWS-1252", charset_id::windows1252 },
107 if (
auto el = charsets.find(name); el != charsets.end())
109 return charset_id::system;
115 template <
typename T_from,
typename T_to>
119 charset_id m_from, m_to;
127 m_from_wincp = to_encoding(from);
128 m_to_wincp = to_encoding(to);
130 m_handle = iconv_open(to_encoding(to), to_encoding(from));
131 if (m_handle == (iconv_t)-1)
132 throw std::system_error(errno, std::system_category(),
"iconv_open failed");
139 iconv_close(m_handle);
143 inline charset_id from_encoding()
const {
return m_from; }
144 inline charset_id to_encoding()
const {
return m_to; }
153 template <
class _Traits_to = std::
char_traits<T_to>,
class _Alloc_to = std::allocator<T_to>>
155 _Inout_ std::basic_string<T_to, _Traits_to, _Alloc_to>& dst,
156 _In_reads_or_z_opt_(count_src)
const T_from* src, _In_
size_t count_src)
158 _Assume_(src || !count_src);
159 count_src = stdex::strnlen<T_from>(src, count_src);
160 if (!count_src) _Unlikely_
164 constexpr DWORD dwFlagsMBWC = MB_PRECOMPOSED;
165 constexpr DWORD dwFlagsWCMB = 0;
166 constexpr LPCCH lpDefaultChar = NULL;
169 if (m_from_wincp == m_to_wincp) _Unlikely_{
170 dst.append(
reinterpret_cast<const T_to*
>(src), count_src);
174#pragma warning(suppress: 4127)
175 if constexpr (
sizeof(T_from) ==
sizeof(
char) &&
sizeof(T_to) ==
sizeof(
wchar_t)) {
176 _Assume_(count_src < INT_MAX || count_src == SIZE_MAX);
179 WCHAR szStackBuffer[1024 /
sizeof(WCHAR)];
180#pragma warning(suppress: 6387)
181 int cch = MultiByteToWideChar(
static_cast<UINT
>(m_from_wincp), dwFlagsMBWC,
reinterpret_cast<LPCCH
>(src),
static_cast<int>(count_src), szStackBuffer, _countof(szStackBuffer));
184 dst.append(
reinterpret_cast<const T_to*
>(szStackBuffer), count_src != SIZE_MAX ? wcsnlen(szStackBuffer, cch) :
static_cast<size_t>(cch) - 1);
187 if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
189 cch = MultiByteToWideChar(
static_cast<UINT
>(m_from_wincp), dwFlagsMBWC,
reinterpret_cast<LPCCH
>(src),
static_cast<int>(count_src), NULL, 0);
190 std::unique_ptr<WCHAR[]> szBuffer(
new WCHAR[cch]);
191 cch = MultiByteToWideChar(
static_cast<UINT
>(m_from_wincp), dwFlagsMBWC,
reinterpret_cast<LPCCH
>(src),
static_cast<int>(count_src), szBuffer.get(), cch);
192 dst.append(
reinterpret_cast<const T_to*
>(szBuffer.get()), count_src != SIZE_MAX ? wcsnlen(szBuffer.get(), cch) :
static_cast<size_t>(cch) - 1);
195 throw std::system_error(GetLastError(), std::system_category(),
"MultiByteToWideChar failed");
198#pragma warning(suppress: 4127)
199 if constexpr (
sizeof(T_from) ==
sizeof(
wchar_t) &&
sizeof(T_to) ==
sizeof(
char)) {
200 _Assume_(count_src < INT_MAX || count_src == SIZE_MAX);
203 CHAR szStackBuffer[1024 /
sizeof(CHAR)];
204#pragma warning(suppress: 6387)
205 int cch = WideCharToMultiByte(
static_cast<UINT
>(m_to_wincp), dwFlagsWCMB,
reinterpret_cast<LPCWCH
>(src),
static_cast<int>(count_src), szStackBuffer, _countof(szStackBuffer), lpDefaultChar, NULL);
208 dst.append(
reinterpret_cast<const T_to*
>(szStackBuffer), count_src != SIZE_MAX ? strnlen(szStackBuffer, cch) :
static_cast<size_t>(cch) - 1);
211 if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
213 cch = WideCharToMultiByte(
static_cast<UINT
>(m_to_wincp), dwFlagsWCMB,
reinterpret_cast<LPCWCH
>(src),
static_cast<int>(count_src), NULL, 0, lpDefaultChar, NULL);
214 std::unique_ptr<CHAR[]> szBuffer(
new CHAR[cch]);
215 cch = WideCharToMultiByte(
static_cast<UINT
>(m_to_wincp), dwFlagsWCMB,
reinterpret_cast<LPCWCH
>(src),
static_cast<int>(count_src), szBuffer.get(), cch, lpDefaultChar, NULL);
216 dst.append(
reinterpret_cast<const T_to*
>(szBuffer.get()), count_src != SIZE_MAX ? strnlen(szBuffer.get(), cch) :
static_cast<size_t>(cch) - 1);
219 throw std::system_error(GetLastError(), std::system_category(),
"WideCharToMultiByte failed");
222#pragma warning(suppress: 4127)
223 if constexpr (
sizeof(T_from) ==
sizeof(
char) &&
sizeof(T_to) ==
sizeof(
char)) {
224 _Assume_(count_src < INT_MAX || count_src == SIZE_MAX);
227 WCHAR szStackBufferMBWC[512 /
sizeof(WCHAR)];
228#pragma warning(suppress: 6387)
229 int cch = MultiByteToWideChar(
static_cast<UINT
>(m_from_wincp), dwFlagsMBWC,
reinterpret_cast<LPCCH
>(src),
static_cast<int>(count_src), szStackBufferMBWC, _countof(szStackBufferMBWC));
232 size_t count_inter = count_src != SIZE_MAX ? wcsnlen(szStackBufferMBWC, cch) :
static_cast<size_t>(cch) - 1;
233 _Assume_(count_inter < INT_MAX);
236 CHAR szStackBufferWCMB[512 /
sizeof(CHAR)];
237#pragma warning(suppress: 6387)
238 cch = WideCharToMultiByte(
static_cast<UINT
>(m_to_wincp), dwFlagsWCMB, szStackBufferMBWC,
static_cast<int>(count_inter), szStackBufferWCMB, _countof(szStackBufferWCMB), lpDefaultChar, NULL);
241 dst.append(
reinterpret_cast<const T_to*
>(szStackBufferWCMB), strnlen(szStackBufferWCMB, cch));
244 if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
246 cch = WideCharToMultiByte(
static_cast<UINT
>(m_to_wincp), dwFlagsWCMB, szStackBufferMBWC,
static_cast<int>(count_inter), NULL, 0, lpDefaultChar, NULL);
247 std::unique_ptr<CHAR[]> szBufferWCMB(
new CHAR[cch]);
248 cch = WideCharToMultiByte(
static_cast<UINT
>(m_to_wincp), dwFlagsWCMB, szStackBufferMBWC,
static_cast<int>(count_inter), szBufferWCMB.get(), cch, lpDefaultChar, NULL);
249 dst.append(
reinterpret_cast<const T_to*
>(szBufferWCMB.get()), strnlen(szBufferWCMB.get(), cch));
252 throw std::system_error(GetLastError(), std::system_category(),
"WideCharToMultiByte failed");
254 if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
256 cch = MultiByteToWideChar(
static_cast<UINT
>(m_from_wincp), dwFlagsMBWC,
reinterpret_cast<LPCCH
>(src),
static_cast<int>(count_src), NULL, 0);
257 std::unique_ptr<WCHAR[]> szBufferMBWC(
new WCHAR[cch]);
258 cch = MultiByteToWideChar(
static_cast<UINT
>(m_from_wincp), dwFlagsMBWC,
reinterpret_cast<LPCCH
>(src),
static_cast<int>(count_src), szBufferMBWC.get(), cch);
259 size_t count_inter = count_src != SIZE_MAX ? wcsnlen(szBufferMBWC.get(), cch) :
static_cast<size_t>(cch) - 1;
262 cch = WideCharToMultiByte(
static_cast<UINT
>(m_to_wincp), dwFlagsWCMB, szBufferMBWC.get(),
static_cast<int>(count_inter), NULL, 0, lpDefaultChar, NULL);
263 std::unique_ptr<CHAR[]> szBufferWCMB(
new CHAR[cch]);
264 cch = WideCharToMultiByte(
static_cast<UINT
>(m_to_wincp), dwFlagsWCMB, szBufferMBWC.get(),
static_cast<int>(count_inter), szBufferWCMB.get(), cch, lpDefaultChar, NULL);
265 dst.append(
reinterpret_cast<const T_to*
>(szBufferWCMB.get()), strnlen(szBufferWCMB.get(), cch));
268 throw std::system_error(GetLastError(), std::system_category(),
"MultiByteToWideChar failed");
271 dst.reserve(dst.size() + count_src);
272 T_to buf[1024 /
sizeof(T_to)];
273 size_t src_size = stdex::mul(
sizeof(T_from), count_src);
275 T_to* output = &buf[0];
276 size_t output_size =
sizeof(buf);
278 iconv(m_handle,
const_cast<char**
>(
reinterpret_cast<const char**
>(&src)), &src_size,
reinterpret_cast<char**
>(&output), &output_size);
279 dst.append(buf,
reinterpret_cast<T_to*
>(
reinterpret_cast<char*
>(buf) +
sizeof(buf) - output_size));
284 throw std::system_error(errno, std::system_category(),
"iconv failed");
295 template <
class _Traits_to = std::
char_traits<T_to>,
class _Alloc_to = std::allocator<T_to>>
297 _Inout_ std::basic_string<T_to, _Traits_to, _Alloc_to>& dst,
298 _In_z_
const T_from* src)
300 strcat(dst, src, SIZE_MAX);
309 template <
class _Traits_to = std::
char_traits<T_to>,
class _Alloc_to = std::allocator<T_to>,
class _Traits_from = std::
char_traits<T_from>,
class _Alloc_from = std::allocator<T_from>>
311 _Inout_ std::basic_string<T_to, _Traits_to, _Alloc_to>& dst,
312 _In_
const std::basic_string<T_from, _Traits_from, _Alloc_from>& src)
314 strcat(dst, src.data(), src.size());
324 template <
class _Traits_to = std::
char_traits<T_to>,
class _Alloc_to = std::allocator<T_to>>
326 _Inout_ std::basic_string<T_to, _Traits_to, _Alloc_to>& dst,
327 _In_reads_or_z_opt_(count_src)
const T_from* src, _In_
size_t count_src)
330 strcat(dst, src, count_src);
339 template <
class _Traits_to = std::
char_traits<T_to>,
class _Alloc_to = std::allocator<T_to>>
341 _Inout_ std::basic_string<T_to, _Traits_to, _Alloc_to>& dst,
342 _In_z_
const T_from* src)
344 strcpy(dst, src, SIZE_MAX);
353 template <
class _Traits_to = std::
char_traits<T_to>,
class _Alloc_to = std::allocator<T_to>,
class _Traits_from = std::
char_traits<T_from>,
class _Alloc_from = std::allocator<T_from>>
355 _Inout_ std::basic_string<T_to, _Traits_to, _Alloc_to>& dst,
356 _In_
const std::basic_string<T_from, _Traits_from, _Alloc_from>& src)
358 strcpy(dst, src.data(), src.size());
367 template <
class _Traits_to = std::
char_traits<T_to>,
class _Alloc_to = std::allocator<T_to>>
368 inline std::basic_string<T_to, _Traits_to, _Alloc_to>
convert(_In_reads_or_z_opt_(count_src)
const T_from* src, _In_
size_t count_src)
370 std::basic_string<T_to, _Traits_to, _Alloc_to> dst;
371 strcat(dst, src, count_src);
380 template <
class _Traits_to = std::
char_traits<T_to>,
class _Alloc_to = std::allocator<T_to>>
381 inline std::basic_string<T_to, _Traits_to, _Alloc_to>
convert(_In_z_
const T_from* src)
391 template <
class _Traits_to = std::
char_traits<T_to>,
class _Alloc_to = std::allocator<T_to>,
class _Traits_from = std::
char_traits<T_from>,
class _Alloc_from = std::allocator<T_from>>
392 inline std::basic_string<T_to, _Traits_to, _Alloc_to>
convert(_In_
const std::basic_string<T_from, _Traits_from, _Alloc_from>& src)
394 return convert(src.data(), src.size());
400 iconv(m_handle, NULL, NULL, NULL, NULL);
404 static charset_id system_charset()
407 return static_cast<charset_id
>(GetACP());
409 return charset_from_name(nl_langinfo(CODESET));
415 static UINT to_encoding(_In_ charset_id charset)
418 charset == charset_id::system ? GetACP() :
419 charset == charset_id::oem ? GetOEMCP() :
420 static_cast<UINT>(charset);
424 UINT m_from_wincp, m_to_wincp;
427 static const char* to_encoding(_In_ charset_id charset)
429 static const char*
const encodings[
static_cast<std::underlying_type_t<charset_id>
>(charset_id::_max)] = {
433#if BYTE_ORDER == BIG_ENDIAN
445 charset == charset_id::system ? nl_langinfo(CODESET) :
446 encodings[static_cast<std::underlying_type_t<charset_id>>(charset)];
465 _Deprecated_(
"For better performance, consider a reusable charset_encoder")
468 _Inout_ std::wstring& dst,
469 _In_reads_or_z_opt_(count_src)
const char* src, _In_
size_t count_src,
470 _In_ charset_id charset = charset_id::system)
472 charset_encoder<char, wchar_t>(charset, wchar_t_charset).strcat(dst, src, count_src);
475 _Deprecated_(
"Use stdex::strcat")
476 inline
void str2wstr(
477 _Inout_ std::wstring& dst,
478 _In_reads_or_z_opt_(count_src) const
char* src, _In_
size_t count_src,
479 _In_ charset_id charset = charset_id::system)
481 strcat(dst, src, count_src, charset);
494 _Deprecated_(
"For better performance, consider a reusable charset_encoder")
497 _Inout_ std::wstring& dst,
498 _In_
const std::string& src,
499 _In_ charset_id charset = charset_id::system)
501 strcat(dst, src.data(), src.size(), charset);
504 _Deprecated_(
"Use stdex::strcat")
505 inline
void str2wstr(
506 _Inout_ std::wstring& dst,
507 _In_ const std::
string& src,
508 _In_ charset_id charset = charset_id::system)
510 strcat(dst, src, charset);
524 _Deprecated_(
"For better performance, consider a reusable charset_encoder")
527 _Inout_ std::wstring& dst,
528 _In_reads_or_z_opt_(count_src)
const char* src, _In_
size_t count_src,
529 _In_ charset_id charset = charset_id::system)
532 strcat(dst, src, count_src, charset);
545 _Deprecated_(
"For better performance, consider a reusable charset_encoder")
548 _Inout_ std::wstring& dst,
549 _In_
const std::string& src,
550 _In_ charset_id charset = charset_id::system)
552 strcpy(dst, src.data(), src.size(), charset);
566 _Deprecated_(
"For better performance, consider a reusable charset_encoder")
568 inline std::wstring str2wstr(
569 _In_z_
const char* src,
570 _In_ charset_id charset = charset_id::system)
573 strcat(dst, src, SIZE_MAX, charset);
589 _Deprecated_(
"For better performance, consider a reusable charset_encoder")
591 inline std::wstring str2wstr(
592 _In_reads_or_z_opt_(count_src)
const char* src, _In_
size_t count_src,
593 _In_ charset_id charset = charset_id::system)
596 strcat(dst, src, count_src, charset);
611 _Deprecated_(
"For better performance, consider a reusable charset_encoder")
613 inline std::wstring str2wstr(
614 _In_
const std::string& src,
615 _In_ charset_id charset = charset_id::system)
617 return str2wstr(src.c_str(), src.size(), charset);
631 _Deprecated_(
"For better performance, consider a reusable charset_encoder")
634 _Inout_ std::string& dst,
635 _In_reads_or_z_opt_(count_src)
const wchar_t* src, _In_
size_t count_src,
636 _In_ charset_id charset = charset_id::system)
638 charset_encoder<wchar_t, char>(wchar_t_charset, charset).strcat(dst, src, count_src);
641 _Deprecated_(
"Use stdex::strcat")
642 inline
void wstr2str(
643 _Inout_ std::
string& dst,
644 _In_reads_or_z_opt_(count_src) const
wchar_t* src, _In_
size_t count_src,
645 _In_ charset_id charset = charset_id::system)
647 strcat(dst, src, count_src, charset);
660 _Deprecated_(
"For better performance, consider a reusable charset_encoder")
663 _Inout_ std::string& dst,
664 _In_
const std::wstring& src,
665 _In_ charset_id charset = charset_id::system)
667 strcat(dst, src.c_str(), src.size(), charset);
670 _Deprecated_(
"Use stdex::strcat")
671 inline
void wstr2str(
672 _Inout_ std::
string& dst,
673 _In_ const std::wstring& src,
674 _In_ charset_id charset = charset_id::system)
676 strcat(dst, src, charset);
690 _Deprecated_(
"For better performance, consider a reusable charset_encoder")
693 _Inout_ std::string& dst,
694 _In_reads_or_z_opt_(count_src)
const wchar_t* src, _In_
size_t count_src,
695 _In_ charset_id charset = charset_id::system)
698 strcat(dst, src, count_src, charset);
711 _Deprecated_(
"For better performance, consider a reusable charset_encoder")
714 _Inout_ std::string& dst,
715 _In_
const std::wstring& src,
716 _In_ charset_id charset = charset_id::system)
718 strcpy(dst, src.data(), src.size(), charset);
732 _Deprecated_(
"For better performance, consider a reusable charset_encoder")
734 inline std::string wstr2str(
735 _In_z_
const wchar_t* src,
736 _In_ charset_id charset = charset_id::system)
739 strcat(dst, src, SIZE_MAX, charset);
755 _Deprecated_(
"For better performance, consider a reusable charset_encoder")
757 inline std::string wstr2str(
758 _In_reads_or_z_opt_(count_src)
const wchar_t* src, _In_
size_t count_src,
759 _In_ charset_id charset = charset_id::system)
762 strcat(dst, src, count_src, charset);
777 _Deprecated_(
"For better performance, consider a reusable charset_encoder")
779 inline std::string wstr2str(
780 _In_
const std::wstring& src,
781 _In_ charset_id charset = charset_id::system)
783 return wstr2str(src.c_str(), src.size(), charset);
788#pragma GCC diagnostic pop
Encoding converter context.
Definition unicode.hpp:117
void strcpy(std::basic_string< T_to, _Traits_to, _Alloc_to > &dst, const std::basic_string< T_from, _Traits_from, _Alloc_from > &src)
Convert string.
Definition unicode.hpp:354
std::basic_string< T_to, _Traits_to, _Alloc_to > convert(const std::basic_string< T_from, _Traits_from, _Alloc_from > &src)
Return converted string.
Definition unicode.hpp:392
std::basic_string< T_to, _Traits_to, _Alloc_to > convert(const T_from *src)
Return converted string.
Definition unicode.hpp:381
void strcat(std::basic_string< T_to, _Traits_to, _Alloc_to > &dst, const std::basic_string< T_from, _Traits_from, _Alloc_from > &src)
Convert string and append to string.
Definition unicode.hpp:310
void strcpy(std::basic_string< T_to, _Traits_to, _Alloc_to > &dst, const T_from *src)
Convert string.
Definition unicode.hpp:340
void strcat(std::basic_string< T_to, _Traits_to, _Alloc_to > &dst, _In_reads_or_z_opt_(count_src) const T_from *src, size_t count_src)
Convert string and append to string.
Definition unicode.hpp:154
void strcat(std::basic_string< T_to, _Traits_to, _Alloc_to > &dst, const T_from *src)
Convert string and append to string.
Definition unicode.hpp:296
void strcpy(std::basic_string< T_to, _Traits_to, _Alloc_to > &dst, _In_reads_or_z_opt_(count_src) const T_from *src, size_t count_src)
Convert string.
Definition unicode.hpp:325
std::basic_string< T_to, _Traits_to, _Alloc_to > convert(_In_reads_or_z_opt_(count_src) const T_from *src, size_t count_src)
Return converted string.
Definition unicode.hpp:368