stdex
Additional custom or not Standard C++ covered algorithms
Loading...
Searching...
No Matches
unicode.hpp
1/*
2 SPDX-License-Identifier: MIT
3 Copyright © 2023 Amebis
4*/
5
6#pragma once
7
8#include "compat.hpp"
9#include "endian.hpp"
10#include "math.hpp"
11#include "string.hpp"
12#include <stdint.h>
13#ifndef _WIN32
14#include <iconv.h>
15#include <langinfo.h>
16#endif
17#include <map>
18#include <memory>
19#include <string>
20
21#ifndef _WIN32
22#pragma GCC diagnostic push
23#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
24#endif
25
26namespace stdex
27{
28 enum class charset_id : uint16_t {
29#ifdef _WIN32
30 system = CP_ACP,
31 oem = CP_OEMCP,
32 utf7 = CP_UTF7,
33 utf8 = CP_UTF8,
34 utf16 = 1200 /*CP_WINUNICODE*/,
35 utf32 = 12000,
36 windows1250 = 1250,
37 windows1251 = 1251,
38 windows1252 = 1252,
39#else
40 system = 0,
41 utf7,
42 utf8,
43 utf16,
44 utf32,
45 windows1250,
46 windows1251,
47 windows1252,
48
49 _max
50#endif
51 };
52
53#ifdef _WIN32
54 constexpr charset_id wchar_t_charset = charset_id::utf16;
55#ifdef _UNICODE
56 constexpr charset_id system_charset = charset_id::utf16;
57#else
58 constexpr charset_id system_charset = charset_id::system;
59#endif
60#else
61 constexpr charset_id wchar_t_charset = charset_id::utf32;
62 constexpr charset_id system_charset = charset_id::system;
63#endif
64
70 inline charset_id charset_from_name(_In_z_ const char* name)
71 {
72 struct charset_less {
73 bool operator()(_In_z_ const char* a, _In_z_ const char* b) const
74 {
75 return stdex::stricmp(a, b) < 0;
76 }
77 };
78 static const std::map<const char*, charset_id, charset_less> charsets = {
79 { "UNICODE-1-1-UTF-7", charset_id::utf7 },
80 { "UTF-7", charset_id::utf7 },
81 { "CSUNICODE11UTF7", charset_id::utf7 },
82
83 { "UTF-8", charset_id::utf8 },
84 { "UTF8", charset_id::utf8 },
85
86 { "UTF-16", charset_id::utf16 },
87#if BYTE_ORDER == BIG_ENDIAN
88 { "UTF-16BE", charset_id::utf16 },
89#else
90 { "UTF-16LE", charset_id::utf16 },
91#endif
92
93 { "UTF-32", charset_id::utf32 },
94#if BYTE_ORDER == BIG_ENDIAN
95 { "UTF-32BE", charset_id::utf32 },
96#else
97 { "UTF-32LE", charset_id::utf32 },
98#endif
99
100 { "CP1250", charset_id::windows1250 },
101 { "MS-EE", charset_id::windows1250 },
102 { "WINDOWS-1250", charset_id::windows1250 },
103
104 { "CP1251", charset_id::windows1251 },
105 { "MS-CYRL", charset_id::windows1251 },
106 { "WINDOWS-1251", charset_id::windows1251 },
107
108 { "CP1252", charset_id::windows1252 },
109 { "MS-ANSI", charset_id::windows1252 },
110 { "WINDOWS-1252", charset_id::windows1252 },
111 };
112 if (auto el = charsets.find(name); el != charsets.end())
113 return el->second;
114 return charset_id::system;
115 }
116
122 template <class _Traits = std::char_traits<char>, class _Alloc = std::allocator<char>>
123 charset_id charset_from_name(_In_ const std::basic_string<char, _Traits, _Alloc>& name)
124 {
125 return charset_from_name(name.c_str());
126 }
127
131 template <typename T_from, typename T_to>
133 {
134 protected:
135 charset_id m_from, m_to;
136
137 public:
138 charset_encoder(_In_ charset_id from, _In_ charset_id to) :
139 m_from(from),
140 m_to(to)
141 {
142#ifdef _WIN32
143 m_from_wincp = to_encoding(from);
144 m_to_wincp = to_encoding(to);
145#else
146 m_handle = iconv_open(to_encoding(to), to_encoding(from));
147 if (m_handle == (iconv_t)-1)
148 throw std::system_error(errno, std::system_category(), "iconv_open failed");
149#endif
150 }
151
152#ifndef _WIN32
154 {
155 iconv_close(m_handle);
156 }
157#endif
158
159 charset_id from_encoding() const { return m_from; }
160 charset_id to_encoding() const { return m_to; }
161
169 template <class _Traits_to = std::char_traits<T_to>, class _Alloc_to = std::allocator<T_to>>
170 void strcat(
171 _Inout_ std::basic_string<T_to, _Traits_to, _Alloc_to>& dst,
172 _In_reads_or_z_opt_(count_src) const T_from* src, _In_ size_t count_src)
173 {
174 _Assume_(src || !count_src);
175 count_src = stdex::strnlen<T_from>(src, count_src);
176 if (!count_src) _Unlikely_
177 return;
178
179#ifdef _WIN32
180 constexpr DWORD dwFlagsMBWC = MB_PRECOMPOSED;
181 constexpr DWORD dwFlagsWCMB = 0;
182 constexpr LPCCH lpDefaultChar = NULL;
183
184 _Assume_(src);
185 if (m_from_wincp == m_to_wincp) _Unlikely_{
186 dst.append(reinterpret_cast<const T_to*>(src), count_src);
187 return;
188 }
189
190#pragma warning(suppress: 4127)
191 if constexpr (sizeof(T_from) == sizeof(char) && sizeof(T_to) == sizeof(wchar_t)) {
192 _Assume_(count_src < INT_MAX || count_src == SIZE_MAX);
193
194 // Try to convert to stack buffer first.
195 WCHAR szStackBuffer[1024 / sizeof(WCHAR)];
196#pragma warning(suppress: 6387) // Testing indicates src may be NULL when count_src is also 0. Is SAL of the lpMultiByteStr parameter wrong?
197 int cch = MultiByteToWideChar(static_cast<UINT>(m_from_wincp), dwFlagsMBWC, reinterpret_cast<LPCCH>(src), static_cast<int>(count_src), szStackBuffer, _countof(szStackBuffer));
198 if (cch) {
199 // Append from stack.
200 dst.append(reinterpret_cast<const T_to*>(szStackBuffer), count_src != SIZE_MAX ? wcsnlen(szStackBuffer, cch) : static_cast<size_t>(cch) - 1);
201 return;
202 }
203 if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
204 // Query the required output size. Allocate buffer. Then convert again.
205 cch = MultiByteToWideChar(static_cast<UINT>(m_from_wincp), dwFlagsMBWC, reinterpret_cast<LPCCH>(src), static_cast<int>(count_src), NULL, 0);
206 std::unique_ptr<WCHAR[]> szBuffer(new WCHAR[cch]);
207 cch = MultiByteToWideChar(static_cast<UINT>(m_from_wincp), dwFlagsMBWC, reinterpret_cast<LPCCH>(src), static_cast<int>(count_src), szBuffer.get(), cch);
208 dst.append(reinterpret_cast<const T_to*>(szBuffer.get()), count_src != SIZE_MAX ? wcsnlen(szBuffer.get(), cch) : static_cast<size_t>(cch) - 1);
209 return;
210 }
211 throw std::system_error(GetLastError(), std::system_category(), "MultiByteToWideChar failed");
212 }
213
214#pragma warning(suppress: 4127)
215 if constexpr (sizeof(T_from) == sizeof(wchar_t) && sizeof(T_to) == sizeof(char)) {
216 _Assume_(count_src < INT_MAX || count_src == SIZE_MAX);
217
218 // Try to convert to stack buffer first.
219 CHAR szStackBuffer[1024 / sizeof(CHAR)];
220#pragma warning(suppress: 6387) // Testing indicates src may be NULL when count_src is also 0. Is SAL of the lpWideCharStr parameter wrong?
221 int cch = WideCharToMultiByte(static_cast<UINT>(m_to_wincp), dwFlagsWCMB, reinterpret_cast<LPCWCH>(src), static_cast<int>(count_src), szStackBuffer, _countof(szStackBuffer), lpDefaultChar, NULL);
222 if (cch) {
223 // Copy from stack. Be careful not to include zero terminator.
224 dst.append(reinterpret_cast<const T_to*>(szStackBuffer), count_src != SIZE_MAX ? strnlen(szStackBuffer, cch) : static_cast<size_t>(cch) - 1);
225 return;
226 }
227 if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
228 // Query the required output size. Allocate buffer. Then convert again.
229 cch = WideCharToMultiByte(static_cast<UINT>(m_to_wincp), dwFlagsWCMB, reinterpret_cast<LPCWCH>(src), static_cast<int>(count_src), NULL, 0, lpDefaultChar, NULL);
230 std::unique_ptr<CHAR[]> szBuffer(new CHAR[cch]);
231 cch = WideCharToMultiByte(static_cast<UINT>(m_to_wincp), dwFlagsWCMB, reinterpret_cast<LPCWCH>(src), static_cast<int>(count_src), szBuffer.get(), cch, lpDefaultChar, NULL);
232 dst.append(reinterpret_cast<const T_to*>(szBuffer.get()), count_src != SIZE_MAX ? strnlen(szBuffer.get(), cch) : static_cast<size_t>(cch) - 1);
233 return;
234 }
235 throw std::system_error(GetLastError(), std::system_category(), "WideCharToMultiByte failed");
236 }
237
238#pragma warning(suppress: 4127)
239 if constexpr (sizeof(T_from) == sizeof(char) && sizeof(T_to) == sizeof(char)) {
240 _Assume_(count_src < INT_MAX || count_src == SIZE_MAX);
241
242 // Try to convert to stack buffer first.
243 WCHAR szStackBufferMBWC[512 / sizeof(WCHAR)];
244#pragma warning(suppress: 6387) // Testing indicates src may be NULL when count_src is also 0. Is SAL of the lpMultiByteStr parameter wrong?
245 int cch = MultiByteToWideChar(static_cast<UINT>(m_from_wincp), dwFlagsMBWC, reinterpret_cast<LPCCH>(src), static_cast<int>(count_src), szStackBufferMBWC, _countof(szStackBufferMBWC));
246 if (cch) {
247 // Append from stack.
248 size_t count_inter = count_src != SIZE_MAX ? wcsnlen(szStackBufferMBWC, cch) : static_cast<size_t>(cch) - 1;
249 _Assume_(count_inter < INT_MAX);
250
251 // Try to convert to stack buffer first.
252 CHAR szStackBufferWCMB[512 / sizeof(CHAR)];
253#pragma warning(suppress: 6387) // Testing indicates szStackBufferMBWC may be NULL when count_inter is also 0. Is SAL of the lpWideCharStr parameter wrong?
254 cch = WideCharToMultiByte(static_cast<UINT>(m_to_wincp), dwFlagsWCMB, szStackBufferMBWC, static_cast<int>(count_inter), szStackBufferWCMB, _countof(szStackBufferWCMB), lpDefaultChar, NULL);
255 if (cch) {
256 // Copy from stack. Be careful not to include zero terminator.
257 dst.append(reinterpret_cast<const T_to*>(szStackBufferWCMB), strnlen(szStackBufferWCMB, cch));
258 return;
259 }
260 if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
261 // Query the required output size. Allocate buffer. Then convert again.
262 cch = WideCharToMultiByte(static_cast<UINT>(m_to_wincp), dwFlagsWCMB, szStackBufferMBWC, static_cast<int>(count_inter), NULL, 0, lpDefaultChar, NULL);
263 std::unique_ptr<CHAR[]> szBufferWCMB(new CHAR[cch]);
264 cch = WideCharToMultiByte(static_cast<UINT>(m_to_wincp), dwFlagsWCMB, szStackBufferMBWC, static_cast<int>(count_inter), szBufferWCMB.get(), cch, lpDefaultChar, NULL);
265 dst.append(reinterpret_cast<const T_to*>(szBufferWCMB.get()), strnlen(szBufferWCMB.get(), cch));
266 return;
267 }
268 throw std::system_error(GetLastError(), std::system_category(), "WideCharToMultiByte failed");
269 }
270 if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
271 // Query the required output size. Allocate buffer. Then convert again.
272 cch = MultiByteToWideChar(static_cast<UINT>(m_from_wincp), dwFlagsMBWC, reinterpret_cast<LPCCH>(src), static_cast<int>(count_src), NULL, 0);
273 std::unique_ptr<WCHAR[]> szBufferMBWC(new WCHAR[cch]);
274 cch = MultiByteToWideChar(static_cast<UINT>(m_from_wincp), dwFlagsMBWC, reinterpret_cast<LPCCH>(src), static_cast<int>(count_src), szBufferMBWC.get(), cch);
275 size_t count_inter = count_src != SIZE_MAX ? wcsnlen(szBufferMBWC.get(), cch) : static_cast<size_t>(cch) - 1;
276
277 // Query the required output size. Allocate buffer. Then convert again.
278 cch = WideCharToMultiByte(static_cast<UINT>(m_to_wincp), dwFlagsWCMB, szBufferMBWC.get(), static_cast<int>(count_inter), NULL, 0, lpDefaultChar, NULL);
279 std::unique_ptr<CHAR[]> szBufferWCMB(new CHAR[cch]);
280 cch = WideCharToMultiByte(static_cast<UINT>(m_to_wincp), dwFlagsWCMB, szBufferMBWC.get(), static_cast<int>(count_inter), szBufferWCMB.get(), cch, lpDefaultChar, NULL);
281 dst.append(reinterpret_cast<const T_to*>(szBufferWCMB.get()), strnlen(szBufferWCMB.get(), cch));
282 return;
283 }
284 throw std::system_error(GetLastError(), std::system_category(), "MultiByteToWideChar failed");
285 }
286#else
287 dst.reserve(dst.size() + count_src);
288 T_to buf[1024 / sizeof(T_to)];
289 size_t src_size = stdex::mul(sizeof(T_from), count_src);
290 for (;;) {
291 T_to* output = &buf[0];
292 size_t output_size = sizeof(buf);
293 errno = 0;
294 iconv(m_handle, const_cast<char**>(reinterpret_cast<const char**>(&src)), &src_size, reinterpret_cast<char**>(&output), &output_size);
295 dst.append(buf, reinterpret_cast<T_to*>(reinterpret_cast<char*>(buf) + sizeof(buf) - output_size));
296 if (!errno)
297 break;
298 if (errno == E2BIG)
299 continue;
300 throw std::system_error(errno, std::system_category(), "iconv failed");
301 }
302#endif
303 }
304
311 template <class _Traits_to = std::char_traits<T_to>, class _Alloc_to = std::allocator<T_to>>
312 void strcat(
313 _Inout_ std::basic_string<T_to, _Traits_to, _Alloc_to>& dst,
314 _In_z_ const T_from* src)
315 {
316 strcat(dst, src, SIZE_MAX);
317 }
318
325 template <class _Traits_to = std::char_traits<T_to>, class _Alloc_to = std::allocator<T_to>, class _Traits_from = std::char_traits<T_from>, class _Alloc_from = std::allocator<T_from>>
326 void strcat(
327 _Inout_ std::basic_string<T_to, _Traits_to, _Alloc_to>& dst,
328 _In_ const std::basic_string<T_from, _Traits_from, _Alloc_from>& src)
329 {
330 strcat(dst, src.data(), src.size());
331 }
332
340 template <class _Traits_to = std::char_traits<T_to>, class _Alloc_to = std::allocator<T_to>>
341 void strcpy(
342 _Inout_ std::basic_string<T_to, _Traits_to, _Alloc_to>& dst,
343 _In_reads_or_z_opt_(count_src) const T_from* src, _In_ size_t count_src)
344 {
345 dst.clear();
346 strcat(dst, src, count_src);
347 }
348
355 template <class _Traits_to = std::char_traits<T_to>, class _Alloc_to = std::allocator<T_to>>
356 void strcpy(
357 _Inout_ std::basic_string<T_to, _Traits_to, _Alloc_to>& dst,
358 _In_z_ const T_from* src)
359 {
360 strcpy(dst, src, SIZE_MAX);
361 }
362
369 template <class _Traits_to = std::char_traits<T_to>, class _Alloc_to = std::allocator<T_to>, class _Traits_from = std::char_traits<T_from>, class _Alloc_from = std::allocator<T_from>>
370 void strcpy(
371 _Inout_ std::basic_string<T_to, _Traits_to, _Alloc_to>& dst,
372 _In_ const std::basic_string<T_from, _Traits_from, _Alloc_from>& src)
373 {
374 strcpy(dst, src.data(), src.size());
375 }
376
383 template <class _Traits_to = std::char_traits<T_to>, class _Alloc_to = std::allocator<T_to>>
384 std::basic_string<T_to, _Traits_to, _Alloc_to> convert(_In_reads_or_z_opt_(count_src) const T_from* src, _In_ size_t count_src)
385 {
386 std::basic_string<T_to, _Traits_to, _Alloc_to> dst;
387 strcat(dst, src, count_src);
388 return dst;
389 }
390
396 template <class _Traits_to = std::char_traits<T_to>, class _Alloc_to = std::allocator<T_to>>
397 std::basic_string<T_to, _Traits_to, _Alloc_to> convert(_In_z_ const T_from* src)
398 {
399 return convert(src, SIZE_MAX);
400 }
401
407 template <class _Traits_to = std::char_traits<T_to>, class _Alloc_to = std::allocator<T_to>, class _Traits_from = std::char_traits<T_from>, class _Alloc_from = std::allocator<T_from>>
408 std::basic_string<T_to, _Traits_to, _Alloc_to> convert(_In_ const std::basic_string<T_from, _Traits_from, _Alloc_from>& src)
409 {
410 return convert(src.data(), src.size());
411 }
412
413 void clear()
414 {
415#ifndef _WIN32
416 iconv(m_handle, NULL, NULL, NULL, NULL);
417#endif
418 }
419
420 static charset_id system_charset()
421 {
422#ifdef _WIN32
423 return static_cast<charset_id>(GetACP());
424#else
425 return charset_from_name(nl_langinfo(CODESET));
426#endif
427 }
428
429#ifdef _WIN32
430 protected:
431 static UINT to_encoding(_In_ charset_id charset)
432 {
433 return
434 charset == charset_id::system ? GetACP() :
435 charset == charset_id::oem ? GetOEMCP() :
436 static_cast<UINT>(charset);
437 }
438
439 protected:
440 UINT m_from_wincp, m_to_wincp;
441#else
442 protected:
443 static const char* to_encoding(_In_ charset_id charset)
444 {
445 static const char* const encodings[static_cast<std::underlying_type_t<charset_id>>(charset_id::_max)] = {
446 "", // system
447 "UTF-7", // utf7
448 "UTF-8", // utf8
449#if BYTE_ORDER == BIG_ENDIAN
450 "UTF-16BE", // utf16
451 "UTF-32BE", // utf32
452#else
453 "UTF-16LE", // utf16
454 "UTF-32LE", // utf32
455#endif
456 "CP1250", // windows1250
457 "CP1251", // windows1251
458 "CP1252", // windows1252
459 };
460 return
461 charset == charset_id::system ? nl_langinfo(CODESET) :
462 encodings[static_cast<std::underlying_type_t<charset_id>>(charset)];
463 }
464
465 protected:
466 iconv_t m_handle;
467#endif
468 };
469
480#ifndef _WIN32
481 _Deprecated_("For better performance, consider a reusable charset_encoder")
482#endif
483 inline void strcat(
484 _Inout_ std::wstring& dst,
485 _In_reads_or_z_opt_(count_src) const char* src, _In_ size_t count_src,
486 _In_ charset_id charset = charset_id::system)
487 {
488 charset_encoder<char, wchar_t>(charset, wchar_t_charset).strcat(dst, src, count_src);
489 }
490
491 _Deprecated_("Use stdex::strcat")
492 inline void str2wstr(
493 _Inout_ std::wstring& dst,
494 _In_reads_or_z_opt_(count_src) const char* src, _In_ size_t count_src,
495 _In_ charset_id charset = charset_id::system)
496 {
497 strcat(dst, src, count_src, charset);
498 }
499
509#ifndef _WIN32
510 _Deprecated_("For better performance, consider a reusable charset_encoder")
511#endif
512 inline void strcat(
513 _Inout_ std::wstring& dst,
514 _In_ const std::string& src,
515 _In_ charset_id charset = charset_id::system)
516 {
517 strcat(dst, src.data(), src.size(), charset);
518 }
519
520 _Deprecated_("Use stdex::strcat")
521 inline void str2wstr(
522 _Inout_ std::wstring& dst,
523 _In_ const std::string& src,
524 _In_ charset_id charset = charset_id::system)
525 {
526 strcat(dst, src, charset);
527 }
528
539#ifndef _WIN32
540 _Deprecated_("For better performance, consider a reusable charset_encoder")
541#endif
542 inline void strcpy(
543 _Inout_ std::wstring& dst,
544 _In_reads_or_z_opt_(count_src) const char* src, _In_ size_t count_src,
545 _In_ charset_id charset = charset_id::system)
546 {
547 dst.clear();
548 strcat(dst, src, count_src, charset);
549 }
550
560#ifndef _WIN32
561 _Deprecated_("For better performance, consider a reusable charset_encoder")
562#endif
563 inline void strcpy(
564 _Inout_ std::wstring& dst,
565 _In_ const std::string& src,
566 _In_ charset_id charset = charset_id::system)
567 {
568 strcpy(dst, src.data(), src.size(), charset);
569 }
570
581#ifndef _WIN32
582 _Deprecated_("For better performance, consider a reusable charset_encoder")
583#endif
584 inline std::wstring str2wstr(
585 _In_z_ const char* src,
586 _In_ charset_id charset = charset_id::system)
587 {
588 std::wstring dst;
589 strcat(dst, src, SIZE_MAX, charset);
590 return dst;
591 }
592
604#ifndef _WIN32
605 _Deprecated_("For better performance, consider a reusable charset_encoder")
606#endif
607 inline std::wstring str2wstr(
608 _In_reads_or_z_opt_(count_src) const char* src, _In_ size_t count_src,
609 _In_ charset_id charset = charset_id::system)
610 {
611 std::wstring dst;
612 strcat(dst, src, count_src, charset);
613 return dst;
614 }
615
626#ifndef _WIN32
627 _Deprecated_("For better performance, consider a reusable charset_encoder")
628#endif
629 inline std::wstring str2wstr(
630 _In_ const std::string& src,
631 _In_ charset_id charset = charset_id::system)
632 {
633 return str2wstr(src.c_str(), src.size(), charset);
634 }
635
646#ifndef _WIN32
647 _Deprecated_("For better performance, consider a reusable charset_encoder")
648#endif
649 inline void strcat(
650 _Inout_ std::string& dst,
651 _In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src,
652 _In_ charset_id charset = charset_id::system)
653 {
654 charset_encoder<wchar_t, char>(wchar_t_charset, charset).strcat(dst, src, count_src);
655 }
656
657 _Deprecated_("Use stdex::strcat")
658 inline void wstr2str(
659 _Inout_ std::string& dst,
660 _In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src,
661 _In_ charset_id charset = charset_id::system)
662 {
663 strcat(dst, src, count_src, charset);
664 }
665
675#ifndef _WIN32
676 _Deprecated_("For better performance, consider a reusable charset_encoder")
677#endif
678 inline void strcat(
679 _Inout_ std::string& dst,
680 _In_ const std::wstring& src,
681 _In_ charset_id charset = charset_id::system)
682 {
683 strcat(dst, src.c_str(), src.size(), charset);
684 }
685
686 _Deprecated_("Use stdex::strcat")
687 inline void wstr2str(
688 _Inout_ std::string& dst,
689 _In_ const std::wstring& src,
690 _In_ charset_id charset = charset_id::system)
691 {
692 strcat(dst, src, charset);
693 }
694
705#ifndef _WIN32
706 _Deprecated_("For better performance, consider a reusable charset_encoder")
707#endif
708 inline void strcpy(
709 _Inout_ std::string& dst,
710 _In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src,
711 _In_ charset_id charset = charset_id::system)
712 {
713 dst.clear();
714 strcat(dst, src, count_src, charset);
715 }
716
726#ifndef _WIN32
727 _Deprecated_("For better performance, consider a reusable charset_encoder")
728#endif
729 inline void strcpy(
730 _Inout_ std::string& dst,
731 _In_ const std::wstring& src,
732 _In_ charset_id charset = charset_id::system)
733 {
734 strcpy(dst, src.data(), src.size(), charset);
735 }
736
747#ifndef _WIN32
748 _Deprecated_("For better performance, consider a reusable charset_encoder")
749#endif
750 inline std::string wstr2str(
751 _In_z_ const wchar_t* src,
752 _In_ charset_id charset = charset_id::system)
753 {
754 std::string dst;
755 strcat(dst, src, SIZE_MAX, charset);
756 return dst;
757 }
758
770#ifndef _WIN32
771 _Deprecated_("For better performance, consider a reusable charset_encoder")
772#endif
773 inline std::string wstr2str(
774 _In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src,
775 _In_ charset_id charset = charset_id::system)
776 {
777 std::string dst;
778 strcat(dst, src, count_src, charset);
779 return dst;
780 }
781
792#ifndef _WIN32
793 _Deprecated_("For better performance, consider a reusable charset_encoder")
794#endif
795 inline std::string wstr2str(
796 _In_ const std::wstring& src,
797 _In_ charset_id charset = charset_id::system)
798 {
799 return wstr2str(src.c_str(), src.size(), charset);
800 }
801
802#ifdef _WIN32
812 template <class _Traits = std::char_traits<wchar_t>, class _Alloc = std::allocator<wchar_t>>
813 size_t normalizecat(
814 _Inout_ std::basic_string<wchar_t, _Traits, _Alloc>& dst,
815 _In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src)
816 {
817 count_src = stdex::strnlen(src, count_src);
818 size_t count_dst = dst.size();
819 dst.resize(count_dst + count_src);
820 _Assume_(count_src + 1 < INT_MAX);
821#pragma warning(suppress: 6387) // Testing indicates src may be NULL when count_src is also 0. Is SAL of the lpSrcString parameter wrong?
822 int r = NormalizeString(NormalizationC, src, static_cast<int>(count_src), dst.data() + count_dst, static_cast<int>(count_src + 1));
823 if (r >= 0)
824 dst.resize(count_dst + r);
825 else
826#pragma warning(suppress: 6387) // Testing indicates src may be NULL when count_src is also 0. Is SAL of the _Src parameter wrong?
827 memcpy(dst.data() + count_dst, src, count_src * sizeof(wchar_t));
828 return dst.size();
829 }
830
839 template <size_t _Size, class _Traits = std::char_traits<wchar_t>, class _Alloc = std::allocator<wchar_t>>
840 size_t normalizecat(
841 _Inout_ std::basic_string<wchar_t, _Traits, _Alloc>& dst,
842 _In_ const wchar_t (&src)[_Size])
843 {
844 return normalizecat(dst, src, _Size);
845 }
846
855 template <class _Traits_dst = std::char_traits<wchar_t>, class _Alloc_dst = std::allocator<wchar_t>, class _Traits_src = std::char_traits<wchar_t>, class _Alloc_src = std::allocator<wchar_t>>
856 size_t normalizecat(
857 _Inout_ std::basic_string<wchar_t, _Traits_dst, _Alloc_dst>& dst,
858 _In_ const std::basic_string<wchar_t, _Traits_src, _Alloc_src>& src)
859 {
860 return normalizecat(dst, src.data(), src.size());
861 }
862
872 template <class _Traits = std::char_traits<wchar_t>, class _Alloc = std::allocator<wchar_t>>
873 size_t normalize(
874 _Inout_ std::basic_string<wchar_t, _Traits, _Alloc>& dst,
875 _In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src)
876 {
877 dst.clear();
878 return normalizecat(dst, src, count_src);
879 }
880
889 template <size_t _Size, class _Traits = std::char_traits<wchar_t>, class _Alloc = std::allocator<wchar_t>>
890 size_t normalize(
891 _Inout_ std::basic_string<wchar_t, _Traits, _Alloc>& dst,
892 _In_ const wchar_t(&src)[_Size])
893 {
894 return normalize(dst, src, _Size);
895 }
896
905 template <class _Traits_dst = std::char_traits<wchar_t>, class _Alloc_dst = std::allocator<wchar_t>, class _Traits_src = std::char_traits<wchar_t>, class _Alloc_src = std::allocator<wchar_t>>
906 size_t normalize(
907 _Inout_ std::basic_string<wchar_t, _Traits_dst, _Alloc_dst>& dst,
908 _In_ const std::basic_string<wchar_t, _Traits_src, _Alloc_src>& src)
909 {
910 return normalize(dst, src.data(), src.size());
911 }
912
921 inline std::wstring normalize(_In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src)
922 {
923 std::wstring dst;
924 normalizecat(dst, src, count_src);
925 return dst;
926 }
927
935 template <size_t _Size>
936 std::wstring normalize(_In_ const wchar_t(&src)[_Size])
937 {
938 std::wstring dst;
939 normalizecat(dst, src, _Size);
940 return dst;
941 }
942
950 template <class _Traits = std::char_traits<wchar_t>, class _Alloc = std::allocator<wchar_t>>
951 std::wstring normalize(_In_ const std::basic_string<wchar_t, _Traits, _Alloc>& src)
952 {
953 std::wstring dst;
954 normalizecat(dst, src.data(), src.size());
955 return dst;
956 }
957#endif
958}
959
960#ifndef _WIN32
961#pragma GCC diagnostic pop
962#endif
Encoding converter context.
Definition unicode.hpp:133
void strcpy(std::basic_string< T_to, _Traits_to, _Alloc_to > &dst, const std::basic_string< T_from, _Traits_from, _Alloc_from > &src)
Convert string.
Definition unicode.hpp:370
std::basic_string< T_to, _Traits_to, _Alloc_to > convert(const std::basic_string< T_from, _Traits_from, _Alloc_from > &src)
Return converted string.
Definition unicode.hpp:408
std::basic_string< T_to, _Traits_to, _Alloc_to > convert(const T_from *src)
Return converted string.
Definition unicode.hpp:397
void strcat(std::basic_string< T_to, _Traits_to, _Alloc_to > &dst, const std::basic_string< T_from, _Traits_from, _Alloc_from > &src)
Convert string and append to string.
Definition unicode.hpp:326
void strcpy(std::basic_string< T_to, _Traits_to, _Alloc_to > &dst, const T_from *src)
Convert string.
Definition unicode.hpp:356
void strcat(std::basic_string< T_to, _Traits_to, _Alloc_to > &dst, _In_reads_or_z_opt_(count_src) const T_from *src, size_t count_src)
Convert string and append to string.
Definition unicode.hpp:170
void strcat(std::basic_string< T_to, _Traits_to, _Alloc_to > &dst, const T_from *src)
Convert string and append to string.
Definition unicode.hpp:312
void strcpy(std::basic_string< T_to, _Traits_to, _Alloc_to > &dst, _In_reads_or_z_opt_(count_src) const T_from *src, size_t count_src)
Convert string.
Definition unicode.hpp:341
std::basic_string< T_to, _Traits_to, _Alloc_to > convert(_In_reads_or_z_opt_(count_src) const T_from *src, size_t count_src)
Return converted string.
Definition unicode.hpp:384