stdex
Additional custom or not Standard C++ covered algorithms
Loading...
Searching...
No Matches
unicode.hpp
1/*
2 SPDX-License-Identifier: MIT
3 Copyright © 2023-2024 Amebis
4*/
5
6#pragma once
7
8#include "compat.hpp"
9#include "endian.hpp"
10#include "math.hpp"
11#include "string.hpp"
12#include <stdint.h>
13#ifndef _WIN32
14#include <iconv.h>
15#include <langinfo.h>
16#endif
17#include <map>
18#include <memory>
19#include <string>
20
21#if defined(__GNUC__)
22#pragma GCC diagnostic push
23#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
24#pragma GCC diagnostic ignored "-Wexit-time-destructors"
25#endif
26
27namespace stdex
28{
29 enum class charset_id : uint16_t {
30#ifdef _WIN32
31 system = CP_ACP,
32 oem = CP_OEMCP,
33 utf7 = CP_UTF7,
34 utf8 = CP_UTF8,
35 utf16 = 1200 /*CP_WINUNICODE*/,
36 utf32 = 12000,
37 windows1250 = 1250,
38 windows1251 = 1251,
39 windows1252 = 1252,
40#else
41 system = 0,
42 utf7,
43 utf8,
44 utf16,
45 utf32,
46 windows1250,
47 windows1251,
48 windows1252,
49
50 _max
51#endif
52 };
53
54#ifdef _WIN32
55 constexpr charset_id wchar_t_charset = charset_id::utf16;
56#ifdef _UNICODE
57 constexpr charset_id system_charset = charset_id::utf16;
58#else
59 constexpr charset_id system_charset = charset_id::system;
60#endif
61#else
62 constexpr charset_id wchar_t_charset = charset_id::utf32;
63 constexpr charset_id system_charset = charset_id::system;
64#endif
65
73 inline charset_id charset_from_name(_In_z_ const char* name)
74 {
75 struct charset_less {
76 bool operator()(_In_z_ const char* a, _In_z_ const char* b) const
77 {
78 return stricmp(a, b) < 0;
79 }
80 };
81 static const std::map<const char*, charset_id, charset_less> charsets = {
82 { "UNICODE-1-1-UTF-7", charset_id::utf7 },
83 { "UTF-7", charset_id::utf7 },
84 { "CSUNICODE11UTF7", charset_id::utf7 },
85
86 { "UTF-8", charset_id::utf8 },
87 { "UTF8", charset_id::utf8 },
88
89 { "UTF-16", charset_id::utf16 },
90#if BYTE_ORDER == BIG_ENDIAN
91 { "UTF-16BE", charset_id::utf16 },
92#else
93 { "UTF-16LE", charset_id::utf16 },
94#endif
95
96 { "UTF-32", charset_id::utf32 },
97#if BYTE_ORDER == BIG_ENDIAN
98 { "UTF-32BE", charset_id::utf32 },
99#else
100 { "UTF-32LE", charset_id::utf32 },
101#endif
102
103 { "CP1250", charset_id::windows1250 },
104 { "MS-EE", charset_id::windows1250 },
105 { "WINDOWS-1250", charset_id::windows1250 },
106
107 { "CP1251", charset_id::windows1251 },
108 { "MS-CYRL", charset_id::windows1251 },
109 { "WINDOWS-1251", charset_id::windows1251 },
110
111 { "CP1252", charset_id::windows1252 },
112 { "MS-ANSI", charset_id::windows1252 },
113 { "WINDOWS-1252", charset_id::windows1252 },
114 };
115 if (auto el = charsets.find(name); el != charsets.end())
116 return el->second;
117 return charset_id::system;
118 }
119
127 template <class TR = std::char_traits<char>, class AX = std::allocator<char>>
128 charset_id charset_from_name(_In_ const std::basic_string<char, TR, AX>& name)
129 {
130 return charset_from_name(name.c_str());
131 }
132
136 template <typename T_from, typename T_to>
138 {
139 protected:
140 charset_id m_from, m_to;
141
142 public:
143 charset_encoder(_In_ charset_id from, _In_ charset_id to) :
144 m_from(from),
145 m_to(to)
146 {
147#ifdef _WIN32
148 m_from_wincp = to_encoding(from);
149 m_to_wincp = to_encoding(to);
150#else
151 m_handle = iconv_open(to_encoding(to), to_encoding(from));
152 if (m_handle == (iconv_t)-1)
153 throw std::system_error(errno, std::system_category(), "iconv_open failed");
154#endif
155 }
156
157#ifndef _WIN32
159 {
160 iconv_close(m_handle);
161 }
162#endif
163
164 charset_id from_encoding() const { return m_from; }
165 charset_id to_encoding() const { return m_to; }
166
174 template <class TR_to = std::char_traits<T_to>, class AX_to = std::allocator<T_to>>
175 void strcat(
176 _Inout_ std::basic_string<T_to, TR_to, AX_to>& dst,
177 _In_reads_or_z_opt_(count_src) const T_from* src, _In_ size_t count_src)
178 {
179 _Assume_(src || !count_src);
180 count_src = strnlen<T_from>(src, count_src);
181 if (!count_src) _Unlikely_
182 return;
183
184#ifdef _WIN32
185 DWORD dwFlagsMBWC = static_cast<UINT>(m_from_wincp) < CP_UTF7 ? MB_PRECOMPOSED : 0;
186 constexpr DWORD dwFlagsWCMB = 0;
187 constexpr LPCCH lpDefaultChar = NULL;
188
189 _Assume_(src);
190 if (m_from_wincp == m_to_wincp) _Unlikely_{
191 dst.append(reinterpret_cast<const T_to*>(src), count_src);
192 return;
193 }
194
195#pragma warning(suppress: 4127)
196 if constexpr (sizeof(T_from) == sizeof(char) && sizeof(T_to) == sizeof(wchar_t)) {
197 _Assume_(count_src < INT_MAX || count_src == SIZE_MAX);
198
199 // Try to convert to stack buffer first.
200 WCHAR szStackBuffer[1024 / sizeof(WCHAR)];
201#pragma warning(suppress: 6387) // Testing indicates src may be NULL when count_src is also 0. Is SAL of the lpMultiByteStr parameter wrong?
202 int cch = MultiByteToWideChar(static_cast<UINT>(m_from_wincp), dwFlagsMBWC, reinterpret_cast<LPCCH>(src), static_cast<int>(count_src), szStackBuffer, _countof(szStackBuffer));
203 if (cch) {
204 // Append from stack.
205 dst.append(reinterpret_cast<const T_to*>(szStackBuffer), count_src != SIZE_MAX ? wcsnlen(szStackBuffer, cch) : static_cast<size_t>(cch) - 1);
206 return;
207 }
208 if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
209 // Query the required output size. Allocate buffer. Then convert again.
210 cch = MultiByteToWideChar(static_cast<UINT>(m_from_wincp), dwFlagsMBWC, reinterpret_cast<LPCCH>(src), static_cast<int>(count_src), NULL, 0);
211 size_t offset = dst.size();
212 dst.resize(offset + static_cast<size_t>(cch));
213 cch = MultiByteToWideChar(static_cast<UINT>(m_from_wincp), dwFlagsMBWC, reinterpret_cast<LPCCH>(src), static_cast<int>(count_src), &dst[offset], cch);
214 dst.resize(offset + (count_src != SIZE_MAX ? wcsnlen(&dst[offset], cch) : static_cast<size_t>(cch) - 1));
215 return;
216 }
217 throw std::system_error(GetLastError(), std::system_category(), "MultiByteToWideChar failed");
218 }
219
220#pragma warning(suppress: 4127)
221 if constexpr (sizeof(T_from) == sizeof(wchar_t) && sizeof(T_to) == sizeof(char)) {
222 _Assume_(count_src < INT_MAX || count_src == SIZE_MAX);
223
224 // Try to convert to stack buffer first.
225 CHAR szStackBuffer[1024 / sizeof(CHAR)];
226#pragma warning(suppress: 6387) // Testing indicates src may be NULL when count_src is also 0. Is SAL of the lpWideCharStr parameter wrong?
227 int cch = WideCharToMultiByte(static_cast<UINT>(m_to_wincp), dwFlagsWCMB, reinterpret_cast<LPCWCH>(src), static_cast<int>(count_src), szStackBuffer, _countof(szStackBuffer), lpDefaultChar, NULL);
228 if (cch) {
229 // Copy from stack. Be careful not to include zero terminator.
230 dst.append(reinterpret_cast<const T_to*>(szStackBuffer), count_src != SIZE_MAX ? strnlen(szStackBuffer, cch) : static_cast<size_t>(cch) - 1);
231 return;
232 }
233 if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
234 // Query the required output size. Allocate buffer. Then convert again.
235 cch = WideCharToMultiByte(static_cast<UINT>(m_to_wincp), dwFlagsWCMB, reinterpret_cast<LPCWCH>(src), static_cast<int>(count_src), NULL, 0, lpDefaultChar, NULL);
236 size_t offset = dst.size();
237 dst.resize(offset + static_cast<size_t>(cch));
238 cch = WideCharToMultiByte(static_cast<UINT>(m_to_wincp), dwFlagsWCMB, reinterpret_cast<LPCWCH>(src), static_cast<int>(count_src), &dst[offset], cch, lpDefaultChar, NULL);
239 dst.resize(offset + (count_src != SIZE_MAX ? strnlen(&dst[offset], cch) : static_cast<size_t>(cch) - 1));
240 return;
241 }
242 throw std::system_error(GetLastError(), std::system_category(), "WideCharToMultiByte failed");
243 }
244
245#pragma warning(suppress: 4127)
246 if constexpr (sizeof(T_from) == sizeof(char) && sizeof(T_to) == sizeof(char)) {
247 _Assume_(count_src < INT_MAX || count_src == SIZE_MAX);
248
249 // Try to convert to stack buffer first.
250 WCHAR szStackBufferMBWC[512 / sizeof(WCHAR)];
251#pragma warning(suppress: 6387) // Testing indicates src may be NULL when count_src is also 0. Is SAL of the lpMultiByteStr parameter wrong?
252 int cch = MultiByteToWideChar(static_cast<UINT>(m_from_wincp), dwFlagsMBWC, reinterpret_cast<LPCCH>(src), static_cast<int>(count_src), szStackBufferMBWC, _countof(szStackBufferMBWC));
253 if (cch) {
254 // Append from stack.
255 size_t count_inter = count_src != SIZE_MAX ? wcsnlen(szStackBufferMBWC, cch) : static_cast<size_t>(cch) - 1;
256 _Assume_(count_inter < INT_MAX);
257
258 // Try to convert to stack buffer first.
259 CHAR szStackBufferWCMB[512 / sizeof(CHAR)];
260#pragma warning(suppress: 6387) // Testing indicates szStackBufferMBWC may be NULL when count_inter is also 0. Is SAL of the lpWideCharStr parameter wrong?
261 cch = WideCharToMultiByte(static_cast<UINT>(m_to_wincp), dwFlagsWCMB, szStackBufferMBWC, static_cast<int>(count_inter), szStackBufferWCMB, _countof(szStackBufferWCMB), lpDefaultChar, NULL);
262 if (cch) {
263 // Copy from stack. Be careful not to include zero terminator.
264 dst.append(reinterpret_cast<const T_to*>(szStackBufferWCMB), strnlen(szStackBufferWCMB, cch));
265 return;
266 }
267 if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
268 // Query the required output size. Allocate buffer. Then convert again.
269 cch = WideCharToMultiByte(static_cast<UINT>(m_to_wincp), dwFlagsWCMB, szStackBufferMBWC, static_cast<int>(count_inter), NULL, 0, lpDefaultChar, NULL);
270 size_t offset = dst.size();
271 dst.resize(offset + cch);
272 cch = WideCharToMultiByte(static_cast<UINT>(m_to_wincp), dwFlagsWCMB, szStackBufferMBWC, static_cast<int>(count_inter), &dst[offset], cch, lpDefaultChar, NULL);
273 dst.resize(offset + strnlen(&dst[offset], cch));
274 return;
275 }
276 throw std::system_error(GetLastError(), std::system_category(), "WideCharToMultiByte failed");
277 }
278 if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
279 // Query the required output size. Allocate buffer. Then convert again.
280 cch = MultiByteToWideChar(static_cast<UINT>(m_from_wincp), dwFlagsMBWC, reinterpret_cast<LPCCH>(src), static_cast<int>(count_src), NULL, 0);
281 std::unique_ptr<WCHAR[]> szBufferMBWC(new WCHAR[cch]);
282 cch = MultiByteToWideChar(static_cast<UINT>(m_from_wincp), dwFlagsMBWC, reinterpret_cast<LPCCH>(src), static_cast<int>(count_src), szBufferMBWC.get(), cch);
283 size_t count_inter = count_src != SIZE_MAX ? wcsnlen(szBufferMBWC.get(), cch) : static_cast<size_t>(cch) - 1;
284
285 // Query the required output size. Allocate buffer. Then convert again.
286 cch = WideCharToMultiByte(static_cast<UINT>(m_to_wincp), dwFlagsWCMB, szBufferMBWC.get(), static_cast<int>(count_inter), NULL, 0, lpDefaultChar, NULL);
287 size_t offset = dst.size();
288 dst.resize(offset + cch);
289 cch = WideCharToMultiByte(static_cast<UINT>(m_to_wincp), dwFlagsWCMB, szBufferMBWC.get(), static_cast<int>(count_inter), &dst[offset], cch, lpDefaultChar, NULL);
290 dst.resize(offset + strnlen(&dst[offset], cch));
291 return;
292 }
293 throw std::system_error(GetLastError(), std::system_category(), "MultiByteToWideChar failed");
294 }
295#else
296 dst.reserve(dst.size() + count_src);
297 T_to buf[1024 / sizeof(T_to)];
298 size_t src_size = stdex::mul(sizeof(T_from), count_src);
299 for (;;) {
300 T_to* output = &buf[0];
301 size_t output_size = sizeof(buf);
302 errno = 0;
303 iconv(m_handle, const_cast<char**>(reinterpret_cast<const char**>(&src)), &src_size, reinterpret_cast<char**>(&output), &output_size);
304 dst.append(buf, reinterpret_cast<T_to*>(reinterpret_cast<char*>(buf) + sizeof(buf) - output_size));
305 if (!errno)
306 break;
307 if (errno == E2BIG)
308 continue;
309 throw std::system_error(errno, std::system_category(), "iconv failed");
310 }
311#endif
312 }
313
320 template <class TR_to = std::char_traits<T_to>, class AX_to = std::allocator<T_to>>
321 void strcat(
322 _Inout_ std::basic_string<T_to, TR_to, AX_to>& dst,
323 _In_z_ const T_from* src)
324 {
325 strcat(dst, src, SIZE_MAX);
326 }
327
334 template <class TR_to = std::char_traits<T_to>, class AX_to = std::allocator<T_to>>
335 void strcat(
336 _Inout_ std::basic_string<T_to, TR_to, AX_to>& dst,
337 _In_ const std::basic_string_view<T_from, std::char_traits<T_from>> src)
338 {
339 strcat(dst, src.data(), src.size());
340 }
341
349 template <class TR_to = std::char_traits<T_to>, class AX_to = std::allocator<T_to>>
350 void strcpy(
351 _Inout_ std::basic_string<T_to, TR_to, AX_to>& dst,
352 _In_reads_or_z_opt_(count_src) const T_from* src, _In_ size_t count_src)
353 {
354 dst.clear();
355 strcat(dst, src, count_src);
356 }
357
364 template <class TR_to = std::char_traits<T_to>, class AX_to = std::allocator<T_to>>
365 void strcpy(
366 _Inout_ std::basic_string<T_to, TR_to, AX_to>& dst,
367 _In_z_ const T_from* src)
368 {
369 strcpy(dst, src, SIZE_MAX);
370 }
371
378 template <class TR_to = std::char_traits<T_to>, class AX_to = std::allocator<T_to>>
379 void strcpy(
380 _Inout_ std::basic_string<T_to, TR_to, AX_to>& dst,
381 _In_ const std::basic_string_view<T_from, std::char_traits<T_from>> src)
382 {
383 strcpy(dst, src.data(), src.size());
384 }
385
392 template <class TR_to = std::char_traits<T_to>, class AX_to = std::allocator<T_to>>
393 std::basic_string<T_to, TR_to, AX_to> convert(_In_reads_or_z_opt_(count_src) const T_from* src, _In_ size_t count_src)
394 {
395 std::basic_string<T_to, TR_to, AX_to> dst;
396 strcat(dst, src, count_src);
397 return dst;
398 }
399
405 template <class TR_to = std::char_traits<T_to>, class AX_to = std::allocator<T_to>>
406 std::basic_string<T_to, TR_to, AX_to> convert(_In_z_ const T_from* src)
407 {
408 return convert(src, SIZE_MAX);
409 }
410
416 template <class TR_to = std::char_traits<T_to>, class AX_to = std::allocator<T_to>>
417 std::basic_string<T_to, TR_to, AX_to> convert(_In_ const std::basic_string_view<T_from, std::char_traits<T_from>> src)
418 {
419 return convert(src.data(), src.size());
420 }
421
422 void clear()
423 {
424#ifndef _WIN32
425 iconv(m_handle, NULL, NULL, NULL, NULL);
426#endif
427 }
428
429 static charset_id system_charset()
430 {
431#ifdef _WIN32
432 return static_cast<charset_id>(GetACP());
433#else
434 return charset_from_name(nl_langinfo(CODESET));
435#endif
436 }
437
438#ifdef _WIN32
439 protected:
440 static UINT to_encoding(_In_ charset_id charset)
441 {
442 return
443 charset == charset_id::system ? GetACP() :
444 charset == charset_id::oem ? GetOEMCP() :
445 static_cast<UINT>(charset);
446 }
447
448 protected:
449 UINT m_from_wincp, m_to_wincp;
450#else
451 protected:
452 static const char* to_encoding(_In_ charset_id charset)
453 {
454 static const char* const encodings[static_cast<std::underlying_type_t<charset_id>>(charset_id::_max)] = {
455 "", // system
456 "UTF-7", // utf7
457 "UTF-8", // utf8
458#if BYTE_ORDER == BIG_ENDIAN
459 "UTF-16BE", // utf16
460 "UTF-32BE", // utf32
461#else
462 "UTF-16LE", // utf16
463 "UTF-32LE", // utf32
464#endif
465 "CP1250", // windows1250
466 "CP1251", // windows1251
467 "CP1252", // windows1252
468 };
469 return
470 charset == charset_id::system ? nl_langinfo(CODESET) :
471 encodings[static_cast<std::underlying_type_t<charset_id>>(charset)];
472 }
473
474 protected:
475 iconv_t m_handle;
476#endif
477 };
478
489 template <class TR_to = std::char_traits<wchar_t>, class AX_to = std::allocator<wchar_t>>
490#ifndef _WIN32
491 _Deprecated_("For better performance, consider a reusable charset_encoder")
492#endif
493 inline void strcat(
494 _Inout_ std::basic_string<wchar_t, TR_to, AX_to>& dst,
495 _In_reads_or_z_opt_(count_src) const char* src, _In_ size_t count_src,
496 _In_ charset_id charset = charset_id::system)
497 {
498 charset_encoder<char, wchar_t>(charset, wchar_t_charset).strcat(dst, src, count_src);
499 }
500
501 template <class TR_to = std::char_traits<wchar_t>, class AX_to = std::allocator<wchar_t>>
502 _Deprecated_("Use stdex::strcat")
503 inline void str2wstr(
504 _Inout_ std::basic_string<wchar_t, TR_to, AX_to>& dst,
505 _In_reads_or_z_opt_(count_src) const char* src, _In_ size_t count_src,
506 _In_ charset_id charset = charset_id::system)
507 {
508 strcat(dst, src, count_src, charset);
509 }
510
520 template <class TR_to = std::char_traits<wchar_t>, class AX_to = std::allocator<wchar_t>>
521#ifndef _WIN32
522 _Deprecated_("For better performance, consider a reusable charset_encoder")
523#endif
524 inline void strcat(
525 _Inout_ std::basic_string<wchar_t, TR_to, AX_to>& dst,
526 _In_ const std::basic_string_view<char, std::char_traits<char>> src,
527 _In_ charset_id charset = charset_id::system)
528 {
529 strcat(dst, src.data(), src.size(), charset);
530 }
531
532 template <class TR_to = std::char_traits<wchar_t>, class AX_to = std::allocator<wchar_t>>
533 _Deprecated_("Use stdex::strcat")
534 inline void str2wstr(
535 _Inout_ std::basic_string<wchar_t, TR_to, AX_to>& dst,
536 _In_ const std::basic_string_view<char, std::char_traits<char>> src,
537 _In_ charset_id charset = charset_id::system)
538 {
539 strcat(dst, src, charset);
540 }
541
552 template <class TR_to = std::char_traits<wchar_t>, class AX_to = std::allocator<wchar_t>>
553#ifndef _WIN32
554 _Deprecated_("For better performance, consider a reusable charset_encoder")
555#endif
556 inline void strcpy(
557 _Inout_ std::basic_string<wchar_t, TR_to, AX_to>& dst,
558 _In_reads_or_z_opt_(count_src) const char* src, _In_ size_t count_src,
559 _In_ charset_id charset = charset_id::system)
560 {
561 dst.clear();
562 strcat(dst, src, count_src, charset);
563 }
564
574 template <class TR_to = std::char_traits<wchar_t>, class AX_to = std::allocator<wchar_t>>
575#ifndef _WIN32
576 _Deprecated_("For better performance, consider a reusable charset_encoder")
577#endif
578 inline void strcpy(
579 _Inout_ std::basic_string<wchar_t, TR_to, AX_to>& dst,
580 _In_ const std::basic_string_view<char, std::char_traits<char>> src,
581 _In_ charset_id charset = charset_id::system)
582 {
583 strcpy(dst, src.data(), src.size(), charset);
584 }
585
596#ifndef _WIN32
597 _Deprecated_("For better performance, consider a reusable charset_encoder")
598#endif
599 inline std::wstring str2wstr(
600 _In_z_ const char* src,
601 _In_ charset_id charset = charset_id::system)
602 {
603 std::wstring dst;
604 strcat(dst, src, SIZE_MAX, charset);
605 return dst;
606 }
607
619#ifndef _WIN32
620 _Deprecated_("For better performance, consider a reusable charset_encoder")
621#endif
622 inline std::wstring str2wstr(
623 _In_reads_or_z_opt_(count_src) const char* src, _In_ size_t count_src,
624 _In_ charset_id charset = charset_id::system)
625 {
626 std::wstring dst;
627 strcat(dst, src, count_src, charset);
628 return dst;
629 }
630
641#ifndef _WIN32
642 _Deprecated_("For better performance, consider a reusable charset_encoder")
643#endif
644 inline std::wstring str2wstr(
645 _In_ const std::basic_string_view<char, std::char_traits<char>> src,
646 _In_ charset_id charset = charset_id::system)
647 {
648 return str2wstr(src.data(), src.size(), charset);
649 }
650
661 template <class TR_to = std::char_traits<char>, class AX_to = std::allocator<char>>
662#ifndef _WIN32
663 _Deprecated_("For better performance, consider a reusable charset_encoder")
664#endif
665 inline void strcat(
666 _Inout_ std::basic_string<char, TR_to, AX_to>& dst,
667 _In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src,
668 _In_ charset_id charset = charset_id::system)
669 {
670 charset_encoder<wchar_t, char>(wchar_t_charset, charset).strcat(dst, src, count_src);
671 }
672
673 template <class TR_to = std::char_traits<char>, class AX_to = std::allocator<char>>
674 _Deprecated_("Use stdex::strcat")
675 inline void wstr2str(
676 _Inout_ std::basic_string<char, TR_to, AX_to>& dst,
677 _In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src,
678 _In_ charset_id charset = charset_id::system)
679 {
680 strcat(dst, src, count_src, charset);
681 }
682
692 template <class TR_to = std::char_traits<char>, class AX_to = std::allocator<char>>
693#ifndef _WIN32
694 _Deprecated_("For better performance, consider a reusable charset_encoder")
695#endif
696 inline void strcat(
697 _Inout_ std::basic_string<char, TR_to, AX_to>& dst,
698 _In_ const std::basic_string_view<wchar_t, std::char_traits<wchar_t>> src,
699 _In_ charset_id charset = charset_id::system)
700 {
701 strcat(dst, src.data(), src.size(), charset);
702 }
703
704 template <class TR_to = std::char_traits<char>, class AX_to = std::allocator<char>>
705 _Deprecated_("Use stdex::strcat")
706 inline void wstr2str(
707 _Inout_ std::basic_string<char, TR_to, AX_to>& dst,
708 _In_ const std::basic_string_view<wchar_t, std::char_traits<wchar_t>> src,
709 _In_ charset_id charset = charset_id::system)
710 {
711 strcat(dst, src, charset);
712 }
713
724 template <class TR_to = std::char_traits<char>, class AX_to = std::allocator<char>>
725#ifndef _WIN32
726 _Deprecated_("For better performance, consider a reusable charset_encoder")
727#endif
728 inline void strcpy(
729 _Inout_ std::basic_string<char, TR_to, AX_to>& dst,
730 _In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src,
731 _In_ charset_id charset = charset_id::system)
732 {
733 dst.clear();
734 strcat(dst, src, count_src, charset);
735 }
736
746 template <class TR_to = std::char_traits<char>, class AX_to = std::allocator<char>>
747#ifndef _WIN32
748 _Deprecated_("For better performance, consider a reusable charset_encoder")
749#endif
750 inline void strcpy(
751 _Inout_ std::basic_string<char, TR_to, AX_to>& dst,
752 _In_ const std::basic_string_view<wchar_t, std::char_traits<wchar_t>> src,
753 _In_ charset_id charset = charset_id::system)
754 {
755 strcpy(dst, src.data(), src.size(), charset);
756 }
757
768#ifndef _WIN32
769 _Deprecated_("For better performance, consider a reusable charset_encoder")
770#endif
771 inline std::string wstr2str(
772 _In_z_ const wchar_t* src,
773 _In_ charset_id charset = charset_id::system)
774 {
775 std::string dst;
776 strcat(dst, src, SIZE_MAX, charset);
777 return dst;
778 }
779
791#ifndef _WIN32
792 _Deprecated_("For better performance, consider a reusable charset_encoder")
793#endif
794 inline std::string wstr2str(
795 _In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src,
796 _In_ charset_id charset = charset_id::system)
797 {
798 std::string dst;
799 strcat(dst, src, count_src, charset);
800 return dst;
801 }
802
813#ifndef _WIN32
814 _Deprecated_("For better performance, consider a reusable charset_encoder")
815#endif
816 inline std::string wstr2str(
817 _In_ const std::basic_string_view<wchar_t, std::char_traits<wchar_t>> src,
818 _In_ charset_id charset = charset_id::system)
819 {
820 return wstr2str(src.data(), src.size(), charset);
821 }
822
823#ifdef _WIN32
833 template <class TR = std::char_traits<wchar_t>, class AX = std::allocator<wchar_t>>
834 size_t normalizecat(
835 _Inout_ std::basic_string<wchar_t, TR, AX>& dst,
836 _In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src)
837 {
838 count_src = strnlen(src, count_src);
839 size_t count_dst = dst.size();
840 dst.resize(count_dst + count_src);
841 _Assume_(count_src + 1 < INT_MAX);
842#pragma warning(suppress: 6387) // Testing indicates src may be NULL when count_src is also 0. Is SAL of the lpSrcString parameter wrong?
843 int r = NormalizeString(NormalizationC, src, static_cast<int>(count_src), dst.data() + count_dst, static_cast<int>(count_src + 1));
844 if (r >= 0)
845 dst.resize(count_dst + r);
846 else
847#pragma warning(suppress: 6387) // Testing indicates src may be NULL when count_src is also 0. Is SAL of the _Src parameter wrong?
848 memcpy(dst.data() + count_dst, src, count_src * sizeof(wchar_t));
849 return dst.size();
850 }
851
860 template <size_t N, class TR = std::char_traits<wchar_t>, class AX = std::allocator<wchar_t>>
861 size_t normalizecat(
862 _Inout_ std::basic_string<wchar_t, TR, AX>& dst,
863 _In_ const wchar_t (&src)[N])
864 {
865 return normalizecat(dst, src, N);
866 }
867
876 template <class TR_dst = std::char_traits<wchar_t>, class AX_dst = std::allocator<wchar_t>>
877 size_t normalizecat(
878 _Inout_ std::basic_string<wchar_t, TR_dst, AX_dst>& dst,
879 _In_ const std::basic_string_view<wchar_t, std::char_traits<wchar_t>> src)
880 {
881 return normalizecat(dst, src.data(), src.size());
882 }
883
893 template <class TR = std::char_traits<wchar_t>, class AX = std::allocator<wchar_t>>
894 size_t normalize(
895 _Inout_ std::basic_string<wchar_t, TR, AX>& dst,
896 _In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src)
897 {
898 dst.clear();
899 return normalizecat(dst, src, count_src);
900 }
901
910 template <size_t N, class TR = std::char_traits<wchar_t>, class AX = std::allocator<wchar_t>>
911 size_t normalize(
912 _Inout_ std::basic_string<wchar_t, TR, AX>& dst,
913 _In_ const wchar_t(&src)[N])
914 {
915 return normalize(dst, src, N);
916 }
917
926 template <class TR_dst = std::char_traits<wchar_t>, class AX_dst = std::allocator<wchar_t>>
927 size_t normalize(
928 _Inout_ std::basic_string<wchar_t, TR_dst, AX_dst>& dst,
929 _In_ const std::basic_string_view<wchar_t, std::char_traits<wchar_t>> src)
930 {
931 return normalize(dst, src.data(), src.size());
932 }
933
942 inline std::wstring normalize(_In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src)
943 {
944 std::wstring dst;
945 normalizecat(dst, src, count_src);
946 return dst;
947 }
948
956 template <size_t N>
957 std::wstring normalize(_In_ const wchar_t(&src)[N])
958 {
959 std::wstring dst;
960 normalizecat(dst, src, N);
961 return dst;
962 }
963
971 inline std::wstring normalize(_In_ const std::basic_string_view<wchar_t, std::char_traits<wchar_t>> src)
972 {
973 std::wstring dst;
974 normalizecat(dst, src.data(), src.size());
975 return dst;
976 }
977#endif
978}
979
980#if defined(__GNUC__)
981#pragma GCC diagnostic pop
982#endif
Encoding converter context.
Definition unicode.hpp:138
void strcat(std::basic_string< T_to, TR_to, AX_to > &dst, _In_reads_or_z_opt_(count_src) const T_from *src, size_t count_src)
Convert string and append to string.
Definition unicode.hpp:175
void strcat(std::basic_string< T_to, TR_to, AX_to > &dst, const std::basic_string_view< T_from, std::char_traits< T_from > > src)
Convert string and append to string.
Definition unicode.hpp:335
void strcpy(std::basic_string< T_to, TR_to, AX_to > &dst, const T_from *src)
Convert string.
Definition unicode.hpp:365
void strcat(std::basic_string< T_to, TR_to, AX_to > &dst, const T_from *src)
Convert string and append to string.
Definition unicode.hpp:321
void strcpy(std::basic_string< T_to, TR_to, AX_to > &dst, _In_reads_or_z_opt_(count_src) const T_from *src, size_t count_src)
Convert string.
Definition unicode.hpp:350
std::basic_string< T_to, TR_to, AX_to > convert(const T_from *src)
Return converted string.
Definition unicode.hpp:406
void strcpy(std::basic_string< T_to, TR_to, AX_to > &dst, const std::basic_string_view< T_from, std::char_traits< T_from > > src)
Convert string.
Definition unicode.hpp:379
std::basic_string< T_to, TR_to, AX_to > convert(const std::basic_string_view< T_from, std::char_traits< T_from > > src)
Return converted string.
Definition unicode.hpp:417
std::basic_string< T_to, TR_to, AX_to > convert(_In_reads_or_z_opt_(count_src) const T_from *src, size_t count_src)
Return converted string.
Definition unicode.hpp:393