stdex
Additional custom or not Standard C++ covered algorithms
Loading...
Searching...
No Matches
unicode.hpp
1/*
2 SPDX-License-Identifier: MIT
3 Copyright © 2023-2024 Amebis
4*/
5
6#pragma once
7
8#include "compat.hpp"
9#include "endian.hpp"
10#include "math.hpp"
11#include "string.hpp"
12#include <stdint.h>
13#ifndef _WIN32
14#include <iconv.h>
15#include <langinfo.h>
16#endif
17#include <map>
18#include <memory>
19#include <string>
20
21#if defined(__GNUC__)
22#pragma GCC diagnostic push
23#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
24#pragma GCC diagnostic ignored "-Wexit-time-destructors"
25#endif
26
27namespace stdex
28{
29 enum class charset_id : uint16_t {
30#ifdef _WIN32
31 system = CP_ACP,
32 oem = CP_OEMCP,
33 utf7 = CP_UTF7,
34 utf8 = CP_UTF8,
35 utf16 = 1200 /*CP_WINUNICODE*/,
36 utf32 = 12000,
37 windows1250 = 1250,
38 windows1251 = 1251,
39 windows1252 = 1252,
40#else
41 system = 0,
42 utf7,
43 utf8,
44 utf16,
45 utf32,
46 windows1250,
47 windows1251,
48 windows1252,
49
50 _max
51#endif
52 };
53
54#ifdef _WIN32
55 constexpr charset_id wchar_t_charset = charset_id::utf16;
56#ifdef _UNICODE
57 constexpr charset_id system_charset = charset_id::utf16;
58#else
59 constexpr charset_id system_charset = charset_id::system;
60#endif
61#else
62 constexpr charset_id wchar_t_charset = charset_id::utf32;
63 constexpr charset_id system_charset = charset_id::system;
64#endif
65
73 inline charset_id charset_from_name(_In_z_ const char* name)
74 {
75 struct charset_less {
76 bool operator()(_In_z_ const char* a, _In_z_ const char* b) const
77 {
78 return stricmp(a, b) < 0;
79 }
80 };
81 static const std::map<const char*, charset_id, charset_less> charsets = {
82 { "UNICODE-1-1-UTF-7", charset_id::utf7 },
83 { "UTF-7", charset_id::utf7 },
84 { "CSUNICODE11UTF7", charset_id::utf7 },
85
86 { "UTF-8", charset_id::utf8 },
87 { "UTF8", charset_id::utf8 },
88
89 { "UTF-16", charset_id::utf16 },
90#if BYTE_ORDER == BIG_ENDIAN
91 { "UTF-16BE", charset_id::utf16 },
92#else
93 { "UTF-16LE", charset_id::utf16 },
94#endif
95
96 { "UTF-32", charset_id::utf32 },
97#if BYTE_ORDER == BIG_ENDIAN
98 { "UTF-32BE", charset_id::utf32 },
99#else
100 { "UTF-32LE", charset_id::utf32 },
101#endif
102
103 { "CP1250", charset_id::windows1250 },
104 { "MS-EE", charset_id::windows1250 },
105 { "WINDOWS-1250", charset_id::windows1250 },
106
107 { "CP1251", charset_id::windows1251 },
108 { "MS-CYRL", charset_id::windows1251 },
109 { "WINDOWS-1251", charset_id::windows1251 },
110
111 { "CP1252", charset_id::windows1252 },
112 { "MS-ANSI", charset_id::windows1252 },
113 { "WINDOWS-1252", charset_id::windows1252 },
114 };
115 if (auto el = charsets.find(name); el != charsets.end())
116 return el->second;
117 return charset_id::system;
118 }
119
127 template <class TR = std::char_traits<char>, class AX = std::allocator<char>>
128 charset_id charset_from_name(_In_ const std::basic_string<char, TR, AX>& name)
129 {
130 return charset_from_name(name.c_str());
131 }
132
136 template <typename T_from, typename T_to>
138 {
139 protected:
140 charset_id m_from, m_to;
141
142 public:
143 charset_encoder(_In_ charset_id from, _In_ charset_id to) :
144 m_from(from),
145 m_to(to)
146 {
147#ifdef _WIN32
148 m_from_wincp = to_encoding(from);
149 m_to_wincp = to_encoding(to);
150#else
151 m_handle = iconv_open(to_encoding(to), to_encoding(from));
152 if (m_handle == (iconv_t)-1)
153 throw std::system_error(errno, std::system_category(), "iconv_open failed");
154#endif
155 }
156
157#ifndef _WIN32
159 {
160 iconv_close(m_handle);
161 }
162#endif
163
164 charset_id from_encoding() const { return m_from; }
165 charset_id to_encoding() const { return m_to; }
166
174 template <class TR_to = std::char_traits<T_to>, class AX_to = std::allocator<T_to>>
175 void strcat(
176 _Inout_ std::basic_string<T_to, TR_to, AX_to>& dst,
177 _In_reads_or_z_opt_(count_src) const T_from* src, _In_ size_t count_src)
178 {
179 _Assume_(src || !count_src);
180 count_src = strnlen<T_from>(src, count_src);
181 if (!count_src) _Unlikely_
182 return;
183
184#ifdef _WIN32
185 constexpr DWORD dwFlagsWCMB = 0;
186 constexpr LPCCH lpDefaultChar = NULL;
187
188 _Assume_(src);
189 if (m_from_wincp == m_to_wincp) _Unlikely_{
190 dst.append(reinterpret_cast<const T_to*>(src), count_src);
191 return;
192 }
193
194#pragma warning(suppress: 4127)
195 if constexpr (sizeof(T_from) == sizeof(char) && sizeof(T_to) == sizeof(wchar_t)) {
196 _Assume_(count_src < INT_MAX || count_src == SIZE_MAX);
197
198 // Try to convert to stack buffer first.
199 DWORD dwFlagsMBWC = static_cast<UINT>(m_from_wincp) < CP_UTF7 ? MB_PRECOMPOSED : 0;
200 WCHAR szStackBuffer[1024 / sizeof(WCHAR)];
201#pragma warning(suppress: 6387) // Testing indicates src may be NULL when count_src is also 0. Is SAL of the lpMultiByteStr parameter wrong?
202 int cch = MultiByteToWideChar(static_cast<UINT>(m_from_wincp), dwFlagsMBWC, reinterpret_cast<LPCCH>(src), static_cast<int>(count_src), szStackBuffer, _countof(szStackBuffer));
203 if (cch) {
204 // Append from stack.
205 dst.append(reinterpret_cast<const T_to*>(szStackBuffer), count_src != SIZE_MAX ? wcsnlen(szStackBuffer, cch) : static_cast<size_t>(cch) - 1);
206 return;
207 }
208 DWORD dwResult = GetLastError();
209 if (dwResult == ERROR_INSUFFICIENT_BUFFER) {
210 // Query the required output size. Allocate buffer. Then convert again.
211 cch = MultiByteToWideChar(static_cast<UINT>(m_from_wincp), dwFlagsMBWC, reinterpret_cast<LPCCH>(src), static_cast<int>(count_src), NULL, 0);
212 size_t offset = dst.size();
213 dst.resize(offset + static_cast<size_t>(cch));
214 cch = MultiByteToWideChar(static_cast<UINT>(m_from_wincp), dwFlagsMBWC, reinterpret_cast<LPCCH>(src), static_cast<int>(count_src), &dst[offset], cch);
215 dst.resize(offset + (count_src != SIZE_MAX ? wcsnlen(&dst[offset], cch) : static_cast<size_t>(cch) - 1));
216 return;
217 }
218 throw std::system_error(dwResult, std::system_category(), "MultiByteToWideChar failed");
219 }
220
221#pragma warning(suppress: 4127)
222 if constexpr (sizeof(T_from) == sizeof(wchar_t) && sizeof(T_to) == sizeof(char)) {
223 _Assume_(count_src < INT_MAX || count_src == SIZE_MAX);
224
225 // Try to convert to stack buffer first.
226 CHAR szStackBuffer[1024 / sizeof(CHAR)];
227#pragma warning(suppress: 6387) // Testing indicates src may be NULL when count_src is also 0. Is SAL of the lpWideCharStr parameter wrong?
228 int cch = WideCharToMultiByte(static_cast<UINT>(m_to_wincp), dwFlagsWCMB, reinterpret_cast<LPCWCH>(src), static_cast<int>(count_src), szStackBuffer, _countof(szStackBuffer), lpDefaultChar, NULL);
229 if (cch) {
230 // Copy from stack. Be careful not to include zero terminator.
231 dst.append(reinterpret_cast<const T_to*>(szStackBuffer), count_src != SIZE_MAX ? strnlen(szStackBuffer, cch) : static_cast<size_t>(cch) - 1);
232 return;
233 }
234 DWORD dwResult = GetLastError();
235 if (dwResult == ERROR_INSUFFICIENT_BUFFER) {
236 // Query the required output size. Allocate buffer. Then convert again.
237 cch = WideCharToMultiByte(static_cast<UINT>(m_to_wincp), dwFlagsWCMB, reinterpret_cast<LPCWCH>(src), static_cast<int>(count_src), NULL, 0, lpDefaultChar, NULL);
238 size_t offset = dst.size();
239 dst.resize(offset + static_cast<size_t>(cch));
240 cch = WideCharToMultiByte(static_cast<UINT>(m_to_wincp), dwFlagsWCMB, reinterpret_cast<LPCWCH>(src), static_cast<int>(count_src), &dst[offset], cch, lpDefaultChar, NULL);
241 dst.resize(offset + (count_src != SIZE_MAX ? strnlen(&dst[offset], cch) : static_cast<size_t>(cch) - 1));
242 return;
243 }
244 throw std::system_error(dwResult, std::system_category(), "WideCharToMultiByte failed");
245 }
246
247#pragma warning(suppress: 4127)
248 if constexpr (sizeof(T_from) == sizeof(char) && sizeof(T_to) == sizeof(char)) {
249 _Assume_(count_src < INT_MAX || count_src == SIZE_MAX);
250
251 // Try to convert to stack buffer first.
252 DWORD dwFlagsMBWC = static_cast<UINT>(m_from_wincp) < CP_UTF7 ? MB_PRECOMPOSED : 0, dwResult;
253 WCHAR szStackBufferMBWC[512 / sizeof(WCHAR)];
254#pragma warning(suppress: 6387) // Testing indicates src may be NULL when count_src is also 0. Is SAL of the lpMultiByteStr parameter wrong?
255 int cch = MultiByteToWideChar(static_cast<UINT>(m_from_wincp), dwFlagsMBWC, reinterpret_cast<LPCCH>(src), static_cast<int>(count_src), szStackBufferMBWC, _countof(szStackBufferMBWC));
256 if (cch) {
257 // Append from stack.
258 size_t count_inter = count_src != SIZE_MAX ? wcsnlen(szStackBufferMBWC, cch) : static_cast<size_t>(cch) - 1;
259 _Assume_(count_inter < INT_MAX);
260
261 // Try to convert to stack buffer first.
262 CHAR szStackBufferWCMB[512 / sizeof(CHAR)];
263#pragma warning(suppress: 6387) // Testing indicates szStackBufferMBWC may be NULL when count_inter is also 0. Is SAL of the lpWideCharStr parameter wrong?
264 cch = WideCharToMultiByte(static_cast<UINT>(m_to_wincp), dwFlagsWCMB, szStackBufferMBWC, static_cast<int>(count_inter), szStackBufferWCMB, _countof(szStackBufferWCMB), lpDefaultChar, NULL);
265 if (cch) {
266 // Copy from stack. Be careful not to include zero terminator.
267 dst.append(reinterpret_cast<const T_to*>(szStackBufferWCMB), strnlen(szStackBufferWCMB, cch));
268 return;
269 }
270 dwResult = GetLastError();
271 if (dwResult == ERROR_INSUFFICIENT_BUFFER) {
272 // Query the required output size. Allocate buffer. Then convert again.
273 cch = WideCharToMultiByte(static_cast<UINT>(m_to_wincp), dwFlagsWCMB, szStackBufferMBWC, static_cast<int>(count_inter), NULL, 0, lpDefaultChar, NULL);
274 size_t offset = dst.size();
275 dst.resize(offset + cch);
276 cch = WideCharToMultiByte(static_cast<UINT>(m_to_wincp), dwFlagsWCMB, szStackBufferMBWC, static_cast<int>(count_inter), &dst[offset], cch, lpDefaultChar, NULL);
277 dst.resize(offset + strnlen(&dst[offset], cch));
278 return;
279 }
280 throw std::system_error(dwResult, std::system_category(), "WideCharToMultiByte failed");
281 }
282 dwResult = GetLastError();
283 if (dwResult == ERROR_INSUFFICIENT_BUFFER) {
284 // Query the required output size. Allocate buffer. Then convert again.
285 cch = MultiByteToWideChar(static_cast<UINT>(m_from_wincp), dwFlagsMBWC, reinterpret_cast<LPCCH>(src), static_cast<int>(count_src), NULL, 0);
286 std::unique_ptr<WCHAR[]> szBufferMBWC(new WCHAR[cch]);
287 cch = MultiByteToWideChar(static_cast<UINT>(m_from_wincp), dwFlagsMBWC, reinterpret_cast<LPCCH>(src), static_cast<int>(count_src), szBufferMBWC.get(), cch);
288 size_t count_inter = count_src != SIZE_MAX ? wcsnlen(szBufferMBWC.get(), cch) : static_cast<size_t>(cch) - 1;
289
290 // Query the required output size. Allocate buffer. Then convert again.
291 cch = WideCharToMultiByte(static_cast<UINT>(m_to_wincp), dwFlagsWCMB, szBufferMBWC.get(), static_cast<int>(count_inter), NULL, 0, lpDefaultChar, NULL);
292 size_t offset = dst.size();
293 dst.resize(offset + cch);
294 cch = WideCharToMultiByte(static_cast<UINT>(m_to_wincp), dwFlagsWCMB, szBufferMBWC.get(), static_cast<int>(count_inter), &dst[offset], cch, lpDefaultChar, NULL);
295 dst.resize(offset + strnlen(&dst[offset], cch));
296 return;
297 }
298 throw std::system_error(dwResult, std::system_category(), "MultiByteToWideChar failed");
299 }
300#else
301 dst.reserve(dst.size() + count_src);
302 T_to buf[1024 / sizeof(T_to)];
303 size_t src_size = stdex::mul(sizeof(T_from), count_src);
304 for (;;) {
305 T_to* output = &buf[0];
306 size_t output_size = sizeof(buf);
307 errno = 0;
308 iconv(m_handle, const_cast<char**>(reinterpret_cast<const char**>(&src)), &src_size, reinterpret_cast<char**>(&output), &output_size);
309 dst.append(buf, reinterpret_cast<T_to*>(reinterpret_cast<char*>(buf) + sizeof(buf) - output_size));
310 if (!errno)
311 break;
312 if (errno == E2BIG)
313 continue;
314 throw std::system_error(errno, std::system_category(), "iconv failed");
315 }
316#endif
317 }
318
325 template <class TR_to = std::char_traits<T_to>, class AX_to = std::allocator<T_to>>
326 void strcat(
327 _Inout_ std::basic_string<T_to, TR_to, AX_to>& dst,
328 _In_z_ const T_from* src)
329 {
330 strcat(dst, src, SIZE_MAX);
331 }
332
339 template <class TR_to = std::char_traits<T_to>, class AX_to = std::allocator<T_to>>
340 void strcat(
341 _Inout_ std::basic_string<T_to, TR_to, AX_to>& dst,
342 _In_ const std::basic_string_view<T_from, std::char_traits<T_from>> src)
343 {
344 strcat(dst, src.data(), src.size());
345 }
346
354 template <class TR_to = std::char_traits<T_to>, class AX_to = std::allocator<T_to>>
355 void strcpy(
356 _Inout_ std::basic_string<T_to, TR_to, AX_to>& dst,
357 _In_reads_or_z_opt_(count_src) const T_from* src, _In_ size_t count_src)
358 {
359 dst.clear();
360 strcat(dst, src, count_src);
361 }
362
369 template <class TR_to = std::char_traits<T_to>, class AX_to = std::allocator<T_to>>
370 void strcpy(
371 _Inout_ std::basic_string<T_to, TR_to, AX_to>& dst,
372 _In_z_ const T_from* src)
373 {
374 strcpy(dst, src, SIZE_MAX);
375 }
376
383 template <class TR_to = std::char_traits<T_to>, class AX_to = std::allocator<T_to>>
384 void strcpy(
385 _Inout_ std::basic_string<T_to, TR_to, AX_to>& dst,
386 _In_ const std::basic_string_view<T_from, std::char_traits<T_from>> src)
387 {
388 strcpy(dst, src.data(), src.size());
389 }
390
397 template <class TR_to = std::char_traits<T_to>, class AX_to = std::allocator<T_to>>
398 std::basic_string<T_to, TR_to, AX_to> convert(_In_reads_or_z_opt_(count_src) const T_from* src, _In_ size_t count_src)
399 {
400 std::basic_string<T_to, TR_to, AX_to> dst;
401 strcat(dst, src, count_src);
402 return dst;
403 }
404
410 template <class TR_to = std::char_traits<T_to>, class AX_to = std::allocator<T_to>>
411 std::basic_string<T_to, TR_to, AX_to> convert(_In_z_ const T_from* src)
412 {
413 return convert(src, SIZE_MAX);
414 }
415
421 template <class TR_to = std::char_traits<T_to>, class AX_to = std::allocator<T_to>>
422 std::basic_string<T_to, TR_to, AX_to> convert(_In_ const std::basic_string_view<T_from, std::char_traits<T_from>> src)
423 {
424 return convert(src.data(), src.size());
425 }
426
427 void clear()
428 {
429#ifndef _WIN32
430 iconv(m_handle, NULL, NULL, NULL, NULL);
431#endif
432 }
433
434 static charset_id system_charset()
435 {
436#ifdef _WIN32
437 return static_cast<charset_id>(GetACP());
438#else
439 return charset_from_name(nl_langinfo(CODESET));
440#endif
441 }
442
443#ifdef _WIN32
444 protected:
445 static UINT to_encoding(_In_ charset_id charset)
446 {
447 return
448 charset == charset_id::system ? GetACP() :
449 charset == charset_id::oem ? GetOEMCP() :
450 static_cast<UINT>(charset);
451 }
452
453 protected:
454 UINT m_from_wincp, m_to_wincp;
455#else
456 protected:
457 static const char* to_encoding(_In_ charset_id charset)
458 {
459 static const char* const encodings[static_cast<std::underlying_type_t<charset_id>>(charset_id::_max)] = {
460 "", // system
461 "UTF-7", // utf7
462 "UTF-8", // utf8
463#if BYTE_ORDER == BIG_ENDIAN
464 "UTF-16BE", // utf16
465 "UTF-32BE", // utf32
466#else
467 "UTF-16LE", // utf16
468 "UTF-32LE", // utf32
469#endif
470 "CP1250", // windows1250
471 "CP1251", // windows1251
472 "CP1252", // windows1252
473 };
474 return
475 charset == charset_id::system ? nl_langinfo(CODESET) :
476 encodings[static_cast<std::underlying_type_t<charset_id>>(charset)];
477 }
478
479 protected:
480 iconv_t m_handle;
481#endif
482 };
483
494 template <class TR_to = std::char_traits<wchar_t>, class AX_to = std::allocator<wchar_t>>
495#ifndef _WIN32
496 _Deprecated_("For better performance, consider a reusable charset_encoder")
497#endif
498 inline void strcat(
499 _Inout_ std::basic_string<wchar_t, TR_to, AX_to>& dst,
500 _In_reads_or_z_opt_(count_src) const char* src, _In_ size_t count_src,
501 _In_ charset_id charset = charset_id::system)
502 {
503 charset_encoder<char, wchar_t>(charset, wchar_t_charset).strcat(dst, src, count_src);
504 }
505
506 template <class TR_to = std::char_traits<wchar_t>, class AX_to = std::allocator<wchar_t>>
507 _Deprecated_("Use stdex::strcat")
508 inline void str2wstr(
509 _Inout_ std::basic_string<wchar_t, TR_to, AX_to>& dst,
510 _In_reads_or_z_opt_(count_src) const char* src, _In_ size_t count_src,
511 _In_ charset_id charset = charset_id::system)
512 {
513 strcat(dst, src, count_src, charset);
514 }
515
525 template <class TR_to = std::char_traits<wchar_t>, class AX_to = std::allocator<wchar_t>>
526#ifndef _WIN32
527 _Deprecated_("For better performance, consider a reusable charset_encoder")
528#endif
529 inline void strcat(
530 _Inout_ std::basic_string<wchar_t, TR_to, AX_to>& dst,
531 _In_ const std::basic_string_view<char, std::char_traits<char>> src,
532 _In_ charset_id charset = charset_id::system)
533 {
534 strcat(dst, src.data(), src.size(), charset);
535 }
536
537 template <class TR_to = std::char_traits<wchar_t>, class AX_to = std::allocator<wchar_t>>
538 _Deprecated_("Use stdex::strcat")
539 inline void str2wstr(
540 _Inout_ std::basic_string<wchar_t, TR_to, AX_to>& dst,
541 _In_ const std::basic_string_view<char, std::char_traits<char>> src,
542 _In_ charset_id charset = charset_id::system)
543 {
544 strcat(dst, src, charset);
545 }
546
557 template <class TR_to = std::char_traits<wchar_t>, class AX_to = std::allocator<wchar_t>>
558#ifndef _WIN32
559 _Deprecated_("For better performance, consider a reusable charset_encoder")
560#endif
561 inline void strcpy(
562 _Inout_ std::basic_string<wchar_t, TR_to, AX_to>& dst,
563 _In_reads_or_z_opt_(count_src) const char* src, _In_ size_t count_src,
564 _In_ charset_id charset = charset_id::system)
565 {
566 dst.clear();
567 strcat(dst, src, count_src, charset);
568 }
569
579 template <class TR_to = std::char_traits<wchar_t>, class AX_to = std::allocator<wchar_t>>
580#ifndef _WIN32
581 _Deprecated_("For better performance, consider a reusable charset_encoder")
582#endif
583 inline void strcpy(
584 _Inout_ std::basic_string<wchar_t, TR_to, AX_to>& dst,
585 _In_ const std::basic_string_view<char, std::char_traits<char>> src,
586 _In_ charset_id charset = charset_id::system)
587 {
588 strcpy(dst, src.data(), src.size(), charset);
589 }
590
601#ifndef _WIN32
602 _Deprecated_("For better performance, consider a reusable charset_encoder")
603#endif
604 inline std::wstring str2wstr(
605 _In_z_ const char* src,
606 _In_ charset_id charset = charset_id::system)
607 {
608 std::wstring dst;
609 strcat(dst, src, SIZE_MAX, charset);
610 return dst;
611 }
612
624#ifndef _WIN32
625 _Deprecated_("For better performance, consider a reusable charset_encoder")
626#endif
627 inline std::wstring str2wstr(
628 _In_reads_or_z_opt_(count_src) const char* src, _In_ size_t count_src,
629 _In_ charset_id charset = charset_id::system)
630 {
631 std::wstring dst;
632 strcat(dst, src, count_src, charset);
633 return dst;
634 }
635
646#ifndef _WIN32
647 _Deprecated_("For better performance, consider a reusable charset_encoder")
648#endif
649 inline std::wstring str2wstr(
650 _In_ const std::basic_string_view<char, std::char_traits<char>> src,
651 _In_ charset_id charset = charset_id::system)
652 {
653 return str2wstr(src.data(), src.size(), charset);
654 }
655
666 template <class TR_to = std::char_traits<char>, class AX_to = std::allocator<char>>
667#ifndef _WIN32
668 _Deprecated_("For better performance, consider a reusable charset_encoder")
669#endif
670 inline void strcat(
671 _Inout_ std::basic_string<char, TR_to, AX_to>& dst,
672 _In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src,
673 _In_ charset_id charset = charset_id::system)
674 {
675 charset_encoder<wchar_t, char>(wchar_t_charset, charset).strcat(dst, src, count_src);
676 }
677
678 template <class TR_to = std::char_traits<char>, class AX_to = std::allocator<char>>
679 _Deprecated_("Use stdex::strcat")
680 inline void wstr2str(
681 _Inout_ std::basic_string<char, TR_to, AX_to>& dst,
682 _In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src,
683 _In_ charset_id charset = charset_id::system)
684 {
685 strcat(dst, src, count_src, charset);
686 }
687
697 template <class TR_to = std::char_traits<char>, class AX_to = std::allocator<char>>
698#ifndef _WIN32
699 _Deprecated_("For better performance, consider a reusable charset_encoder")
700#endif
701 inline void strcat(
702 _Inout_ std::basic_string<char, TR_to, AX_to>& dst,
703 _In_ const std::basic_string_view<wchar_t, std::char_traits<wchar_t>> src,
704 _In_ charset_id charset = charset_id::system)
705 {
706 strcat(dst, src.data(), src.size(), charset);
707 }
708
709 template <class TR_to = std::char_traits<char>, class AX_to = std::allocator<char>>
710 _Deprecated_("Use stdex::strcat")
711 inline void wstr2str(
712 _Inout_ std::basic_string<char, TR_to, AX_to>& dst,
713 _In_ const std::basic_string_view<wchar_t, std::char_traits<wchar_t>> src,
714 _In_ charset_id charset = charset_id::system)
715 {
716 strcat(dst, src, charset);
717 }
718
729 template <class TR_to = std::char_traits<char>, class AX_to = std::allocator<char>>
730#ifndef _WIN32
731 _Deprecated_("For better performance, consider a reusable charset_encoder")
732#endif
733 inline void strcpy(
734 _Inout_ std::basic_string<char, TR_to, AX_to>& dst,
735 _In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src,
736 _In_ charset_id charset = charset_id::system)
737 {
738 dst.clear();
739 strcat(dst, src, count_src, charset);
740 }
741
751 template <class TR_to = std::char_traits<char>, class AX_to = std::allocator<char>>
752#ifndef _WIN32
753 _Deprecated_("For better performance, consider a reusable charset_encoder")
754#endif
755 inline void strcpy(
756 _Inout_ std::basic_string<char, TR_to, AX_to>& dst,
757 _In_ const std::basic_string_view<wchar_t, std::char_traits<wchar_t>> src,
758 _In_ charset_id charset = charset_id::system)
759 {
760 strcpy(dst, src.data(), src.size(), charset);
761 }
762
773#ifndef _WIN32
774 _Deprecated_("For better performance, consider a reusable charset_encoder")
775#endif
776 inline std::string wstr2str(
777 _In_z_ const wchar_t* src,
778 _In_ charset_id charset = charset_id::system)
779 {
780 std::string dst;
781 strcat(dst, src, SIZE_MAX, charset);
782 return dst;
783 }
784
796#ifndef _WIN32
797 _Deprecated_("For better performance, consider a reusable charset_encoder")
798#endif
799 inline std::string wstr2str(
800 _In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src,
801 _In_ charset_id charset = charset_id::system)
802 {
803 std::string dst;
804 strcat(dst, src, count_src, charset);
805 return dst;
806 }
807
818#ifndef _WIN32
819 _Deprecated_("For better performance, consider a reusable charset_encoder")
820#endif
821 inline std::string wstr2str(
822 _In_ const std::basic_string_view<wchar_t, std::char_traits<wchar_t>> src,
823 _In_ charset_id charset = charset_id::system)
824 {
825 return wstr2str(src.data(), src.size(), charset);
826 }
827
828#ifdef _WIN32
838 template <class TR = std::char_traits<wchar_t>, class AX = std::allocator<wchar_t>>
839 size_t normalizecat(
840 _Inout_ std::basic_string<wchar_t, TR, AX>& dst,
841 _In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src)
842 {
843 count_src = strnlen(src, count_src);
844 size_t count_dst = dst.size();
845 dst.resize(count_dst + count_src);
846 _Assume_(count_src + 1 < INT_MAX);
847#pragma warning(suppress: 6387) // Testing indicates src may be NULL when count_src is also 0. Is SAL of the lpSrcString parameter wrong?
848 int r = NormalizeString(NormalizationC, src, static_cast<int>(count_src), dst.data() + count_dst, static_cast<int>(count_src + 1));
849 if (r >= 0)
850 dst.resize(count_dst + r);
851 else
852#pragma warning(suppress: 6387) // Testing indicates src may be NULL when count_src is also 0. Is SAL of the _Src parameter wrong?
853 memcpy(dst.data() + count_dst, src, count_src * sizeof(wchar_t));
854 return dst.size();
855 }
856
865 template <size_t N, class TR = std::char_traits<wchar_t>, class AX = std::allocator<wchar_t>>
866 size_t normalizecat(
867 _Inout_ std::basic_string<wchar_t, TR, AX>& dst,
868 _In_ const wchar_t (&src)[N])
869 {
870 return normalizecat(dst, src, N);
871 }
872
881 template <class TR_dst = std::char_traits<wchar_t>, class AX_dst = std::allocator<wchar_t>>
882 size_t normalizecat(
883 _Inout_ std::basic_string<wchar_t, TR_dst, AX_dst>& dst,
884 _In_ const std::basic_string_view<wchar_t, std::char_traits<wchar_t>> src)
885 {
886 return normalizecat(dst, src.data(), src.size());
887 }
888
898 template <class TR = std::char_traits<wchar_t>, class AX = std::allocator<wchar_t>>
899 size_t normalize(
900 _Inout_ std::basic_string<wchar_t, TR, AX>& dst,
901 _In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src)
902 {
903 dst.clear();
904 return normalizecat(dst, src, count_src);
905 }
906
915 template <size_t N, class TR = std::char_traits<wchar_t>, class AX = std::allocator<wchar_t>>
916 size_t normalize(
917 _Inout_ std::basic_string<wchar_t, TR, AX>& dst,
918 _In_ const wchar_t(&src)[N])
919 {
920 return normalize(dst, src, N);
921 }
922
931 template <class TR_dst = std::char_traits<wchar_t>, class AX_dst = std::allocator<wchar_t>>
932 size_t normalize(
933 _Inout_ std::basic_string<wchar_t, TR_dst, AX_dst>& dst,
934 _In_ const std::basic_string_view<wchar_t, std::char_traits<wchar_t>> src)
935 {
936 return normalize(dst, src.data(), src.size());
937 }
938
947 inline std::wstring normalize(_In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src)
948 {
949 std::wstring dst;
950 normalizecat(dst, src, count_src);
951 return dst;
952 }
953
961 template <size_t N>
962 std::wstring normalize(_In_ const wchar_t(&src)[N])
963 {
964 std::wstring dst;
965 normalizecat(dst, src, N);
966 return dst;
967 }
968
976 inline std::wstring normalize(_In_ const std::basic_string_view<wchar_t, std::char_traits<wchar_t>> src)
977 {
978 std::wstring dst;
979 normalizecat(dst, src.data(), src.size());
980 return dst;
981 }
982#endif
983}
984
985#if defined(__GNUC__)
986#pragma GCC diagnostic pop
987#endif
Encoding converter context.
Definition unicode.hpp:138
void strcat(std::basic_string< T_to, TR_to, AX_to > &dst, _In_reads_or_z_opt_(count_src) const T_from *src, size_t count_src)
Convert string and append to string.
Definition unicode.hpp:175
void strcat(std::basic_string< T_to, TR_to, AX_to > &dst, const std::basic_string_view< T_from, std::char_traits< T_from > > src)
Convert string and append to string.
Definition unicode.hpp:340
void strcpy(std::basic_string< T_to, TR_to, AX_to > &dst, const T_from *src)
Convert string.
Definition unicode.hpp:370
void strcat(std::basic_string< T_to, TR_to, AX_to > &dst, const T_from *src)
Convert string and append to string.
Definition unicode.hpp:326
void strcpy(std::basic_string< T_to, TR_to, AX_to > &dst, _In_reads_or_z_opt_(count_src) const T_from *src, size_t count_src)
Convert string.
Definition unicode.hpp:355
std::basic_string< T_to, TR_to, AX_to > convert(const T_from *src)
Return converted string.
Definition unicode.hpp:411
void strcpy(std::basic_string< T_to, TR_to, AX_to > &dst, const std::basic_string_view< T_from, std::char_traits< T_from > > src)
Convert string.
Definition unicode.hpp:384
std::basic_string< T_to, TR_to, AX_to > convert(const std::basic_string_view< T_from, std::char_traits< T_from > > src)
Return converted string.
Definition unicode.hpp:422
std::basic_string< T_to, TR_to, AX_to > convert(_In_reads_or_z_opt_(count_src) const T_from *src, size_t count_src)
Return converted string.
Definition unicode.hpp:398