stdex
Additional custom or not Standard C++ covered algorithms
Loading...
Searching...
No Matches
unicode.hpp
1/*
2 SPDX-License-Identifier: MIT
3 Copyright © 2023-2024 Amebis
4*/
5
6#pragma once
7
8#include "compat.hpp"
9#include "endian.hpp"
10#include "math.hpp"
11#include "string.hpp"
12#include <stdint.h>
13#ifndef _WIN32
14#include <iconv.h>
15#include <langinfo.h>
16#endif
17#include <map>
18#include <memory>
19#include <string>
20
21#ifndef _WIN32
22#pragma GCC diagnostic push
23#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
24#endif
25
26namespace stdex
27{
28 enum class charset_id : uint16_t {
29#ifdef _WIN32
30 system = CP_ACP,
31 oem = CP_OEMCP,
32 utf7 = CP_UTF7,
33 utf8 = CP_UTF8,
34 utf16 = 1200 /*CP_WINUNICODE*/,
35 utf32 = 12000,
36 windows1250 = 1250,
37 windows1251 = 1251,
38 windows1252 = 1252,
39#else
40 system = 0,
41 utf7,
42 utf8,
43 utf16,
44 utf32,
45 windows1250,
46 windows1251,
47 windows1252,
48
49 _max
50#endif
51 };
52
53#ifdef _WIN32
54 constexpr charset_id wchar_t_charset = charset_id::utf16;
55#ifdef _UNICODE
56 constexpr charset_id system_charset = charset_id::utf16;
57#else
58 constexpr charset_id system_charset = charset_id::system;
59#endif
60#else
61 constexpr charset_id wchar_t_charset = charset_id::utf32;
62 constexpr charset_id system_charset = charset_id::system;
63#endif
64
72 inline charset_id charset_from_name(_In_z_ const char* name)
73 {
74 struct charset_less {
75 bool operator()(_In_z_ const char* a, _In_z_ const char* b) const
76 {
77 return stricmp(a, b) < 0;
78 }
79 };
80 static const std::map<const char*, charset_id, charset_less> charsets = {
81 { "UNICODE-1-1-UTF-7", charset_id::utf7 },
82 { "UTF-7", charset_id::utf7 },
83 { "CSUNICODE11UTF7", charset_id::utf7 },
84
85 { "UTF-8", charset_id::utf8 },
86 { "UTF8", charset_id::utf8 },
87
88 { "UTF-16", charset_id::utf16 },
89#if BYTE_ORDER == BIG_ENDIAN
90 { "UTF-16BE", charset_id::utf16 },
91#else
92 { "UTF-16LE", charset_id::utf16 },
93#endif
94
95 { "UTF-32", charset_id::utf32 },
96#if BYTE_ORDER == BIG_ENDIAN
97 { "UTF-32BE", charset_id::utf32 },
98#else
99 { "UTF-32LE", charset_id::utf32 },
100#endif
101
102 { "CP1250", charset_id::windows1250 },
103 { "MS-EE", charset_id::windows1250 },
104 { "WINDOWS-1250", charset_id::windows1250 },
105
106 { "CP1251", charset_id::windows1251 },
107 { "MS-CYRL", charset_id::windows1251 },
108 { "WINDOWS-1251", charset_id::windows1251 },
109
110 { "CP1252", charset_id::windows1252 },
111 { "MS-ANSI", charset_id::windows1252 },
112 { "WINDOWS-1252", charset_id::windows1252 },
113 };
114 if (auto el = charsets.find(name); el != charsets.end())
115 return el->second;
116 return charset_id::system;
117 }
118
126 template <class TR = std::char_traits<char>, class AX = std::allocator<char>>
127 charset_id charset_from_name(_In_ const std::basic_string<char, TR, AX>& name)
128 {
129 return charset_from_name(name.c_str());
130 }
131
135 template <typename T_from, typename T_to>
137 {
138 protected:
139 charset_id m_from, m_to;
140
141 public:
142 charset_encoder(_In_ charset_id from, _In_ charset_id to) :
143 m_from(from),
144 m_to(to)
145 {
146#ifdef _WIN32
147 m_from_wincp = to_encoding(from);
148 m_to_wincp = to_encoding(to);
149#else
150 m_handle = iconv_open(to_encoding(to), to_encoding(from));
151 if (m_handle == (iconv_t)-1)
152 throw std::system_error(errno, std::system_category(), "iconv_open failed");
153#endif
154 }
155
156#ifndef _WIN32
158 {
159 iconv_close(m_handle);
160 }
161#endif
162
163 charset_id from_encoding() const { return m_from; }
164 charset_id to_encoding() const { return m_to; }
165
173 template <class TR_to = std::char_traits<T_to>, class AX_to = std::allocator<T_to>>
174 void strcat(
175 _Inout_ std::basic_string<T_to, TR_to, AX_to>& dst,
176 _In_reads_or_z_opt_(count_src) const T_from* src, _In_ size_t count_src)
177 {
178 _Assume_(src || !count_src);
179 count_src = strnlen<T_from>(src, count_src);
180 if (!count_src) _Unlikely_
181 return;
182
183#ifdef _WIN32
184 constexpr DWORD dwFlagsMBWC = MB_PRECOMPOSED;
185 constexpr DWORD dwFlagsWCMB = 0;
186 constexpr LPCCH lpDefaultChar = NULL;
187
188 _Assume_(src);
189 if (m_from_wincp == m_to_wincp) _Unlikely_{
190 dst.append(reinterpret_cast<const T_to*>(src), count_src);
191 return;
192 }
193
194#pragma warning(suppress: 4127)
195 if constexpr (sizeof(T_from) == sizeof(char) && sizeof(T_to) == sizeof(wchar_t)) {
196 _Assume_(count_src < INT_MAX || count_src == SIZE_MAX);
197
198 // Try to convert to stack buffer first.
199 WCHAR szStackBuffer[1024 / sizeof(WCHAR)];
200#pragma warning(suppress: 6387) // Testing indicates src may be NULL when count_src is also 0. Is SAL of the lpMultiByteStr parameter wrong?
201 int cch = MultiByteToWideChar(static_cast<UINT>(m_from_wincp), dwFlagsMBWC, reinterpret_cast<LPCCH>(src), static_cast<int>(count_src), szStackBuffer, _countof(szStackBuffer));
202 if (cch) {
203 // Append from stack.
204 dst.append(reinterpret_cast<const T_to*>(szStackBuffer), count_src != SIZE_MAX ? wcsnlen(szStackBuffer, cch) : static_cast<size_t>(cch) - 1);
205 return;
206 }
207 if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
208 // Query the required output size. Allocate buffer. Then convert again.
209 cch = MultiByteToWideChar(static_cast<UINT>(m_from_wincp), dwFlagsMBWC, reinterpret_cast<LPCCH>(src), static_cast<int>(count_src), NULL, 0);
210 size_t offset = dst.size();
211 dst.resize(offset + static_cast<size_t>(cch));
212 cch = MultiByteToWideChar(static_cast<UINT>(m_from_wincp), dwFlagsMBWC, reinterpret_cast<LPCCH>(src), static_cast<int>(count_src), &dst[offset], cch);
213 dst.resize(offset + (count_src != SIZE_MAX ? wcsnlen(&dst[offset], cch) : static_cast<size_t>(cch) - 1));
214 return;
215 }
216 throw std::system_error(GetLastError(), std::system_category(), "MultiByteToWideChar failed");
217 }
218
219#pragma warning(suppress: 4127)
220 if constexpr (sizeof(T_from) == sizeof(wchar_t) && sizeof(T_to) == sizeof(char)) {
221 _Assume_(count_src < INT_MAX || count_src == SIZE_MAX);
222
223 // Try to convert to stack buffer first.
224 CHAR szStackBuffer[1024 / sizeof(CHAR)];
225#pragma warning(suppress: 6387) // Testing indicates src may be NULL when count_src is also 0. Is SAL of the lpWideCharStr parameter wrong?
226 int cch = WideCharToMultiByte(static_cast<UINT>(m_to_wincp), dwFlagsWCMB, reinterpret_cast<LPCWCH>(src), static_cast<int>(count_src), szStackBuffer, _countof(szStackBuffer), lpDefaultChar, NULL);
227 if (cch) {
228 // Copy from stack. Be careful not to include zero terminator.
229 dst.append(reinterpret_cast<const T_to*>(szStackBuffer), count_src != SIZE_MAX ? strnlen(szStackBuffer, cch) : static_cast<size_t>(cch) - 1);
230 return;
231 }
232 if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
233 // Query the required output size. Allocate buffer. Then convert again.
234 cch = WideCharToMultiByte(static_cast<UINT>(m_to_wincp), dwFlagsWCMB, reinterpret_cast<LPCWCH>(src), static_cast<int>(count_src), NULL, 0, lpDefaultChar, NULL);
235 size_t offset = dst.size();
236 dst.resize(offset + static_cast<size_t>(cch));
237 cch = WideCharToMultiByte(static_cast<UINT>(m_to_wincp), dwFlagsWCMB, reinterpret_cast<LPCWCH>(src), static_cast<int>(count_src), &dst[offset], cch, lpDefaultChar, NULL);
238 dst.resize(offset + (count_src != SIZE_MAX ? strnlen(&dst[offset], cch) : static_cast<size_t>(cch) - 1));
239 return;
240 }
241 throw std::system_error(GetLastError(), std::system_category(), "WideCharToMultiByte failed");
242 }
243
244#pragma warning(suppress: 4127)
245 if constexpr (sizeof(T_from) == sizeof(char) && sizeof(T_to) == sizeof(char)) {
246 _Assume_(count_src < INT_MAX || count_src == SIZE_MAX);
247
248 // Try to convert to stack buffer first.
249 WCHAR szStackBufferMBWC[512 / sizeof(WCHAR)];
250#pragma warning(suppress: 6387) // Testing indicates src may be NULL when count_src is also 0. Is SAL of the lpMultiByteStr parameter wrong?
251 int cch = MultiByteToWideChar(static_cast<UINT>(m_from_wincp), dwFlagsMBWC, reinterpret_cast<LPCCH>(src), static_cast<int>(count_src), szStackBufferMBWC, _countof(szStackBufferMBWC));
252 if (cch) {
253 // Append from stack.
254 size_t count_inter = count_src != SIZE_MAX ? wcsnlen(szStackBufferMBWC, cch) : static_cast<size_t>(cch) - 1;
255 _Assume_(count_inter < INT_MAX);
256
257 // Try to convert to stack buffer first.
258 CHAR szStackBufferWCMB[512 / sizeof(CHAR)];
259#pragma warning(suppress: 6387) // Testing indicates szStackBufferMBWC may be NULL when count_inter is also 0. Is SAL of the lpWideCharStr parameter wrong?
260 cch = WideCharToMultiByte(static_cast<UINT>(m_to_wincp), dwFlagsWCMB, szStackBufferMBWC, static_cast<int>(count_inter), szStackBufferWCMB, _countof(szStackBufferWCMB), lpDefaultChar, NULL);
261 if (cch) {
262 // Copy from stack. Be careful not to include zero terminator.
263 dst.append(reinterpret_cast<const T_to*>(szStackBufferWCMB), strnlen(szStackBufferWCMB, cch));
264 return;
265 }
266 if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
267 // Query the required output size. Allocate buffer. Then convert again.
268 cch = WideCharToMultiByte(static_cast<UINT>(m_to_wincp), dwFlagsWCMB, szStackBufferMBWC, static_cast<int>(count_inter), NULL, 0, lpDefaultChar, NULL);
269 size_t offset = dst.size();
270 dst.resize(offset + cch);
271 cch = WideCharToMultiByte(static_cast<UINT>(m_to_wincp), dwFlagsWCMB, szStackBufferMBWC, static_cast<int>(count_inter), &dst[offset], cch, lpDefaultChar, NULL);
272 dst.resize(offset + strnlen(&dst[offset], cch));
273 return;
274 }
275 throw std::system_error(GetLastError(), std::system_category(), "WideCharToMultiByte failed");
276 }
277 if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
278 // Query the required output size. Allocate buffer. Then convert again.
279 cch = MultiByteToWideChar(static_cast<UINT>(m_from_wincp), dwFlagsMBWC, reinterpret_cast<LPCCH>(src), static_cast<int>(count_src), NULL, 0);
280 std::unique_ptr<WCHAR[]> szBufferMBWC(new WCHAR[cch]);
281 cch = MultiByteToWideChar(static_cast<UINT>(m_from_wincp), dwFlagsMBWC, reinterpret_cast<LPCCH>(src), static_cast<int>(count_src), szBufferMBWC.get(), cch);
282 size_t count_inter = count_src != SIZE_MAX ? wcsnlen(szBufferMBWC.get(), cch) : static_cast<size_t>(cch) - 1;
283
284 // Query the required output size. Allocate buffer. Then convert again.
285 cch = WideCharToMultiByte(static_cast<UINT>(m_to_wincp), dwFlagsWCMB, szBufferMBWC.get(), static_cast<int>(count_inter), NULL, 0, lpDefaultChar, NULL);
286 size_t offset = dst.size();
287 dst.resize(offset + cch);
288 cch = WideCharToMultiByte(static_cast<UINT>(m_to_wincp), dwFlagsWCMB, szBufferMBWC.get(), static_cast<int>(count_inter), &dst[offset], cch, lpDefaultChar, NULL);
289 dst.resize(offset + strnlen(&dst[offset], cch));
290 return;
291 }
292 throw std::system_error(GetLastError(), std::system_category(), "MultiByteToWideChar failed");
293 }
294#else
295 dst.reserve(dst.size() + count_src);
296 T_to buf[1024 / sizeof(T_to)];
297 size_t src_size = stdex::mul(sizeof(T_from), count_src);
298 for (;;) {
299 T_to* output = &buf[0];
300 size_t output_size = sizeof(buf);
301 errno = 0;
302 iconv(m_handle, const_cast<char**>(reinterpret_cast<const char**>(&src)), &src_size, reinterpret_cast<char**>(&output), &output_size);
303 dst.append(buf, reinterpret_cast<T_to*>(reinterpret_cast<char*>(buf) + sizeof(buf) - output_size));
304 if (!errno)
305 break;
306 if (errno == E2BIG)
307 continue;
308 throw std::system_error(errno, std::system_category(), "iconv failed");
309 }
310#endif
311 }
312
319 template <class TR_to = std::char_traits<T_to>, class AX_to = std::allocator<T_to>>
320 void strcat(
321 _Inout_ std::basic_string<T_to, TR_to, AX_to>& dst,
322 _In_z_ const T_from* src)
323 {
324 strcat(dst, src, SIZE_MAX);
325 }
326
333 template <class TR_to = std::char_traits<T_to>, class AX_to = std::allocator<T_to>>
334 void strcat(
335 _Inout_ std::basic_string<T_to, TR_to, AX_to>& dst,
336 _In_ const std::basic_string_view<T_from, std::char_traits<T_from>> src)
337 {
338 strcat(dst, src.data(), src.size());
339 }
340
348 template <class TR_to = std::char_traits<T_to>, class AX_to = std::allocator<T_to>>
349 void strcpy(
350 _Inout_ std::basic_string<T_to, TR_to, AX_to>& dst,
351 _In_reads_or_z_opt_(count_src) const T_from* src, _In_ size_t count_src)
352 {
353 dst.clear();
354 strcat(dst, src, count_src);
355 }
356
363 template <class TR_to = std::char_traits<T_to>, class AX_to = std::allocator<T_to>>
364 void strcpy(
365 _Inout_ std::basic_string<T_to, TR_to, AX_to>& dst,
366 _In_z_ const T_from* src)
367 {
368 strcpy(dst, src, SIZE_MAX);
369 }
370
377 template <class TR_to = std::char_traits<T_to>, class AX_to = std::allocator<T_to>>
378 void strcpy(
379 _Inout_ std::basic_string<T_to, TR_to, AX_to>& dst,
380 _In_ const std::basic_string_view<T_from, std::char_traits<T_from>> src)
381 {
382 strcpy(dst, src.data(), src.size());
383 }
384
391 template <class TR_to = std::char_traits<T_to>, class AX_to = std::allocator<T_to>>
392 std::basic_string<T_to, TR_to, AX_to> convert(_In_reads_or_z_opt_(count_src) const T_from* src, _In_ size_t count_src)
393 {
394 std::basic_string<T_to, TR_to, AX_to> dst;
395 strcat(dst, src, count_src);
396 return dst;
397 }
398
404 template <class TR_to = std::char_traits<T_to>, class AX_to = std::allocator<T_to>>
405 std::basic_string<T_to, TR_to, AX_to> convert(_In_z_ const T_from* src)
406 {
407 return convert(src, SIZE_MAX);
408 }
409
415 template <class TR_to = std::char_traits<T_to>, class AX_to = std::allocator<T_to>>
416 std::basic_string<T_to, TR_to, AX_to> convert(_In_ const std::basic_string_view<T_from, std::char_traits<T_from>> src)
417 {
418 return convert(src.data(), src.size());
419 }
420
421 void clear()
422 {
423#ifndef _WIN32
424 iconv(m_handle, NULL, NULL, NULL, NULL);
425#endif
426 }
427
428 static charset_id system_charset()
429 {
430#ifdef _WIN32
431 return static_cast<charset_id>(GetACP());
432#else
433 return charset_from_name(nl_langinfo(CODESET));
434#endif
435 }
436
437#ifdef _WIN32
438 protected:
439 static UINT to_encoding(_In_ charset_id charset)
440 {
441 return
442 charset == charset_id::system ? GetACP() :
443 charset == charset_id::oem ? GetOEMCP() :
444 static_cast<UINT>(charset);
445 }
446
447 protected:
448 UINT m_from_wincp, m_to_wincp;
449#else
450 protected:
451 static const char* to_encoding(_In_ charset_id charset)
452 {
453 static const char* const encodings[static_cast<std::underlying_type_t<charset_id>>(charset_id::_max)] = {
454 "", // system
455 "UTF-7", // utf7
456 "UTF-8", // utf8
457#if BYTE_ORDER == BIG_ENDIAN
458 "UTF-16BE", // utf16
459 "UTF-32BE", // utf32
460#else
461 "UTF-16LE", // utf16
462 "UTF-32LE", // utf32
463#endif
464 "CP1250", // windows1250
465 "CP1251", // windows1251
466 "CP1252", // windows1252
467 };
468 return
469 charset == charset_id::system ? nl_langinfo(CODESET) :
470 encodings[static_cast<std::underlying_type_t<charset_id>>(charset)];
471 }
472
473 protected:
474 iconv_t m_handle;
475#endif
476 };
477
488 template <class TR_to = std::char_traits<wchar_t>, class AX_to = std::allocator<wchar_t>>
489#ifndef _WIN32
490 _Deprecated_("For better performance, consider a reusable charset_encoder")
491#endif
492 inline void strcat(
493 _Inout_ std::basic_string<wchar_t, TR_to, AX_to>& dst,
494 _In_reads_or_z_opt_(count_src) const char* src, _In_ size_t count_src,
495 _In_ charset_id charset = charset_id::system)
496 {
497 charset_encoder<char, wchar_t>(charset, wchar_t_charset).strcat(dst, src, count_src);
498 }
499
500 template <class TR_to = std::char_traits<wchar_t>, class AX_to = std::allocator<wchar_t>>
501 _Deprecated_("Use stdex::strcat")
502 inline void str2wstr(
503 _Inout_ std::basic_string<wchar_t, TR_to, AX_to>& dst,
504 _In_reads_or_z_opt_(count_src) const char* src, _In_ size_t count_src,
505 _In_ charset_id charset = charset_id::system)
506 {
507 strcat(dst, src, count_src, charset);
508 }
509
519 template <class TR_to = std::char_traits<wchar_t>, class AX_to = std::allocator<wchar_t>>
520#ifndef _WIN32
521 _Deprecated_("For better performance, consider a reusable charset_encoder")
522#endif
523 inline void strcat(
524 _Inout_ std::basic_string<wchar_t, TR_to, AX_to>& dst,
525 _In_ const std::basic_string_view<char, std::char_traits<char>> src,
526 _In_ charset_id charset = charset_id::system)
527 {
528 strcat(dst, src.data(), src.size(), charset);
529 }
530
531 template <class TR_to = std::char_traits<wchar_t>, class AX_to = std::allocator<wchar_t>>
532 _Deprecated_("Use stdex::strcat")
533 inline void str2wstr(
534 _Inout_ std::basic_string<wchar_t, TR_to, AX_to>& dst,
535 _In_ const std::basic_string_view<char, std::char_traits<char>> src,
536 _In_ charset_id charset = charset_id::system)
537 {
538 strcat(dst, src, charset);
539 }
540
551 template <class TR_to = std::char_traits<wchar_t>, class AX_to = std::allocator<wchar_t>>
552#ifndef _WIN32
553 _Deprecated_("For better performance, consider a reusable charset_encoder")
554#endif
555 inline void strcpy(
556 _Inout_ std::basic_string<wchar_t, TR_to, AX_to>& dst,
557 _In_reads_or_z_opt_(count_src) const char* src, _In_ size_t count_src,
558 _In_ charset_id charset = charset_id::system)
559 {
560 dst.clear();
561 strcat(dst, src, count_src, charset);
562 }
563
573 template <class TR_to = std::char_traits<wchar_t>, class AX_to = std::allocator<wchar_t>>
574#ifndef _WIN32
575 _Deprecated_("For better performance, consider a reusable charset_encoder")
576#endif
577 inline void strcpy(
578 _Inout_ std::basic_string<wchar_t, TR_to, AX_to>& dst,
579 _In_ const std::basic_string_view<char, std::char_traits<char>> src,
580 _In_ charset_id charset = charset_id::system)
581 {
582 strcpy(dst, src.data(), src.size(), charset);
583 }
584
595#ifndef _WIN32
596 _Deprecated_("For better performance, consider a reusable charset_encoder")
597#endif
598 inline std::wstring str2wstr(
599 _In_z_ const char* src,
600 _In_ charset_id charset = charset_id::system)
601 {
602 std::wstring dst;
603 strcat(dst, src, SIZE_MAX, charset);
604 return dst;
605 }
606
618#ifndef _WIN32
619 _Deprecated_("For better performance, consider a reusable charset_encoder")
620#endif
621 inline std::wstring str2wstr(
622 _In_reads_or_z_opt_(count_src) const char* src, _In_ size_t count_src,
623 _In_ charset_id charset = charset_id::system)
624 {
625 std::wstring dst;
626 strcat(dst, src, count_src, charset);
627 return dst;
628 }
629
640#ifndef _WIN32
641 _Deprecated_("For better performance, consider a reusable charset_encoder")
642#endif
643 inline std::wstring str2wstr(
644 _In_ const std::basic_string_view<char, std::char_traits<char>> src,
645 _In_ charset_id charset = charset_id::system)
646 {
647 return str2wstr(src.data(), src.size(), charset);
648 }
649
660 template <class TR_to = std::char_traits<char>, class AX_to = std::allocator<char>>
661#ifndef _WIN32
662 _Deprecated_("For better performance, consider a reusable charset_encoder")
663#endif
664 inline void strcat(
665 _Inout_ std::basic_string<char, TR_to, AX_to>& dst,
666 _In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src,
667 _In_ charset_id charset = charset_id::system)
668 {
669 charset_encoder<wchar_t, char>(wchar_t_charset, charset).strcat(dst, src, count_src);
670 }
671
672 template <class TR_to = std::char_traits<char>, class AX_to = std::allocator<char>>
673 _Deprecated_("Use stdex::strcat")
674 inline void wstr2str(
675 _Inout_ std::basic_string<char, TR_to, AX_to>& dst,
676 _In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src,
677 _In_ charset_id charset = charset_id::system)
678 {
679 strcat(dst, src, count_src, charset);
680 }
681
691 template <class TR_to = std::char_traits<char>, class AX_to = std::allocator<char>>
692#ifndef _WIN32
693 _Deprecated_("For better performance, consider a reusable charset_encoder")
694#endif
695 inline void strcat(
696 _Inout_ std::basic_string<char, TR_to, AX_to>& dst,
697 _In_ const std::basic_string_view<wchar_t, std::char_traits<wchar_t>> src,
698 _In_ charset_id charset = charset_id::system)
699 {
700 strcat(dst, src.data(), src.size(), charset);
701 }
702
703 template <class TR_to = std::char_traits<char>, class AX_to = std::allocator<char>>
704 _Deprecated_("Use stdex::strcat")
705 inline void wstr2str(
706 _Inout_ std::basic_string<char, TR_to, AX_to>& dst,
707 _In_ const std::basic_string_view<wchar_t, std::char_traits<wchar_t>> src,
708 _In_ charset_id charset = charset_id::system)
709 {
710 strcat(dst, src, charset);
711 }
712
723 template <class TR_to = std::char_traits<char>, class AX_to = std::allocator<char>>
724#ifndef _WIN32
725 _Deprecated_("For better performance, consider a reusable charset_encoder")
726#endif
727 inline void strcpy(
728 _Inout_ std::basic_string<char, TR_to, AX_to>& dst,
729 _In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src,
730 _In_ charset_id charset = charset_id::system)
731 {
732 dst.clear();
733 strcat(dst, src, count_src, charset);
734 }
735
745 template <class TR_to = std::char_traits<char>, class AX_to = std::allocator<char>>
746#ifndef _WIN32
747 _Deprecated_("For better performance, consider a reusable charset_encoder")
748#endif
749 inline void strcpy(
750 _Inout_ std::basic_string<char, TR_to, AX_to>& dst,
751 _In_ const std::basic_string_view<wchar_t, std::char_traits<wchar_t>> src,
752 _In_ charset_id charset = charset_id::system)
753 {
754 strcpy(dst, src.data(), src.size(), charset);
755 }
756
767#ifndef _WIN32
768 _Deprecated_("For better performance, consider a reusable charset_encoder")
769#endif
770 inline std::string wstr2str(
771 _In_z_ const wchar_t* src,
772 _In_ charset_id charset = charset_id::system)
773 {
774 std::string dst;
775 strcat(dst, src, SIZE_MAX, charset);
776 return dst;
777 }
778
790#ifndef _WIN32
791 _Deprecated_("For better performance, consider a reusable charset_encoder")
792#endif
793 inline std::string wstr2str(
794 _In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src,
795 _In_ charset_id charset = charset_id::system)
796 {
797 std::string dst;
798 strcat(dst, src, count_src, charset);
799 return dst;
800 }
801
812#ifndef _WIN32
813 _Deprecated_("For better performance, consider a reusable charset_encoder")
814#endif
815 inline std::string wstr2str(
816 _In_ const std::basic_string_view<wchar_t, std::char_traits<wchar_t>> src,
817 _In_ charset_id charset = charset_id::system)
818 {
819 return wstr2str(src.data(), src.size(), charset);
820 }
821
822#ifdef _WIN32
832 template <class TR = std::char_traits<wchar_t>, class AX = std::allocator<wchar_t>>
833 size_t normalizecat(
834 _Inout_ std::basic_string<wchar_t, TR, AX>& dst,
835 _In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src)
836 {
837 count_src = strnlen(src, count_src);
838 size_t count_dst = dst.size();
839 dst.resize(count_dst + count_src);
840 _Assume_(count_src + 1 < INT_MAX);
841#pragma warning(suppress: 6387) // Testing indicates src may be NULL when count_src is also 0. Is SAL of the lpSrcString parameter wrong?
842 int r = NormalizeString(NormalizationC, src, static_cast<int>(count_src), dst.data() + count_dst, static_cast<int>(count_src + 1));
843 if (r >= 0)
844 dst.resize(count_dst + r);
845 else
846#pragma warning(suppress: 6387) // Testing indicates src may be NULL when count_src is also 0. Is SAL of the _Src parameter wrong?
847 memcpy(dst.data() + count_dst, src, count_src * sizeof(wchar_t));
848 return dst.size();
849 }
850
859 template <size_t N, class TR = std::char_traits<wchar_t>, class AX = std::allocator<wchar_t>>
860 size_t normalizecat(
861 _Inout_ std::basic_string<wchar_t, TR, AX>& dst,
862 _In_ const wchar_t (&src)[N])
863 {
864 return normalizecat(dst, src, N);
865 }
866
875 template <class TR_dst = std::char_traits<wchar_t>, class AX_dst = std::allocator<wchar_t>>
876 size_t normalizecat(
877 _Inout_ std::basic_string<wchar_t, TR_dst, AX_dst>& dst,
878 _In_ const std::basic_string_view<wchar_t, std::char_traits<wchar_t>> src)
879 {
880 return normalizecat(dst, src.data(), src.size());
881 }
882
892 template <class TR = std::char_traits<wchar_t>, class AX = std::allocator<wchar_t>>
893 size_t normalize(
894 _Inout_ std::basic_string<wchar_t, TR, AX>& dst,
895 _In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src)
896 {
897 dst.clear();
898 return normalizecat(dst, src, count_src);
899 }
900
909 template <size_t N, class TR = std::char_traits<wchar_t>, class AX = std::allocator<wchar_t>>
910 size_t normalize(
911 _Inout_ std::basic_string<wchar_t, TR, AX>& dst,
912 _In_ const wchar_t(&src)[N])
913 {
914 return normalize(dst, src, N);
915 }
916
925 template <class TR_dst = std::char_traits<wchar_t>, class AX_dst = std::allocator<wchar_t>>
926 size_t normalize(
927 _Inout_ std::basic_string<wchar_t, TR_dst, AX_dst>& dst,
928 _In_ const std::basic_string_view<wchar_t, std::char_traits<wchar_t>> src)
929 {
930 return normalize(dst, src.data(), src.size());
931 }
932
941 inline std::wstring normalize(_In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src)
942 {
943 std::wstring dst;
944 normalizecat(dst, src, count_src);
945 return dst;
946 }
947
955 template <size_t N>
956 std::wstring normalize(_In_ const wchar_t(&src)[N])
957 {
958 std::wstring dst;
959 normalizecat(dst, src, N);
960 return dst;
961 }
962
970 inline std::wstring normalize(_In_ const std::basic_string_view<wchar_t, std::char_traits<wchar_t>> src)
971 {
972 std::wstring dst;
973 normalizecat(dst, src.data(), src.size());
974 return dst;
975 }
976#endif
977}
978
979#ifndef _WIN32
980#pragma GCC diagnostic pop
981#endif
Encoding converter context.
Definition unicode.hpp:137
void strcat(std::basic_string< T_to, TR_to, AX_to > &dst, _In_reads_or_z_opt_(count_src) const T_from *src, size_t count_src)
Convert string and append to string.
Definition unicode.hpp:174
void strcat(std::basic_string< T_to, TR_to, AX_to > &dst, const std::basic_string_view< T_from, std::char_traits< T_from > > src)
Convert string and append to string.
Definition unicode.hpp:334
void strcpy(std::basic_string< T_to, TR_to, AX_to > &dst, const T_from *src)
Convert string.
Definition unicode.hpp:364
void strcat(std::basic_string< T_to, TR_to, AX_to > &dst, const T_from *src)
Convert string and append to string.
Definition unicode.hpp:320
void strcpy(std::basic_string< T_to, TR_to, AX_to > &dst, _In_reads_or_z_opt_(count_src) const T_from *src, size_t count_src)
Convert string.
Definition unicode.hpp:349
std::basic_string< T_to, TR_to, AX_to > convert(const T_from *src)
Return converted string.
Definition unicode.hpp:405
void strcpy(std::basic_string< T_to, TR_to, AX_to > &dst, const std::basic_string_view< T_from, std::char_traits< T_from > > src)
Convert string.
Definition unicode.hpp:378
std::basic_string< T_to, TR_to, AX_to > convert(const std::basic_string_view< T_from, std::char_traits< T_from > > src)
Return converted string.
Definition unicode.hpp:416
std::basic_string< T_to, TR_to, AX_to > convert(_In_reads_or_z_opt_(count_src) const T_from *src, size_t count_src)
Return converted string.
Definition unicode.hpp:392