stdex
Additional custom or not Standard C++ covered algorithms
Loading...
Searching...
No Matches
unicode.hpp
1/*
2 SPDX-License-Identifier: MIT
3 Copyright © 2023 Amebis
4*/
5
6#pragma once
7
8#include "compat.hpp"
9#include "endian.hpp"
10#include "math.hpp"
11#include "string.hpp"
12#include <stdint.h>
13#ifndef _WIN32
14#include <iconv.h>
15#include <langinfo.h>
16#endif
17#include <map>
18#include <memory>
19#include <string>
20
21#ifndef _WIN32
22#pragma GCC diagnostic push
23#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
24#endif
25
26namespace stdex
27{
28 enum class charset_id : uint16_t {
29#ifdef _WIN32
30 system = CP_ACP,
31 oem = CP_OEMCP,
32 utf7 = CP_UTF7,
33 utf8 = CP_UTF8,
34 utf16 = 1200 /*CP_WINUNICODE*/,
35 utf32 = 12000,
36 windows1250 = 1250,
37 windows1251 = 1251,
38 windows1252 = 1252,
39#else
40 system = 0,
41 utf7,
42 utf8,
43 utf16,
44 utf32,
45 windows1250,
46 windows1251,
47 windows1252,
48
49 _max
50#endif
51 };
52
53#ifdef _WIN32
54 constexpr charset_id wchar_t_charset = charset_id::utf16;
55#ifdef _UNICODE
56 constexpr charset_id system_charset = charset_id::utf16;
57#else
58 constexpr charset_id system_charset = charset_id::system;
59#endif
60#else
61 constexpr charset_id wchar_t_charset = charset_id::utf32;
62 constexpr charset_id system_charset = charset_id::system;
63#endif
64
72 inline charset_id charset_from_name(_In_z_ const char* name)
73 {
74 struct charset_less {
75 bool operator()(_In_z_ const char* a, _In_z_ const char* b) const
76 {
77 return stricmp(a, b) < 0;
78 }
79 };
80 static const std::map<const char*, charset_id, charset_less> charsets = {
81 { "UNICODE-1-1-UTF-7", charset_id::utf7 },
82 { "UTF-7", charset_id::utf7 },
83 { "CSUNICODE11UTF7", charset_id::utf7 },
84
85 { "UTF-8", charset_id::utf8 },
86 { "UTF8", charset_id::utf8 },
87
88 { "UTF-16", charset_id::utf16 },
89#if BYTE_ORDER == BIG_ENDIAN
90 { "UTF-16BE", charset_id::utf16 },
91#else
92 { "UTF-16LE", charset_id::utf16 },
93#endif
94
95 { "UTF-32", charset_id::utf32 },
96#if BYTE_ORDER == BIG_ENDIAN
97 { "UTF-32BE", charset_id::utf32 },
98#else
99 { "UTF-32LE", charset_id::utf32 },
100#endif
101
102 { "CP1250", charset_id::windows1250 },
103 { "MS-EE", charset_id::windows1250 },
104 { "WINDOWS-1250", charset_id::windows1250 },
105
106 { "CP1251", charset_id::windows1251 },
107 { "MS-CYRL", charset_id::windows1251 },
108 { "WINDOWS-1251", charset_id::windows1251 },
109
110 { "CP1252", charset_id::windows1252 },
111 { "MS-ANSI", charset_id::windows1252 },
112 { "WINDOWS-1252", charset_id::windows1252 },
113 };
114 if (auto el = charsets.find(name); el != charsets.end())
115 return el->second;
116 return charset_id::system;
117 }
118
126 template <class TR = std::char_traits<char>, class AX = std::allocator<char>>
127 charset_id charset_from_name(_In_ const std::basic_string<char, TR, AX>& name)
128 {
129 return charset_from_name(name.c_str());
130 }
131
135 template <typename T_from, typename T_to>
137 {
138 protected:
139 charset_id m_from, m_to;
140
141 public:
142 charset_encoder(_In_ charset_id from, _In_ charset_id to) :
143 m_from(from),
144 m_to(to)
145 {
146#ifdef _WIN32
147 m_from_wincp = to_encoding(from);
148 m_to_wincp = to_encoding(to);
149#else
150 m_handle = iconv_open(to_encoding(to), to_encoding(from));
151 if (m_handle == (iconv_t)-1)
152 throw std::system_error(errno, std::system_category(), "iconv_open failed");
153#endif
154 }
155
156#ifndef _WIN32
158 {
159 iconv_close(m_handle);
160 }
161#endif
162
163 charset_id from_encoding() const { return m_from; }
164 charset_id to_encoding() const { return m_to; }
165
173 template <class TR_to = std::char_traits<T_to>, class AX_to = std::allocator<T_to>>
174 void strcat(
175 _Inout_ std::basic_string<T_to, TR_to, AX_to>& dst,
176 _In_reads_or_z_opt_(count_src) const T_from* src, _In_ size_t count_src)
177 {
178 _Assume_(src || !count_src);
179 count_src = strnlen<T_from>(src, count_src);
180 if (!count_src) _Unlikely_
181 return;
182
183#ifdef _WIN32
184 constexpr DWORD dwFlagsMBWC = MB_PRECOMPOSED;
185 constexpr DWORD dwFlagsWCMB = 0;
186 constexpr LPCCH lpDefaultChar = NULL;
187
188 _Assume_(src);
189 if (m_from_wincp == m_to_wincp) _Unlikely_{
190 dst.append(reinterpret_cast<const T_to*>(src), count_src);
191 return;
192 }
193
194#pragma warning(suppress: 4127)
195 if constexpr (sizeof(T_from) == sizeof(char) && sizeof(T_to) == sizeof(wchar_t)) {
196 _Assume_(count_src < INT_MAX || count_src == SIZE_MAX);
197
198 // Try to convert to stack buffer first.
199 WCHAR szStackBuffer[1024 / sizeof(WCHAR)];
200#pragma warning(suppress: 6387) // Testing indicates src may be NULL when count_src is also 0. Is SAL of the lpMultiByteStr parameter wrong?
201 int cch = MultiByteToWideChar(static_cast<UINT>(m_from_wincp), dwFlagsMBWC, reinterpret_cast<LPCCH>(src), static_cast<int>(count_src), szStackBuffer, _countof(szStackBuffer));
202 if (cch) {
203 // Append from stack.
204 dst.append(reinterpret_cast<const T_to*>(szStackBuffer), count_src != SIZE_MAX ? wcsnlen(szStackBuffer, cch) : static_cast<size_t>(cch) - 1);
205 return;
206 }
207 if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
208 // Query the required output size. Allocate buffer. Then convert again.
209 cch = MultiByteToWideChar(static_cast<UINT>(m_from_wincp), dwFlagsMBWC, reinterpret_cast<LPCCH>(src), static_cast<int>(count_src), NULL, 0);
210 std::unique_ptr<WCHAR[]> szBuffer(new WCHAR[cch]);
211 cch = MultiByteToWideChar(static_cast<UINT>(m_from_wincp), dwFlagsMBWC, reinterpret_cast<LPCCH>(src), static_cast<int>(count_src), szBuffer.get(), cch);
212 dst.append(reinterpret_cast<const T_to*>(szBuffer.get()), count_src != SIZE_MAX ? wcsnlen(szBuffer.get(), cch) : static_cast<size_t>(cch) - 1);
213 return;
214 }
215 throw std::system_error(GetLastError(), std::system_category(), "MultiByteToWideChar failed");
216 }
217
218#pragma warning(suppress: 4127)
219 if constexpr (sizeof(T_from) == sizeof(wchar_t) && sizeof(T_to) == sizeof(char)) {
220 _Assume_(count_src < INT_MAX || count_src == SIZE_MAX);
221
222 // Try to convert to stack buffer first.
223 CHAR szStackBuffer[1024 / sizeof(CHAR)];
224#pragma warning(suppress: 6387) // Testing indicates src may be NULL when count_src is also 0. Is SAL of the lpWideCharStr parameter wrong?
225 int cch = WideCharToMultiByte(static_cast<UINT>(m_to_wincp), dwFlagsWCMB, reinterpret_cast<LPCWCH>(src), static_cast<int>(count_src), szStackBuffer, _countof(szStackBuffer), lpDefaultChar, NULL);
226 if (cch) {
227 // Copy from stack. Be careful not to include zero terminator.
228 dst.append(reinterpret_cast<const T_to*>(szStackBuffer), count_src != SIZE_MAX ? strnlen(szStackBuffer, cch) : static_cast<size_t>(cch) - 1);
229 return;
230 }
231 if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
232 // Query the required output size. Allocate buffer. Then convert again.
233 cch = WideCharToMultiByte(static_cast<UINT>(m_to_wincp), dwFlagsWCMB, reinterpret_cast<LPCWCH>(src), static_cast<int>(count_src), NULL, 0, lpDefaultChar, NULL);
234 std::unique_ptr<CHAR[]> szBuffer(new CHAR[cch]);
235 cch = WideCharToMultiByte(static_cast<UINT>(m_to_wincp), dwFlagsWCMB, reinterpret_cast<LPCWCH>(src), static_cast<int>(count_src), szBuffer.get(), cch, lpDefaultChar, NULL);
236 dst.append(reinterpret_cast<const T_to*>(szBuffer.get()), count_src != SIZE_MAX ? strnlen(szBuffer.get(), cch) : static_cast<size_t>(cch) - 1);
237 return;
238 }
239 throw std::system_error(GetLastError(), std::system_category(), "WideCharToMultiByte failed");
240 }
241
242#pragma warning(suppress: 4127)
243 if constexpr (sizeof(T_from) == sizeof(char) && sizeof(T_to) == sizeof(char)) {
244 _Assume_(count_src < INT_MAX || count_src == SIZE_MAX);
245
246 // Try to convert to stack buffer first.
247 WCHAR szStackBufferMBWC[512 / sizeof(WCHAR)];
248#pragma warning(suppress: 6387) // Testing indicates src may be NULL when count_src is also 0. Is SAL of the lpMultiByteStr parameter wrong?
249 int cch = MultiByteToWideChar(static_cast<UINT>(m_from_wincp), dwFlagsMBWC, reinterpret_cast<LPCCH>(src), static_cast<int>(count_src), szStackBufferMBWC, _countof(szStackBufferMBWC));
250 if (cch) {
251 // Append from stack.
252 size_t count_inter = count_src != SIZE_MAX ? wcsnlen(szStackBufferMBWC, cch) : static_cast<size_t>(cch) - 1;
253 _Assume_(count_inter < INT_MAX);
254
255 // Try to convert to stack buffer first.
256 CHAR szStackBufferWCMB[512 / sizeof(CHAR)];
257#pragma warning(suppress: 6387) // Testing indicates szStackBufferMBWC may be NULL when count_inter is also 0. Is SAL of the lpWideCharStr parameter wrong?
258 cch = WideCharToMultiByte(static_cast<UINT>(m_to_wincp), dwFlagsWCMB, szStackBufferMBWC, static_cast<int>(count_inter), szStackBufferWCMB, _countof(szStackBufferWCMB), lpDefaultChar, NULL);
259 if (cch) {
260 // Copy from stack. Be careful not to include zero terminator.
261 dst.append(reinterpret_cast<const T_to*>(szStackBufferWCMB), strnlen(szStackBufferWCMB, cch));
262 return;
263 }
264 if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
265 // Query the required output size. Allocate buffer. Then convert again.
266 cch = WideCharToMultiByte(static_cast<UINT>(m_to_wincp), dwFlagsWCMB, szStackBufferMBWC, static_cast<int>(count_inter), NULL, 0, lpDefaultChar, NULL);
267 std::unique_ptr<CHAR[]> szBufferWCMB(new CHAR[cch]);
268 cch = WideCharToMultiByte(static_cast<UINT>(m_to_wincp), dwFlagsWCMB, szStackBufferMBWC, static_cast<int>(count_inter), szBufferWCMB.get(), cch, lpDefaultChar, NULL);
269 dst.append(reinterpret_cast<const T_to*>(szBufferWCMB.get()), strnlen(szBufferWCMB.get(), cch));
270 return;
271 }
272 throw std::system_error(GetLastError(), std::system_category(), "WideCharToMultiByte failed");
273 }
274 if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
275 // Query the required output size. Allocate buffer. Then convert again.
276 cch = MultiByteToWideChar(static_cast<UINT>(m_from_wincp), dwFlagsMBWC, reinterpret_cast<LPCCH>(src), static_cast<int>(count_src), NULL, 0);
277 std::unique_ptr<WCHAR[]> szBufferMBWC(new WCHAR[cch]);
278 cch = MultiByteToWideChar(static_cast<UINT>(m_from_wincp), dwFlagsMBWC, reinterpret_cast<LPCCH>(src), static_cast<int>(count_src), szBufferMBWC.get(), cch);
279 size_t count_inter = count_src != SIZE_MAX ? wcsnlen(szBufferMBWC.get(), cch) : static_cast<size_t>(cch) - 1;
280
281 // Query the required output size. Allocate buffer. Then convert again.
282 cch = WideCharToMultiByte(static_cast<UINT>(m_to_wincp), dwFlagsWCMB, szBufferMBWC.get(), static_cast<int>(count_inter), NULL, 0, lpDefaultChar, NULL);
283 std::unique_ptr<CHAR[]> szBufferWCMB(new CHAR[cch]);
284 cch = WideCharToMultiByte(static_cast<UINT>(m_to_wincp), dwFlagsWCMB, szBufferMBWC.get(), static_cast<int>(count_inter), szBufferWCMB.get(), cch, lpDefaultChar, NULL);
285 dst.append(reinterpret_cast<const T_to*>(szBufferWCMB.get()), strnlen(szBufferWCMB.get(), cch));
286 return;
287 }
288 throw std::system_error(GetLastError(), std::system_category(), "MultiByteToWideChar failed");
289 }
290#else
291 dst.reserve(dst.size() + count_src);
292 T_to buf[1024 / sizeof(T_to)];
293 size_t src_size = stdex::mul(sizeof(T_from), count_src);
294 for (;;) {
295 T_to* output = &buf[0];
296 size_t output_size = sizeof(buf);
297 errno = 0;
298 iconv(m_handle, const_cast<char**>(reinterpret_cast<const char**>(&src)), &src_size, reinterpret_cast<char**>(&output), &output_size);
299 dst.append(buf, reinterpret_cast<T_to*>(reinterpret_cast<char*>(buf) + sizeof(buf) - output_size));
300 if (!errno)
301 break;
302 if (errno == E2BIG)
303 continue;
304 throw std::system_error(errno, std::system_category(), "iconv failed");
305 }
306#endif
307 }
308
315 template <class TR_to = std::char_traits<T_to>, class AX_to = std::allocator<T_to>>
316 void strcat(
317 _Inout_ std::basic_string<T_to, TR_to, AX_to>& dst,
318 _In_z_ const T_from* src)
319 {
320 strcat(dst, src, SIZE_MAX);
321 }
322
329 template <class TR_to = std::char_traits<T_to>, class AX_to = std::allocator<T_to>>
330 void strcat(
331 _Inout_ std::basic_string<T_to, TR_to, AX_to>& dst,
332 _In_ const std::basic_string_view<T_from, std::char_traits<T_from>> src)
333 {
334 strcat(dst, src.data(), src.size());
335 }
336
344 template <class TR_to = std::char_traits<T_to>, class AX_to = std::allocator<T_to>>
345 void strcpy(
346 _Inout_ std::basic_string<T_to, TR_to, AX_to>& dst,
347 _In_reads_or_z_opt_(count_src) const T_from* src, _In_ size_t count_src)
348 {
349 dst.clear();
350 strcat(dst, src, count_src);
351 }
352
359 template <class TR_to = std::char_traits<T_to>, class AX_to = std::allocator<T_to>>
360 void strcpy(
361 _Inout_ std::basic_string<T_to, TR_to, AX_to>& dst,
362 _In_z_ const T_from* src)
363 {
364 strcpy(dst, src, SIZE_MAX);
365 }
366
373 template <class TR_to = std::char_traits<T_to>, class AX_to = std::allocator<T_to>>
374 void strcpy(
375 _Inout_ std::basic_string<T_to, TR_to, AX_to>& dst,
376 _In_ const std::basic_string_view<T_from, std::char_traits<T_from>> src)
377 {
378 strcpy(dst, src.data(), src.size());
379 }
380
387 template <class TR_to = std::char_traits<T_to>, class AX_to = std::allocator<T_to>>
388 std::basic_string<T_to, TR_to, AX_to> convert(_In_reads_or_z_opt_(count_src) const T_from* src, _In_ size_t count_src)
389 {
390 std::basic_string<T_to, TR_to, AX_to> dst;
391 strcat(dst, src, count_src);
392 return dst;
393 }
394
400 template <class TR_to = std::char_traits<T_to>, class AX_to = std::allocator<T_to>>
401 std::basic_string<T_to, TR_to, AX_to> convert(_In_z_ const T_from* src)
402 {
403 return convert(src, SIZE_MAX);
404 }
405
411 template <class TR_to = std::char_traits<T_to>, class AX_to = std::allocator<T_to>>
412 std::basic_string<T_to, TR_to, AX_to> convert(_In_ const std::basic_string_view<T_from, std::char_traits<T_from>> src)
413 {
414 return convert(src.data(), src.size());
415 }
416
417 void clear()
418 {
419#ifndef _WIN32
420 iconv(m_handle, NULL, NULL, NULL, NULL);
421#endif
422 }
423
424 static charset_id system_charset()
425 {
426#ifdef _WIN32
427 return static_cast<charset_id>(GetACP());
428#else
429 return charset_from_name(nl_langinfo(CODESET));
430#endif
431 }
432
433#ifdef _WIN32
434 protected:
435 static UINT to_encoding(_In_ charset_id charset)
436 {
437 return
438 charset == charset_id::system ? GetACP() :
439 charset == charset_id::oem ? GetOEMCP() :
440 static_cast<UINT>(charset);
441 }
442
443 protected:
444 UINT m_from_wincp, m_to_wincp;
445#else
446 protected:
447 static const char* to_encoding(_In_ charset_id charset)
448 {
449 static const char* const encodings[static_cast<std::underlying_type_t<charset_id>>(charset_id::_max)] = {
450 "", // system
451 "UTF-7", // utf7
452 "UTF-8", // utf8
453#if BYTE_ORDER == BIG_ENDIAN
454 "UTF-16BE", // utf16
455 "UTF-32BE", // utf32
456#else
457 "UTF-16LE", // utf16
458 "UTF-32LE", // utf32
459#endif
460 "CP1250", // windows1250
461 "CP1251", // windows1251
462 "CP1252", // windows1252
463 };
464 return
465 charset == charset_id::system ? nl_langinfo(CODESET) :
466 encodings[static_cast<std::underlying_type_t<charset_id>>(charset)];
467 }
468
469 protected:
470 iconv_t m_handle;
471#endif
472 };
473
484 template <class TR_to = std::char_traits<wchar_t>, class AX_to = std::allocator<wchar_t>>
485#ifndef _WIN32
486 _Deprecated_("For better performance, consider a reusable charset_encoder")
487#endif
488 inline void strcat(
489 _Inout_ std::basic_string<wchar_t, TR_to, AX_to>& dst,
490 _In_reads_or_z_opt_(count_src) const char* src, _In_ size_t count_src,
491 _In_ charset_id charset = charset_id::system)
492 {
493 charset_encoder<char, wchar_t>(charset, wchar_t_charset).strcat(dst, src, count_src);
494 }
495
496 template <class TR_to = std::char_traits<wchar_t>, class AX_to = std::allocator<wchar_t>>
497 _Deprecated_("Use stdex::strcat")
498 inline void str2wstr(
499 _Inout_ std::basic_string<wchar_t, TR_to, AX_to>& dst,
500 _In_reads_or_z_opt_(count_src) const char* src, _In_ size_t count_src,
501 _In_ charset_id charset = charset_id::system)
502 {
503 strcat(dst, src, count_src, charset);
504 }
505
515 template <class TR_to = std::char_traits<wchar_t>, class AX_to = std::allocator<wchar_t>>
516#ifndef _WIN32
517 _Deprecated_("For better performance, consider a reusable charset_encoder")
518#endif
519 inline void strcat(
520 _Inout_ std::basic_string<wchar_t, TR_to, AX_to>& dst,
521 _In_ const std::basic_string_view<char, std::char_traits<char>> src,
522 _In_ charset_id charset = charset_id::system)
523 {
524 strcat(dst, src.data(), src.size(), charset);
525 }
526
527 template <class TR_to = std::char_traits<wchar_t>, class AX_to = std::allocator<wchar_t>>
528 _Deprecated_("Use stdex::strcat")
529 inline void str2wstr(
530 _Inout_ std::basic_string<wchar_t, TR_to, AX_to>& dst,
531 _In_ const std::basic_string_view<char, std::char_traits<char>> src,
532 _In_ charset_id charset = charset_id::system)
533 {
534 strcat(dst, src, charset);
535 }
536
547 template <class TR_to = std::char_traits<wchar_t>, class AX_to = std::allocator<wchar_t>>
548#ifndef _WIN32
549 _Deprecated_("For better performance, consider a reusable charset_encoder")
550#endif
551 inline void strcpy(
552 _Inout_ std::basic_string<wchar_t, TR_to, AX_to>& dst,
553 _In_reads_or_z_opt_(count_src) const char* src, _In_ size_t count_src,
554 _In_ charset_id charset = charset_id::system)
555 {
556 dst.clear();
557 strcat(dst, src, count_src, charset);
558 }
559
569 template <class TR_to = std::char_traits<wchar_t>, class AX_to = std::allocator<wchar_t>>
570#ifndef _WIN32
571 _Deprecated_("For better performance, consider a reusable charset_encoder")
572#endif
573 inline void strcpy(
574 _Inout_ std::basic_string<wchar_t, TR_to, AX_to>& dst,
575 _In_ const std::basic_string_view<char, std::char_traits<char>> src,
576 _In_ charset_id charset = charset_id::system)
577 {
578 strcpy(dst, src.data(), src.size(), charset);
579 }
580
591#ifndef _WIN32
592 _Deprecated_("For better performance, consider a reusable charset_encoder")
593#endif
594 inline std::wstring str2wstr(
595 _In_z_ const char* src,
596 _In_ charset_id charset = charset_id::system)
597 {
598 std::wstring dst;
599 strcat(dst, src, SIZE_MAX, charset);
600 return dst;
601 }
602
614#ifndef _WIN32
615 _Deprecated_("For better performance, consider a reusable charset_encoder")
616#endif
617 inline std::wstring str2wstr(
618 _In_reads_or_z_opt_(count_src) const char* src, _In_ size_t count_src,
619 _In_ charset_id charset = charset_id::system)
620 {
621 std::wstring dst;
622 strcat(dst, src, count_src, charset);
623 return dst;
624 }
625
636#ifndef _WIN32
637 _Deprecated_("For better performance, consider a reusable charset_encoder")
638#endif
639 inline std::wstring str2wstr(
640 _In_ const std::basic_string_view<char, std::char_traits<char>> src,
641 _In_ charset_id charset = charset_id::system)
642 {
643 return str2wstr(src.data(), src.size(), charset);
644 }
645
656 template <class TR_to = std::char_traits<char>, class AX_to = std::allocator<char>>
657#ifndef _WIN32
658 _Deprecated_("For better performance, consider a reusable charset_encoder")
659#endif
660 inline void strcat(
661 _Inout_ std::basic_string<char, TR_to, AX_to>& dst,
662 _In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src,
663 _In_ charset_id charset = charset_id::system)
664 {
665 charset_encoder<wchar_t, char>(wchar_t_charset, charset).strcat(dst, src, count_src);
666 }
667
668 template <class TR_to = std::char_traits<char>, class AX_to = std::allocator<char>>
669 _Deprecated_("Use stdex::strcat")
670 inline void wstr2str(
671 _Inout_ std::basic_string<char, TR_to, AX_to>& dst,
672 _In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src,
673 _In_ charset_id charset = charset_id::system)
674 {
675 strcat(dst, src, count_src, charset);
676 }
677
687 template <class TR_to = std::char_traits<char>, class AX_to = std::allocator<char>>
688#ifndef _WIN32
689 _Deprecated_("For better performance, consider a reusable charset_encoder")
690#endif
691 inline void strcat(
692 _Inout_ std::basic_string<char, TR_to, AX_to>& dst,
693 _In_ const std::basic_string_view<wchar_t, std::char_traits<wchar_t>> src,
694 _In_ charset_id charset = charset_id::system)
695 {
696 strcat(dst, src.data(), src.size(), charset);
697 }
698
699 template <class TR_to = std::char_traits<char>, class AX_to = std::allocator<char>>
700 _Deprecated_("Use stdex::strcat")
701 inline void wstr2str(
702 _Inout_ std::basic_string<char, TR_to, AX_to>& dst,
703 _In_ const std::basic_string_view<wchar_t, std::char_traits<wchar_t>> src,
704 _In_ charset_id charset = charset_id::system)
705 {
706 strcat(dst, src, charset);
707 }
708
719 template <class TR_to = std::char_traits<char>, class AX_to = std::allocator<char>>
720#ifndef _WIN32
721 _Deprecated_("For better performance, consider a reusable charset_encoder")
722#endif
723 inline void strcpy(
724 _Inout_ std::basic_string<char, TR_to, AX_to>& dst,
725 _In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src,
726 _In_ charset_id charset = charset_id::system)
727 {
728 dst.clear();
729 strcat(dst, src, count_src, charset);
730 }
731
741 template <class TR_to = std::char_traits<char>, class AX_to = std::allocator<char>>
742#ifndef _WIN32
743 _Deprecated_("For better performance, consider a reusable charset_encoder")
744#endif
745 inline void strcpy(
746 _Inout_ std::basic_string<char, TR_to, AX_to>& dst,
747 _In_ const std::basic_string_view<wchar_t, std::char_traits<wchar_t>> src,
748 _In_ charset_id charset = charset_id::system)
749 {
750 strcpy(dst, src.data(), src.size(), charset);
751 }
752
763#ifndef _WIN32
764 _Deprecated_("For better performance, consider a reusable charset_encoder")
765#endif
766 inline std::string wstr2str(
767 _In_z_ const wchar_t* src,
768 _In_ charset_id charset = charset_id::system)
769 {
770 std::string dst;
771 strcat(dst, src, SIZE_MAX, charset);
772 return dst;
773 }
774
786#ifndef _WIN32
787 _Deprecated_("For better performance, consider a reusable charset_encoder")
788#endif
789 inline std::string wstr2str(
790 _In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src,
791 _In_ charset_id charset = charset_id::system)
792 {
793 std::string dst;
794 strcat(dst, src, count_src, charset);
795 return dst;
796 }
797
808#ifndef _WIN32
809 _Deprecated_("For better performance, consider a reusable charset_encoder")
810#endif
811 inline std::string wstr2str(
812 _In_ const std::basic_string_view<wchar_t, std::char_traits<wchar_t>> src,
813 _In_ charset_id charset = charset_id::system)
814 {
815 return wstr2str(src.data(), src.size(), charset);
816 }
817
818#ifdef _WIN32
828 template <class TR = std::char_traits<wchar_t>, class AX = std::allocator<wchar_t>>
829 size_t normalizecat(
830 _Inout_ std::basic_string<wchar_t, TR, AX>& dst,
831 _In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src)
832 {
833 count_src = strnlen(src, count_src);
834 size_t count_dst = dst.size();
835 dst.resize(count_dst + count_src);
836 _Assume_(count_src + 1 < INT_MAX);
837#pragma warning(suppress: 6387) // Testing indicates src may be NULL when count_src is also 0. Is SAL of the lpSrcString parameter wrong?
838 int r = NormalizeString(NormalizationC, src, static_cast<int>(count_src), dst.data() + count_dst, static_cast<int>(count_src + 1));
839 if (r >= 0)
840 dst.resize(count_dst + r);
841 else
842#pragma warning(suppress: 6387) // Testing indicates src may be NULL when count_src is also 0. Is SAL of the _Src parameter wrong?
843 memcpy(dst.data() + count_dst, src, count_src * sizeof(wchar_t));
844 return dst.size();
845 }
846
855 template <size_t N, class TR = std::char_traits<wchar_t>, class AX = std::allocator<wchar_t>>
856 size_t normalizecat(
857 _Inout_ std::basic_string<wchar_t, TR, AX>& dst,
858 _In_ const wchar_t (&src)[N])
859 {
860 return normalizecat(dst, src, N);
861 }
862
871 template <class TR_dst = std::char_traits<wchar_t>, class AX_dst = std::allocator<wchar_t>>
872 size_t normalizecat(
873 _Inout_ std::basic_string<wchar_t, TR_dst, AX_dst>& dst,
874 _In_ const std::basic_string_view<wchar_t, std::char_traits<wchar_t>> src)
875 {
876 return normalizecat(dst, src.data(), src.size());
877 }
878
888 template <class TR = std::char_traits<wchar_t>, class AX = std::allocator<wchar_t>>
889 size_t normalize(
890 _Inout_ std::basic_string<wchar_t, TR, AX>& dst,
891 _In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src)
892 {
893 dst.clear();
894 return normalizecat(dst, src, count_src);
895 }
896
905 template <size_t N, class TR = std::char_traits<wchar_t>, class AX = std::allocator<wchar_t>>
906 size_t normalize(
907 _Inout_ std::basic_string<wchar_t, TR, AX>& dst,
908 _In_ const wchar_t(&src)[N])
909 {
910 return normalize(dst, src, N);
911 }
912
921 template <class TR_dst = std::char_traits<wchar_t>, class AX_dst = std::allocator<wchar_t>>
922 size_t normalize(
923 _Inout_ std::basic_string<wchar_t, TR_dst, AX_dst>& dst,
924 _In_ const std::basic_string_view<wchar_t, std::char_traits<wchar_t>> src)
925 {
926 return normalize(dst, src.data(), src.size());
927 }
928
937 inline std::wstring normalize(_In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src)
938 {
939 std::wstring dst;
940 normalizecat(dst, src, count_src);
941 return dst;
942 }
943
951 template <size_t N>
952 std::wstring normalize(_In_ const wchar_t(&src)[N])
953 {
954 std::wstring dst;
955 normalizecat(dst, src, N);
956 return dst;
957 }
958
966 inline std::wstring normalize(_In_ const std::basic_string_view<wchar_t, std::char_traits<wchar_t>> src)
967 {
968 std::wstring dst;
969 normalizecat(dst, src.data(), src.size());
970 return dst;
971 }
972#endif
973}
974
975#ifndef _WIN32
976#pragma GCC diagnostic pop
977#endif
Encoding converter context.
Definition unicode.hpp:137
void strcat(std::basic_string< T_to, TR_to, AX_to > &dst, _In_reads_or_z_opt_(count_src) const T_from *src, size_t count_src)
Convert string and append to string.
Definition unicode.hpp:174
void strcat(std::basic_string< T_to, TR_to, AX_to > &dst, const std::basic_string_view< T_from, std::char_traits< T_from > > src)
Convert string and append to string.
Definition unicode.hpp:330
void strcpy(std::basic_string< T_to, TR_to, AX_to > &dst, const T_from *src)
Convert string.
Definition unicode.hpp:360
void strcat(std::basic_string< T_to, TR_to, AX_to > &dst, const T_from *src)
Convert string and append to string.
Definition unicode.hpp:316
void strcpy(std::basic_string< T_to, TR_to, AX_to > &dst, _In_reads_or_z_opt_(count_src) const T_from *src, size_t count_src)
Convert string.
Definition unicode.hpp:345
std::basic_string< T_to, TR_to, AX_to > convert(const T_from *src)
Return converted string.
Definition unicode.hpp:401
void strcpy(std::basic_string< T_to, TR_to, AX_to > &dst, const std::basic_string_view< T_from, std::char_traits< T_from > > src)
Convert string.
Definition unicode.hpp:374
std::basic_string< T_to, TR_to, AX_to > convert(const std::basic_string_view< T_from, std::char_traits< T_from > > src)
Return converted string.
Definition unicode.hpp:412
std::basic_string< T_to, TR_to, AX_to > convert(_In_reads_or_z_opt_(count_src) const T_from *src, size_t count_src)
Return converted string.
Definition unicode.hpp:388