stdex
Additional custom or not Standard C++ covered algorithms
Loading...
Searching...
No Matches
unicode.hpp
1/*
2 SPDX-License-Identifier: MIT
3 Copyright © 2023 Amebis
4*/
5
6#pragma once
7
8#include "compat.hpp"
9#include "endian.hpp"
10#include "math.hpp"
11#include "system.hpp"
12#include <assert.h>
13#include <stdint.h>
14#ifndef _WIN32
15#include <iconv.h>
16#endif
17#include <memory>
18#include <string>
19
20namespace stdex
21{
22 enum class charset_id : uint16_t {
23#ifdef _WIN32
24 system = CP_ACP,
25 utf8 = CP_UTF8,
26 utf16 = 1200 /*CP_WINUNICODE*/,
27#else
28 system = 0,
29 utf8,
30 utf16,
31 utf32,
32#endif
33 };
34
35#ifndef _WIN32
39 template <typename T_from, typename T_to>
41 {
42 public:
43 iconverter(_In_ charset_id from, _In_ charset_id to)
44 {
45 m_handle = iconv_open(to_encoding(to), to_encoding(from));
46 if (m_handle == (iconv_t)-1)
47 throw std::runtime_error("iconv_open failed");
48 }
49
51 {
52 iconv_close(m_handle);
53 }
54
55 void convert(_Inout_ std::basic_string<T_to> &dst, _In_reads_or_z_opt_(count) const T_from* src, _In_ size_t count_src) const
56 {
57 T_to buf[0x100];
58 count_src = stdex::strnlen(src, count_src);
59 size_t src_size = stdex::mul(sizeof(T_from), count_src);
60 do {
61 T_to* output = &buf[0];
62 size_t output_size = sizeof(buf);
63 errno = 0;
64 iconv(m_handle, (char**)&src, &src_size, (char**)&output, &output_size);
65 if (errno)
66 throw std::runtime_error("iconv failed");
67 dst.insert(dst.end(), buf, (T_to*)((char*)buf + sizeof(buf) - output_size));
68 } while (src_size);
69 }
70
71 protected:
72 static const char* to_encoding(_In_ charset_id charset)
73 {
74 switch (charset) {
75 case charset_id::system:
76 case charset_id::utf8: return "UTF-8";
77#if BYTE_ORDER == BIG_ENDIAN
78 case charset_id::utf16: return "UTF-16BE";
79 case charset_id::utf32: return "UTF-32BE";
80#else
81 case charset_id::utf16: return "UTF-16LE";
82 case charset_id::utf32: return "UTF-32LE";
83#endif
84 default: throw std::invalid_argument("unsupported charset");
85 }
86 }
87
88 protected:
89 iconv_t m_handle;
90 };
91#endif
92
101 inline void strcat(
102 _Inout_ std::wstring& dst,
103 _In_reads_or_z_opt_(count_src) const char* src, _In_ size_t count_src,
104 _In_ charset_id charset = charset_id::system)
105 {
106 assert(src || !count_src);
107#ifdef _WIN32
108 assert(count_src < INT_MAX || count_src == SIZE_MAX);
109 constexpr DWORD dwFlags = MB_PRECOMPOSED;
110
111 // Try to convert to stack buffer first.
112 WCHAR szStackBuffer[1024/sizeof(WCHAR)];
113#pragma warning(suppress: 6387) // Testing indicates src may be NULL when count_src is also 0. Is SAL of the lpMultiByteStr parameter wrong?
114 int cch = MultiByteToWideChar(static_cast<UINT>(charset), dwFlags, src, static_cast<int>(count_src), szStackBuffer, _countof(szStackBuffer));
115 if (cch) {
116 // Append from stack.
117 dst.append(szStackBuffer, count_src != SIZE_MAX ? wcsnlen(szStackBuffer, cch) : (size_t)cch - 1);
118 } else if (::GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
119 // Query the required output size. Allocate buffer. Then convert again.
120 cch = MultiByteToWideChar(static_cast<UINT>(charset), dwFlags, src, static_cast<int>(count_src), NULL, 0);
121 std::unique_ptr<WCHAR[]> szBuffer(new WCHAR[cch]);
122 cch = MultiByteToWideChar(static_cast<UINT>(charset), dwFlags, src, static_cast<int>(count_src), szBuffer.get(), cch);
123 dst.append(szBuffer.get(), count_src != SIZE_MAX ? wcsnlen(szBuffer.get(), cch) : (size_t)cch - 1);
124 }
125#else
126 iconverter<char, wchar_t>(charset, charset_id::utf32).convert(dst, src, count_src);
127#endif
128 }
129
130 _Deprecated_("Use stdex::strcat")
131 inline void str2wstr(
132 _Inout_ std::wstring& dst,
133 _In_reads_or_z_opt_(count_src) const char* src, _In_ size_t count_src,
134 _In_ charset_id charset = charset_id::system)
135 {
136 strcat(dst, src, count_src, charset);
137 }
138
146 inline void strcat(
147 _Inout_ std::wstring& dst,
148 _In_ const std::string& src,
149 _In_ charset_id charset = charset_id::system)
150 {
151 strcat(dst, src.data(), src.size(), charset);
152 }
153
154 _Deprecated_("Use stdex::strcat")
155 inline void str2wstr(
156 _Inout_ std::wstring& dst,
157 _In_ const std::string& src,
158 _In_ charset_id charset = charset_id::system)
159 {
160 strcat(dst, src, charset);
161 }
162
171 inline void strcpy(
172 _Inout_ std::wstring& dst,
173 _In_reads_or_z_opt_(count_src) const char* src, _In_ size_t count_src,
174 _In_ charset_id charset = charset_id::system)
175 {
176 dst.clear();
177 strcat(dst, src, count_src, charset);
178 }
179
187 inline void strcpy(
188 _Inout_ std::wstring& dst,
189 _In_ const std::string& src,
190 _In_ charset_id charset = charset_id::system)
191 {
192 strcpy(dst, src.data(), src.size(), charset);
193 }
194
203 inline std::wstring str2wstr(
204 _In_z_ const char* src,
205 _In_ charset_id charset = charset_id::system)
206 {
207 std::wstring dst;
208 strcat(dst, src, SIZE_MAX, charset);
209 return dst;
210 }
211
221 inline std::wstring str2wstr(
222 _In_reads_or_z_opt_(count_src) const char* src, _In_ size_t count_src,
223 _In_ charset_id charset = charset_id::system)
224 {
225 std::wstring dst;
226 strcat(dst, src, count_src, charset);
227 return dst;
228 }
229
238 inline std::wstring str2wstr(
239 _In_ const std::string& src,
240 _In_ charset_id charset = charset_id::system)
241 {
242 return str2wstr(src.c_str(), src.size(), charset);
243 }
244
253 inline void strcat(
254 _Inout_ std::string& dst,
255 _In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src,
256 _In_ charset_id charset = charset_id::system)
257 {
258 assert(src || !count_src);
259#ifdef _WIN32
260 assert(count_src < INT_MAX || count_src == SIZE_MAX);
261 constexpr DWORD dwFlags = 0;
262 constexpr LPCCH lpDefaultChar = NULL;
263
264 // Try to convert to stack buffer first.
265 CHAR szStackBuffer[1024/sizeof(CHAR)];
266#pragma warning(suppress: 6387) // Testing indicates src may be NULL when count_src is also 0. Is SAL of the lpWideCharStr parameter wrong?
267 int cch = WideCharToMultiByte(static_cast<UINT>(charset), dwFlags, src, static_cast<int>(count_src), szStackBuffer, _countof(szStackBuffer), lpDefaultChar, NULL);
268 if (cch) {
269 // Copy from stack. Be careful not to include zero terminator.
270 dst.append(szStackBuffer, count_src != SIZE_MAX ? strnlen(szStackBuffer, cch) : (size_t)cch - 1);
271 } else if (::GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
272 // Query the required output size. Allocate buffer. Then convert again.
273 cch = WideCharToMultiByte(static_cast<UINT>(charset), dwFlags, src, static_cast<int>(count_src), NULL, 0, lpDefaultChar, NULL);
274 std::unique_ptr<CHAR[]> szBuffer(new CHAR[cch]);
275 cch = WideCharToMultiByte(static_cast<UINT>(charset), dwFlags, src, static_cast<int>(count_src), szBuffer.get(), cch, lpDefaultChar, NULL);
276 dst.append(szBuffer.get(), count_src != SIZE_MAX ? strnlen(szBuffer.get(), cch) : (size_t)cch - 1);
277 }
278#else
279 iconverter<wchar_t, char>(charset_id::utf32, charset).convert(dst, src, count_src);
280#endif
281 }
282
283 _Deprecated_("Use stdex::strcat")
284 inline void wstr2str(
285 _Inout_ std::string& dst,
286 _In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src,
287 _In_ charset_id charset = charset_id::system)
288 {
289 strcat(dst, src, count_src, charset);
290 }
291
299 inline void strcat(
300 _Inout_ std::string& dst,
301 _In_ const std::wstring& src,
302 _In_ charset_id charset = charset_id::system)
303 {
304 strcat(dst, src.c_str(), src.size(), charset);
305 }
306
307 _Deprecated_("Use stdex::strcat")
308 inline void wstr2str(
309 _Inout_ std::string& dst,
310 _In_ const std::wstring& src,
311 _In_ charset_id charset = charset_id::system)
312 {
313 strcat(dst, src, charset);
314 }
315
324 inline void strcpy(
325 _Inout_ std::string& dst,
326 _In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src,
327 _In_ charset_id charset = charset_id::system)
328 {
329 dst.clear();
330 strcat(dst, src, count_src, charset);
331 }
332
340 inline void strcpy(
341 _Inout_ std::string& dst,
342 _In_ const std::wstring& src,
343 _In_ charset_id charset = charset_id::system)
344 {
345 strcpy(dst, src.data(), src.size(), charset);
346 }
347
356 inline std::string wstr2str(
357 _In_z_ const wchar_t* src,
358 _In_ charset_id charset = charset_id::system)
359 {
360 std::string dst;
361 strcat(dst, src, SIZE_MAX, charset);
362 return dst;
363 }
364
374 inline std::string wstr2str(
375 _In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src,
376 _In_ charset_id charset = charset_id::system)
377 {
378 std::string dst;
379 strcat(dst, src, count_src, charset);
380 return dst;
381 }
382
391 inline std::string wstr2str(
392 _In_ const std::wstring& src,
393 _In_ charset_id charset = charset_id::system)
394 {
395 return wstr2str(src.c_str(), src.size(), charset);
396 }
397}
Unicode converter context.
Definition unicode.hpp:41