stdex
Additional custom or not Standard C++ covered algorithms
Loading...
Searching...
No Matches
sgml.hpp
1/*
2 SPDX-License-Identifier: MIT
3 Copyright © 2023 Amebis
4*/
5
6#pragma once
7
8#include "mapping.hpp"
9#include "sal.hpp"
10#include "sgml_unicode.hpp"
11#include "string.hpp"
12#include <assert.h>
13#include <exception>
14#include <string>
15
16namespace stdex
17{
19 template <class T>
20 inline const wchar_t* sgml2uni(_In_reads_or_z_(count) const T* entity, _In_ size_t count)
21 {
22 assert(entity && count);
23 assert(count < 2 || entity[0] != '#'); // No numeric entities
24
25 for (size_t i = 0, j = _countof(sgml_unicode); i < j; ) {
26 size_t m = (i + j) / 2;
27 if (sgml_unicode[m].sgml[0] < entity[0])
28 i = m + 1;
29 else if (sgml_unicode[m].sgml[0] > entity[0])
30 j = m;
31 else {
32 auto r = strncmp<char, T>(sgml_unicode[m].sgml + 1, _countof(sgml_unicode[0].sgml) - 1, entity + 1, count - 1);
33 if (r < 0)
34 i = m + 1;
35 else if (r > 0)
36 j = m;
37 else {
38 for (; i < m && strncmp<char, T>(sgml_unicode[m - 1].sgml, _countof(sgml_unicode[0].sgml), entity, count) == 0; m--);
39 return sgml_unicode[m].unicode;
40 }
41 }
42 }
43 return nullptr;
44 }
45
46 template <class T>
47 inline const T* sgmlend(
48 _In_reads_or_z_opt_(count) const T* str, _In_ size_t count)
49 {
50 assert(str || !count);
51 for (size_t i = 0; i < count; i++) {
52 if (str[i] == ';')
53 return str + i;
54 if (!str[i] || str[i] == '&' || isspace(str[i]))
55 break;
56 }
57 return nullptr;
58 }
60
61 constexpr int sgml_full = 0x80000000;
62 constexpr int sgml_quot = 0x00000001;
63 constexpr int sgml_apos = 0x00000002;
64 constexpr int sgml_quot_apos = sgml_quot | sgml_apos;
65 constexpr int sgml_amp = 0x00000004;
66 constexpr int sgml_lt_gt = 0x00000008;
67 constexpr int sgml_bsol = 0x00000010;
68 constexpr int sgml_dollar = 0x00000020;
69 constexpr int sgml_percnt = 0x00000040;
70 constexpr int sgml_commat = 0x00000080;
71 constexpr int sgml_num = 0x00000100;
72 constexpr int sgml_lpar_rpar = 0x00000200;
73 constexpr int sgml_lcub_rcub = 0x00000400;
74 constexpr int sgml_lsqb_rsqb = 0x00000800;
75 constexpr int sgml_sgml = sgml_amp | sgml_lt_gt;
76 constexpr int sgml_ml_attrib = sgml_amp | sgml_quot_apos;
77 constexpr int sgml_c = sgml_amp | sgml_bsol | sgml_quot_apos;
78 // constexpr int sgml_ajt_lemma = sgml_amp | sgml_quot | sgml_dollar | sgml_percnt;
79 // constexpr int sgml_ajt_form = sgml_ajt_lemma;
80 // constexpr int sgml_kolos = sgml_amp | sgml_quot | sgml_dollar | sgml_percnt | sgml_lt_gt | sgml_bsol/* | sgml_commat | sgml_num*/ | sgml_lpar_rpar | sgml_lcub_rcub | sgml_lsqb_rsqb;
81
92 template <class T>
93 inline void sgml2wstrcat(
94 _Inout_ std::wstring& dst,
95 _In_reads_or_z_opt_(count_src) const T* src, _In_ size_t count_src,
96 _In_ int skip = 0,
97 _In_ const mapping<size_t>& offset = mapping<size_t>(0, 0),
98 _Inout_opt_ mapping_vector<size_t>* map = nullptr)
99 {
100 assert(src || !count_src);
101
102 const bool
103 skip_quot = (skip & sgml_quot) == 0,
104 skip_apos = (skip & sgml_apos) == 0,
105 skip_amp = (skip & sgml_amp) == 0,
106 skip_lt_gt = (skip & sgml_lt_gt) == 0,
107 skip_bsol = (skip & sgml_bsol) == 0,
108 skip_dollar = (skip & sgml_dollar) == 0,
109 skip_percnt = (skip & sgml_percnt) == 0,
110 skip_commat = (skip & sgml_commat) == 0,
111 skip_num = (skip & sgml_num) == 0,
112 skip_lpar_rpar = (skip & sgml_lpar_rpar) == 0,
113 skip_lcub_rcub = (skip & sgml_lcub_rcub) == 0,
114 skip_lsqb_rsqb = (skip & sgml_lsqb_rsqb) == 0;
115
116 count_src = strnlen(src, count_src);
117 dst.reserve(dst.size() + count_src);
118 for (size_t i = 0; i < count_src;) {
119 if (src[i] == '&') {
120 auto end = sgmlend(src + i + 1, count_src - i - 1);
121 if (end) {
122 const wchar_t* entity_w;
123 wchar_t chr[3];
124 size_t n = end - src - i - 1;
125 if (n >= 2 && src[i + 1] == '#') {
126 uint32_t unicode;
127 if (src[i + 2] == 'x' || src[i + 2] == 'X')
128 unicode = strtou32(src + i + 3, n - 2, nullptr, 16);
129 else
130 unicode = strtou32(src + i + 2, n - 1, nullptr, 10);
131#ifdef _WIN32
132 if (unicode < 0x10000) {
133 chr[0] = (wchar_t)unicode;
134 chr[1] = 0;
135 }
136 else {
137 ucs4_to_surrogate_pair(chr, unicode);
138 chr[2] = 0;
139 }
140#else
141 chr[0] = (wchar_t)unicode;
142 chr[1] = 0;
143#endif
144 entity_w = chr;
145 }
146 else
147 entity_w = sgml2uni(src + i + 1, n);
148
149 if (entity_w &&
150 (skip_quot || (entity_w[0] != L'"')) &&
151 (skip_apos || (entity_w[0] != L'\'')) &&
152 (skip_amp || (entity_w[0] != L'&')) &&
153 (skip_lt_gt || (entity_w[0] != L'<' && entity_w[0] != L'>')) &&
154 (skip_bsol || (entity_w[0] != L'\\')) &&
155 (skip_dollar || (entity_w[0] != L'$')) &&
156 (skip_percnt || (entity_w[0] != L'%')) &&
157 (skip_commat || (entity_w[0] != L'@')) &&
158 (skip_num || (entity_w[0] != L'#')) &&
159 (skip_lpar_rpar || (entity_w[0] != L'(' && entity_w[0] != L')')) &&
160 (skip_lcub_rcub || (entity_w[0] != L'{' && entity_w[0] != L'}')) &&
161 (skip_lsqb_rsqb || (entity_w[0] != L'[' && entity_w[0] != L']')))
162 {
163 if (map) map->push_back(mapping<size_t>(offset.from + i, offset.to + dst.size()));
164 dst.append(entity_w);
165 i = end - src + 1;
166 if (map) map->push_back(mapping<size_t>(offset.from + i, offset.to + dst.size()));
167 continue;
168 }
169 }
170 }
171 dst.append(1, src[i++]);
172 }
173 }
174
175 template <class T>
176 inline _Deprecated_("Use stdex::sgml2wstrcat") void sgml2wstr(
177 _Inout_ std::wstring& dst,
178 _In_reads_or_z_opt_(count_src) const T* src, _In_ size_t count_src,
179 _In_ int skip = 0,
180 _In_ const mapping<size_t>& offset = mapping<size_t>(0, 0),
181 _Inout_opt_ mapping_vector<size_t>* map = nullptr)
182 {
183 sgml2wstrcat(dst, src, count_src, skip, offset, map);
184 }
185
197 template <class T>
198 inline void sgml2wstrcat(
199 _Inout_ std::wstring& dst,
200 _In_ const std::basic_string<T>& src,
201 _In_ int skip = 0,
202 _In_ const mapping<size_t>& offset = mapping<size_t>(0, 0),
203 _Inout_opt_ mapping_vector<size_t>* map = nullptr)
204 {
205 sgml2wstrcat(dst, src.data(), src.size(), skip, offset, map);
206 }
207
208 template <class T>
209 inline _Deprecated_("Use stdex::sgml2wstrcat") void sgml2wstr(
210 _Inout_ std::wstring& dst,
211 _In_ const std::basic_string<T>& src,
212 _In_ int skip = 0,
213 _In_ const mapping<size_t>& offset = mapping<size_t>(0, 0),
214 _Inout_opt_ mapping_vector<size_t>* map = nullptr)
215 {
216 sgml2wstrcat(dst, src, skip, offset, map);
217 }
218
232 template <class T>
233 inline size_t sgml2wstrcat(
234 _Inout_cap_(count_dst) wchar_t* dst, _In_ size_t count_dst,
235 _In_reads_or_z_opt_(count_src) const T* src, _In_ size_t count_src,
236 _In_ int skip = 0,
237 _In_ const mapping<size_t>& offset = mapping<size_t>(0, 0),
238 _Inout_opt_ mapping_vector<size_t>* map = nullptr)
239 {
240 assert(dst || !count_dst);
241 assert(src || !count_src);
242
243 static const std::invalid_argument buffer_overrun("buffer overrun");
244 const bool
245 skip_quot = (skip & sgml_quot) == 0,
246 skip_apos = (skip & sgml_apos) == 0,
247 skip_amp = (skip & sgml_amp) == 0,
248 skip_lt_gt = (skip & sgml_lt_gt) == 0,
249 skip_bsol = (skip & sgml_bsol) == 0,
250 skip_dollar = (skip & sgml_dollar) == 0,
251 skip_percnt = (skip & sgml_percnt) == 0,
252 skip_commat = (skip & sgml_commat) == 0,
253 skip_num = (skip & sgml_num) == 0,
254 skip_lpar_rpar = (skip & sgml_lpar_rpar) == 0,
255 skip_lcub_rcub = (skip & sgml_lcub_rcub) == 0,
256 skip_lsqb_rsqb = (skip & sgml_lsqb_rsqb) == 0;
257
258 size_t j = wcsnlen(dst, count_dst);
259 count_src = strnlen(src, count_src);
260 for (size_t i = 0; i < count_src;) {
261 if (src[i] == '&') {
262 auto end = sgmlend(src + i + 1, count_src - i - 1);
263 if (end) {
264 const wchar_t* entity_w;
265 wchar_t chr[3];
266 size_t n = end - src - i - 1;
267 if (n >= 2 && src[i + 1] == '#') {
268 uint32_t unicode;
269 if (src[i + 2] == 'x' || src[i + 2] == 'X')
270 unicode = strtou32(src + i + 3, n - 2, nullptr, 16);
271 else
272 unicode = strtou32(src + i + 2, n - 1, nullptr, 10);
273#ifdef _WIN32
274 if (unicode < 0x10000) {
275 chr[0] = (wchar_t)unicode;
276 chr[1] = 0;
277 }
278 else {
279 ucs4_to_surrogate_pair(chr, unicode);
280 chr[2] = 0;
281 }
282#else
283 chr[0] = (wchar_t)unicode;
284 chr[1] = 0;
285#endif
286 entity_w = chr;
287 }
288 else
289 entity_w = sgml2uni(src + i + 1, n);
290
291 if (entity_w &&
292 (skip_quot || (entity_w[0] != L'"')) &&
293 (skip_apos || (entity_w[0] != L'\'')) &&
294 (skip_amp || (entity_w[0] != L'&')) &&
295 (skip_lt_gt || (entity_w[0] != L'<' && entity_w[0] != L'>')) &&
296 (skip_bsol || (entity_w[0] != L'\\')) &&
297 (skip_dollar || (entity_w[0] != L'$')) &&
298 (skip_percnt || (entity_w[0] != L'%')) &&
299 (skip_commat || (entity_w[0] != L'@')) &&
300 (skip_num || (entity_w[0] != L'#')) &&
301 (skip_lpar_rpar || (entity_w[0] != L'(' && entity_w[0] != L')')) &&
302 (skip_lcub_rcub || (entity_w[0] != L'{' && entity_w[0] != L'}')) &&
303 (skip_lsqb_rsqb || (entity_w[0] != L'[' && entity_w[0] != L']')))
304 {
305 if (map) map->push_back(mapping<size_t>(offset.from + i, offset.to + j));
306 size_t m = wcslen(entity_w);
307 if (j + m >= count_dst)
308 throw buffer_overrun;
309 memcpy(dst + j, entity_w, m * sizeof(wchar_t)); j += m;
310 i = end - src + 1;
311 if (map) map->push_back(mapping<size_t>(offset.from + i, offset.to + j));
312 continue;
313 }
314 }
315 }
316 if (j + 1 >= count_dst)
317 throw buffer_overrun;
318 dst[j++] = src[i++];
319 }
320 if (j >= count_dst)
321 throw buffer_overrun;
322 dst[j] = 0;
323 return j;
324 }
325
326 template <class T>
327 inline _Deprecated_("Use stdex::sgml2wstrcat") size_t sgml2wstr(
328 _Inout_cap_(count_dst) wchar_t* dst, _In_ size_t count_dst,
329 _In_reads_or_z_opt_(count_src) const T* src, _In_ size_t count_src,
330 _In_ int skip = 0,
331 _In_ const mapping<size_t>& offset = mapping<size_t>(0, 0),
332 _Inout_opt_ mapping_vector<size_t>* map = nullptr)
333 {
334 return sgml2wstrcat(dst, count_dst, src, count_src, skip, offset, map);
335 }
336
347 template <class T>
348 inline void sgml2wstrcpy(
349 _Inout_ std::wstring& dst,
350 _In_reads_or_z_opt_(count_src) const T* src, _In_ size_t count_src,
351 _In_ int skip = 0,
352 _In_ const mapping<size_t>& offset = mapping<size_t>(0, 0),
353 _Inout_opt_ mapping_vector<size_t>* map = nullptr)
354 {
355 dst.clear();
356 if (map)
357 map->clear();
358 sgml2wstrcat(dst, src, count_src, skip, offset, map);
359 }
360
374 template <class T>
375 inline size_t sgml2wstrcpy(
376 _Inout_cap_(count_dst) wchar_t* dst, _In_ size_t count_dst,
377 _In_reads_or_z_opt_(count_src) const T* src, _In_ size_t count_src,
378 _In_ int skip = 0,
379 _In_ const mapping<size_t>& offset = mapping<size_t>(0, 0),
380 _Inout_opt_ mapping_vector<size_t>* map = nullptr)
381 {
382 assert(dst || !count_dst);
383 if (count_dst)
384 dst[0] = 0;
385 if (map)
386 map->clear();
387 sgml2wstrcat(dst, count_dst, src, count_src, skip, offset, map);
388 }
389
401 template <class T>
402 inline std::wstring sgml2wstr(
403 _In_reads_or_z_opt_(count_src) const T* src, _In_ size_t count_src,
404 _In_ int skip = 0,
405 _In_ const mapping<size_t>& offset = mapping<size_t>(0, 0),
406 _Inout_opt_ mapping_vector<size_t>* map = nullptr)
407 {
408 std::wstring dst;
409 sgml2wstrcat(dst, src, count_src, skip, offset, map);
410 return dst;
411 }
412
423 template <class T>
424 inline std::wstring sgml2wstr(
425 _In_ const std::basic_string<T>& src,
426 _In_ int skip = 0,
427 _In_ const mapping<size_t>& offset = mapping<size_t>(0, 0),
428 _Inout_opt_ mapping_vector<size_t>* map = nullptr)
429 {
430 return sgml2wstr(src.c_str(), src.size(), skip, offset, map);
431 }
432
434 inline const char* chr2sgml(_In_reads_or_z_(count) const wchar_t* entity, _In_ size_t count)
435 {
436 assert(entity && count);
437
438 const wchar_t e2 = entity[0];
439 for (size_t i = 0, j = _countof(unicode_sgml); i < j; ) {
440 size_t m = (i + j) / 2;
441 wchar_t e1 = sgml_unicode[unicode_sgml[m]].unicode[0];
442 if (e1 < e2)
443 i = m + 1;
444 else if (e1 > e2)
445 j = m;
446 else {
447 auto r = strncmp(sgml_unicode[unicode_sgml[m]].unicode + 1, _countof(sgml_unicode[0].unicode) - 1, entity + 1, count - 1);
448 if (r < 0)
449 i = m + 1;
450 else if (r > 0)
451 j = m;
452 else {
453 for (; i < m && sgml_unicode[unicode_sgml[m - 1]].unicode[0] == e2 && strncmp(sgml_unicode[unicode_sgml[m - 1]].unicode + 1, _countof(sgml_unicode[0].unicode) - 1, entity + 1, count - 1) == 0; m--);
454 return sgml_unicode[unicode_sgml[m]].sgml;
455 }
456 }
457 }
458 return nullptr;
459 }
461
470 inline void wstr2sgmlcat(
471 _Inout_ std::string& dst,
472 _In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src,
473 _In_ size_t what = 0)
474 {
475 assert(src || !count_src);
476
477 const bool
478 do_ascii = (what & sgml_full) == 0,
479 do_quot = (what & sgml_quot) == 0,
480 do_apos = (what & sgml_apos) == 0,
481 do_lt_gt = (what & sgml_lt_gt) == 0,
482 do_bsol = (what & sgml_bsol) == 0,
483 do_dollar = (what & sgml_dollar) == 0,
484 do_percnt = (what & sgml_percnt) == 0,
485 do_commat = (what & sgml_commat) == 0,
486 do_num = (what & sgml_num) == 0,
487 do_lpar_rpar = (what & sgml_lpar_rpar) == 0,
488 do_lcub_rcub = (what & sgml_lcub_rcub) == 0,
489 do_lsqb_rsqb = (what & sgml_lsqb_rsqb) == 0;
490
491 count_src = wcsnlen(src, count_src);
492 dst.reserve(dst.size() + count_src);
493 for (size_t i = 0; i < count_src;) {
494 size_t n = glyphlen(src + i, count_src - i);
495 if (n == 1 &&
496 do_ascii && (unsigned int)src[i] < 128 &&
497 src[i] != L'&' &&
498 (do_quot || (src[i] != L'"')) &&
499 (do_apos || (src[i] != L'\'')) &&
500 (do_lt_gt || (src[i] != L'<' && src[i] != L'>')) &&
501 (do_bsol || (src[i] != L'\\')) &&
502 (do_dollar || (src[i] != L'$')) &&
503 (do_percnt || (src[i] != L'%')) &&
504 (do_commat || (src[i] != L'@')) &&
505 (do_num || (src[i] != L'#')) &&
506 (do_lpar_rpar || (src[i] != L'(' && src[i] != L')')) &&
507 (do_lcub_rcub || (src[i] != L'{' && src[i] != L'}')) &&
508 (do_lsqb_rsqb || (src[i] != L'[' && src[i] != L']')))
509 {
510 // 7-bit ASCII and no desire to encode it as an SGML entity.
511 dst.append(1, static_cast<char>(src[i++]));
512 }
513 else {
514 const char* entity = chr2sgml(src + i, n);
515 if (entity) {
516 dst.append(1, '&');
517 dst.append(entity);
518 dst.append(1, ';');
519 i += n;
520 }
521 else if (n == 1) {
522 // Trivial character (1 code unit, 1 glyph), no entity available.
523 if ((unsigned int)src[i] < 128)
524 dst.append(1, static_cast<char>(src[i++]));
525 else {
526 char tmp[3 + 8 + 1 + 1];
527 snprintf(tmp, _countof(tmp), "&#x%x;", src[i++]);
528 dst.append(tmp);
529 }
530 }
531 else {
532 // Non-trivial character. Decompose.
533 const size_t end = i + n;
534 while (i < end) {
535 if ((entity = chr2sgml(src + i, 1)) != nullptr) {
536 dst.append(1, '&');
537 dst.append(entity);
538 dst.append(1, ';');
539 i++;
540 }
541 else if ((unsigned int)src[i] < 128)
542 dst.append(1, static_cast<char>(src[i++]));
543 else {
544 uint32_t unicode;
545#ifdef _WIN32
546 if (i + 1 < end && is_surrogate_pair(src + i)) {
547 unicode = surrogate_pair_to_ucs4(src + i);
548 i += 2;
549 }
550 else
551#endif
552 {
553 unicode = src[i++];
554 }
555 char tmp[3 + 8 + 1 + 1];
556 snprintf(tmp, _countof(tmp), "&#x%x;", unicode);
557 dst.append(tmp);
558 }
559 }
560 }
561 }
562 }
563 }
564
565 inline _Deprecated_("Use stdex::wstr2sgmlcat") void wstr2sgml(
566 _Inout_ std::string& dst,
567 _In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src,
568 _In_ size_t what = 0)
569 {
570 wstr2sgmlcat(dst, src, count_src, what);
571 }
572
580 inline void wstr2sgmlcat(
581 _Inout_ std::string& dst,
582 _In_ const std::wstring& src,
583 _In_ size_t what = 0)
584 {
585 wstr2sgmlcat(dst, src.c_str(), src.size(), what);
586 }
587
588 inline _Deprecated_("Use stdex::wstr2sgmlcat") void wstr2sgml(
589 _Inout_ std::string& dst,
590 _In_ const std::wstring& src,
591 _In_ size_t what = 0)
592 {
593 wstr2sgmlcat(dst, src, what);
594 }
595
607 inline size_t wstr2sgmlcat(
608 _Inout_cap_(count_dst) char* dst, _In_ size_t count_dst,
609 _In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src,
610 _In_ size_t what = 0)
611 {
612 assert(dst || !count_dst);
613 assert(src || !count_src);
614
615 static const std::invalid_argument buffer_overrun("buffer overrun");
616 const bool
617 do_ascii = (what & sgml_full) == 0,
618 do_quot = (what & sgml_quot) == 0,
619 do_apos = (what & sgml_apos) == 0,
620 do_lt_gt = (what & sgml_lt_gt) == 0,
621 do_bsol = (what & sgml_bsol) == 0,
622 do_dollar = (what & sgml_dollar) == 0,
623 do_percnt = (what & sgml_percnt) == 0,
624 do_commat = (what & sgml_commat) == 0,
625 do_num = (what & sgml_num) == 0,
626 do_lpar_rpar = (what & sgml_lpar_rpar) == 0,
627 do_lcub_rcub = (what & sgml_lcub_rcub) == 0,
628 do_lsqb_rsqb = (what & sgml_lsqb_rsqb) == 0;
629
630 size_t j = strnlen(dst, count_dst);
631 count_src = wcsnlen(src, count_src);
632 for (size_t i = 0; i < count_src;) {
633 size_t n = glyphlen(src + i, count_src - i);
634 if (n == 1 &&
635 do_ascii && (unsigned int)src[i] < 128 &&
636 src[i] != L'&' &&
637 (do_quot || (src[i] != L'"')) &&
638 (do_apos || (src[i] != L'\'')) &&
639 (do_lt_gt || (src[i] != L'<' && src[i] != L'>')) &&
640 (do_bsol || (src[i] != L'\\')) &&
641 (do_dollar || (src[i] != L'$')) &&
642 (do_percnt || (src[i] != L'%')) &&
643 (do_commat || (src[i] != L'@')) &&
644 (do_num || (src[i] != L'#')) &&
645 (do_lpar_rpar || (src[i] != L'(' && src[i] != L')')) &&
646 (do_lcub_rcub || (src[i] != L'{' && src[i] != L'}')) &&
647 (do_lsqb_rsqb || (src[i] != L'[' && src[i] != L']')))
648 {
649 // 7-bit ASCII and no desire to encode it as an SGML entity.
650 if (j + 1 >= count_dst)
651 throw buffer_overrun;
652 dst[j++] = static_cast<char>(src[i++]);
653 }
654 else {
655 const char* entity = chr2sgml(src + i, n);
656 if (entity) {
657 size_t m = strlen(entity);
658 if (j + m + 2 >= count_dst)
659 throw buffer_overrun;
660 dst[j++] = '&';
661 memcpy(dst + j, entity, m * sizeof(char)); j += m;
662 dst[j++] = ';';
663 i += n;
664 }
665 else if (n == 1) {
666 // Trivial character (1 code unit, 1 glyph), no entity available.
667 if ((unsigned int)src[i] < 128) {
668 if (j + 1 >= count_dst)
669 throw buffer_overrun;
670 dst[j++] = static_cast<char>(src[i++]);
671 }
672 else {
673 char tmp[3 + 8 + 1 + 1];
674 int m = snprintf(tmp, _countof(tmp), "&#x%x;", src[i++]);
675 assert(m >= 0);
676 if (static_cast<size_t>(m) >= count_dst)
677 throw buffer_overrun;
678 memcpy(dst + j, tmp, m * sizeof(char)); j += m;
679 }
680 }
681 else {
682 // Non-trivial character. Decompose.
683 const size_t end = i + n;
684 while (i < end) {
685 if ((entity = chr2sgml(src + i, 1)) != nullptr) {
686 size_t m = strlen(entity);
687 if (j + m + 2 >= count_dst)
688 throw buffer_overrun;
689 dst[j++] = '&';
690 memcpy(dst + j, entity, m * sizeof(char)); j += m;
691 dst[j++] = ';';
692 i++;
693 }
694 else if ((unsigned int)src[i] < 128) {
695 if (j + 1 >= count_dst)
696 throw buffer_overrun;
697 dst[j++] = static_cast<char>(src[i++]);
698 }
699 else {
700 uint32_t unicode;
701#ifdef _WIN32
702 if (i + 1 < end && is_surrogate_pair(src + i)) {
703 unicode = surrogate_pair_to_ucs4(src + i);
704 i += 2;
705 }
706 else
707#endif
708 {
709 unicode = src[i++];
710 }
711 char tmp[3 + 8 + 1 + 1];
712 int m = snprintf(tmp, _countof(tmp), "&#x%x;", unicode);
713 assert(m >= 0);
714 if (static_cast<size_t>(m) >= count_dst)
715 throw buffer_overrun;
716 memcpy(dst + j, tmp, m * sizeof(char)); j += m;
717 }
718 }
719 }
720 }
721 }
722 if (j >= count_dst)
723 throw buffer_overrun;
724 dst[j] = 0;
725 return j;
726 }
727
728 inline _Deprecated_("Use stdex::wstr2sgmlcat") size_t wstr2sgml(
729 _Inout_cap_(count_dst) char* dst, _In_ size_t count_dst,
730 _In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src,
731 _In_ size_t what = 0)
732 {
733 return wstr2sgmlcat(dst, count_dst, src, count_src, what);
734 }
735
744 inline void wstr2sgmlcpy(
745 _Inout_ std::string& dst,
746 _In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src,
747 _In_ size_t what = 0)
748 {
749 dst.clear();
750 wstr2sgmlcat(dst, src, count_src, what);
751 }
752
764 inline size_t wstr2sgmlcpy(
765 _Inout_cap_(count_dst) char* dst, _In_ size_t count_dst,
766 _In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src,
767 _In_ size_t what = 0)
768 {
769 assert(dst || !count_dst);
770 if (count_dst)
771 dst[0] = 0;
772 wstr2sgmlcat(dst, count_dst, src, count_src, what);
773 }
774
784 inline std::string wstr2sgml(
785 _In_reads_or_z_opt_(count_src) const wchar_t* src, _In_ size_t count_src,
786 _In_ size_t what = 0)
787 {
788 std::string dst;
789 wstr2sgmlcat(dst, src, count_src, what);
790 return dst;
791 }
792
801 inline std::string wstr2sgml(
802 _In_ const std::wstring& src,
803 _In_ size_t what = 0)
804 {
805 return wstr2sgml(src.c_str(), src.size(), what);
806 }
807}