stdex
Additional custom or not Standard C++ covered algorithms
Loading...
Searching...
No Matches
string.hpp
1/*
2 SPDX-License-Identifier: MIT
3 Copyright © 2016-2023 Amebis
4*/
5
6#pragma once
7
8#include "sal.hpp"
9#include <assert.h>
10#include <ctype.h>
11#include <stdint.h>
12#include <stdexcept>
13
14namespace stdex
15{
19#ifdef _WIN32
20 typedef wchar_t utf16_t;
21#else
22 typedef char16_t utf16_t;
23#endif
24
30 inline bool is_high_surrogate(_In_ utf16_t chr)
31 {
32 return 0xd800 < chr && chr < 0xdc00;
33 }
34
40 inline bool is_low_surrogate(_In_ utf16_t chr)
41 {
42 return 0xdc00 < chr && chr < 0xe000;
43 }
44
50 inline bool is_surrogate_pair(_In_reads_(2) const utf16_t* str)
51 {
52 return is_high_surrogate(str[0]) && is_low_surrogate(str[1]);
53 }
54
60 inline char32_t surrogate_pair_to_ucs4(_In_reads_(2) const utf16_t* str)
61 {
62 assert(is_surrogate_pair(str));
63 return
64 ((char32_t)(str[0] - 0xd800) << 10) +
65 (char32_t)(str[1] - 0xdc00) +
66 0x10000;
67 }
68
74 inline void ucs4_to_surrogate_pair(_Out_writes_(2) utf16_t* str, _In_ char32_t chr)
75 {
76 assert(chr >= 0x10000);
77 chr -= 0x10000;
78 str[0] = 0xd800 + (char32_t)((chr >> 10) & 0x3ff);
79 str[1] = 0xdc00 + (char32_t)(chr & 0x3ff);
80 }
81
87 inline bool iscombining(_In_ char32_t chr)
88 {
89 return
90 0x0300 <= chr && chr < 0x0370 ||
91 0x1dc0 <= chr && chr < 0x1e00 ||
92 0x20d0 <= chr && chr < 0x2100 ||
93 0xfe20 <= chr && chr < 0xfe30;
94 }
95
101 template <class T>
102 inline size_t islbreak(_In_ T chr)
103 {
104 return chr == '\n' || chr == '\r';
105 }
106
113 template <class T>
114 inline size_t islbreak(_In_reads_or_z_opt_(count) const T* chr, _In_ size_t count)
115 {
116 if (count >= 2 && (chr[0] == '\r' && chr[1] == '\n' || chr[0] == '\n' && chr[1] == '\r'))
117 return 2;
118 if (count > 1 && (chr[0] == '\n' || chr[0] == '\r'))
119 return 1;
120 return 0;
121 }
122
129 inline size_t glyphlen(_In_reads_or_z_opt_(count) const wchar_t* glyph, size_t count)
130 {
131 if (count) {
132#ifdef _WIN32
133 size_t i = count < 2 || !is_surrogate_pair(glyph) ? 1 : 2;
134#else
135 size_t i = 1;
136#endif
137 for (; i < count && iscombining(glyph[i]); ++i);
138 return i;
139 }
140 return 0;
141 }
142
150 template <class T>
151 inline size_t strlen(_In_z_ const T* str)
152 {
153 assert(str);
154 size_t i;
155 for (i = 0; str[i]; ++i);
156 return i;
157 }
158
167 template <class T>
168 inline size_t strnlen(_In_reads_or_z_opt_(count) const T* str, _In_ size_t count)
169 {
170 assert(str);
171 size_t i;
172 for (i = 0; i < count && str[i]; ++i);
173 return i;
174 }
175
176 constexpr auto npos{ static_cast<size_t>(-1) };
177
187 template <class T>
188 inline size_t strnchr(
189 _In_reads_or_z_opt_(count) const T* str,
190 _In_ size_t count,
191 _In_ T chr)
192 {
193 assert(str || !count);
194 for (size_t i = 0; i < count && str[i]; ++i)
195 if (str[i] == chr) return i;
196 return npos;
197 }
198
208 template <class T>
209 inline size_t strrnchr(
210 _In_reads_or_z_opt_(count) const T* str,
211 _In_ size_t count,
212 _In_ T chr)
213 {
214 assert(str || !count);
215 size_t z = npos;
216 for (size_t i = 0; i < count && str[i]; ++i)
217 if (str[i] == chr) z = i;
218 return z;
219 }
220
230 template <class T>
231 inline size_t strnichr(
232 _In_reads_or_z_opt_(count) const T* str,
233 _In_ size_t count,
234 _In_ T chr,
235 _In_ const std::locale& locale)
236 {
237 assert(str || !count);
238 const auto& ctype = std::use_facet<std::ctype<T>>(locale);
239 chr = ctype.tolower(chr);
240 for (size_t i = 0; i < count && str[i]; ++i)
241 if (ctype.tolower(str[i]) == chr) return i;
242 return npos;
243 }
244
254 template <class T>
255 inline size_t strrnichr(
256 _In_reads_or_z_opt_(count) const T* str,
257 _In_ size_t count,
258 _In_ T chr,
259 _In_ const std::locale& locale)
260 {
261 assert(str || !count);
262 const auto& ctype = std::use_facet<std::ctype<T>>(locale);
263 chr = ctype.tolower(chr);
264 size_t z = npos;
265 for (size_t i = 0; i < count && str[i]; ++i)
266 if (ctype.tolower(str[i]) == chr) z = i;
267 return z;
268 }
269
280 template <class T1, class T2>
281 inline int strncmp(
282 _In_reads_or_z_opt_(count1) const T1* str1, _In_ size_t count1,
283 _In_reads_or_z_opt_(count2) const T2* str2, _In_ size_t count2)
284 {
285 assert(str1 || !count1);
286 assert(str2 || !count2);
287 size_t i; T1 a; T2 b;
288 for (i = 0; i < count1 && i < count2 && ((a = str1[i]) | (b = str2[i])); ++i) {
289 if (a > b) return +1;
290 if (a < b) return -1;
291 }
292 if (i < count1 && str1[i]) return +1;
293 if (i < count2 && str2[i]) return -1;
294 return 0;
295 }
296
307 template <class T>
308 inline int strncoll(
309 _In_reads_or_z_opt_(count1) const T* str1, _In_ size_t count1,
310 _In_reads_or_z_opt_(count2) const T* str2, _In_ size_t count2,
311 _In_ const std::locale& locale)
312 {
313 assert(str1 || !count1);
314 assert(str2 || !count2);
315 auto& collate = std::use_facet<std::collate<T>>(locale);
316 return collate.compare(str1, str1 + count1, str2, str2 + count2);
317 }
318
329 template <class T1, class T2>
330 inline int strnicmp(
331 _In_reads_or_z_opt_(count1) const T1* str1, _In_ size_t count1,
332 _In_reads_or_z_opt_(count2) const T2* str2, _In_ size_t count2,
333 _In_ const std::locale& locale)
334 {
335 assert(str1 || !count1);
336 assert(str2 || !count2);
337 size_t i; T1 a; T2 b;
338 const auto& ctype1 = std::use_facet<std::ctype<T1>>(locale);
339 const auto& ctype2 = std::use_facet<std::ctype<T2>>(locale);
340 for (i = 0; i < count1 && i < count2 && ((a = ctype1.tolower(str1[i])) | (b = ctype2.tolower(str2[i]))); i++) {
341 if (a > b) return +1;
342 if (a < b) return -1;
343 }
344 if (i < count1 && str1[i]) return +1;
345 if (i < count2 && str2[i]) return -1;
346 return 0;
347 }
348
358 template <class T1, class T2>
359 inline size_t strnstr(
360 _In_reads_or_z_opt_(count) const T1* str,
361 _In_ size_t count,
362 _In_z_ const T2* sample)
363 {
364 assert(str || !count);
365 assert(sample);
366 for (size_t offset = 0;; ++offset) {
367 for (size_t i = offset, j = 0;; ++i, ++j) {
368 if (!sample[j])
369 return offset;
370 if (i >= count || !str[i])
371 return npos;
372 if (str[i] != sample[j])
373 break;
374 }
375 }
376 }
377
387 template <class T1, class T2>
388 inline size_t strnistr(
389 _In_reads_or_z_opt_(count) const T1* str,
390 _In_ size_t count,
391 _In_z_ const T2* sample,
392 _In_ const std::locale& locale)
393 {
394 assert(str || !count);
395 assert(sample);
396 const auto& ctype1 = std::use_facet<std::ctype<T1>>(locale);
397 const auto& ctype2 = std::use_facet<std::ctype<T2>>(locale);
398 for (size_t offset = 0;; ++offset) {
399 for (size_t i = offset, j = 0;; ++i, ++j) {
400 if (!sample[j])
401 return offset;
402 if (i >= count || !str[i])
403 return npos;
404 if (ctype1.tolower(str[i]) != ctype2.tolower(sample[j]))
405 break;
406 }
407 }
408 }
409
419 template <class T1, class T2>
420 inline size_t strncpy(
421 _Out_writes_(count) _Post_maybez_ T1* dst,
422 _In_reads_or_z_opt_(count) const T2* src, _In_ size_t count)
423 {
424 assert(dst && src || !count);
425 for (size_t i = 0; ; ++i) {
426 if (i >= count)
427 return i;
428 if ((dst[i] = src[i]) == 0)
429 return i;
430 }
431 }
432
443 template <class T1, class T2>
444 inline size_t strncpy(
445 _Out_writes_(count_dst) _Post_maybez_ T1* dst, _In_ size_t count_dst,
446 _In_reads_or_z_opt_(count_src) const T2* src, _In_ size_t count_src)
447 {
448 assert(dst || !count_dst);
449 assert(src || !count_src);
450 for (size_t i = 0; ; ++i)
451 {
452 if (i > count_dst)
453 return i;
454 if (i > count_src) {
455 dst[i] = 0;
456 return i;
457 }
458 if ((dst[i] = src[i]) == 0)
459 return i;
460 }
461 }
462
472 template <class T>
473 inline size_t crlf2nl(_Out_writes_z_(strlen(src)) T* dst, _In_z_ const T* src)
474 {
475 assert(dst);
476 assert(src);
477 size_t i, j;
478 for (i = j = 0; src[j];) {
479 if (src[j] != '\r' || src[j + 1] != '\n')
480 dst[i++] = src[j++];
481 else {
482 dst[i++] = '\n';
483 j += 2;
484 }
485 }
486 dst[i] = 0;
487 return i;
488 }
489
491 template <class T, class T_bin>
492 inline T_bin strtoint(
493 _In_reads_or_z_opt_(count) const T* str, _In_ size_t count,
494 _Out_opt_ size_t* end,
495 _In_ int radix,
496 _Out_ uint8_t& flags)
497 {
498 assert(str || !count);
499 assert(radix == 0 || 2 <= radix && radix <= 36);
500
501 size_t i = 0;
502 T_bin value = 0, digit,
503 max_ui = (T_bin)-1,
504 max_ui_pre1, max_ui_pre2;
505
506 flags = 0;
507
508 // Skip leading spaces.
509 for (;; ++i) {
510 if (i >= count || !str[i]) goto error;
511 if (!isspace(str[i])) break;
512 }
513
514 // Read the sign.
515 if (str[i] == '+') {
516 flags &= ~0x01;
517 ++i;
518 if (i >= count || !str[i]) goto error;
519 }
520 else if (str[i] == '-') {
521 flags |= 0x01;
522 ++i;
523 if (i >= count || !str[i]) goto error;
524 }
525
526 if (radix == 16) {
527 // On hexadecimal, allow leading 0x.
528 if (str[i] == '0' && i + 1 < count && (str[i + 1] == 'x' || str[i + 1] == 'X')) {
529 i += 2;
530 if (i >= count || !str[i]) goto error;
531 }
532 }
533 else if (!radix) {
534 // Autodetect radix.
535 if (str[i] == '0') {
536 ++i;
537 if (i >= count || !str[i]) goto error;
538 if (str[i] == 'x' || str[i] == 'X') {
539 radix = 16;
540 ++i;
541 if (i >= count || !str[i]) goto error;
542 }
543 else
544 radix = 8;
545 }
546 else
547 radix = 10;
548 }
549
550 // We have the radix.
551 max_ui_pre1 = max_ui / (T_bin)radix;
552 max_ui_pre2 = max_ui % (T_bin)radix;
553 for (;;) {
554 if ('0' <= str[i] && str[i] <= '9')
555 digit = (T_bin)str[i] - '0';
556 else if ('A' <= str[i] && str[i] <= 'Z')
557 digit = (T_bin)str[i] - 'A' + '\x0a';
558 else if ('a' <= str[i] && str[i] <= 'z')
559 digit = (T_bin)str[i] - 'a' + '\x0a';
560 else
561 goto error;
562 if (digit >= (T_bin)radix)
563 goto error;
564
565 if (value < max_ui_pre1 || // Multiplication nor addition will not overflow.
566 value == max_ui_pre1 && digit <= max_ui_pre2) // Small digits will not overflow.
567 value = value * (T_bin)radix + digit;
568 else {
569 // Overflow!
570 flags |= 0x02;
571 }
572
573 ++i;
574 if (i >= count || !str[i])
575 goto error;
576 }
577
578 error:
579 if (end) *end = i;
580 return value;
581 }
583
594 template <class T, class T_bin>
595 T_bin strtoint(
596 _In_reads_or_z_opt_(count) const T* str, _In_ size_t count,
597 _Out_opt_ size_t* end,
598 _In_ int radix)
599 {
600 uint8_t flags;
601 T_bin value;
602
603 switch (sizeof(T_bin)) {
604 case 1:
605 value = (T_bin)strtoint<T, uint8_t>(str, count, end, radix, flags);
606 if ((flags & 0x01) && (value & 0x80)) {
607 // Sign bit is 1 => overflow.
608 flags |= 0x02;
609 }
610 return (flags & 0x02) ?
611 (flags & 0x01) ? (T_bin)0x80 : (T_bin)0x7f :
612 (flags & 0x01) ? -value : value;
613
614 case 2:
615 value = (T_bin)strtoint<T, T_U2>(str, count, end, radix, flags);
616 if ((flags & 0x01) && (value & 0x8000)) {
617 // Sign bit is 1 => overflow.
618 flags |= 0x02;
619 }
620 return (flags & 0x02) ?
621 (flags & 0x01) ? (T_bin)0x8000 : (T_bin)0x7fff :
622 (flags & 0x01) ? -value : value;
623
624 case 4:
625 value = (T_bin)strtoint<T, uint32_t>(str, count, end, radix, flags);
626 if ((flags & 0x01) && (value & 0x80000000)) {
627 // Sign bit is 1 => overflow.
628 flags |= 0x02;
629 }
630 return (flags & 0x02) ?
631 (flags & 0x01) ? (T_bin)0x80000000 : (T_bin)0x7fffffff :
632 (flags & 0x01) ? -value : value;
633
634 case 8:
635 value = (T_bin)strtoint<T, uint64_t>(str, count, end, radix, flags);
636 if ((flags & 0x01) && (value & 0x8000000000000000)) {
637 // Sign bit is 1 => overflow.
638 flags |= 0x02;
639 }
640 return (flags & 0x02) ?
641 (flags & 0x01) ? (T_bin)0x8000000000000000 : (T_bin)0x7fffffffffffffff :
642 (flags & 0x01) ? -value : value;
643
644 default:
645 throw std::invalid_argument("Unsupported bit length");
646 }
647 }
648
659 template <class T, class T_bin>
660 inline T_bin strtouint(
661 _In_reads_or_z_opt_(count) const T* str,
662 _In_ size_t count,
663 _Out_opt_ size_t* end,
664 _In_ int radix)
665 {
666 uint8_t flags;
667 T_bin value;
668
669 switch (sizeof(T_bin)) {
670 case 1: value = (T_bin)strtoint<T, uint8_t>(str, count, end, radix, flags); break;
671 case 2: value = (T_bin)strtoint<T, uint16_t>(str, count, end, radix, flags); break;
672 case 4: value = (T_bin)strtoint<T, uint32_t>(str, count, end, radix, flags); break;
673 case 8: value = (T_bin)strtoint<T, uint64_t>(str, count, end, radix, flags); break;
674 default: throw std::invalid_argument("Unsupported bit length");
675 }
676
677 return (flags & 0x02) ?
678 (flags & 0x01) ? (T_bin)0 : (T_bin)-1 :
679 (flags & 0x01) ? ~value : value;
680 }
681
692 template <class T>
693 inline int32_t strto32(
694 _In_reads_or_z_opt_(count) const T* str, _In_ size_t count,
695 _Out_opt_ size_t* end,
696 _In_ int radix)
697 {
698 return strtoint<T, int32_t>(str, count, end, radix);
699 }
700
711 template <class T>
712 inline int64_t strto64(
713 _In_reads_or_z_opt_(count) const T* str, _In_ size_t count,
714 _Out_opt_ size_t* end,
715 _In_ int radix)
716 {
717 return strtoint<T, int64_t>(str, count, end, radix);
718 }
719
731 template <class T>
732 inline intptr_t strtoi(
733 _In_reads_or_z_opt_(count) const T* str, _In_ size_t count,
734 _Out_opt_ size_t* end,
735 _In_ int radix)
736 {
737#if defined(_WIN64) || defined(__LP64__)
738 return (intptr_t)strto64(str, count, end, radix);
739#else
740 return (intptr_t)strto32(str, count, end, radix);
741#endif
742 }
743
754 template <class T>
755 inline uint32_t strtou32(
756 _In_reads_or_z_opt_(count) const T* str, _In_ size_t count,
757 _Out_opt_ size_t* end,
758 _In_ int radix)
759 {
760 return strtouint<T, uint32_t>(str, count, end, radix);
761 }
762
773 template <class T>
774 inline uint64_t strtou64(
775 _In_reads_or_z_opt_(count) const T* str, _In_ size_t count,
776 _Out_opt_ size_t* end,
777 _In_ int radix)
778 {
779 return strtouint<T, uint64_t>(str, count, end, radix);
780 }
781
793 template <class T>
794 inline size_t strtoui(
795 _In_reads_or_z_opt_(count) const T* str, _In_ size_t count,
796 _Out_opt_ size_t* end,
797 _In_ int radix)
798 {
799#if defined(_WIN64) || defined(__LP64__)
800 return (size_t)strtou64(str, count, end, radix);
801#else
802 return (size_t)strtou32(str, count, end, radix);
803#endif
804 }
805}