stdex
Additional custom or not Standard C++ covered algorithms
Loading...
Searching...
No Matches
string.hpp
1/*
2 SPDX-License-Identifier: MIT
3 Copyright © 2016-2023 Amebis
4*/
5
6#pragma once
7
8#include "sal.hpp"
9#include <assert.h>
10#include <ctype.h>
11#include <stdint.h>
12#include <stdexcept>
13
14namespace stdex
15{
16#ifdef _WIN32
17 using locale_t = _locale_t;
18#else
19 using locale_t = ::locale_t;
20#endif
21
25#ifdef _WIN32
26 typedef wchar_t utf16_t;
27#else
28 typedef char16_t utf16_t;
29#endif
30
36 inline bool is_high_surrogate(_In_ utf16_t chr)
37 {
38 return 0xd800 < chr && chr < 0xdc00;
39 }
40
46 inline bool is_low_surrogate(_In_ utf16_t chr)
47 {
48 return 0xdc00 < chr && chr < 0xe000;
49 }
50
56 inline bool is_surrogate_pair(_In_reads_(2) const utf16_t* str)
57 {
58 return is_high_surrogate(str[0]) && is_low_surrogate(str[1]);
59 }
60
66 inline char32_t surrogate_pair_to_ucs4(_In_reads_(2) const utf16_t* str)
67 {
68 assert(is_surrogate_pair(str));
69 return
70 ((char32_t)(str[0] - 0xd800) << 10) +
71 (char32_t)(str[1] - 0xdc00) +
72 0x10000;
73 }
74
80 inline void ucs4_to_surrogate_pair(_Out_writes_(2) utf16_t* str, _In_ char32_t chr)
81 {
82 assert(chr >= 0x10000);
83 chr -= 0x10000;
84 str[0] = 0xd800 + (char32_t)((chr >> 10) & 0x3ff);
85 str[1] = 0xdc00 + (char32_t)(chr & 0x3ff);
86 }
87
93 inline bool iscombining(_In_ char32_t chr)
94 {
95 return
96 0x0300 <= chr && chr < 0x0370 ||
97 0x1dc0 <= chr && chr < 0x1e00 ||
98 0x20d0 <= chr && chr < 0x2100 ||
99 0xfe20 <= chr && chr < 0xfe30;
100 }
101
107 template <class T>
108 inline size_t islbreak(_In_ T chr)
109 {
110 return chr == '\n' || chr == '\r';
111 }
112
119 template <class T>
120 inline size_t islbreak(_In_reads_or_z_opt_(count) const T* chr, _In_ size_t count)
121 {
122 _Analysis_assume_(chr || !count);
123 if (count >= 2 && (chr[0] == '\r' && chr[1] == '\n' || chr[0] == '\n' && chr[1] == '\r'))
124 return 2;
125 if (count > 1 && (chr[0] == '\n' || chr[0] == '\r'))
126 return 1;
127 return 0;
128 }
129
136 inline size_t glyphlen(_In_reads_or_z_opt_(count) const wchar_t* glyph, size_t count)
137 {
138 _Analysis_assume_(glyph || !count);
139 if (count) {
140#ifdef _WIN32
141 size_t i = count < 2 || !is_surrogate_pair(glyph) ? 1 : 2;
142#else
143 size_t i = 1;
144#endif
145 for (; i < count && iscombining(glyph[i]); ++i);
146 return i;
147 }
148 return 0;
149 }
150
158 template <class T>
159 inline size_t strlen(_In_z_ const T* str)
160 {
161 assert(str);
162 size_t i;
163 for (i = 0; str[i]; ++i);
164 return i;
165 }
166
175 template <class T>
176 inline size_t strnlen(_In_reads_or_z_opt_(count) const T* str, _In_ size_t count)
177 {
178 assert(str);
179 size_t i;
180 for (i = 0; i < count && str[i]; ++i);
181 return i;
182 }
183
184 constexpr auto npos{ static_cast<size_t>(-1) };
185
195 template <class T>
196 inline size_t strnchr(
197 _In_reads_or_z_opt_(count) const T* str,
198 _In_ size_t count,
199 _In_ T chr)
200 {
201 assert(str || !count);
202 for (size_t i = 0; i < count && str[i]; ++i)
203 if (str[i] == chr) return i;
204 return npos;
205 }
206
216 template <class T>
217 inline size_t strrnchr(
218 _In_reads_or_z_opt_(count) const T* str,
219 _In_ size_t count,
220 _In_ T chr)
221 {
222 assert(str || !count);
223 size_t z = npos;
224 for (size_t i = 0; i < count && str[i]; ++i)
225 if (str[i] == chr) z = i;
226 return z;
227 }
228
238 template <class T>
239 inline size_t strnichr(
240 _In_reads_or_z_opt_(count) const T* str,
241 _In_ size_t count,
242 _In_ T chr,
243 _In_ const std::locale& locale)
244 {
245 assert(str || !count);
246 const auto& ctype = std::use_facet<std::ctype<T>>(locale);
247 chr = ctype.tolower(chr);
248 for (size_t i = 0; i < count && str[i]; ++i)
249 if (ctype.tolower(str[i]) == chr) return i;
250 return npos;
251 }
252
262 template <class T>
263 inline size_t strrnichr(
264 _In_reads_or_z_opt_(count) const T* str,
265 _In_ size_t count,
266 _In_ T chr,
267 _In_ const std::locale& locale)
268 {
269 assert(str || !count);
270 const auto& ctype = std::use_facet<std::ctype<T>>(locale);
271 chr = ctype.tolower(chr);
272 size_t z = npos;
273 for (size_t i = 0; i < count && str[i]; ++i)
274 if (ctype.tolower(str[i]) == chr) z = i;
275 return z;
276 }
277
288 template <class T1, class T2>
289 inline int strncmp(
290 _In_reads_or_z_opt_(count1) const T1* str1, _In_ size_t count1,
291 _In_reads_or_z_opt_(count2) const T2* str2, _In_ size_t count2)
292 {
293 assert(str1 || !count1);
294 assert(str2 || !count2);
295 size_t i; T1 a; T2 b;
296 for (i = 0; i < count1 && i < count2 && ((a = str1[i]) | (b = str2[i])); ++i) {
297 if (a > b) return +1;
298 if (a < b) return -1;
299 }
300 if (i < count1 && str1[i]) return +1;
301 if (i < count2 && str2[i]) return -1;
302 return 0;
303 }
304
315 template <class T>
316 inline int strncoll(
317 _In_reads_or_z_opt_(count1) const T* str1, _In_ size_t count1,
318 _In_reads_or_z_opt_(count2) const T* str2, _In_ size_t count2,
319 _In_ const std::locale& locale)
320 {
321 assert(str1 || !count1);
322 assert(str2 || !count2);
323 auto& collate = std::use_facet<std::collate<T>>(locale);
324 return collate.compare(str1, str1 + count1, str2, str2 + count2);
325 }
326
337 template <class T1, class T2>
338 inline int strnicmp(
339 _In_reads_or_z_opt_(count1) const T1* str1, _In_ size_t count1,
340 _In_reads_or_z_opt_(count2) const T2* str2, _In_ size_t count2,
341 _In_ const std::locale& locale)
342 {
343 assert(str1 || !count1);
344 assert(str2 || !count2);
345 size_t i; T1 a; T2 b;
346 const auto& ctype1 = std::use_facet<std::ctype<T1>>(locale);
347 const auto& ctype2 = std::use_facet<std::ctype<T2>>(locale);
348 for (i = 0; i < count1 && i < count2 && ((a = ctype1.tolower(str1[i])) | (b = ctype2.tolower(str2[i]))); i++) {
349 if (a > b) return +1;
350 if (a < b) return -1;
351 }
352 if (i < count1 && str1[i]) return +1;
353 if (i < count2 && str2[i]) return -1;
354 return 0;
355 }
356
366 template <class T1, class T2>
367 inline size_t strnstr(
368 _In_reads_or_z_opt_(count) const T1* str,
369 _In_ size_t count,
370 _In_z_ const T2* sample)
371 {
372 assert(str || !count);
373 assert(sample);
374 for (size_t offset = 0;; ++offset) {
375 for (size_t i = offset, j = 0;; ++i, ++j) {
376 if (!sample[j])
377 return offset;
378 if (i >= count || !str[i])
379 return npos;
380 if (str[i] != sample[j])
381 break;
382 }
383 }
384 }
385
395 template <class T1, class T2>
396 inline size_t strnistr(
397 _In_reads_or_z_opt_(count) const T1* str,
398 _In_ size_t count,
399 _In_z_ const T2* sample,
400 _In_ const std::locale& locale)
401 {
402 assert(str || !count);
403 assert(sample);
404 const auto& ctype1 = std::use_facet<std::ctype<T1>>(locale);
405 const auto& ctype2 = std::use_facet<std::ctype<T2>>(locale);
406 for (size_t offset = 0;; ++offset) {
407 for (size_t i = offset, j = 0;; ++i, ++j) {
408 if (!sample[j])
409 return offset;
410 if (i >= count || !str[i])
411 return npos;
412 if (ctype1.tolower(str[i]) != ctype2.tolower(sample[j]))
413 break;
414 }
415 }
416 }
417
427 template <class T1, class T2>
428 inline size_t strncpy(
429 _Out_writes_(count) _Post_maybez_ T1* dst,
430 _In_reads_or_z_opt_(count) const T2* src, _In_ size_t count)
431 {
432 assert(dst && src || !count);
433 for (size_t i = 0; ; ++i) {
434 if (i >= count)
435 return i;
436 if ((dst[i] = src[i]) == 0)
437 return i;
438 }
439 }
440
451 template <class T1, class T2>
452 inline size_t strncpy(
453 _Out_writes_(count_dst) _Post_maybez_ T1* dst, _In_ size_t count_dst,
454 _In_reads_or_z_opt_(count_src) const T2* src, _In_ size_t count_src)
455 {
456 assert(dst || !count_dst);
457 assert(src || !count_src);
458 for (size_t i = 0; ; ++i)
459 {
460 if (i > count_dst)
461 return i;
462 if (i > count_src) {
463 dst[i] = 0;
464 return i;
465 }
466 if ((dst[i] = src[i]) == 0)
467 return i;
468 }
469 }
470
480 template <class T>
481 inline size_t crlf2nl(_Out_writes_z_(strlen(src)) T* dst, _In_z_ const T* src)
482 {
483 assert(dst);
484 assert(src);
485 size_t i, j;
486 for (i = j = 0; src[j];) {
487 if (src[j] != '\r' || src[j + 1] != '\n')
488 dst[i++] = src[j++];
489 else {
490 dst[i++] = '\n';
491 j += 2;
492 }
493 }
494 dst[i] = 0;
495 return i;
496 }
497
499 template <class T, class T_bin>
500 inline T_bin strtoint(
501 _In_reads_or_z_opt_(count) const T* str, _In_ size_t count,
502 _Out_opt_ size_t* end,
503 _In_ int radix,
504 _Out_ uint8_t& flags)
505 {
506 assert(str || !count);
507 assert(radix == 0 || 2 <= radix && radix <= 36);
508
509 size_t i = 0;
510 T_bin value = 0, digit,
511 max_ui = (T_bin)-1,
512 max_ui_pre1, max_ui_pre2;
513
514 flags = 0;
515
516 // Skip leading spaces.
517 for (;; ++i) {
518 if (i >= count || !str[i]) goto error;
519 if (!isspace(str[i])) break;
520 }
521
522 // Read the sign.
523 if (str[i] == '+') {
524 flags &= ~0x01;
525 ++i;
526 if (i >= count || !str[i]) goto error;
527 }
528 else if (str[i] == '-') {
529 flags |= 0x01;
530 ++i;
531 if (i >= count || !str[i]) goto error;
532 }
533
534 if (radix == 16) {
535 // On hexadecimal, allow leading 0x.
536 if (str[i] == '0' && i + 1 < count && (str[i + 1] == 'x' || str[i + 1] == 'X')) {
537 i += 2;
538 if (i >= count || !str[i]) goto error;
539 }
540 }
541 else if (!radix) {
542 // Autodetect radix.
543 if (str[i] == '0') {
544 ++i;
545 if (i >= count || !str[i]) goto error;
546 if (str[i] == 'x' || str[i] == 'X') {
547 radix = 16;
548 ++i;
549 if (i >= count || !str[i]) goto error;
550 }
551 else
552 radix = 8;
553 }
554 else
555 radix = 10;
556 }
557
558 // We have the radix.
559 max_ui_pre1 = max_ui / (T_bin)radix;
560 max_ui_pre2 = max_ui % (T_bin)radix;
561 for (;;) {
562 if ('0' <= str[i] && str[i] <= '9')
563 digit = (T_bin)str[i] - '0';
564 else if ('A' <= str[i] && str[i] <= 'Z')
565 digit = (T_bin)str[i] - 'A' + '\x0a';
566 else if ('a' <= str[i] && str[i] <= 'z')
567 digit = (T_bin)str[i] - 'a' + '\x0a';
568 else
569 goto error;
570 if (digit >= (T_bin)radix)
571 goto error;
572
573 if (value < max_ui_pre1 || // Multiplication nor addition will not overflow.
574 value == max_ui_pre1 && digit <= max_ui_pre2) // Small digits will not overflow.
575 value = value * (T_bin)radix + digit;
576 else {
577 // Overflow!
578 flags |= 0x02;
579 }
580
581 ++i;
582 if (i >= count || !str[i])
583 goto error;
584 }
585
586 error:
587 if (end) *end = i;
588 return value;
589 }
591
602 template <class T, class T_bin>
603 T_bin strtoint(
604 _In_reads_or_z_opt_(count) const T* str, _In_ size_t count,
605 _Out_opt_ size_t* end,
606 _In_ int radix)
607 {
608 uint8_t flags;
609 T_bin value;
610
611 switch (sizeof(T_bin)) {
612 case 1:
613 value = (T_bin)strtoint<T, uint8_t>(str, count, end, radix, flags);
614 if ((flags & 0x01) && (value & 0x80)) {
615 // Sign bit is 1 => overflow.
616 flags |= 0x02;
617 }
618 return (flags & 0x02) ?
619 (flags & 0x01) ? (T_bin)0x80 : (T_bin)0x7f :
620 (flags & 0x01) ? -value : value;
621
622 case 2:
623 value = (T_bin)strtoint<T, T_U2>(str, count, end, radix, flags);
624 if ((flags & 0x01) && (value & 0x8000)) {
625 // Sign bit is 1 => overflow.
626 flags |= 0x02;
627 }
628 return (flags & 0x02) ?
629 (flags & 0x01) ? (T_bin)0x8000 : (T_bin)0x7fff :
630 (flags & 0x01) ? -value : value;
631
632 case 4:
633 value = (T_bin)strtoint<T, uint32_t>(str, count, end, radix, flags);
634 if ((flags & 0x01) && (value & 0x80000000)) {
635 // Sign bit is 1 => overflow.
636 flags |= 0x02;
637 }
638 return (flags & 0x02) ?
639 (flags & 0x01) ? (T_bin)0x80000000 : (T_bin)0x7fffffff :
640 (flags & 0x01) ? -value : value;
641
642 case 8:
643 value = (T_bin)strtoint<T, uint64_t>(str, count, end, radix, flags);
644 if ((flags & 0x01) && (value & 0x8000000000000000)) {
645 // Sign bit is 1 => overflow.
646 flags |= 0x02;
647 }
648 return (flags & 0x02) ?
649 (flags & 0x01) ? (T_bin)0x8000000000000000 : (T_bin)0x7fffffffffffffff :
650 (flags & 0x01) ? -value : value;
651
652 default:
653 throw std::invalid_argument("Unsupported bit length");
654 }
655 }
656
667 template <class T, class T_bin>
668 inline T_bin strtouint(
669 _In_reads_or_z_opt_(count) const T* str,
670 _In_ size_t count,
671 _Out_opt_ size_t* end,
672 _In_ int radix)
673 {
674 uint8_t flags;
675 T_bin value;
676
677 switch (sizeof(T_bin)) {
678 case 1: value = (T_bin)strtoint<T, uint8_t>(str, count, end, radix, flags); break;
679 case 2: value = (T_bin)strtoint<T, uint16_t>(str, count, end, radix, flags); break;
680 case 4: value = (T_bin)strtoint<T, uint32_t>(str, count, end, radix, flags); break;
681 case 8: value = (T_bin)strtoint<T, uint64_t>(str, count, end, radix, flags); break;
682 default: throw std::invalid_argument("Unsupported bit length");
683 }
684
685 return (flags & 0x02) ?
686 (flags & 0x01) ? (T_bin)0 : (T_bin)-1 :
687 (flags & 0x01) ? ~value : value;
688 }
689
700 template <class T>
701 inline int32_t strto32(
702 _In_reads_or_z_opt_(count) const T* str, _In_ size_t count,
703 _Out_opt_ size_t* end,
704 _In_ int radix)
705 {
706 return strtoint<T, int32_t>(str, count, end, radix);
707 }
708
719 template <class T>
720 inline int64_t strto64(
721 _In_reads_or_z_opt_(count) const T* str, _In_ size_t count,
722 _Out_opt_ size_t* end,
723 _In_ int radix)
724 {
725 return strtoint<T, int64_t>(str, count, end, radix);
726 }
727
739 template <class T>
740 inline intptr_t strtoi(
741 _In_reads_or_z_opt_(count) const T* str, _In_ size_t count,
742 _Out_opt_ size_t* end,
743 _In_ int radix)
744 {
745#if defined(_WIN64) || defined(__LP64__)
746 return (intptr_t)strto64(str, count, end, radix);
747#else
748 return (intptr_t)strto32(str, count, end, radix);
749#endif
750 }
751
762 template <class T>
763 inline uint32_t strtou32(
764 _In_reads_or_z_opt_(count) const T* str, _In_ size_t count,
765 _Out_opt_ size_t* end,
766 _In_ int radix)
767 {
768 return strtouint<T, uint32_t>(str, count, end, radix);
769 }
770
781 template <class T>
782 inline uint64_t strtou64(
783 _In_reads_or_z_opt_(count) const T* str, _In_ size_t count,
784 _Out_opt_ size_t* end,
785 _In_ int radix)
786 {
787 return strtouint<T, uint64_t>(str, count, end, radix);
788 }
789
801 template <class T>
802 inline size_t strtoui(
803 _In_reads_or_z_opt_(count) const T* str, _In_ size_t count,
804 _Out_opt_ size_t* end,
805 _In_ int radix)
806 {
807#if defined(_WIN64) || defined(__LP64__)
808 return (size_t)strtou64(str, count, end, radix);
809#else
810 return (size_t)strtou32(str, count, end, radix);
811#endif
812 }
813
815 inline int vsnprintf(_Out_z_cap_(capacity) char *str, _In_ size_t capacity, _In_z_ _Printf_format_string_ const char *format, _In_opt_ locale_t locale, _In_ va_list arg)
816 {
817 int r;
818#ifdef _WIN32
819 // Don't use _vsnprintf_s(). It terminates the string even if we want to print to the edge of the buffer.
820#pragma warning(suppress: 4996)
821 r = _vsnprintf_l(str, capacity, format, locale, arg);
822#else
823 r = vsnprintf(str, capacity, format, arg);
824#endif
825 if (r == -1 && strnlen(str, capacity) == capacity) {
826 // Buffer overrun. Estimate buffer size for the next iteration.
827 capacity += std::max<size_t>(capacity / 8, 0x80);
828 if (capacity > INT_MAX)
829 throw std::invalid_argument("string too big");
830 return (int)capacity;
831 }
832 return r;
833 }
834
835 inline int vsnprintf(_Out_z_cap_(capacity) wchar_t *str, _In_ size_t capacity, _In_z_ _Printf_format_string_ const wchar_t *format, _In_opt_ locale_t locale, _In_ va_list arg)
836 {
837 int r;
838
839#ifdef _WIN32
840 // Don't use _vsnwprintf_s(). It terminates the string even if we want to print to the edge of the buffer.
841#pragma warning(suppress: 4996)
842 r = _vsnwprintf_l(str, capacity, format, locale, arg);
843#else
844 r = vswprintf(str, capacity, format, arg);
845#endif
846 if (r == -1 && strnlen(str, capacity) == capacity) {
847 // Buffer overrun. Estimate buffer size for the next iteration.
848 capacity += std::max<size_t>(capacity / 8, 0x80);
849 if (capacity > INT_MAX)
850 throw std::invalid_argument("string too big");
851 return (int)capacity;
852 }
853 return r;
854 }
856
865 template<class _Elem, class _Traits, class _Ax>
866 inline void vappendf(_Inout_ std::basic_string<_Elem, _Traits, _Ax> &str, _In_z_ _Printf_format_string_ const _Elem *format, _In_opt_ locale_t locale, _In_ va_list arg)
867 {
868 _Elem buf[1024/sizeof(_Elem)];
869
870 // Try with stack buffer first.
871 int count = vsnprintf(buf, _countof(buf) - 1, format, locale, arg);
872 if (count >= 0) {
873 // Copy from stack.
874 str.append(buf, count);
875 } else {
876 for (size_t capacity = 2*1024/sizeof(_Elem);; capacity *= 2) {
877 // Allocate on heap and retry.
878 auto buf_dyn = std::make_unique<_Elem[]>(capacity);
879 count = vsnprintf(buf_dyn.get(), capacity - 1, format, locale, arg);
880 if (count >= 0) {
881 str.append(buf_dyn.get(), count);
882 break;
883 }
884 }
885 }
886 }
887
895 template<class _Elem, class _Traits, class _Ax>
896 inline void appendf(_Inout_ std::basic_string<_Elem, _Traits, _Ax> &str, _In_z_ _Printf_format_string_ const _Elem *format, _In_opt_ locale_t locale, ...)
897 {
898 va_list arg;
899 va_start(arg, locale);
900 vappendf(str, format, locale, arg);
901 va_end(arg);
902 }
903
912 template<class _Elem, class _Traits, class _Ax>
913 inline void vsprintf(_Inout_ std::basic_string<_Elem, _Traits, _Ax> &str, _In_z_ _Printf_format_string_ const _Elem *format, _In_opt_ locale_t locale, _In_ va_list arg)
914 {
915 str.clear();
916 appendf(str, format, locale, arg);
917 }
918
926 template<class _Elem, class _Traits, class _Ax>
927 inline void sprintf(_Inout_ std::basic_string<_Elem, _Traits, _Ax> &str, _In_z_ _Printf_format_string_ const _Elem *format, _In_opt_ locale_t locale, ...)
928 {
929 va_list arg;
930 va_start(arg, locale);
931 vsprintf(str, format, locale, arg);
932 va_end(arg);
933 }
934}