stdex
Additional custom or not Standard C++ covered algorithms
Loading...
Searching...
No Matches
string.hpp
1/*
2 SPDX-License-Identifier: MIT
3 Copyright © 2016-2023 Amebis
4*/
5
6#pragma once
7
8#include "sal.hpp"
9#include <assert.h>
10#include <ctype.h>
11#include <stdarg.h>
12#include <stdint.h>
13#include <stdexcept>
14
15namespace stdex
16{
17#ifdef _WIN32
18 using locale_t = _locale_t;
19#else
20 using locale_t = ::locale_t;
21#endif
22
26#ifdef _WIN32
27 typedef wchar_t utf16_t;
28#else
29 typedef char16_t utf16_t;
30#endif
31
37 inline bool is_high_surrogate(_In_ utf16_t chr)
38 {
39 return 0xd800 < chr && chr < 0xdc00;
40 }
41
47 inline bool is_low_surrogate(_In_ utf16_t chr)
48 {
49 return 0xdc00 < chr && chr < 0xe000;
50 }
51
57 inline bool is_surrogate_pair(_In_reads_(2) const utf16_t* str)
58 {
59 return is_high_surrogate(str[0]) && is_low_surrogate(str[1]);
60 }
61
67 inline char32_t surrogate_pair_to_ucs4(_In_reads_(2) const utf16_t* str)
68 {
69 assert(is_surrogate_pair(str));
70 return
71 ((char32_t)(str[0] - 0xd800) << 10) +
72 (char32_t)(str[1] - 0xdc00) +
73 0x10000;
74 }
75
81 inline void ucs4_to_surrogate_pair(_Out_writes_(2) utf16_t* str, _In_ char32_t chr)
82 {
83 assert(chr >= 0x10000);
84 chr -= 0x10000;
85 str[0] = 0xd800 + (char32_t)((chr >> 10) & 0x3ff);
86 str[1] = 0xdc00 + (char32_t)(chr & 0x3ff);
87 }
88
94 inline bool iscombining(_In_ char32_t chr)
95 {
96 return
97 0x0300 <= chr && chr < 0x0370 ||
98 0x1dc0 <= chr && chr < 0x1e00 ||
99 0x20d0 <= chr && chr < 0x2100 ||
100 0xfe20 <= chr && chr < 0xfe30;
101 }
102
108 template <class T>
109 inline size_t islbreak(_In_ T chr)
110 {
111 return chr == '\n' || chr == '\r';
112 }
113
120 template <class T>
121 inline size_t islbreak(_In_reads_or_z_opt_(count) const T* chr, _In_ size_t count)
122 {
123 _Analysis_assume_(chr || !count);
124 if (count >= 2 && (chr[0] == '\r' && chr[1] == '\n' || chr[0] == '\n' && chr[1] == '\r'))
125 return 2;
126 if (count > 1 && (chr[0] == '\n' || chr[0] == '\r'))
127 return 1;
128 return 0;
129 }
130
137 inline size_t glyphlen(_In_reads_or_z_opt_(count) const wchar_t* glyph, size_t count)
138 {
139 _Analysis_assume_(glyph || !count);
140 if (count) {
141#ifdef _WIN32
142 size_t i = count < 2 || !is_surrogate_pair(glyph) ? 1 : 2;
143#else
144 size_t i = 1;
145#endif
146 for (; i < count && iscombining(glyph[i]); ++i);
147 return i;
148 }
149 return 0;
150 }
151
159 template <class T>
160 inline size_t strlen(_In_z_ const T* str)
161 {
162 assert(str);
163 size_t i;
164 for (i = 0; str[i]; ++i);
165 return i;
166 }
167
176 template <class T>
177 inline size_t strnlen(_In_reads_or_z_opt_(count) const T* str, _In_ size_t count)
178 {
179 assert(str);
180 size_t i;
181 for (i = 0; i < count && str[i]; ++i);
182 return i;
183 }
184
185 constexpr auto npos{ static_cast<size_t>(-1) };
186
196 template <class T>
197 inline size_t strnchr(
198 _In_reads_or_z_opt_(count) const T* str,
199 _In_ size_t count,
200 _In_ T chr)
201 {
202 assert(str || !count);
203 for (size_t i = 0; i < count && str[i]; ++i)
204 if (str[i] == chr) return i;
205 return npos;
206 }
207
217 template <class T>
218 inline size_t strrnchr(
219 _In_reads_or_z_opt_(count) const T* str,
220 _In_ size_t count,
221 _In_ T chr)
222 {
223 assert(str || !count);
224 size_t z = npos;
225 for (size_t i = 0; i < count && str[i]; ++i)
226 if (str[i] == chr) z = i;
227 return z;
228 }
229
239 template <class T>
240 inline size_t strnichr(
241 _In_reads_or_z_opt_(count) const T* str,
242 _In_ size_t count,
243 _In_ T chr,
244 _In_ const std::locale& locale)
245 {
246 assert(str || !count);
247 const auto& ctype = std::use_facet<std::ctype<T>>(locale);
248 chr = ctype.tolower(chr);
249 for (size_t i = 0; i < count && str[i]; ++i)
250 if (ctype.tolower(str[i]) == chr) return i;
251 return npos;
252 }
253
263 template <class T>
264 inline size_t strrnichr(
265 _In_reads_or_z_opt_(count) const T* str,
266 _In_ size_t count,
267 _In_ T chr,
268 _In_ const std::locale& locale)
269 {
270 assert(str || !count);
271 const auto& ctype = std::use_facet<std::ctype<T>>(locale);
272 chr = ctype.tolower(chr);
273 size_t z = npos;
274 for (size_t i = 0; i < count && str[i]; ++i)
275 if (ctype.tolower(str[i]) == chr) z = i;
276 return z;
277 }
278
289 template <class T1, class T2>
290 inline int strncmp(
291 _In_reads_or_z_opt_(count1) const T1* str1, _In_ size_t count1,
292 _In_reads_or_z_opt_(count2) const T2* str2, _In_ size_t count2)
293 {
294 assert(str1 || !count1);
295 assert(str2 || !count2);
296 size_t i; T1 a; T2 b;
297 for (i = 0; i < count1 && i < count2 && ((a = str1[i]) | (b = str2[i])); ++i) {
298 if (a > b) return +1;
299 if (a < b) return -1;
300 }
301 if (i < count1 && str1[i]) return +1;
302 if (i < count2 && str2[i]) return -1;
303 return 0;
304 }
305
316 template <class T>
317 inline int strncoll(
318 _In_reads_or_z_opt_(count1) const T* str1, _In_ size_t count1,
319 _In_reads_or_z_opt_(count2) const T* str2, _In_ size_t count2,
320 _In_ const std::locale& locale)
321 {
322 assert(str1 || !count1);
323 assert(str2 || !count2);
324 auto& collate = std::use_facet<std::collate<T>>(locale);
325 return collate.compare(str1, str1 + count1, str2, str2 + count2);
326 }
327
338 template <class T1, class T2>
339 inline int strnicmp(
340 _In_reads_or_z_opt_(count1) const T1* str1, _In_ size_t count1,
341 _In_reads_or_z_opt_(count2) const T2* str2, _In_ size_t count2,
342 _In_ const std::locale& locale)
343 {
344 assert(str1 || !count1);
345 assert(str2 || !count2);
346 size_t i; T1 a; T2 b;
347 const auto& ctype1 = std::use_facet<std::ctype<T1>>(locale);
348 const auto& ctype2 = std::use_facet<std::ctype<T2>>(locale);
349 for (i = 0; i < count1 && i < count2 && ((a = ctype1.tolower(str1[i])) | (b = ctype2.tolower(str2[i]))); i++) {
350 if (a > b) return +1;
351 if (a < b) return -1;
352 }
353 if (i < count1 && str1[i]) return +1;
354 if (i < count2 && str2[i]) return -1;
355 return 0;
356 }
357
367 template <class T1, class T2>
368 inline size_t strnstr(
369 _In_reads_or_z_opt_(count) const T1* str,
370 _In_ size_t count,
371 _In_z_ const T2* sample)
372 {
373 assert(str || !count);
374 assert(sample);
375 for (size_t offset = 0;; ++offset) {
376 for (size_t i = offset, j = 0;; ++i, ++j) {
377 if (!sample[j])
378 return offset;
379 if (i >= count || !str[i])
380 return npos;
381 if (str[i] != sample[j])
382 break;
383 }
384 }
385 }
386
396 template <class T1, class T2>
397 inline size_t strnistr(
398 _In_reads_or_z_opt_(count) const T1* str,
399 _In_ size_t count,
400 _In_z_ const T2* sample,
401 _In_ const std::locale& locale)
402 {
403 assert(str || !count);
404 assert(sample);
405 const auto& ctype1 = std::use_facet<std::ctype<T1>>(locale);
406 const auto& ctype2 = std::use_facet<std::ctype<T2>>(locale);
407 for (size_t offset = 0;; ++offset) {
408 for (size_t i = offset, j = 0;; ++i, ++j) {
409 if (!sample[j])
410 return offset;
411 if (i >= count || !str[i])
412 return npos;
413 if (ctype1.tolower(str[i]) != ctype2.tolower(sample[j]))
414 break;
415 }
416 }
417 }
418
428 template <class T1, class T2>
429 inline size_t strncpy(
430 _Out_writes_(count) _Post_maybez_ T1* dst,
431 _In_reads_or_z_opt_(count) const T2* src, _In_ size_t count)
432 {
433 assert(dst && src || !count);
434 for (size_t i = 0; ; ++i) {
435 if (i >= count)
436 return i;
437 if ((dst[i] = src[i]) == 0)
438 return i;
439 }
440 }
441
452 template <class T1, class T2>
453 inline size_t strncpy(
454 _Out_writes_(count_dst) _Post_maybez_ T1* dst, _In_ size_t count_dst,
455 _In_reads_or_z_opt_(count_src) const T2* src, _In_ size_t count_src)
456 {
457 assert(dst || !count_dst);
458 assert(src || !count_src);
459 for (size_t i = 0; ; ++i)
460 {
461 if (i > count_dst)
462 return i;
463 if (i > count_src) {
464 dst[i] = 0;
465 return i;
466 }
467 if ((dst[i] = src[i]) == 0)
468 return i;
469 }
470 }
471
481 template <class T>
482 inline size_t crlf2nl(_Out_writes_z_(strlen(src)) T* dst, _In_z_ const T* src)
483 {
484 assert(dst);
485 assert(src);
486 size_t i, j;
487 for (i = j = 0; src[j];) {
488 if (src[j] != '\r' || src[j + 1] != '\n')
489 dst[i++] = src[j++];
490 else {
491 dst[i++] = '\n';
492 j += 2;
493 }
494 }
495 dst[i] = 0;
496 return i;
497 }
498
500 template <class T, class T_bin>
501 inline T_bin strtoint(
502 _In_reads_or_z_opt_(count) const T* str, _In_ size_t count,
503 _Out_opt_ size_t* end,
504 _In_ int radix,
505 _Out_ uint8_t& flags)
506 {
507 assert(str || !count);
508 assert(radix == 0 || 2 <= radix && radix <= 36);
509
510 size_t i = 0;
511 T_bin value = 0, digit,
512 max_ui = (T_bin)-1,
513 max_ui_pre1, max_ui_pre2;
514
515 flags = 0;
516
517 // Skip leading spaces.
518 for (;; ++i) {
519 if (i >= count || !str[i]) goto error;
520 if (!isspace(str[i])) break;
521 }
522
523 // Read the sign.
524 if (str[i] == '+') {
525 flags &= ~0x01;
526 ++i;
527 if (i >= count || !str[i]) goto error;
528 }
529 else if (str[i] == '-') {
530 flags |= 0x01;
531 ++i;
532 if (i >= count || !str[i]) goto error;
533 }
534
535 if (radix == 16) {
536 // On hexadecimal, allow leading 0x.
537 if (str[i] == '0' && i + 1 < count && (str[i + 1] == 'x' || str[i + 1] == 'X')) {
538 i += 2;
539 if (i >= count || !str[i]) goto error;
540 }
541 }
542 else if (!radix) {
543 // Autodetect radix.
544 if (str[i] == '0') {
545 ++i;
546 if (i >= count || !str[i]) goto error;
547 if (str[i] == 'x' || str[i] == 'X') {
548 radix = 16;
549 ++i;
550 if (i >= count || !str[i]) goto error;
551 }
552 else
553 radix = 8;
554 }
555 else
556 radix = 10;
557 }
558
559 // We have the radix.
560 max_ui_pre1 = max_ui / (T_bin)radix;
561 max_ui_pre2 = max_ui % (T_bin)radix;
562 for (;;) {
563 if ('0' <= str[i] && str[i] <= '9')
564 digit = (T_bin)str[i] - '0';
565 else if ('A' <= str[i] && str[i] <= 'Z')
566 digit = (T_bin)str[i] - 'A' + '\x0a';
567 else if ('a' <= str[i] && str[i] <= 'z')
568 digit = (T_bin)str[i] - 'a' + '\x0a';
569 else
570 goto error;
571 if (digit >= (T_bin)radix)
572 goto error;
573
574 if (value < max_ui_pre1 || // Multiplication nor addition will not overflow.
575 value == max_ui_pre1 && digit <= max_ui_pre2) // Small digits will not overflow.
576 value = value * (T_bin)radix + digit;
577 else {
578 // Overflow!
579 flags |= 0x02;
580 }
581
582 ++i;
583 if (i >= count || !str[i])
584 goto error;
585 }
586
587 error:
588 if (end) *end = i;
589 return value;
590 }
592
603 template <class T, class T_bin>
604 T_bin strtoint(
605 _In_reads_or_z_opt_(count) const T* str, _In_ size_t count,
606 _Out_opt_ size_t* end,
607 _In_ int radix)
608 {
609 uint8_t flags;
610 T_bin value;
611
612 switch (sizeof(T_bin)) {
613 case 1:
614 value = (T_bin)strtoint<T, uint8_t>(str, count, end, radix, flags);
615 if ((flags & 0x01) && (value & 0x80)) {
616 // Sign bit is 1 => overflow.
617 flags |= 0x02;
618 }
619 return (flags & 0x02) ?
620 (flags & 0x01) ? (T_bin)0x80 : (T_bin)0x7f :
621 (flags & 0x01) ? -value : value;
622
623 case 2:
624 value = (T_bin)strtoint<T, T_U2>(str, count, end, radix, flags);
625 if ((flags & 0x01) && (value & 0x8000)) {
626 // Sign bit is 1 => overflow.
627 flags |= 0x02;
628 }
629 return (flags & 0x02) ?
630 (flags & 0x01) ? (T_bin)0x8000 : (T_bin)0x7fff :
631 (flags & 0x01) ? -value : value;
632
633 case 4:
634 value = (T_bin)strtoint<T, uint32_t>(str, count, end, radix, flags);
635 if ((flags & 0x01) && (value & 0x80000000)) {
636 // Sign bit is 1 => overflow.
637 flags |= 0x02;
638 }
639 return (flags & 0x02) ?
640 (flags & 0x01) ? (T_bin)0x80000000 : (T_bin)0x7fffffff :
641 (flags & 0x01) ? -value : value;
642
643 case 8:
644 value = (T_bin)strtoint<T, uint64_t>(str, count, end, radix, flags);
645 if ((flags & 0x01) && (value & 0x8000000000000000)) {
646 // Sign bit is 1 => overflow.
647 flags |= 0x02;
648 }
649 return (flags & 0x02) ?
650 (flags & 0x01) ? (T_bin)0x8000000000000000 : (T_bin)0x7fffffffffffffff :
651 (flags & 0x01) ? -value : value;
652
653 default:
654 throw std::invalid_argument("Unsupported bit length");
655 }
656 }
657
668 template <class T, class T_bin>
669 inline T_bin strtouint(
670 _In_reads_or_z_opt_(count) const T* str,
671 _In_ size_t count,
672 _Out_opt_ size_t* end,
673 _In_ int radix)
674 {
675 uint8_t flags;
676 T_bin value;
677
678 switch (sizeof(T_bin)) {
679 case 1: value = (T_bin)strtoint<T, uint8_t>(str, count, end, radix, flags); break;
680 case 2: value = (T_bin)strtoint<T, uint16_t>(str, count, end, radix, flags); break;
681 case 4: value = (T_bin)strtoint<T, uint32_t>(str, count, end, radix, flags); break;
682 case 8: value = (T_bin)strtoint<T, uint64_t>(str, count, end, radix, flags); break;
683 default: throw std::invalid_argument("Unsupported bit length");
684 }
685
686 return (flags & 0x02) ?
687 (flags & 0x01) ? (T_bin)0 : (T_bin)-1 :
688 (flags & 0x01) ? ~value : value;
689 }
690
701 template <class T>
702 inline int32_t strto32(
703 _In_reads_or_z_opt_(count) const T* str, _In_ size_t count,
704 _Out_opt_ size_t* end,
705 _In_ int radix)
706 {
707 return strtoint<T, int32_t>(str, count, end, radix);
708 }
709
720 template <class T>
721 inline int64_t strto64(
722 _In_reads_or_z_opt_(count) const T* str, _In_ size_t count,
723 _Out_opt_ size_t* end,
724 _In_ int radix)
725 {
726 return strtoint<T, int64_t>(str, count, end, radix);
727 }
728
740 template <class T>
741 inline intptr_t strtoi(
742 _In_reads_or_z_opt_(count) const T* str, _In_ size_t count,
743 _Out_opt_ size_t* end,
744 _In_ int radix)
745 {
746#if defined(_WIN64) || defined(__LP64__)
747 return (intptr_t)strto64(str, count, end, radix);
748#else
749 return (intptr_t)strto32(str, count, end, radix);
750#endif
751 }
752
763 template <class T>
764 inline uint32_t strtou32(
765 _In_reads_or_z_opt_(count) const T* str, _In_ size_t count,
766 _Out_opt_ size_t* end,
767 _In_ int radix)
768 {
769 return strtouint<T, uint32_t>(str, count, end, radix);
770 }
771
782 template <class T>
783 inline uint64_t strtou64(
784 _In_reads_or_z_opt_(count) const T* str, _In_ size_t count,
785 _Out_opt_ size_t* end,
786 _In_ int radix)
787 {
788 return strtouint<T, uint64_t>(str, count, end, radix);
789 }
790
802 template <class T>
803 inline size_t strtoui(
804 _In_reads_or_z_opt_(count) const T* str, _In_ size_t count,
805 _Out_opt_ size_t* end,
806 _In_ int radix)
807 {
808#if defined(_WIN64) || defined(__LP64__)
809 return (size_t)strtou64(str, count, end, radix);
810#else
811 return (size_t)strtou32(str, count, end, radix);
812#endif
813 }
814
816 inline int vsnprintf(_Out_z_cap_(capacity) char *str, _In_ size_t capacity, _In_z_ _Printf_format_string_params_(2) const char *format, _In_opt_ locale_t locale, _In_ va_list arg)
817 {
818 int r;
819#ifdef _WIN32
820 // Don't use _vsnprintf_s(). It terminates the string even if we want to print to the edge of the buffer.
821#pragma warning(suppress: 4996)
822 r = _vsnprintf_l(str, capacity, format, locale, arg);
823#else
824 r = vsnprintf(str, capacity, format, arg);
825#endif
826 if (r == -1 && strnlen(str, capacity) == capacity) {
827 // Buffer overrun. Estimate buffer size for the next iteration.
828 capacity += std::max<size_t>(capacity / 8, 0x80);
829 if (capacity > INT_MAX)
830 throw std::invalid_argument("string too big");
831 return (int)capacity;
832 }
833 return r;
834 }
835
836 inline int vsnprintf(_Out_z_cap_(capacity) wchar_t *str, _In_ size_t capacity, _In_z_ _Printf_format_string_params_(2) const wchar_t *format, _In_opt_ locale_t locale, _In_ va_list arg)
837 {
838 int r;
839
840#ifdef _WIN32
841 // Don't use _vsnwprintf_s(). It terminates the string even if we want to print to the edge of the buffer.
842#pragma warning(suppress: 4996)
843 r = _vsnwprintf_l(str, capacity, format, locale, arg);
844#else
845 r = vswprintf(str, capacity, format, arg);
846#endif
847 if (r == -1 && strnlen(str, capacity) == capacity) {
848 // Buffer overrun. Estimate buffer size for the next iteration.
849 capacity += std::max<size_t>(capacity / 8, 0x80);
850 if (capacity > INT_MAX)
851 throw std::invalid_argument("string too big");
852 return (int)capacity;
853 }
854 return r;
855 }
857
866 template<class _Elem, class _Traits, class _Ax>
867 inline void vappendf(_Inout_ std::basic_string<_Elem, _Traits, _Ax> &str, _In_z_ _Printf_format_string_params_(2) const _Elem *format, _In_opt_ locale_t locale, _In_ va_list arg)
868 {
869 _Elem buf[1024/sizeof(_Elem)];
870
871 // Try with stack buffer first.
872 int count = vsnprintf(buf, _countof(buf) - 1, format, locale, arg);
873 if (count >= 0) {
874 // Copy from stack.
875 str.append(buf, count);
876 } else {
877 for (size_t capacity = 2*1024/sizeof(_Elem);; capacity *= 2) {
878 // Allocate on heap and retry.
879 auto buf_dyn = std::make_unique<_Elem[]>(capacity);
880 count = vsnprintf(buf_dyn.get(), capacity - 1, format, locale, arg);
881 if (count >= 0) {
882 str.append(buf_dyn.get(), count);
883 break;
884 }
885 }
886 }
887 }
888
896 template<class _Elem, class _Traits, class _Ax>
897 inline void appendf(_Inout_ std::basic_string<_Elem, _Traits, _Ax> &str, _In_z_ _Printf_format_string_params_(2) const _Elem *format, _In_opt_ locale_t locale, ...)
898 {
899 va_list arg;
900 va_start(arg, locale);
901 vappendf(str, format, locale, arg);
902 va_end(arg);
903 }
904
913 template<class _Elem, class _Traits, class _Ax>
914 inline void vsprintf(_Inout_ std::basic_string<_Elem, _Traits, _Ax> &str, _In_z_ _Printf_format_string_params_(2) const _Elem *format, _In_opt_ locale_t locale, _In_ va_list arg)
915 {
916 str.clear();
917 vappendf(str, format, locale, arg);
918 }
919
927 template<class _Elem, class _Traits, class _Ax>
928 inline void sprintf(_Inout_ std::basic_string<_Elem, _Traits, _Ax> &str, _In_z_ _Printf_format_string_params_(2) const _Elem *format, _In_opt_ locale_t locale, ...)
929 {
930 va_list arg;
931 va_start(arg, locale);
932 vsprintf(str, format, locale, arg);
933 va_end(arg);
934 }
935
945 template<class _Elem, class _Traits = std::char_traits<_Elem>, class _Ax = std::allocator<_Elem>>
946 inline std::basic_string<_Elem, _Traits, _Ax> vsprintf(_In_z_ _Printf_format_string_params_(2) const _Elem *format, _In_opt_ locale_t locale, _In_ va_list arg)
947 {
948 std::basic_string<_Elem, _Traits, _Ax> str;
949 vappendf(str, format, locale, arg);
950 return str;
951 }
952
961 template<class _Elem, class _Traits = std::char_traits<_Elem>, class _Ax = std::allocator<_Elem>>
962 inline std::basic_string<_Elem, _Traits, _Ax> sprintf(_In_z_ _Printf_format_string_params_(2) const _Elem *format, _In_opt_ locale_t locale, ...)
963 {
964 va_list arg;
965 va_start(arg, locale);
966 auto str = vsprintf(format, locale, arg);
967 va_end(arg);
968 return str;
969 }
970}