stdex
Additional custom or not Standard C++ covered algorithms
Loading...
Searching...
No Matches
string.hpp
1/*
2 SPDX-License-Identifier: MIT
3 Copyright © 2016-2023 Amebis
4*/
5
6#pragma once
7
8#include "sal.hpp"
9#include <assert.h>
10#include <ctype.h>
11#include <stdint.h>
12#include <stdexcept>
13
14namespace stdex
15{
19#ifdef _WIN32
20 typedef wchar_t utf16_t;
21#else
22 typedef char16_t utf16_t;
23#endif
24
30 inline bool is_high_surrogate(_In_ utf16_t chr)
31 {
32 return 0xd800 < chr && chr < 0xdc00;
33 }
34
40 inline bool is_low_surrogate(_In_ utf16_t chr)
41 {
42 return 0xdc00 < chr && chr < 0xe000;
43 }
44
50 inline bool is_surrogate_pair(_In_reads_(2) const utf16_t* str)
51 {
52 return is_high_surrogate(str[0]) && is_low_surrogate(str[1]);
53 }
54
60 inline char32_t surrogate_pair_to_ucs4(_In_reads_(2) const utf16_t* str)
61 {
62 assert(is_surrogate_pair(str));
63 return
64 ((char32_t)(str[0] - 0xd800) << 10) +
65 (char32_t)(str[1] - 0xdc00) +
66 0x10000;
67 }
68
74 inline void ucs4_to_surrogate_pair(_Out_writes_(2) utf16_t* str, _In_ char32_t chr)
75 {
76 assert(chr >= 0x10000);
77 chr -= 0x10000;
78 str[0] = 0xd800 + (char32_t)((chr >> 10) & 0x3ff);
79 str[1] = 0xdc00 + (char32_t)(chr & 0x3ff);
80 }
81
87 inline bool iscombining(_In_ char32_t chr)
88 {
89 return
90 0x0300 <= chr && chr < 0x0370 ||
91 0x1dc0 <= chr && chr < 0x1e00 ||
92 0x20d0 <= chr && chr < 0x2100 ||
93 0xfe20 <= chr && chr < 0xfe30;
94 }
95
101 template <class T>
102 inline size_t islbreak(_In_ T chr)
103 {
104 return chr == '\n' || chr == '\r';
105 }
106
113 template <class T>
114 inline size_t islbreak(_In_reads_or_z_opt_(count) const T* chr, _In_ size_t count)
115 {
116 _Analysis_assume_(chr || !count);
117 if (count >= 2 && (chr[0] == '\r' && chr[1] == '\n' || chr[0] == '\n' && chr[1] == '\r'))
118 return 2;
119 if (count > 1 && (chr[0] == '\n' || chr[0] == '\r'))
120 return 1;
121 return 0;
122 }
123
130 inline size_t glyphlen(_In_reads_or_z_opt_(count) const wchar_t* glyph, size_t count)
131 {
132 _Analysis_assume_(glyph || !count);
133 if (count) {
134#ifdef _WIN32
135 size_t i = count < 2 || !is_surrogate_pair(glyph) ? 1 : 2;
136#else
137 size_t i = 1;
138#endif
139 for (; i < count && iscombining(glyph[i]); ++i);
140 return i;
141 }
142 return 0;
143 }
144
152 template <class T>
153 inline size_t strlen(_In_z_ const T* str)
154 {
155 assert(str);
156 size_t i;
157 for (i = 0; str[i]; ++i);
158 return i;
159 }
160
169 template <class T>
170 inline size_t strnlen(_In_reads_or_z_opt_(count) const T* str, _In_ size_t count)
171 {
172 assert(str);
173 size_t i;
174 for (i = 0; i < count && str[i]; ++i);
175 return i;
176 }
177
178 constexpr auto npos{ static_cast<size_t>(-1) };
179
189 template <class T>
190 inline size_t strnchr(
191 _In_reads_or_z_opt_(count) const T* str,
192 _In_ size_t count,
193 _In_ T chr)
194 {
195 assert(str || !count);
196 for (size_t i = 0; i < count && str[i]; ++i)
197 if (str[i] == chr) return i;
198 return npos;
199 }
200
210 template <class T>
211 inline size_t strrnchr(
212 _In_reads_or_z_opt_(count) const T* str,
213 _In_ size_t count,
214 _In_ T chr)
215 {
216 assert(str || !count);
217 size_t z = npos;
218 for (size_t i = 0; i < count && str[i]; ++i)
219 if (str[i] == chr) z = i;
220 return z;
221 }
222
232 template <class T>
233 inline size_t strnichr(
234 _In_reads_or_z_opt_(count) const T* str,
235 _In_ size_t count,
236 _In_ T chr,
237 _In_ const std::locale& locale)
238 {
239 assert(str || !count);
240 const auto& ctype = std::use_facet<std::ctype<T>>(locale);
241 chr = ctype.tolower(chr);
242 for (size_t i = 0; i < count && str[i]; ++i)
243 if (ctype.tolower(str[i]) == chr) return i;
244 return npos;
245 }
246
256 template <class T>
257 inline size_t strrnichr(
258 _In_reads_or_z_opt_(count) const T* str,
259 _In_ size_t count,
260 _In_ T chr,
261 _In_ const std::locale& locale)
262 {
263 assert(str || !count);
264 const auto& ctype = std::use_facet<std::ctype<T>>(locale);
265 chr = ctype.tolower(chr);
266 size_t z = npos;
267 for (size_t i = 0; i < count && str[i]; ++i)
268 if (ctype.tolower(str[i]) == chr) z = i;
269 return z;
270 }
271
282 template <class T1, class T2>
283 inline int strncmp(
284 _In_reads_or_z_opt_(count1) const T1* str1, _In_ size_t count1,
285 _In_reads_or_z_opt_(count2) const T2* str2, _In_ size_t count2)
286 {
287 assert(str1 || !count1);
288 assert(str2 || !count2);
289 size_t i; T1 a; T2 b;
290 for (i = 0; i < count1 && i < count2 && ((a = str1[i]) | (b = str2[i])); ++i) {
291 if (a > b) return +1;
292 if (a < b) return -1;
293 }
294 if (i < count1 && str1[i]) return +1;
295 if (i < count2 && str2[i]) return -1;
296 return 0;
297 }
298
309 template <class T>
310 inline int strncoll(
311 _In_reads_or_z_opt_(count1) const T* str1, _In_ size_t count1,
312 _In_reads_or_z_opt_(count2) const T* str2, _In_ size_t count2,
313 _In_ const std::locale& locale)
314 {
315 assert(str1 || !count1);
316 assert(str2 || !count2);
317 auto& collate = std::use_facet<std::collate<T>>(locale);
318 return collate.compare(str1, str1 + count1, str2, str2 + count2);
319 }
320
331 template <class T1, class T2>
332 inline int strnicmp(
333 _In_reads_or_z_opt_(count1) const T1* str1, _In_ size_t count1,
334 _In_reads_or_z_opt_(count2) const T2* str2, _In_ size_t count2,
335 _In_ const std::locale& locale)
336 {
337 assert(str1 || !count1);
338 assert(str2 || !count2);
339 size_t i; T1 a; T2 b;
340 const auto& ctype1 = std::use_facet<std::ctype<T1>>(locale);
341 const auto& ctype2 = std::use_facet<std::ctype<T2>>(locale);
342 for (i = 0; i < count1 && i < count2 && ((a = ctype1.tolower(str1[i])) | (b = ctype2.tolower(str2[i]))); i++) {
343 if (a > b) return +1;
344 if (a < b) return -1;
345 }
346 if (i < count1 && str1[i]) return +1;
347 if (i < count2 && str2[i]) return -1;
348 return 0;
349 }
350
360 template <class T1, class T2>
361 inline size_t strnstr(
362 _In_reads_or_z_opt_(count) const T1* str,
363 _In_ size_t count,
364 _In_z_ const T2* sample)
365 {
366 assert(str || !count);
367 assert(sample);
368 for (size_t offset = 0;; ++offset) {
369 for (size_t i = offset, j = 0;; ++i, ++j) {
370 if (!sample[j])
371 return offset;
372 if (i >= count || !str[i])
373 return npos;
374 if (str[i] != sample[j])
375 break;
376 }
377 }
378 }
379
389 template <class T1, class T2>
390 inline size_t strnistr(
391 _In_reads_or_z_opt_(count) const T1* str,
392 _In_ size_t count,
393 _In_z_ const T2* sample,
394 _In_ const std::locale& locale)
395 {
396 assert(str || !count);
397 assert(sample);
398 const auto& ctype1 = std::use_facet<std::ctype<T1>>(locale);
399 const auto& ctype2 = std::use_facet<std::ctype<T2>>(locale);
400 for (size_t offset = 0;; ++offset) {
401 for (size_t i = offset, j = 0;; ++i, ++j) {
402 if (!sample[j])
403 return offset;
404 if (i >= count || !str[i])
405 return npos;
406 if (ctype1.tolower(str[i]) != ctype2.tolower(sample[j]))
407 break;
408 }
409 }
410 }
411
421 template <class T1, class T2>
422 inline size_t strncpy(
423 _Out_writes_(count) _Post_maybez_ T1* dst,
424 _In_reads_or_z_opt_(count) const T2* src, _In_ size_t count)
425 {
426 assert(dst && src || !count);
427 for (size_t i = 0; ; ++i) {
428 if (i >= count)
429 return i;
430 if ((dst[i] = src[i]) == 0)
431 return i;
432 }
433 }
434
445 template <class T1, class T2>
446 inline size_t strncpy(
447 _Out_writes_(count_dst) _Post_maybez_ T1* dst, _In_ size_t count_dst,
448 _In_reads_or_z_opt_(count_src) const T2* src, _In_ size_t count_src)
449 {
450 assert(dst || !count_dst);
451 assert(src || !count_src);
452 for (size_t i = 0; ; ++i)
453 {
454 if (i > count_dst)
455 return i;
456 if (i > count_src) {
457 dst[i] = 0;
458 return i;
459 }
460 if ((dst[i] = src[i]) == 0)
461 return i;
462 }
463 }
464
474 template <class T>
475 inline size_t crlf2nl(_Out_writes_z_(strlen(src)) T* dst, _In_z_ const T* src)
476 {
477 assert(dst);
478 assert(src);
479 size_t i, j;
480 for (i = j = 0; src[j];) {
481 if (src[j] != '\r' || src[j + 1] != '\n')
482 dst[i++] = src[j++];
483 else {
484 dst[i++] = '\n';
485 j += 2;
486 }
487 }
488 dst[i] = 0;
489 return i;
490 }
491
493 template <class T, class T_bin>
494 inline T_bin strtoint(
495 _In_reads_or_z_opt_(count) const T* str, _In_ size_t count,
496 _Out_opt_ size_t* end,
497 _In_ int radix,
498 _Out_ uint8_t& flags)
499 {
500 assert(str || !count);
501 assert(radix == 0 || 2 <= radix && radix <= 36);
502
503 size_t i = 0;
504 T_bin value = 0, digit,
505 max_ui = (T_bin)-1,
506 max_ui_pre1, max_ui_pre2;
507
508 flags = 0;
509
510 // Skip leading spaces.
511 for (;; ++i) {
512 if (i >= count || !str[i]) goto error;
513 if (!isspace(str[i])) break;
514 }
515
516 // Read the sign.
517 if (str[i] == '+') {
518 flags &= ~0x01;
519 ++i;
520 if (i >= count || !str[i]) goto error;
521 }
522 else if (str[i] == '-') {
523 flags |= 0x01;
524 ++i;
525 if (i >= count || !str[i]) goto error;
526 }
527
528 if (radix == 16) {
529 // On hexadecimal, allow leading 0x.
530 if (str[i] == '0' && i + 1 < count && (str[i + 1] == 'x' || str[i + 1] == 'X')) {
531 i += 2;
532 if (i >= count || !str[i]) goto error;
533 }
534 }
535 else if (!radix) {
536 // Autodetect radix.
537 if (str[i] == '0') {
538 ++i;
539 if (i >= count || !str[i]) goto error;
540 if (str[i] == 'x' || str[i] == 'X') {
541 radix = 16;
542 ++i;
543 if (i >= count || !str[i]) goto error;
544 }
545 else
546 radix = 8;
547 }
548 else
549 radix = 10;
550 }
551
552 // We have the radix.
553 max_ui_pre1 = max_ui / (T_bin)radix;
554 max_ui_pre2 = max_ui % (T_bin)radix;
555 for (;;) {
556 if ('0' <= str[i] && str[i] <= '9')
557 digit = (T_bin)str[i] - '0';
558 else if ('A' <= str[i] && str[i] <= 'Z')
559 digit = (T_bin)str[i] - 'A' + '\x0a';
560 else if ('a' <= str[i] && str[i] <= 'z')
561 digit = (T_bin)str[i] - 'a' + '\x0a';
562 else
563 goto error;
564 if (digit >= (T_bin)radix)
565 goto error;
566
567 if (value < max_ui_pre1 || // Multiplication nor addition will not overflow.
568 value == max_ui_pre1 && digit <= max_ui_pre2) // Small digits will not overflow.
569 value = value * (T_bin)radix + digit;
570 else {
571 // Overflow!
572 flags |= 0x02;
573 }
574
575 ++i;
576 if (i >= count || !str[i])
577 goto error;
578 }
579
580 error:
581 if (end) *end = i;
582 return value;
583 }
585
596 template <class T, class T_bin>
597 T_bin strtoint(
598 _In_reads_or_z_opt_(count) const T* str, _In_ size_t count,
599 _Out_opt_ size_t* end,
600 _In_ int radix)
601 {
602 uint8_t flags;
603 T_bin value;
604
605 switch (sizeof(T_bin)) {
606 case 1:
607 value = (T_bin)strtoint<T, uint8_t>(str, count, end, radix, flags);
608 if ((flags & 0x01) && (value & 0x80)) {
609 // Sign bit is 1 => overflow.
610 flags |= 0x02;
611 }
612 return (flags & 0x02) ?
613 (flags & 0x01) ? (T_bin)0x80 : (T_bin)0x7f :
614 (flags & 0x01) ? -value : value;
615
616 case 2:
617 value = (T_bin)strtoint<T, T_U2>(str, count, end, radix, flags);
618 if ((flags & 0x01) && (value & 0x8000)) {
619 // Sign bit is 1 => overflow.
620 flags |= 0x02;
621 }
622 return (flags & 0x02) ?
623 (flags & 0x01) ? (T_bin)0x8000 : (T_bin)0x7fff :
624 (flags & 0x01) ? -value : value;
625
626 case 4:
627 value = (T_bin)strtoint<T, uint32_t>(str, count, end, radix, flags);
628 if ((flags & 0x01) && (value & 0x80000000)) {
629 // Sign bit is 1 => overflow.
630 flags |= 0x02;
631 }
632 return (flags & 0x02) ?
633 (flags & 0x01) ? (T_bin)0x80000000 : (T_bin)0x7fffffff :
634 (flags & 0x01) ? -value : value;
635
636 case 8:
637 value = (T_bin)strtoint<T, uint64_t>(str, count, end, radix, flags);
638 if ((flags & 0x01) && (value & 0x8000000000000000)) {
639 // Sign bit is 1 => overflow.
640 flags |= 0x02;
641 }
642 return (flags & 0x02) ?
643 (flags & 0x01) ? (T_bin)0x8000000000000000 : (T_bin)0x7fffffffffffffff :
644 (flags & 0x01) ? -value : value;
645
646 default:
647 throw std::invalid_argument("Unsupported bit length");
648 }
649 }
650
661 template <class T, class T_bin>
662 inline T_bin strtouint(
663 _In_reads_or_z_opt_(count) const T* str,
664 _In_ size_t count,
665 _Out_opt_ size_t* end,
666 _In_ int radix)
667 {
668 uint8_t flags;
669 T_bin value;
670
671 switch (sizeof(T_bin)) {
672 case 1: value = (T_bin)strtoint<T, uint8_t>(str, count, end, radix, flags); break;
673 case 2: value = (T_bin)strtoint<T, uint16_t>(str, count, end, radix, flags); break;
674 case 4: value = (T_bin)strtoint<T, uint32_t>(str, count, end, radix, flags); break;
675 case 8: value = (T_bin)strtoint<T, uint64_t>(str, count, end, radix, flags); break;
676 default: throw std::invalid_argument("Unsupported bit length");
677 }
678
679 return (flags & 0x02) ?
680 (flags & 0x01) ? (T_bin)0 : (T_bin)-1 :
681 (flags & 0x01) ? ~value : value;
682 }
683
694 template <class T>
695 inline int32_t strto32(
696 _In_reads_or_z_opt_(count) const T* str, _In_ size_t count,
697 _Out_opt_ size_t* end,
698 _In_ int radix)
699 {
700 return strtoint<T, int32_t>(str, count, end, radix);
701 }
702
713 template <class T>
714 inline int64_t strto64(
715 _In_reads_or_z_opt_(count) const T* str, _In_ size_t count,
716 _Out_opt_ size_t* end,
717 _In_ int radix)
718 {
719 return strtoint<T, int64_t>(str, count, end, radix);
720 }
721
733 template <class T>
734 inline intptr_t strtoi(
735 _In_reads_or_z_opt_(count) const T* str, _In_ size_t count,
736 _Out_opt_ size_t* end,
737 _In_ int radix)
738 {
739#if defined(_WIN64) || defined(__LP64__)
740 return (intptr_t)strto64(str, count, end, radix);
741#else
742 return (intptr_t)strto32(str, count, end, radix);
743#endif
744 }
745
756 template <class T>
757 inline uint32_t strtou32(
758 _In_reads_or_z_opt_(count) const T* str, _In_ size_t count,
759 _Out_opt_ size_t* end,
760 _In_ int radix)
761 {
762 return strtouint<T, uint32_t>(str, count, end, radix);
763 }
764
775 template <class T>
776 inline uint64_t strtou64(
777 _In_reads_or_z_opt_(count) const T* str, _In_ size_t count,
778 _Out_opt_ size_t* end,
779 _In_ int radix)
780 {
781 return strtouint<T, uint64_t>(str, count, end, radix);
782 }
783
795 template <class T>
796 inline size_t strtoui(
797 _In_reads_or_z_opt_(count) const T* str, _In_ size_t count,
798 _Out_opt_ size_t* end,
799 _In_ int radix)
800 {
801#if defined(_WIN64) || defined(__LP64__)
802 return (size_t)strtou64(str, count, end, radix);
803#else
804 return (size_t)strtou32(str, count, end, radix);
805#endif
806 }
807
809 inline int vsnprintf(_Out_z_cap_(capacity) char *str, _In_ size_t capacity, _In_z_ _Printf_format_string_ const char *format, _In_ va_list arg)
810 {
811#if _MSC_VER <= 1600
812#pragma warning(suppress: 4996)
813 return _vsnprintf(str, capacity, format, arg);
814#else
815#pragma warning(suppress: 4996)
816 return ::vsnprintf(str, capacity, format, arg);
817#endif
818 }
819
820 inline int vsnprintf(_Out_z_cap_(capacity) wchar_t *str, _In_ size_t capacity, _In_z_ _Printf_format_string_ const wchar_t *format, _In_ va_list arg) noexcept
821 {
822#pragma warning(suppress: 4996)
823 return _vsnwprintf(str, capacity, format, arg);
824 }
826
834 template<class _Elem, class _Traits, class _Ax>
835 inline void vappendf(_Inout_ std::basic_string<_Elem, _Traits, _Ax> &str, _In_z_ _Printf_format_string_ const _Elem *format, _In_ va_list arg)
836 {
837 _Elem buf[1024/sizeof(_Elem)];
838
839 // Try with stack buffer first.
840 int count = vsnprintf(buf, _countof(buf) - 1, format, arg);
841 if (count >= 0) {
842 // Copy from stack.
843 str.append(buf, count);
844 } else {
845 for (size_t capacity = 2*1024/sizeof(_Elem);; capacity *= 2) {
846 // Allocate on heap and retry.
847 auto buf_dyn = std::make_unique<_Elem[]>(capacity);
848 count = vsnprintf(buf_dyn.get(), capacity - 1, format, arg);
849 if (count >= 0) {
850 str.append(buf_dyn.get(), count);
851 break;
852 }
853 }
854 }
855 }
856
863 template<class _Elem, class _Traits, class _Ax>
864 inline void appendf(_Inout_ std::basic_string<_Elem, _Traits, _Ax> &str, _In_z_ _Printf_format_string_ const _Elem *format, ...)
865 {
866 va_list arg;
867 va_start(arg, format);
868 vappendf(str, format, arg);
869 va_end(arg);
870 }
871
879 template<class _Elem, class _Traits, class _Ax>
880 inline void vsprintf(_Inout_ std::basic_string<_Elem, _Traits, _Ax> &str, _In_z_ _Printf_format_string_ const _Elem *format, _In_ va_list arg)
881 {
882 str.clear();
883 appendf(str, format, arg);
884 }
885
892 template<class _Elem, class _Traits, class _Ax>
893 inline void sprintf(_Inout_ std::basic_string<_Elem, _Traits, _Ax> &str, _In_z_ _Printf_format_string_ const _Elem *format, ...)
894 {
895 va_list arg;
896 va_start(arg, format);
897 vsprintf(str, format, arg);
898 va_end(arg);
899 }
900}