stdex
Additional custom or not Standard C++ covered algorithms
Loading...
Searching...
No Matches
string.hpp
1/*
2 SPDX-License-Identifier: MIT
3 Copyright © 2016-2023 Amebis
4*/
5
6#pragma once
7
8#include "sal.hpp"
9#include <assert.h>
10#include <ctype.h>
11#include <stdint.h>
12#include <stdexcept>
13
14namespace stdex
15{
19#ifdef _WIN32
20 typedef wchar_t utf16_t;
21#else
22 typedef char16_t utf16_t;
23#endif
24
30 inline bool is_high_surrogate(_In_ utf16_t chr)
31 {
32 return 0xd800 < chr && chr < 0xdc00;
33 }
34
40 inline bool is_low_surrogate(_In_ utf16_t chr)
41 {
42 return 0xdc00 < chr && chr < 0xe000;
43 }
44
50 inline bool is_surrogate_pair(_In_reads_(2) const utf16_t* str)
51 {
52 return is_high_surrogate(str[0]) && is_low_surrogate(str[1]);
53 }
54
60 inline char32_t surrogate_pair_to_ucs4(_In_reads_(2) const utf16_t* str)
61 {
62 assert(is_surrogate_pair(str));
63 return
64 ((char32_t)(str[0] - 0xd800) << 10) +
65 (char32_t)(str[1] - 0xdc00) +
66 0x10000;
67 }
68
74 inline void ucs4_to_surrogate_pair(_Out_writes_(2) utf16_t* str, _In_ char32_t chr)
75 {
76 assert(chr >= 0x10000);
77 chr -= 0x10000;
78 str[0] = 0xd800 + (char32_t)((chr >> 10) & 0x3ff);
79 str[1] = 0xdc00 + (char32_t)(chr & 0x3ff);
80 }
81
87 inline bool iscombining(_In_ char32_t chr)
88 {
89 return
90 0x0300 <= chr && chr < 0x0370 ||
91 0x1dc0 <= chr && chr < 0x1e00 ||
92 0x20d0 <= chr && chr < 0x2100 ||
93 0xfe20 <= chr && chr < 0xfe30;
94 }
95
101 template <class T>
102 inline size_t islbreak(_In_ T chr)
103 {
104 return chr == '\n' || chr == '\r';
105 }
106
113 template <class T>
114 inline size_t islbreak(_In_reads_or_z_(count) const T* chr, _In_ size_t count)
115 {
116 if (count >= 2 && (chr[0] == '\r' && chr[1] == '\n' || chr[0] == '\n' && chr[1] == '\r'))
117 return 2;
118 if (count > 1 && (chr[0] == '\n' || chr[0] == '\r'))
119 return 1;
120 return 0;
121 }
122
129 inline size_t glyphlen(_In_reads_or_z_(count) const wchar_t* glyph, size_t count)
130 {
131 if (count) {
132#ifdef _WIN32
133 size_t i = count < 2 || !is_surrogate_pair(glyph) ? 1 : 2;
134#else
135 size_t i = 1;
136#endif
137 for (; i < count && iscombining(glyph[i]); ++i);
138 return i;
139 }
140 return 0;
141 }
142
150 template <class T>
151 inline size_t strlen(_In_z_ const T* str)
152 {
153 assert(str);
154 size_t i;
155 for (i = 0; str[i]; ++i);
156 return i;
157 }
158
167 template <class T>
168 inline size_t strnlen(_In_z_ const T* str, _In_ size_t count)
169 {
170 assert(str);
171 size_t i;
172 for (i = 0; i < count && str[i]; ++i);
173 return i;
174 }
175
185 template <class T>
186 inline const T* strnchr(
187 _In_reads_or_z_(count) const T* str,
188 _In_ T chr,
189 _In_ size_t count)
190 {
191 assert(str || !count);
192 for (size_t i = 0; i < count && str[i]; ++i)
193 if (str[i] == chr) return str + i;
194 return nullptr;
195 }
196
206 template <class T>
207 inline const T* strnichr(
208 _In_reads_or_z_(count) const T* str,
209 _In_ T chr,
210 _In_ size_t count,
211 _In_ const std::locale& locale)
212 {
213 assert(str || !count);
214 const auto& ctype = std::use_facet<std::ctype<T>>(locale);
215 chr = ctype.tolower(chr);
216 for (size_t i = 0; i < count && str[i]; ++i)
217 if (ctype.tolower(str[i]) == chr) return str + i;
218 return nullptr;
219 }
220
231 template <class T1, class T2>
232 inline int strncmp(
233 _In_reads_or_z_(count1) const T1* str1, _In_ size_t count1,
234 _In_reads_or_z_(count2) const T2* str2, _In_ size_t count2)
235 {
236 assert(str1 || !count1);
237 assert(str2 || !count2);
238 size_t i; T1 a; T2 b;
239 for (i = 0; i < count1 && i < count2 && ((a = str1[i]) | (b = str2[i])); ++i) {
240 if (a > b) return +1;
241 if (a < b) return -1;
242 }
243 if (i < count1 && str1[i]) return +1;
244 if (i < count2 && str2[i]) return -1;
245 return 0;
246 }
247
258 template <class T1, class T2>
259 inline int strnicmp(
260 _In_reads_or_z_(count1) const T1* str1, _In_ size_t count1,
261 _In_reads_or_z_(count2) const T2* str2, _In_ size_t count2,
262 _In_ const std::locale& locale)
263 {
264 assert(str1 || !count1);
265 assert(str2 || !count2);
266 size_t i; T1 a; T2 b;
267 const auto& ctype1 = std::use_facet<std::ctype<T1>>(locale);
268 const auto& ctype2 = std::use_facet<std::ctype<T2>>(locale);
269 for (i = 0; i < count1 && i < count2 && ((a = ctype1.tolower(str1[i])) | (b = ctype2.tolower(str2[i]))); i++) {
270 if (a > b) return +1;
271 if (a < b) return -1;
272 }
273 if (i < count1 && str1[i]) return +1;
274 if (i < count2 && str2[i]) return -1;
275 return 0;
276 }
277
287 template <class T1, class T2>
288 const T1* strnstr(
289 _In_reads_or_z_(count) const T1* str,
290 _In_z_ const T2* sample,
291 _In_ size_t count)
292 {
293 assert(str || !count);
294 assert(sample);
295 for (size_t offset = 0;; ++offset) {
296 for (size_t i = offset, j = 0;; ++i, ++j) {
297 if (!sample[j])
298 return str + offset;
299 if (i >= count || !str[i])
300 return nullptr;
301 if (str[i] != sample[j])
302 break;
303 }
304 }
305 }
306
316 template <class T1, class T2>
317 inline const T1* strnistr(
318 _In_reads_or_z_(count) const T1* str,
319 _In_z_ const T2* sample,
320 _In_ size_t count,
321 _In_ const std::locale& locale)
322 {
323 assert(str || !count);
324 assert(sample);
325 const auto& ctype1 = std::use_facet<std::ctype<T1>>(locale);
326 const auto& ctype2 = std::use_facet<std::ctype<T2>>(locale);
327 for (size_t offset = 0;; ++offset) {
328 for (size_t i = offset, j = 0;; ++i, ++j) {
329 if (!sample[j])
330 return str + offset;
331 if (i >= count || !str[i])
332 return nullptr;
333 if (ctype1.tolower(str[i]) != ctype2.tolower(sample[j]))
334 break;
335 }
336 }
337 }
338
348 template <class T>
349 inline size_t crlf2nl(_Out_writes_z_(strlen(src)) T* dst, _In_z_ const T* src)
350 {
351 assert(dst);
352 assert(src);
353 size_t i, j;
354 for (i = j = 0; src[j];) {
355 if (src[j] != '\r' || src[j + 1] != '\n')
356 dst[i++] = src[j++];
357 else {
358 dst[i++] = '\n';
359 j += 2;
360 }
361 }
362 dst[i] = 0;
363 return i;
364 }
365
367 template <class T, class T_bin>
368 inline T_bin strtoint(
369 _In_reads_or_z_(count) const T* str, _In_ size_t count,
370 _Out_opt_ size_t* end,
371 _In_ int radix,
372 _Out_ uint8_t& flags)
373 {
374 assert(str || !count);
375 assert(radix == 0 || 2 <= radix && radix <= 36);
376
377 size_t i = 0;
378 T_bin value = 0, digit,
379 max_ui = (T_bin)-1,
380 max_ui_pre1, max_ui_pre2;
381
382 flags = 0;
383
384 // Skip leading spaces.
385 for (;; ++i) {
386 if (i >= count || !str[i]) goto error;
387 if (!isspace(str[i])) break;
388 }
389
390 // Read the sign.
391 if (str[i] == '+') {
392 flags &= ~0x01;
393 ++i;
394 if (i >= count || !str[i]) goto error;
395 }
396 else if (str[i] == '-') {
397 flags |= 0x01;
398 ++i;
399 if (i >= count || !str[i]) goto error;
400 }
401
402 if (radix == 16) {
403 // On hexadecimal, allow leading 0x.
404 if (str[i] == '0' && i + 1 < count && (str[i + 1] == 'x' || str[i + 1] == 'X')) {
405 i += 2;
406 if (i >= count || !str[i]) goto error;
407 }
408 }
409 else if (!radix) {
410 // Autodetect radix.
411 if (str[i] == '0') {
412 ++i;
413 if (i >= count || !str[i]) goto error;
414 if (str[i] == 'x' || str[i] == 'X') {
415 radix = 16;
416 ++i;
417 if (i >= count || !str[i]) goto error;
418 }
419 else
420 radix = 8;
421 }
422 else
423 radix = 10;
424 }
425
426 // We have the radix.
427 max_ui_pre1 = max_ui / (T_bin)radix;
428 max_ui_pre2 = max_ui % (T_bin)radix;
429 for (;;) {
430 if ('0' <= str[i] && str[i] <= '9')
431 digit = (T_bin)str[i] - '0';
432 else if ('A' <= str[i] && str[i] <= 'Z')
433 digit = (T_bin)str[i] - 'A' + '\x0a';
434 else if ('a' <= str[i] && str[i] <= 'z')
435 digit = (T_bin)str[i] - 'a' + '\x0a';
436 else
437 goto error;
438 if (digit >= (T_bin)radix)
439 goto error;
440
441 if (value < max_ui_pre1 || // Multiplication nor addition will not overflow.
442 value == max_ui_pre1 && digit <= max_ui_pre2) // Small digits will not overflow.
443 value = value * (T_bin)radix + digit;
444 else {
445 // Overflow!
446 flags |= 0x02;
447 }
448
449 ++i;
450 if (i >= count || !str[i])
451 goto error;
452 }
453
454 error:
455 if (end) *end = i;
456 return value;
457 }
459
470 template <class T, class T_bin>
471 T_bin strtoint(
472 _In_reads_or_z_(count) const T* str, _In_ size_t count,
473 _Out_opt_ size_t* end,
474 _In_ int radix)
475 {
476 uint8_t flags;
477 T_bin value;
478
479 switch (sizeof(T_bin)) {
480 case 1:
481 value = (T_bin)strtoint<T, uint8_t>(str, count, end, radix, flags);
482 if ((flags & 0x01) && (value & 0x80)) {
483 // Sign bit is 1 => overflow.
484 flags |= 0x02;
485 }
486 return (flags & 0x02) ?
487 (flags & 0x01) ? (T_bin)0x80 : (T_bin)0x7f :
488 (flags & 0x01) ? -value : value;
489
490 case 2:
491 value = (T_bin)strtoint<T, T_U2>(str, count, end, radix, flags);
492 if ((flags & 0x01) && (value & 0x8000)) {
493 // Sign bit is 1 => overflow.
494 flags |= 0x02;
495 }
496 return (flags & 0x02) ?
497 (flags & 0x01) ? (T_bin)0x8000 : (T_bin)0x7fff :
498 (flags & 0x01) ? -value : value;
499
500 case 4:
501 value = (T_bin)strtoint<T, uint32_t>(str, count, end, radix, flags);
502 if ((flags & 0x01) && (value & 0x80000000)) {
503 // Sign bit is 1 => overflow.
504 flags |= 0x02;
505 }
506 return (flags & 0x02) ?
507 (flags & 0x01) ? (T_bin)0x80000000 : (T_bin)0x7fffffff :
508 (flags & 0x01) ? -value : value;
509
510 case 8:
511 value = (T_bin)strtoint<T, uint64_t>(str, count, end, radix, flags);
512 if ((flags & 0x01) && (value & 0x8000000000000000)) {
513 // Sign bit is 1 => overflow.
514 flags |= 0x02;
515 }
516 return (flags & 0x02) ?
517 (flags & 0x01) ? (T_bin)0x8000000000000000 : (T_bin)0x7fffffffffffffff :
518 (flags & 0x01) ? -value : value;
519
520 default:
521 throw std::invalid_argument("Unsupported bit length");
522 }
523 }
524
535 template <class T, class T_bin>
536 inline T_bin strtouint(
537 _In_reads_or_z_(count) const T* str,
538 _In_ size_t count,
539 _Out_opt_ size_t* end,
540 _In_ int radix)
541 {
542 uint8_t flags;
543 T_bin value;
544
545 switch (sizeof(T_bin)) {
546 case 1: value = (T_bin)strtoint<T, uint8_t>(str, count, end, radix, flags); break;
547 case 2: value = (T_bin)strtoint<T, uint16_t>(str, count, end, radix, flags); break;
548 case 4: value = (T_bin)strtoint<T, uint32_t>(str, count, end, radix, flags); break;
549 case 8: value = (T_bin)strtoint<T, uint64_t>(str, count, end, radix, flags); break;
550 default: throw std::invalid_argument("Unsupported bit length");
551 }
552
553 return (flags & 0x02) ?
554 (flags & 0x01) ? (T_bin)0 : (T_bin)-1 :
555 (flags & 0x01) ? ~value : value;
556 }
557
568 template <class T>
569 inline int32_t strto32(
570 _In_reads_or_z_(count) const T* str, _In_ size_t count,
571 _Out_opt_ size_t* end,
572 _In_ int radix)
573 {
574 return strtoint<T, int32_t>(str, count, end, radix);
575 }
576
587 template <class T>
588 inline int64_t strto64(
589 _In_reads_or_z_(count) const T* str, _In_ size_t count,
590 _Out_opt_ size_t* end,
591 _In_ int radix)
592 {
593 return strtoint<T, int64_t>(str, count, end, radix);
594 }
595
607 template <class T>
608 inline intptr_t strtoi(
609 _In_reads_or_z_(count) const T* str, _In_ size_t count,
610 _Out_opt_ size_t* end,
611 _In_ int radix)
612 {
613#if defined(_WIN64) || defined(__LP64__)
614 return (intptr_t)strto64(str, count, end, radix);
615#else
616 return (intptr_t)strto32(str, count, end, radix);
617#endif
618 }
619
630 template <class T>
631 inline uint32_t strtou32(
632 _In_reads_or_z_(count) const T* str, _In_ size_t count,
633 _Out_opt_ size_t* end,
634 _In_ int radix)
635 {
636 return strtouint<T, uint32_t>(str, count, end, radix);
637 }
638
649 template <class T>
650 inline uint64_t strtou64(
651 _In_reads_or_z_(count) const T* str, _In_ size_t count,
652 _Out_opt_ size_t* end,
653 _In_ int radix)
654 {
655 return strtouint<T, uint64_t>(str, count, end, radix);
656 }
657
669 template <class T>
670 inline size_t strtoui(
671 _In_reads_or_z_(count) const T* str, _In_ size_t count,
672 _Out_opt_ size_t* end,
673 _In_ int radix)
674 {
675#if defined(_WIN64) || defined(__LP64__)
676 return (size_t)strtou64(str, count, end, radix);
677#else
678 return (size_t)strtou32(str, count, end, radix);
679#endif
680 }
681}