stdex
Additional custom or not Standard C++ covered algorithms
Loading...
Searching...
No Matches
parser.hpp
1/*
2 SPDX-License-Identifier: MIT
3 Copyright © 2023 Amebis
4*/
5
6#pragma once
7
8#include "compat.hpp"
9#include "interval.hpp"
10#include "memory.hpp"
11#include "sgml.hpp"
12#include "string.hpp"
13#include <stdarg.h>
14#include <stdint.h>
15#include <math.h>
16#if defined(_WIN32)
17#include <winsock2.h>
18#if _MSC_VER >= 1300
19#include <ws2ipdef.h>
20#endif
21#include <ws2tcpip.h>
22#elif defined(__APPLE__)
23#include <netinet/in.h>
24#else
25#include <inaddr.h>
26#include <in6addr.h>
27#endif
28#include <limits>
29#include <list>
30#include <locale>
31#include <memory>
32#include <set>
33#include <string>
34
35#ifdef _MSC_VER
36#pragma warning(push)
37#pragma warning(disable: 4100)
38#endif
39
40#define ENUM_FLAG_OPERATOR(T,X) \
41inline T operator X (const T lhs, const T rhs) { return static_cast<T>(static_cast<std::underlying_type_t<T>>(lhs) X static_cast<std::underlying_type_t<T>>(rhs)); } \
42inline T operator X (const T lhs, const std::underlying_type_t<T> rhs) { return static_cast<T>(static_cast<std::underlying_type_t<T>>(lhs) X rhs); } \
43inline T operator X (const std::underlying_type_t<T> lhs, const T rhs) { return static_cast<T>(lhs X static_cast<std::underlying_type_t<T>>(rhs)); } \
44inline T& operator X= (T& lhs, const T rhs) { return lhs = lhs X rhs; } \
45inline T& operator X= (T& lhs, const std::underlying_type_t<T> rhs) { return lhs = lhs X rhs; }
46#define ENUM_FLAGS(T, type) \
47enum class T : type; \
48inline T operator ~ (T t) { return (T) (~static_cast<std::underlying_type_t <T>>(t)); } \
49ENUM_FLAG_OPERATOR(T,|) \
50ENUM_FLAG_OPERATOR(T,^) \
51ENUM_FLAG_OPERATOR(T,&) \
52enum class T : type
53
54#if defined(_WIN32)
55#elif defined(__APPLE__)
56#define s6_words __u6_addr.__u6_addr16
57#else
58#error Unsupported platform
59#endif
60
61namespace stdex
62{
63 namespace parser
64 {
68 constexpr int match_default = 0;
69 constexpr int match_case_insensitive = 0x1;
70 constexpr int match_multiline = 0x2;
71
75 template <class T>
77 {
78 public:
79 basic_parser(_In_ const std::locale& locale = std::locale()) : m_locale(locale) {}
80 virtual ~basic_parser() {}
81
82 bool search(
83 _In_reads_or_z_(end) const T* text,
84 _In_ size_t start = 0,
85 _In_ size_t end = (size_t)-1,
86 _In_ int flags = match_default)
87 {
88 for (size_t i = start; i < end && text[i]; i++)
89 if (match(text, i, end, flags))
90 return true;
91 return false;
92 }
93
94 virtual bool match(
95 _In_reads_or_z_(end) const T* text,
96 _In_ size_t start = 0,
97 _In_ size_t end = (size_t)-1,
98 _In_ int flags = match_default) = 0;
99
100 template<class _Traits, class _Ax>
101 inline bool match(
102 const std::basic_string<T, _Traits, _Ax>& text,
103 _In_ size_t start = 0,
104 _In_ size_t end = (size_t)-1,
105 _In_ int flags = match_default)
106 {
107 return match(text.c_str(), start, std::min<size_t>(end, text.size()), flags);
108 }
109
110 virtual void invalidate()
111 {
112 this->interval.start = 1;
113 this->interval.end = 0;
114 }
115
116 protected:
118 const wchar_t* next_sgml_cp(_In_ const char* text, _In_ size_t start, _In_ size_t end, _Out_ size_t& chr_end, _Out_ wchar_t(&buf)[3])
119 {
120 if (text[start] == '&') {
121 // Potential entity start
122 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
123 for (chr_end = start + 1;; chr_end++) {
124 if (chr_end >= end || text[chr_end] == 0) {
125 // Unterminated entity
126 break;
127 }
128 if (text[chr_end] == ';') {
129 // Entity end
130 size_t n = chr_end - start - 1;
131 if (n >= 2 && text[start + 1] == '#') {
132 // Numerical entity
133 char32_t unicode;
134 if (text[start + 2] == 'x' || text[start + 2] == 'X')
135 unicode = strtou32(text + start + 3, n - 2, nullptr, 16);
136 else
137 unicode = strtou32(text + start + 2, n - 1, nullptr, 10);
138#ifdef _WIN32
139 if (unicode < 0x10000) {
140 buf[0] = (wchar_t)unicode;
141 buf[1] = 0;
142 }
143 else {
144 ucs4_to_surrogate_pair(buf, unicode);
145 buf[2] = 0;
146 }
147#else
148 buf[0] = (wchar_t)unicode;
149 buf[1] = 0;
150#endif
151 chr_end++;
152 return buf;
153 }
154 const wchar_t* entity_w = sgml2uni(text + start + 1, n);
155 if (entity_w) {
156 chr_end++;
157 return entity_w;
158 }
159 // Unknown entity.
160 break;
161 }
162 else if (text[chr_end] == '&' || ctype.is(ctype.space, text[chr_end])) {
163 // This char cannot possibly be a part of entity.
164 break;
165 }
166 }
167 }
168 buf[0] = text[start];
169 buf[1] = 0;
170 chr_end = start + 1;
171 return buf;
172 }
174
175 public:
177
178 protected:
179 std::locale m_locale;
180 };
181
184#ifdef _UNICODE
185 using tparser = wparser;
186#else
187 using tparser = parser;
188#endif
190
194 template <class T>
195 class basic_noop : public basic_parser<T>
196 {
197 public:
198 virtual bool match(
199 _In_reads_or_z_(end) const T* text,
200 _In_ size_t start = 0,
201 _In_ size_t end = (size_t)-1,
202 _In_ int flags = match_default)
203 {
204 _Assume_(text || start >= end);
205 if (start < end && text[start]) {
206 this->interval.start = this->interval.end = start;
207 return true;
208 }
209 this->interval.start = (this->interval.end = start) + 1;
210 return false;
211 }
212 };
213
214 using noop = basic_noop<char>;
216#ifdef _UNICODE
217 using tnoop = wnoop;
218#else
219 using tnoop = noop;
220#endif
222
226 template <class T>
227 class basic_any_cu : public basic_parser<T>
228 {
229 public:
230 basic_any_cu(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
231
232 virtual bool match(
233 _In_reads_or_z_(end) const T* text,
234 _In_ size_t start = 0,
235 _In_ size_t end = (size_t)-1,
236 _In_ int flags = match_default)
237 {
238 _Assume_(text || start >= end);
239 if (start < end && text[start]) {
240 this->interval.end = (this->interval.start = start) + 1;
241 return true;
242 }
243 this->interval.start = (this->interval.end = start) + 1;
244 return false;
245 }
246 };
247
250#ifdef _UNICODE
251 using tany_cu = wany_cu;
252#else
253 using tany_cu = any_cu;
254#endif
255
259 class sgml_any_cp : public basic_any_cu<char>
260 {
261 public:
262 sgml_any_cp(_In_ const std::locale& locale = std::locale()) : basic_any_cu<char>(locale) {}
263
264 virtual bool match(
265 _In_reads_or_z_(end) const char* text,
266 _In_ size_t start = 0,
267 _In_ size_t end = (size_t)-1,
268 _In_ int flags = match_default)
269 {
270 _Assume_(text || start >= end);
271 if (start < end && text[start]) {
272 if (text[start] == '&') {
273 // SGML entity
274 const auto& ctype = std::use_facet<std::ctype<char>>(m_locale);
275 for (this->interval.end = start + 1; this->interval.end < end && text[this->interval.end]; this->interval.end++)
276 if (text[this->interval.end] == ';') {
277 this->interval.end++;
278 this->interval.start = start;
279 return true;
280 }
281 else if (text[this->interval.end] == '&' || ctype.is(ctype.space, text[this->interval.end]))
282 break;
283 // Unterminated entity
284 }
285 this->interval.end = (this->interval.start = start) + 1;
286 return true;
287 }
288 this->interval.start = (this->interval.end = start) + 1;
289 return false;
290 }
291 };
292
296 template <class T>
297 class basic_cu : public basic_parser<T>
298 {
299 public:
300 basic_cu(T chr, bool invert = false, _In_ const std::locale& locale = std::locale()) :
301 basic_parser<T>(locale),
302 m_chr(chr),
303 m_invert(invert)
304 {}
305
306 virtual bool match(
307 _In_reads_or_z_(end) const T* text,
308 _In_ size_t start = 0,
309 _In_ size_t end = (size_t)-1,
310 _In_ int flags = match_default)
311 {
312 _Assume_(text || start >= end);
313 if (start < end && text[start]) {
314 bool r;
315 if (flags & match_case_insensitive) {
316 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
317 r = ctype.tolower(text[start]) == ctype.tolower(m_chr);
318 }
319 else
320 r = text[start] == m_chr;
321 if ((r && !m_invert) || (!r && m_invert)) {
322 this->interval.end = (this->interval.start = start) + 1;
323 return true;
324 }
325 }
326 this->interval.start = (this->interval.end = start) + 1;
327 return false;
328 }
329
330 protected:
331 T m_chr;
332 bool m_invert;
333 };
334
335 using cu = basic_cu<char>;
336 using wcu = basic_cu<wchar_t>;
337#ifdef _UNICODE
338 using tcu = wcu;
339#else
340 using tcu = cu;
341#endif
342
346 class sgml_cp : public sgml_parser
347 {
348 public:
349 sgml_cp(const char* chr, size_t count = (size_t)-1, bool invert = false, _In_ const std::locale& locale = std::locale()) :
350 sgml_parser(locale),
351 m_invert(invert)
352 {
353 _Assume_(chr || !count);
354 wchar_t buf[3];
355 size_t chr_end;
356 m_chr.assign(count ? next_sgml_cp(chr, 0, count, chr_end, buf) : L"");
357 }
358
359 virtual bool match(
360 _In_reads_or_z_(end) const char* text,
361 _In_ size_t start = 0,
362 _In_ size_t end = (size_t)-1,
363 _In_ int flags = match_default)
364 {
365 _Assume_(text || start >= end);
366 if (start < end && text[start]) {
367 wchar_t buf[3];
368 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
369 bool r = ((flags & match_case_insensitive) ?
370 stdex::strnicmp(chr, (size_t)-1, m_chr.c_str(), m_chr.size(), m_locale) :
371 stdex::strncmp(chr, (size_t)-1, m_chr.c_str(), m_chr.size())) == 0;
372 if ((r && !m_invert) || (!r && m_invert)) {
373 this->interval.start = start;
374 return true;
375 }
376 }
377 this->interval.start = (this->interval.end = start) + 1;
378 return false;
379 }
380
381 protected:
382 std::wstring m_chr;
383 bool m_invert;
384 };
385
389 template <class T>
390 class basic_space_cu : public basic_parser<T>
391 {
392 public:
393 basic_space_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) :
394 basic_parser<T>(locale),
395 m_invert(invert)
396 {}
397
398 virtual bool match(
399 _In_reads_or_z_(end) const T* text,
400 _In_ size_t start = 0,
401 _In_ size_t end = (size_t)-1,
402 _In_ int flags = match_default)
403 {
404 _Assume_(text || start >= end);
405 if (start < end && text[start]) {
406 bool r =
407 ((flags & match_multiline) || !islbreak(text[start])) &&
408 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::space, text[start]);
409 if ((r && !m_invert) || (!r && m_invert)) {
410 this->interval.end = (this->interval.start = start) + 1;
411 return true;
412 }
413 }
414 this->interval.start = (this->interval.end = start) + 1;
415 return false;
416 }
417
418 protected:
419 bool m_invert;
420 };
421
424#ifdef _UNICODE
425 using tspace_cu = wspace_cu;
426#else
427 using tspace_cu = space_cu;
428#endif
429
433 class sgml_space_cp : public basic_space_cu<char>
434 {
435 public:
436 sgml_space_cp(_In_ bool invert = false, _In_ const std::locale& locale = std::locale()) :
438 {}
439
440 virtual bool match(
441 _In_reads_or_z_(end) const char* text,
442 _In_ size_t start = 0,
443 _In_ size_t end = (size_t)-1,
444 _In_ int flags = match_default)
445 {
446 _Assume_(text || start >= end);
447 if (start < end && text[start]) {
448 wchar_t buf[3];
449 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
450 const wchar_t* chr_end = chr + stdex::strlen(chr);
451 bool r =
452 ((flags & match_multiline) || !islbreak(chr, (size_t)-1)) &&
453 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::space, chr, chr_end) == chr_end;
454 if ((r && !m_invert) || (!r && m_invert)) {
455 this->interval.start = start;
456 return true;
457 }
458 }
459
460 this->interval.start = (this->interval.end = start) + 1;
461 return false;
462 }
463 };
464
468 template <class T>
469 class basic_punct_cu : public basic_parser<T>
470 {
471 public:
472 basic_punct_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) :
473 basic_parser<T>(locale),
474 m_invert(invert)
475 {}
476
477 virtual bool match(
478 _In_reads_or_z_(end) const T* text,
479 _In_ size_t start = 0,
480 _In_ size_t end = (size_t)-1,
481 _In_ int flags = match_default)
482 {
483 _Assume_(text || start >= end);
484 if (start < end && text[start]) {
485 bool r = std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::punct, text[start]);
486 if ((r && !m_invert) || (!r && m_invert)) {
487 this->interval.end = (this->interval.start = start) + 1;
488 return true;
489 }
490 }
491 this->interval.start = (this->interval.end = start) + 1;
492 return false;
493 }
494
495 protected:
496 bool m_invert;
497 };
498
501#ifdef _UNICODE
502 using tpunct_cu = wpunct_cu;
503#else
504 using tpunct_cu = punct_cu;
505#endif
506
510 class sgml_punct_cp : public basic_punct_cu<char>
511 {
512 public:
513 sgml_punct_cp(bool invert = false, _In_ const std::locale& locale = std::locale()) :
515 {}
516
517 virtual bool match(
518 _In_reads_or_z_(end) const char* text,
519 _In_ size_t start = 0,
520 _In_ size_t end = (size_t)-1,
521 _In_ int flags = match_default)
522 {
523 _Assume_(text || start >= end);
524 if (start < end && text[start]) {
525 wchar_t buf[3];
526 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
527 const wchar_t* chr_end = chr + stdex::strlen(chr);
528 bool r = std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::punct, chr, chr_end) == chr_end;
529 if ((r && !m_invert) || (!r && m_invert)) {
530 this->interval.start = start;
531 return true;
532 }
533 }
534 this->interval.start = (this->interval.end = start) + 1;
535 return false;
536 }
537 };
538
542 template <class T>
544 {
545 public:
546 basic_space_or_punct_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) :
547 basic_parser<T>(locale),
548 m_invert(invert)
549 {}
550
551 virtual bool match(
552 _In_reads_or_z_(end) const T* text,
553 _In_ size_t start = 0,
554 _In_ size_t end = (size_t)-1,
555 _In_ int flags = match_default)
556 {
557 _Assume_(text || start >= end);
558 if (start < end && text[start]) {
559 bool r =
560 ((flags & match_multiline) || !islbreak(text[start])) &&
561 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::space | std::ctype_base::punct, text[start]);
562 if ((r && !m_invert) || (!r && m_invert)) {
563 this->interval.end = (this->interval.start = start) + 1;
564 return true;
565 }
566 }
567 this->interval.start = (this->interval.end = start) + 1;
568 return false;
569 }
570
571 protected:
572 bool m_invert;
573 };
574
577#ifdef _UNICODE
579#else
581#endif
582
587 {
588 public:
589 sgml_space_or_punct_cp(bool invert = false, _In_ const std::locale& locale = std::locale()) :
591 {}
592
593 virtual bool match(
594 _In_reads_or_z_(end) const char* text,
595 _In_ size_t start = 0,
596 _In_ size_t end = (size_t)-1,
597 _In_ int flags = match_default)
598 {
599 _Assume_(text || start >= end);
600 if (start < end && text[start]) {
601 wchar_t buf[3];
602 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
603 const wchar_t* chr_end = chr + stdex::strlen(chr);
604 bool r =
605 ((flags & match_multiline) || !islbreak(chr, (size_t)-1)) &&
606 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::space | std::ctype_base::punct, chr, chr_end) == chr_end;
607 if ((r && !m_invert) || (!r && m_invert)) {
608 this->interval.start = start;
609 return true;
610 }
611 }
612 this->interval.start = (this->interval.end = start) + 1;
613 return false;
614 }
615 };
616
620 template <class T>
621 class basic_bol : public basic_parser<T>
622 {
623 public:
624 basic_bol(bool invert = false) : m_invert(invert) {}
625
626 virtual bool match(
627 _In_reads_or_z_(end) const T* text,
628 _In_ size_t start = 0,
629 _In_ size_t end = (size_t)-1,
630 _In_ int flags = match_default)
631 {
632 _Assume_(text || start >= end);
633 bool r = start == 0 || (start <= end && islbreak(text[start - 1]));
634 if ((r && !m_invert) || (!r && m_invert)) {
635 this->interval.end = this->interval.start = start;
636 return true;
637 }
638 this->interval.start = (this->interval.end = start) + 1;
639 return false;
640 }
641
642 protected:
643 bool m_invert;
644 };
645
646 using bol = basic_bol<char>;
647 using wbol = basic_bol<wchar_t>;
648#ifdef _UNICODE
649 using tbol = wbol;
650#else
651 using tbol = bol;
652#endif
654
658 template <class T>
659 class basic_eol : public basic_parser<T>
660 {
661 public:
662 basic_eol(bool invert = false) : m_invert(invert) {}
663
664 virtual bool match(
665 _In_reads_or_z_(end) const T* text,
666 _In_ size_t start = 0,
667 _In_ size_t end = (size_t)-1,
668 _In_ int flags = match_default)
669 {
670 _Assume_(text || start >= end);
671 bool r = islbreak(text[start]);
672 if ((r && !m_invert) || (!r && m_invert)) {
673 this->interval.end = this->interval.start = start;
674 return true;
675 }
676 this->interval.start = (this->interval.end = start) + 1;
677 return false;
678 }
679
680 protected:
681 bool m_invert;
682 };
683
684 using eol = basic_eol<char>;
685 using weol = basic_eol<wchar_t>;
686#ifdef _UNICODE
687 using teol = weol;
688#else
689 using teol = eol;
690#endif
692
693 template <class T>
694 class basic_set : public basic_parser<T>
695 {
696 public:
697 basic_set(bool invert = false, _In_ const std::locale& locale = std::locale()) :
698 basic_parser<T>(locale),
699 hit_offset((size_t)-1),
700 m_invert(invert)
701 {}
702
703 virtual bool match(
704 _In_reads_or_z_(end) const T* text,
705 _In_ size_t start = 0,
706 _In_ size_t end = (size_t)-1,
707 _In_ int flags = match_default) = 0;
708
709 virtual void invalidate()
710 {
711 hit_offset = (size_t)-1;
713 }
714
715 public:
716 size_t hit_offset;
717
718 protected:
719 bool m_invert;
720 };
721
725 template <class T>
726 class basic_cu_set : public basic_set<T>
727 {
728 public:
730 _In_reads_or_z_(count) const T* set,
731 _In_ size_t count = (size_t)-1,
732 _In_ bool invert = false,
733 _In_ const std::locale& locale = std::locale()) :
734 basic_set<T>(invert, locale)
735 {
736 if (set)
737 m_set.assign(set, set + stdex::strnlen(set, count));
738 }
739
740 virtual bool match(
741 _In_reads_or_z_(end) const T* text,
742 _In_ size_t start = 0,
743 _In_ size_t end = (size_t)-1,
744 _In_ int flags = match_default)
745 {
746 _Assume_(text || start >= end);
747 if (start < end && text[start]) {
748 const T* set = m_set.c_str();
749 size_t r = (flags & match_case_insensitive) ?
750 stdex::strnichr(set, m_set.size(), text[start], this->m_locale) :
751 stdex::strnchr(set, m_set.size(), text[start]);
752 if ((r != stdex::npos && !this->m_invert) || (r == stdex::npos && this->m_invert)) {
753 this->hit_offset = r;
754 this->interval.end = (this->interval.start = start) + 1;
755 return true;
756 }
757 }
758 this->hit_offset = (size_t)-1;
759 this->interval.start = (this->interval.end = start) + 1;
760 return false;
761 }
762
763 protected:
764 std::basic_string<T> m_set;
765 };
766
769#ifdef _UNICODE
770 using tcu_set = wcu_set;
771#else
772 using tcu_set = cu_set;
773#endif
774
778 class sgml_cp_set : public basic_set<char>
779 {
780 public:
781 sgml_cp_set(const char* set, size_t count = (size_t)-1, bool invert = false, _In_ const std::locale& locale = std::locale()) :
782 basic_set<char>(invert, locale)
783 {
784 if (set)
785 m_set = sgml2wstr(set, count);
786 }
787
788 virtual bool match(
789 _In_reads_or_z_(end) const char* text,
790 _In_ size_t start = 0,
791 _In_ size_t end = (size_t)-1,
792 _In_ int flags = match_default)
793 {
794 _Assume_(text || start >= end);
795 if (start < end && text[start]) {
796 wchar_t buf[3];
797 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
798 const wchar_t* set = m_set.c_str();
799 size_t r = (flags & match_case_insensitive) ?
800 stdex::strnistr(set, m_set.size(), chr, m_locale) :
801 stdex::strnstr(set, m_set.size(), chr);
802 if ((r != stdex::npos && !m_invert) || (r == stdex::npos && m_invert)) {
803 hit_offset = r;
804 this->interval.start = start;
805 return true;
806 }
807 }
808 hit_offset = (size_t)-1;
809 this->interval.start = (this->interval.end = start) + 1;
810 return false;
811 }
812
813 protected:
814 std::wstring m_set;
815 };
816
820 template <class T>
821 class basic_string : public basic_parser<T>
822 {
823 public:
825 _In_reads_or_z_(count) const T* str,
826 _In_ size_t count = (size_t)-1,
827 _In_ const std::locale& locale = std::locale()) :
828 basic_parser<T>(locale),
829 m_str(str, str + stdex::strnlen(str, count))
830 {}
831
832 virtual bool match(
833 _In_reads_or_z_(end) const T* text,
834 _In_ size_t start = 0,
835 _In_ size_t end = (size_t)-1,
836 _In_ int flags = match_default)
837 {
838 _Assume_(text || start >= end);
839 size_t
840 m = m_str.size(),
841 n = std::min<size_t>(end - start, m);
842 bool r = ((flags & match_case_insensitive) ?
843 stdex::strnicmp(text + start, n, m_str.c_str(), m, this->m_locale) :
844 stdex::strncmp(text + start, n, m_str.c_str(), m)) == 0;
845 if (r) {
846 this->interval.end = (this->interval.start = start) + n;
847 return true;
848 }
849 this->interval.start = (this->interval.end = start) + 1;
850 return false;
851 }
852
853 protected:
854 std::basic_string<T> m_str;
855 };
856
859#ifdef _UNICODE
860 using tstring = wstring;
861#else
862 using tstring = string;
863#endif
864
869 {
870 public:
871 sgml_string(const char* str, size_t count = (size_t)-1, _In_ const std::locale& locale = std::locale()) :
872 sgml_parser(locale),
873 m_str(sgml2wstr(str, count))
874 {}
875
876 virtual bool match(
877 _In_reads_or_z_(end) const char* text,
878 _In_ size_t start = 0,
879 _In_ size_t end = (size_t)-1,
880 _In_ int flags = match_default)
881 {
882 _Assume_(text || start >= end);
883 const wchar_t* str = m_str.c_str();
884 const bool case_insensitive = flags & match_case_insensitive ? true : false;
885 const auto& ctype = std::use_facet<std::ctype<wchar_t>>(m_locale);
886 for (this->interval.end = start;;) {
887 if (!*str) {
888 this->interval.start = start;
889 return true;
890 }
891 if (this->interval.end >= end || !text[this->interval.end]) {
892 this->interval.start = (this->interval.end = start) + 1;
893 return false;
894 }
895 wchar_t buf[3];
896 const wchar_t* chr = next_sgml_cp(text, this->interval.end, end, this->interval.end, buf);
897 for (; *chr; ++str, ++chr) {
898 if (!*str ||
899 (case_insensitive ? ctype.tolower(*str) != ctype.tolower(*chr) : *str != *chr))
900 {
901 this->interval.start = (this->interval.end = start) + 1;
902 return false;
903 }
904 }
905 }
906 }
907
908 protected:
909 std::wstring m_str;
910 };
911
915 template <class T>
917 {
918 public:
919 basic_iterations(const std::shared_ptr<basic_parser<T>>& el, size_t min_iterations = 0, size_t max_iterations = (size_t)-1, bool greedy = true) :
920 m_el(el),
924 {}
925
926 virtual bool match(
927 _In_reads_or_z_(end) const T* text,
928 _In_ size_t start = 0,
929 _In_ size_t end = (size_t)-1,
930 _In_ int flags = match_default)
931 {
932 _Assume_(text || start >= end);
933 this->interval.start = this->interval.end = start;
934 for (size_t i = 0; ; i++) {
935 if ((!m_greedy && i >= m_min_iterations) || i >= m_max_iterations)
936 return true;
937 if (!m_el->match(text, this->interval.end, end, flags)) {
938 if (i >= m_min_iterations)
939 return true;
940 break;
941 }
942 if (m_el->interval.end == this->interval.end) {
943 // Element did match, but the matching interval was empty. Quit instead of spinning.
944 return true;
945 }
946 this->interval.end = m_el->interval.end;
947 }
948 this->interval.start = (this->interval.end = start) + 1;
949 return false;
950 }
951
952 protected:
953 std::shared_ptr<basic_parser<T>> m_el;
956 bool m_greedy;
957 };
958
961#ifdef _UNICODE
962 using titerations = witerations;
963#else
964 using titerations = iterations;
965#endif
967
971 template <class T>
973 {
974 protected:
975 parser_collection(_In_ const std::locale& locale) : basic_parser<T>(locale) {}
976
977 public:
979 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el,
980 _In_ size_t count,
981 _In_ const std::locale& locale = std::locale()) :
982 basic_parser<T>(locale)
983 {
984 _Assume_(el || !count);
985 m_collection.reserve(count);
986 for (size_t i = 0; i < count; i++)
987 m_collection.push_back(el[i]);
988 }
989
991 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
992 _In_ const std::locale& locale = std::locale()) :
993 basic_parser<T>(locale),
994 m_collection(std::move(collection))
995 {}
996
997 virtual void invalidate()
998 {
999 for (auto& el: m_collection)
1000 el->invalidate();
1002 }
1003
1004 protected:
1005 std::vector<std::shared_ptr<basic_parser<T>>> m_collection;
1006 };
1007
1011 template <class T>
1013 {
1014 public:
1016 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el = nullptr,
1017 _In_ size_t count = 0,
1018 _In_ const std::locale& locale = std::locale()) :
1019 parser_collection<T>(el, count, locale)
1020 {}
1021
1023 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1024 _In_ const std::locale& locale = std::locale()) :
1025 parser_collection<T>(std::move(collection), locale)
1026 {}
1027
1028 virtual bool match(
1029 _In_reads_or_z_(end) const T* text,
1030 _In_ size_t start = 0,
1031 _In_ size_t end = (size_t)-1,
1032 _In_ int flags = match_default)
1033 {
1034 _Assume_(text || start >= end);
1035 this->interval.end = start;
1036 for (auto i = this->m_collection.begin(); i != this->m_collection.end(); ++i) {
1037 if (!(*i)->match(text, this->interval.end, end, flags)) {
1038 for (++i; i != this->m_collection.end(); ++i)
1039 (*i)->invalidate();
1040 this->interval.start = (this->interval.end = start) + 1;
1041 return false;
1042 }
1043 this->interval.end = (*i)->interval.end;
1044 }
1045 this->interval.start = start;
1046 return true;
1047 }
1048 };
1049
1052#ifdef _UNICODE
1053 using tsequence = wsequence;
1054#else
1055 using tsequence = sequence;
1056#endif
1058
1062 template <class T>
1064 {
1065 protected:
1066 basic_branch(_In_ const std::locale& locale) :
1067 parser_collection<T>(locale),
1068 hit_offset((size_t)-1)
1069 {}
1070
1071 public:
1073 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el = nullptr,
1074 _In_ size_t count = 0,
1075 _In_ const std::locale& locale = std::locale()) :
1076 parser_collection<T>(el, count, locale),
1077 hit_offset((size_t)-1)
1078 {}
1079
1081 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1082 _In_ const std::locale& locale = std::locale()) :
1083 parser_collection<T>(std::move(collection), locale),
1084 hit_offset((size_t)-1)
1085 {}
1086
1087 virtual bool match(
1088 _In_reads_or_z_(end) const T* text,
1089 _In_ size_t start = 0,
1090 _In_ size_t end = (size_t)-1,
1091 _In_ int flags = match_default)
1092 {
1093 _Assume_(text || start >= end);
1094 hit_offset = 0;
1095 for (auto i = this->m_collection.begin(); i != this->m_collection.end(); ++i, ++hit_offset) {
1096 if ((*i)->match(text, start, end, flags)) {
1097 this->interval = (*i)->interval;
1098 for (++i; i != this->m_collection.end(); ++i)
1099 (*i)->invalidate();
1100 return true;
1101 }
1102 }
1103 hit_offset = (size_t)-1;
1104 this->interval.start = (this->interval.end = start) + 1;
1105 return false;
1106 }
1107
1108 virtual void invalidate()
1109 {
1110 hit_offset = (size_t)-1;
1112 }
1113
1114 public:
1115 size_t hit_offset;
1116 };
1117
1118 using branch = basic_branch<char>;
1120#ifdef _UNICODE
1121 using tbranch = wbranch;
1122#else
1123 using tbranch = branch;
1124#endif
1126
1130 template <class T, class T_parser = basic_string<T>>
1132 {
1133 public:
1134 inline basic_string_branch(
1135 _In_reads_(count) const T* str_z = nullptr,
1136 _In_ size_t count = 0,
1137 _In_ const std::locale& locale = std::locale()) :
1138 basic_branch<T>(locale)
1139 {
1140 build(str_z, count);
1141 }
1142
1143 inline basic_string_branch(_In_z_ const T* str, ...) :
1144 basic_branch<T>(std::locale())
1145 {
1146 va_list params;
1147 va_start(params, str);
1148 build(str, params);
1149 va_end(params);
1150 }
1151
1152 inline basic_string_branch(_In_ const std::locale& locale, _In_z_ const T* str, ...) :
1153 basic_branch<T>(locale)
1154 {
1155 va_list params;
1156 va_start(params, str);
1157 build(str, params);
1158 va_end(params);
1159 }
1160
1161 protected:
1162 void build(_In_reads_(count) const T* str_z, _In_ size_t count)
1163 {
1164 _Assume_(str_z || !count);
1165 if (count) {
1166 size_t offset, n;
1167 for (
1168 offset = n = 0;
1169 offset < count && str_z[offset];
1170 offset += stdex::strnlen(str_z + offset, count - offset) + 1, ++n);
1171 this->m_collection.reserve(n);
1172 for (
1173 offset = 0;
1174 offset < count && str_z[offset];
1175 offset += stdex::strnlen(str_z + offset, count - offset) + 1)
1176 this->m_collection.push_back(std::move(std::make_shared<T_parser>(str_z + offset, count - offset, this->m_locale)));
1177 }
1178 }
1179
1180 void build(_In_z_ const T* str, _In_ va_list params)
1181 {
1182 const T* p;
1183 for (
1184 this->m_collection.push_back(std::move(std::make_shared<T_parser>(str, (size_t)-1, this->m_locale)));
1185 (p = va_arg(params, const T*)) != nullptr;
1186 this->m_collection.push_back(std::move(std::make_shared<T_parser>(p, (size_t)-1, this->m_locale))));
1187 }
1188 };
1189
1192#ifdef _UNICODE
1194#else
1196#endif
1198
1202 template <class T>
1204 {
1205 public:
1207 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el = nullptr,
1208 _In_ size_t count = 0,
1209 _In_ const std::locale& locale = std::locale()) :
1210 parser_collection<T>(el, count, locale)
1211 {}
1212
1214 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1215 _In_ const std::locale& locale = std::locale()) :
1216 parser_collection<T>(std::move(collection), locale)
1217 {}
1218
1219 virtual bool match(
1220 _In_reads_or_z_(end) const T* text,
1221 _In_ size_t start = 0,
1222 _In_ size_t end = (size_t)-1,
1223 _In_ int flags = match_default)
1224 {
1225 _Assume_(text || start >= end);
1226 for (auto& el: this->m_collection)
1227 el->invalidate();
1228 if (match_recursively(text, start, end, flags)) {
1229 this->interval.start = start;
1230 return true;
1231 }
1232 this->interval.start = (this->interval.end = start) + 1;
1233 return false;
1234 }
1235
1236 protected:
1237 bool match_recursively(
1238 _In_reads_or_z_(end) const T* text,
1239 _In_ size_t start = 0,
1240 _In_ size_t end = (size_t)-1,
1241 _In_ int flags = match_default)
1242 {
1243 bool all_matched = true;
1244 for (auto& el: this->m_collection) {
1245 if (!el->interval) {
1246 // Element was not matched in permutatuion yet.
1247 all_matched = false;
1248 if (el->match(text, start, end, flags)) {
1249 // Element matched for the first time.
1250 if (match_recursively(text, el->interval.end, end, flags)) {
1251 // Rest of the elements matched too.
1252 return true;
1253 }
1254 el->invalidate();
1255 }
1256 }
1257 }
1258 if (all_matched) {
1259 this->interval.end = start;
1260 return true;
1261 }
1262 return false;
1263 }
1264 };
1265
1268#ifdef _UNICODE
1269 using tpermutation = wpermutation;
1270#else
1271 using tpermutation = permutation;
1272#endif
1274
1278 template <class T>
1279 class basic_integer : public basic_parser<T>
1280 {
1281 public:
1282 basic_integer(_In_ const std::locale& locale = std::locale()) :
1283 basic_parser<T>(locale),
1284 value(0)
1285 {}
1286
1287 virtual void invalidate()
1288 {
1289 value = 0;
1291 }
1292
1293 public:
1294 size_t value;
1295 };
1296
1300 template <class T>
1302 {
1303 public:
1305 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
1306 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
1307 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
1308 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
1309 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
1310 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
1311 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
1312 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
1313 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
1314 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
1315 _In_ const std::locale& locale = std::locale()) :
1316 basic_integer<T>(locale),
1317 m_digit_0(digit_0),
1318 m_digit_1(digit_1),
1319 m_digit_2(digit_2),
1320 m_digit_3(digit_3),
1321 m_digit_4(digit_4),
1322 m_digit_5(digit_5),
1323 m_digit_6(digit_6),
1324 m_digit_7(digit_7),
1325 m_digit_8(digit_8),
1326 m_digit_9(digit_9)
1327 {}
1328
1329 virtual bool match(
1330 _In_reads_or_z_(end) const T* text,
1331 _In_ size_t start = 0,
1332 _In_ size_t end = (size_t)-1,
1333 _In_ int flags = match_default)
1334 {
1335 _Assume_(text || start >= end);
1336 for (this->interval.end = start, this->value = 0; this->interval.end < end && text[this->interval.end];) {
1337 size_t dig;
1338 if (m_digit_0->match(text, this->interval.end, end, flags)) { dig = 0; this->interval.end = m_digit_0->interval.end; }
1339 else if (m_digit_1->match(text, this->interval.end, end, flags)) { dig = 1; this->interval.end = m_digit_1->interval.end; }
1340 else if (m_digit_2->match(text, this->interval.end, end, flags)) { dig = 2; this->interval.end = m_digit_2->interval.end; }
1341 else if (m_digit_3->match(text, this->interval.end, end, flags)) { dig = 3; this->interval.end = m_digit_3->interval.end; }
1342 else if (m_digit_4->match(text, this->interval.end, end, flags)) { dig = 4; this->interval.end = m_digit_4->interval.end; }
1343 else if (m_digit_5->match(text, this->interval.end, end, flags)) { dig = 5; this->interval.end = m_digit_5->interval.end; }
1344 else if (m_digit_6->match(text, this->interval.end, end, flags)) { dig = 6; this->interval.end = m_digit_6->interval.end; }
1345 else if (m_digit_7->match(text, this->interval.end, end, flags)) { dig = 7; this->interval.end = m_digit_7->interval.end; }
1346 else if (m_digit_8->match(text, this->interval.end, end, flags)) { dig = 8; this->interval.end = m_digit_8->interval.end; }
1347 else if (m_digit_9->match(text, this->interval.end, end, flags)) { dig = 9; this->interval.end = m_digit_9->interval.end; }
1348 else break;
1349 this->value = this->value * 10 + dig;
1350 }
1352 this->interval.start = start;
1353 return true;
1354 }
1355 this->interval.start = (this->interval.end = start) + 1;
1356 return false;
1357 }
1358
1359 protected:
1360 std::shared_ptr<basic_parser<T>>
1361 m_digit_0,
1362 m_digit_1,
1363 m_digit_2,
1364 m_digit_3,
1365 m_digit_4,
1366 m_digit_5,
1367 m_digit_6,
1368 m_digit_7,
1369 m_digit_8,
1370 m_digit_9;
1371 };
1372
1375#ifdef _UNICODE
1376 using tinteger10 = winteger10;
1377#else
1378 using tinteger10 = integer10;
1379#endif
1381
1385 template <class T>
1387 {
1388 public:
1390 _In_ const std::shared_ptr<basic_integer10<T>>& digits,
1391 _In_ const std::shared_ptr<basic_set<T>>& separator,
1392 _In_ const std::locale& locale = std::locale()) :
1393 basic_integer<T>(locale),
1394 digit_count(0),
1395 has_separators(false),
1396 m_digits(digits),
1397 m_separator(separator)
1398 {}
1399
1400 virtual bool match(
1401 _In_reads_or_z_(end) const T* text,
1402 _In_ size_t start = 0,
1403 _In_ size_t end = (size_t)-1,
1404 _In_ int flags = match_default)
1405 {
1406 _Assume_(text || start >= end);
1407 if (m_digits->match(text, start, end, flags)) {
1408 // Leading part match.
1409 this->value = m_digits->value;
1410 digit_count = m_digits->interval.size();
1411 has_separators = false;
1412 this->interval.start = start;
1413 this->interval.end = m_digits->interval.end;
1414 if (m_digits->interval.size() <= 3) {
1415 // Maybe separated with thousand separators?
1416 size_t hit_offset = (size_t)-1;
1417 while (m_separator->match(text, this->interval.end, end, flags) &&
1418 (hit_offset == (size_t)-1 || hit_offset == m_separator->hit_offset) && // All separators must be the same, no mixing.
1419 m_digits->match(text, m_separator->interval.end, end, flags) &&
1420 m_digits->interval.size() == 3)
1421 {
1422 // Thousand separator and three-digit integer followed.
1423 this->value = this->value * 1000 + m_digits->value;
1424 digit_count += 3;
1425 has_separators = true;
1426 this->interval.end = m_digits->interval.end;
1427 hit_offset = m_separator->hit_offset;
1428 }
1429 }
1430
1431 return true;
1432 }
1433 this->value = 0;
1434 this->interval.start = (this->interval.end = start) + 1;
1435 return false;
1436 }
1437
1438 virtual void invalidate()
1439 {
1440 digit_count = 0;
1441 has_separators = false;
1443 }
1444
1445 public:
1448
1449 protected:
1450 std::shared_ptr<basic_integer10<T>> m_digits;
1451 std::shared_ptr<basic_set<T>> m_separator;
1452 };
1453
1456#ifdef _UNICODE
1457 using tinteger10ts = winteger10ts;
1458#else
1459 using tinteger10ts = integer10ts;
1460#endif
1462
1466 template <class T>
1468 {
1469 public:
1471 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
1472 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
1473 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
1474 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
1475 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
1476 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
1477 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
1478 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
1479 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
1480 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
1481 _In_ const std::shared_ptr<basic_parser<T>>& digit_10,
1482 _In_ const std::shared_ptr<basic_parser<T>>& digit_11,
1483 _In_ const std::shared_ptr<basic_parser<T>>& digit_12,
1484 _In_ const std::shared_ptr<basic_parser<T>>& digit_13,
1485 _In_ const std::shared_ptr<basic_parser<T>>& digit_14,
1486 _In_ const std::shared_ptr<basic_parser<T>>& digit_15,
1487 _In_ const std::locale& locale = std::locale()) :
1488 basic_integer<T>(locale),
1489 m_digit_0(digit_0),
1490 m_digit_1(digit_1),
1491 m_digit_2(digit_2),
1492 m_digit_3(digit_3),
1493 m_digit_4(digit_4),
1494 m_digit_5(digit_5),
1495 m_digit_6(digit_6),
1496 m_digit_7(digit_7),
1497 m_digit_8(digit_8),
1498 m_digit_9(digit_9),
1499 m_digit_10(digit_10),
1500 m_digit_11(digit_11),
1501 m_digit_12(digit_12),
1502 m_digit_13(digit_13),
1503 m_digit_14(digit_14),
1504 m_digit_15(digit_15)
1505 {}
1506
1507 virtual bool match(
1508 _In_reads_or_z_(end) const T* text,
1509 _In_ size_t start = 0,
1510 _In_ size_t end = (size_t)-1,
1511 _In_ int flags = match_default)
1512 {
1513 _Assume_(text || start >= end);
1514 for (this->interval.end = start, this->value = 0; this->interval.end < end && text[this->interval.end];) {
1515 size_t dig;
1516 if (m_digit_0->match(text, this->interval.end, end, flags)) { dig = 0; this->interval.end = m_digit_0->interval.end; }
1517 else if (m_digit_1->match(text, this->interval.end, end, flags)) { dig = 1; this->interval.end = m_digit_1->interval.end; }
1518 else if (m_digit_2->match(text, this->interval.end, end, flags)) { dig = 2; this->interval.end = m_digit_2->interval.end; }
1519 else if (m_digit_3->match(text, this->interval.end, end, flags)) { dig = 3; this->interval.end = m_digit_3->interval.end; }
1520 else if (m_digit_4->match(text, this->interval.end, end, flags)) { dig = 4; this->interval.end = m_digit_4->interval.end; }
1521 else if (m_digit_5->match(text, this->interval.end, end, flags)) { dig = 5; this->interval.end = m_digit_5->interval.end; }
1522 else if (m_digit_6->match(text, this->interval.end, end, flags)) { dig = 6; this->interval.end = m_digit_6->interval.end; }
1523 else if (m_digit_7->match(text, this->interval.end, end, flags)) { dig = 7; this->interval.end = m_digit_7->interval.end; }
1524 else if (m_digit_8->match(text, this->interval.end, end, flags)) { dig = 8; this->interval.end = m_digit_8->interval.end; }
1525 else if (m_digit_9->match(text, this->interval.end, end, flags)) { dig = 9; this->interval.end = m_digit_9->interval.end; }
1526 else if (m_digit_10->match(text, this->interval.end, end, flags)) { dig = 10; this->interval.end = m_digit_10->interval.end; }
1527 else if (m_digit_11->match(text, this->interval.end, end, flags)) { dig = 11; this->interval.end = m_digit_11->interval.end; }
1528 else if (m_digit_12->match(text, this->interval.end, end, flags)) { dig = 12; this->interval.end = m_digit_12->interval.end; }
1529 else if (m_digit_13->match(text, this->interval.end, end, flags)) { dig = 13; this->interval.end = m_digit_13->interval.end; }
1530 else if (m_digit_14->match(text, this->interval.end, end, flags)) { dig = 14; this->interval.end = m_digit_14->interval.end; }
1531 else if (m_digit_15->match(text, this->interval.end, end, flags)) { dig = 15; this->interval.end = m_digit_15->interval.end; }
1532 else break;
1533 this->value = this->value * 16 + dig;
1534 }
1536 this->interval.start = start;
1537 return true;
1538 }
1539 this->interval.start = (this->interval.end = start) + 1;
1540 return false;
1541 }
1542
1543 protected:
1544 std::shared_ptr<basic_parser<T>>
1545 m_digit_0,
1546 m_digit_1,
1547 m_digit_2,
1548 m_digit_3,
1549 m_digit_4,
1550 m_digit_5,
1551 m_digit_6,
1552 m_digit_7,
1553 m_digit_8,
1554 m_digit_9,
1555 m_digit_10,
1556 m_digit_11,
1557 m_digit_12,
1558 m_digit_13,
1559 m_digit_14,
1560 m_digit_15;
1561 };
1562
1565#ifdef _UNICODE
1566 using tinteger16 = winteger16;
1567#else
1568 using tinteger16 = integer16;
1569#endif
1571
1575 template <class T>
1577 {
1578 public:
1580 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
1581 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
1582 _In_ const std::shared_ptr<basic_parser<T>>& digit_10,
1583 _In_ const std::shared_ptr<basic_parser<T>>& digit_50,
1584 _In_ const std::shared_ptr<basic_parser<T>>& digit_100,
1585 _In_ const std::shared_ptr<basic_parser<T>>& digit_500,
1586 _In_ const std::shared_ptr<basic_parser<T>>& digit_1000,
1587 _In_ const std::shared_ptr<basic_parser<T>>& digit_5000,
1588 _In_ const std::shared_ptr<basic_parser<T>>& digit_10000,
1589 _In_ const std::locale& locale = std::locale()) :
1590 basic_integer<T>(locale),
1591 m_digit_1(digit_1),
1592 m_digit_5(digit_5),
1593 m_digit_10(digit_10),
1594 m_digit_50(digit_50),
1595 m_digit_100(digit_100),
1596 m_digit_500(digit_500),
1597 m_digit_1000(digit_1000),
1598 m_digit_5000(digit_5000),
1599 m_digit_10000(digit_10000)
1600 {}
1601
1602 virtual bool match(
1603 _In_reads_or_z_(end) const T* text,
1604 _In_ size_t start = 0,
1605 _In_ size_t end = (size_t)-1,
1606 _In_ int flags = match_default)
1607 {
1608 _Assume_(text || start >= end);
1609 size_t
1610 dig[5] = { (size_t)-1, (size_t)-1, (size_t)-1, (size_t)-1, (size_t)-1 },
1611 end2;
1612
1613 for (this->interval.end = start, this->value = 0; this->interval.end < end && text[this->interval.end]; dig[3] = dig[2], dig[2] = dig[1], dig[1] = dig[0], this->interval.end = end2) {
1614 if (m_digit_1 && m_digit_1->match(text, this->interval.end, end, flags)) { dig[0] = 1; end2 = m_digit_1->interval.end; }
1615 else if (m_digit_5 && m_digit_5->match(text, this->interval.end, end, flags)) { dig[0] = 5; end2 = m_digit_5->interval.end; }
1616 else if (m_digit_10 && m_digit_10->match(text, this->interval.end, end, flags)) { dig[0] = 10; end2 = m_digit_10->interval.end; }
1617 else if (m_digit_50 && m_digit_50->match(text, this->interval.end, end, flags)) { dig[0] = 50; end2 = m_digit_50->interval.end; }
1618 else if (m_digit_100 && m_digit_100->match(text, this->interval.end, end, flags)) { dig[0] = 100; end2 = m_digit_100->interval.end; }
1619 else if (m_digit_500 && m_digit_500->match(text, this->interval.end, end, flags)) { dig[0] = 500; end2 = m_digit_500->interval.end; }
1620 else if (m_digit_1000 && m_digit_1000->match(text, this->interval.end, end, flags)) { dig[0] = 1000; end2 = m_digit_1000->interval.end; }
1621 else if (m_digit_5000 && m_digit_5000->match(text, this->interval.end, end, flags)) { dig[0] = 5000; end2 = m_digit_5000->interval.end; }
1622 else if (m_digit_10000 && m_digit_10000->match(text, this->interval.end, end, flags)) { dig[0] = 10000; end2 = m_digit_10000->interval.end; }
1623 else break;
1624
1625 // Store first digit.
1626 if (dig[4] == (size_t)-1) dig[4] = dig[0];
1627
1628 if (dig[3] == dig[2] && dig[2] == dig[1] && dig[1] == dig[0] && dig[0] != dig[4]) {
1629 // Same digit repeated four times. No-go, unless first digit. E.g. XIIII vs. XIV. MMMMMCD allowed, IIII also...
1630 break;
1631 }
1632 if (dig[0] <= dig[1]) {
1633 // Digit is less or equal previous one: add.
1634 this->value += dig[0];
1635 }
1636 else if (
1637 (dig[1] == 1 && (dig[0] == 5 || dig[0] == 10)) ||
1638 (dig[1] == 10 && (dig[0] == 50 || dig[0] == 100)) ||
1639 (dig[1] == 100 && (dig[0] == 500 || dig[0] == 1000)) ||
1640 (dig[1] == 1000 && (dig[0] == 5000 || dig[0] == 10000)))
1641 {
1642 // Digit is up to two orders bigger than previous one: subtract. But...
1643 if (dig[2] < dig[0]) {
1644 // Digit is also bigger than pre-previous one. E.g. VIX (V < X => invalid)
1645 break;
1646 }
1647 this->value -= dig[1]; // Cancel addition in the previous step.
1648 dig[0] -= dig[1]; // Combine last two digits.
1649 dig[1] = dig[2]; // The true previous digit is now pre-previous one. :)
1650 dig[2] = dig[3]; // The true pre-previous digit is now pre-pre-previous one. :)
1651 this->value += dig[0]; // Add combined value.
1652 }
1653 else {
1654 // New digit is too big than the previous one. E.g. VX (V < X => invalid)
1655 break;
1656 }
1657 }
1658 if (this->value) {
1659 this->interval.start = start;
1660 return true;
1661 }
1662 this->interval.start = (this->interval.end = start) + 1;
1663 return false;
1664 }
1665
1666 protected:
1667 std::shared_ptr<basic_parser<T>>
1668 m_digit_1,
1669 m_digit_5,
1670 m_digit_10,
1671 m_digit_50,
1672 m_digit_100,
1673 m_digit_500,
1674 m_digit_1000,
1675 m_digit_5000,
1676 m_digit_10000;
1677 };
1678
1681#ifdef _UNICODE
1683#else
1685#endif
1687
1691 template <class T>
1693 {
1694 public:
1696 _In_ const std::shared_ptr<basic_parser<T>>& _numerator,
1697 _In_ const std::shared_ptr<basic_parser<T>>& _fraction_line,
1698 _In_ const std::shared_ptr<basic_parser<T>>& _denominator,
1699 _In_ const std::locale& locale = std::locale()) :
1700 basic_parser<T>(locale),
1701 numerator(_numerator),
1702 fraction_line(_fraction_line),
1703 denominator(_denominator)
1704 {}
1705
1706 virtual bool match(
1707 _In_reads_or_z_(end) const T* text,
1708 _In_ size_t start = 0,
1709 _In_ size_t end = (size_t)-1,
1710 _In_ int flags = match_default)
1711 {
1712 _Assume_(text || start >= end);
1713 if (numerator->match(text, start, end, flags) &&
1714 fraction_line->match(text, numerator->interval.end, end, flags) &&
1715 denominator->match(text, fraction_line->interval.end, end, flags))
1716 {
1717 this->interval.start = start;
1718 this->interval.end = denominator->interval.end;
1719 return true;
1720 }
1721 numerator->invalidate();
1722 fraction_line->invalidate();
1723 denominator->invalidate();
1724 this->interval.start = (this->interval.end = start) + 1;
1725 return false;
1726 }
1727
1728 virtual void invalidate()
1729 {
1730 numerator->invalidate();
1731 fraction_line->invalidate();
1732 denominator->invalidate();
1734 }
1735
1736 public:
1737 std::shared_ptr<basic_parser<T>> numerator;
1738 std::shared_ptr<basic_parser<T>> fraction_line;
1739 std::shared_ptr<basic_parser<T>> denominator;
1740 };
1741
1744#ifdef _UNICODE
1745 using tfraction = wfraction;
1746#else
1747 using tfraction = fraction;
1748#endif
1750
1754 template <class T>
1755 class basic_score : public basic_parser<T>
1756 {
1757 public:
1759 _In_ const std::shared_ptr<basic_parser<T>>& _home,
1760 _In_ const std::shared_ptr<basic_parser<T>>& _separator,
1761 _In_ const std::shared_ptr<basic_parser<T>>& _guest,
1762 _In_ const std::shared_ptr<basic_parser<T>>& space,
1763 _In_ const std::locale& locale = std::locale()) :
1764 basic_parser<T>(locale),
1765 home(_home),
1766 separator(_separator),
1767 guest(_guest),
1768 m_space(space)
1769 {}
1770
1771 virtual bool match(
1772 _In_reads_or_z_(end) const T* text,
1773 _In_ size_t start = 0,
1774 _In_ size_t end = (size_t)-1,
1775 _In_ int flags = match_default)
1776 {
1777 _Assume_(text || start >= end);
1778 this->interval.end = start;
1779
1780 const int space_match_flags = flags & ~match_multiline; // Spaces in score must never be broken in new line.
1781
1782 if (home->match(text, this->interval.end, end, flags))
1783 this->interval.end = home->interval.end;
1784 else
1785 goto end;
1786
1787 for (; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
1788
1789 if (separator->match(text, this->interval.end, end, flags))
1790 this->interval.end = separator->interval.end;
1791 else
1792 goto end;
1793
1794 for (; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
1795
1796 if (guest->match(text, this->interval.end, end, flags))
1797 this->interval.end = guest->interval.end;
1798 else
1799 goto end;
1800
1801 this->interval.start = start;
1802 return true;
1803
1804 end:
1805 home->invalidate();
1806 separator->invalidate();
1807 guest->invalidate();
1808 this->interval.start = (this->interval.end = start) + 1;
1809 return false;
1810 }
1811
1812 virtual void invalidate()
1813 {
1814 home->invalidate();
1815 separator->invalidate();
1816 guest->invalidate();
1818 }
1819
1820 public:
1821 std::shared_ptr<basic_parser<T>> home;
1822 std::shared_ptr<basic_parser<T>> separator;
1823 std::shared_ptr<basic_parser<T>> guest;
1824
1825 protected:
1826 std::shared_ptr<basic_parser<T>> m_space;
1827 };
1828
1829 using score = basic_score<char>;
1831#ifdef _UNICODE
1832 using tscore = wscore;
1833#else
1834 using tscore = score;
1835#endif
1837
1841 template <class T>
1843 {
1844 public:
1846 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
1847 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
1848 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
1849 _In_ const std::shared_ptr<basic_parser<T>>& _number,
1850 _In_ const std::locale& locale = std::locale()) :
1851 basic_parser<T>(locale),
1856 {}
1857
1858 virtual bool match(
1859 _In_reads_or_z_(end) const T* text,
1860 _In_ size_t start = 0,
1861 _In_ size_t end = (size_t)-1,
1862 _In_ int flags = match_default)
1863 {
1864 _Assume_(text || start >= end);
1865 this->interval.end = start;
1866 if (positive_sign && positive_sign->match(text, this->interval.end, end, flags)) {
1867 this->interval.end = positive_sign->interval.end;
1868 if (negative_sign) negative_sign->invalidate();
1869 if (special_sign) special_sign->invalidate();
1870 }
1871 else if (negative_sign && negative_sign->match(text, this->interval.end, end, flags)) {
1872 this->interval.end = negative_sign->interval.end;
1873 if (positive_sign) positive_sign->invalidate();
1874 if (special_sign) special_sign->invalidate();
1875 }
1876 else if (special_sign && special_sign->match(text, this->interval.end, end, flags)) {
1877 this->interval.end = special_sign->interval.end;
1878 if (positive_sign) positive_sign->invalidate();
1879 if (negative_sign) negative_sign->invalidate();
1880 }
1881 else {
1882 if (positive_sign) positive_sign->invalidate();
1883 if (negative_sign) negative_sign->invalidate();
1884 if (special_sign) special_sign->invalidate();
1885 }
1886 if (number->match(text, this->interval.end, end, flags)) {
1887 this->interval.start = start;
1888 this->interval.end = number->interval.end;
1889 return true;
1890 }
1891 if (positive_sign) positive_sign->invalidate();
1892 if (negative_sign) negative_sign->invalidate();
1893 if (special_sign) special_sign->invalidate();
1894 number->invalidate();
1895 this->interval.start = (this->interval.end = start) + 1;
1896 return false;
1897 }
1898
1899 virtual void invalidate()
1900 {
1901 if (positive_sign) positive_sign->invalidate();
1902 if (negative_sign) negative_sign->invalidate();
1903 if (special_sign) special_sign->invalidate();
1904 number->invalidate();
1906 }
1907
1908 public:
1909 std::shared_ptr<basic_parser<T>> positive_sign;
1910 std::shared_ptr<basic_parser<T>> negative_sign;
1911 std::shared_ptr<basic_parser<T>> special_sign;
1912 std::shared_ptr<basic_parser<T>> number;
1913 };
1914
1917#ifdef _UNICODE
1919#else
1921#endif
1923
1927 template <class T>
1929 {
1930 public:
1932 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
1933 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
1934 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
1935 _In_ const std::shared_ptr<basic_parser<T>>& _integer,
1936 _In_ const std::shared_ptr<basic_parser<T>>& space,
1937 _In_ const std::shared_ptr<basic_parser<T>>& _fraction,
1938 _In_ const std::locale& locale = std::locale()) :
1939 basic_parser<T>(locale),
1945 m_space(space)
1946 {}
1947
1948 virtual bool match(
1949 _In_reads_or_z_(end) const T* text,
1950 _In_ size_t start = 0,
1951 _In_ size_t end = (size_t)-1,
1952 _In_ int flags = match_default)
1953 {
1954 _Assume_(text || start >= end);
1955 this->interval.end = start;
1956
1957 if (positive_sign && positive_sign->match(text, this->interval.end, end, flags)) {
1958 this->interval.end = positive_sign->interval.end;
1959 if (negative_sign) negative_sign->invalidate();
1960 if (special_sign) special_sign->invalidate();
1961 }
1962 else if (negative_sign && negative_sign->match(text, this->interval.end, end, flags)) {
1963 this->interval.end = negative_sign->interval.end;
1964 if (positive_sign) positive_sign->invalidate();
1965 if (special_sign) special_sign->invalidate();
1966 }
1967 else if (special_sign && special_sign->match(text, this->interval.end, end, flags)) {
1968 this->interval.end = special_sign->interval.end;
1969 if (positive_sign) positive_sign->invalidate();
1970 if (negative_sign) negative_sign->invalidate();
1971 }
1972 else {
1973 if (positive_sign) positive_sign->invalidate();
1974 if (negative_sign) negative_sign->invalidate();
1975 if (special_sign) special_sign->invalidate();
1976 }
1977
1978 // Check for <integer> <fraction>
1979 const int space_match_flags = flags & ~match_multiline; // Spaces in fractions must never be broken in new line.
1980 if (integer->match(text, this->interval.end, end, flags) &&
1981 m_space->match(text, integer->interval.end, end, space_match_flags))
1982 {
1983 for (this->interval.end = m_space->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
1984 if (fraction->match(text, this->interval.end, end, flags)) {
1985 this->interval.start = start;
1986 this->interval.end = fraction->interval.end;
1987 return true;
1988 }
1989 fraction->invalidate();
1990 this->interval.start = start;
1991 this->interval.end = integer->interval.end;
1992 return true;
1993 }
1994
1995 // Check for <fraction>
1996 if (fraction->match(text, this->interval.end, end, flags)) {
1997 integer->invalidate();
1998 this->interval.start = start;
1999 this->interval.end = fraction->interval.end;
2000 return true;
2001 }
2002
2003 // Check for <integer>
2004 if (integer->match(text, this->interval.end, end, flags)) {
2005 fraction->invalidate();
2006 this->interval.start = start;
2007 this->interval.end = integer->interval.end;
2008 return true;
2009 }
2010
2011 if (positive_sign) positive_sign->invalidate();
2012 if (negative_sign) negative_sign->invalidate();
2013 if (special_sign) special_sign->invalidate();
2014 integer->invalidate();
2015 fraction->invalidate();
2016 this->interval.start = (this->interval.end = start) + 1;
2017 return false;
2018 }
2019
2020 virtual void invalidate()
2021 {
2022 if (positive_sign) positive_sign->invalidate();
2023 if (negative_sign) negative_sign->invalidate();
2024 if (special_sign) special_sign->invalidate();
2025 integer->invalidate();
2026 fraction->invalidate();
2028 }
2029
2030 public:
2031 std::shared_ptr<basic_parser<T>> positive_sign;
2032 std::shared_ptr<basic_parser<T>> negative_sign;
2033 std::shared_ptr<basic_parser<T>> special_sign;
2034 std::shared_ptr<basic_parser<T>> integer;
2035 std::shared_ptr<basic_parser<T>> fraction;
2036
2037 protected:
2038 std::shared_ptr<basic_parser<T>> m_space;
2039 };
2040
2043#ifdef _UNICODE
2045#else
2047#endif
2049
2053 template <class T>
2055 {
2056 public:
2058 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
2059 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
2060 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
2061 _In_ const std::shared_ptr<basic_integer<T>>& _integer,
2062 _In_ const std::shared_ptr<basic_parser<T>>& _decimal_separator,
2063 _In_ const std::shared_ptr<basic_integer<T>>& _decimal,
2064 _In_ const std::shared_ptr<basic_parser<T>>& _exponent_symbol,
2065 _In_ const std::shared_ptr<basic_parser<T>>& _positive_exp_sign,
2066 _In_ const std::shared_ptr<basic_parser<T>>& _negative_exp_sign,
2067 _In_ const std::shared_ptr<basic_integer<T>>& _exponent,
2068 _In_ const std::locale& locale = std::locale()) :
2069 basic_parser<T>(locale),
2080 value(std::numeric_limits<double>::quiet_NaN())
2081 {}
2082
2083 virtual bool match(
2084 _In_reads_or_z_(end) const T* text,
2085 _In_ size_t start = 0,
2086 _In_ size_t end = (size_t)-1,
2087 _In_ int flags = match_default)
2088 {
2089 _Assume_(text || start >= end);
2090 this->interval.end = start;
2091
2092 if (positive_sign && positive_sign->match(text, this->interval.end, end, flags)) {
2093 this->interval.end = positive_sign->interval.end;
2094 if (negative_sign) negative_sign->invalidate();
2095 if (special_sign) special_sign->invalidate();
2096 }
2097 else if (negative_sign && negative_sign->match(text, this->interval.end, end, flags)) {
2098 this->interval.end = negative_sign->interval.end;
2099 if (positive_sign) positive_sign->invalidate();
2100 if (special_sign) special_sign->invalidate();
2101 }
2102 else if (special_sign && special_sign->match(text, this->interval.end, end, flags)) {
2103 this->interval.end = special_sign->interval.end;
2104 if (positive_sign) positive_sign->invalidate();
2105 if (negative_sign) negative_sign->invalidate();
2106 }
2107 else {
2108 if (positive_sign) positive_sign->invalidate();
2109 if (negative_sign) negative_sign->invalidate();
2110 if (special_sign) special_sign->invalidate();
2111 }
2112
2113 if (integer->match(text, this->interval.end, end, flags))
2114 this->interval.end = integer->interval.end;
2115
2116 if (decimal_separator->match(text, this->interval.end, end, flags) &&
2118 this->interval.end = decimal->interval.end;
2119 else {
2120 decimal_separator->invalidate();
2121 decimal->invalidate();
2122 }
2123
2124 if (integer->interval.empty() &&
2125 decimal->interval.empty())
2126 {
2127 // No integer part, no decimal part.
2128 if (positive_sign) positive_sign->invalidate();
2129 if (negative_sign) negative_sign->invalidate();
2130 if (special_sign) special_sign->invalidate();
2131 integer->invalidate();
2132 decimal_separator->invalidate();
2133 decimal->invalidate();
2134 if (exponent_symbol) exponent_symbol->invalidate();
2135 if (positive_exp_sign) positive_exp_sign->invalidate();
2136 if (negative_exp_sign) negative_exp_sign->invalidate();
2137 if (exponent) exponent->invalidate();
2138 this->interval.start = (this->interval.end = start) + 1;
2139 return false;
2140 }
2141
2142 if (exponent_symbol && exponent_symbol->match(text, this->interval.end, end, flags) &&
2145 (exponent && exponent->match(text, exponent_symbol->interval.end, end, flags))))
2146 {
2147 this->interval.end = exponent->interval.end;
2148 if (negative_exp_sign) negative_exp_sign->invalidate();
2149 }
2150 else if (exponent_symbol && exponent_symbol->match(text, this->interval.end, end, flags) &&
2153 {
2154 this->interval.end = exponent->interval.end;
2155 if (positive_exp_sign) positive_exp_sign->invalidate();
2156 }
2157 else {
2158 if (exponent_symbol) exponent_symbol->invalidate();
2159 if (positive_exp_sign) positive_exp_sign->invalidate();
2160 if (negative_exp_sign) negative_exp_sign->invalidate();
2161 if (exponent) exponent->invalidate();
2162 }
2163
2164 value = (double)integer->value;
2165 if (decimal->interval)
2166 value += (double)decimal->value * pow(10.0, -(double)decimal->interval.size());
2167 if (negative_sign && negative_sign->interval)
2168 value = -value;
2169 if (exponent && exponent->interval) {
2170 double e = (double)exponent->value;
2171 if (negative_exp_sign && negative_exp_sign->interval)
2172 e = -e;
2173 value *= pow(10.0, e);
2174 }
2175
2176 this->interval.start = start;
2177 return true;
2178 }
2179
2180 virtual void invalidate()
2181 {
2182 if (positive_sign) positive_sign->invalidate();
2183 if (negative_sign) negative_sign->invalidate();
2184 if (special_sign) special_sign->invalidate();
2185 integer->invalidate();
2186 decimal_separator->invalidate();
2187 decimal->invalidate();
2188 if (exponent_symbol) exponent_symbol->invalidate();
2189 if (positive_exp_sign) positive_exp_sign->invalidate();
2190 if (negative_exp_sign) negative_exp_sign->invalidate();
2191 if (exponent) exponent->invalidate();
2192 value = std::numeric_limits<double>::quiet_NaN();
2194 }
2195
2196 public:
2197 std::shared_ptr<basic_parser<T>> positive_sign;
2198 std::shared_ptr<basic_parser<T>> negative_sign;
2199 std::shared_ptr<basic_parser<T>> special_sign;
2200 std::shared_ptr<basic_integer<T>> integer;
2201 std::shared_ptr<basic_parser<T>> decimal_separator;
2202 std::shared_ptr<basic_integer<T>> decimal;
2203 std::shared_ptr<basic_parser<T>> exponent_symbol;
2204 std::shared_ptr<basic_parser<T>> positive_exp_sign;
2205 std::shared_ptr<basic_parser<T>> negative_exp_sign;
2206 std::shared_ptr<basic_integer<T>> exponent;
2207 double value;
2208 };
2209
2212#ifdef _UNICODE
2214#else
2216#endif
2218
2222 template <class T>
2224 {
2225 public:
2227 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
2228 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
2229 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
2230 _In_ const std::shared_ptr<basic_parser<T>>& _currency,
2231 _In_ const std::shared_ptr<basic_parser<T>>& _integer,
2232 _In_ const std::shared_ptr<basic_parser<T>>& _decimal_separator,
2233 _In_ const std::shared_ptr<basic_parser<T>>& _decimal,
2234 _In_ const std::locale& locale = std::locale()) :
2235 basic_parser<T>(locale),
2243 {}
2244
2245 virtual bool match(
2246 _In_reads_or_z_(end) const T* text,
2247 _In_ size_t start = 0,
2248 _In_ size_t end = (size_t)-1,
2249 _In_ int flags = match_default)
2250 {
2251 _Assume_(text || start >= end);
2252 this->interval.end = start;
2253
2254 if (positive_sign->match(text, this->interval.end, end, flags)) {
2255 this->interval.end = positive_sign->interval.end;
2256 if (negative_sign) negative_sign->invalidate();
2257 if (special_sign) special_sign->invalidate();
2258 }
2259 else if (negative_sign->match(text, this->interval.end, end, flags)) {
2260 this->interval.end = negative_sign->interval.end;
2261 if (positive_sign) positive_sign->invalidate();
2262 if (special_sign) special_sign->invalidate();
2263 }
2264 else if (special_sign->match(text, this->interval.end, end, flags)) {
2265 this->interval.end = special_sign->interval.end;
2266 if (positive_sign) positive_sign->invalidate();
2267 if (negative_sign) negative_sign->invalidate();
2268 }
2269 else {
2270 if (positive_sign) positive_sign->invalidate();
2271 if (negative_sign) negative_sign->invalidate();
2272 if (special_sign) special_sign->invalidate();
2273 }
2274
2275 if (currency->match(text, this->interval.end, end, flags))
2276 this->interval.end = currency->interval.end;
2277 else {
2278 if (positive_sign) positive_sign->invalidate();
2279 if (negative_sign) negative_sign->invalidate();
2280 if (special_sign) special_sign->invalidate();
2281 integer->invalidate();
2282 decimal_separator->invalidate();
2283 decimal->invalidate();
2284 this->interval.start = (this->interval.end = start) + 1;
2285 return false;
2286 }
2287
2288 if (integer->match(text, this->interval.end, end, flags))
2289 this->interval.end = integer->interval.end;
2290 if (decimal_separator->match(text, this->interval.end, end, flags) &&
2292 this->interval.end = decimal->interval.end;
2293 else {
2294 decimal_separator->invalidate();
2295 decimal->invalidate();
2296 }
2297
2298 if (integer->interval.empty() &&
2299 decimal->interval.empty())
2300 {
2301 // No integer part, no decimal part.
2302 if (positive_sign) positive_sign->invalidate();
2303 if (negative_sign) negative_sign->invalidate();
2304 if (special_sign) special_sign->invalidate();
2305 currency->invalidate();
2306 integer->invalidate();
2307 decimal_separator->invalidate();
2308 decimal->invalidate();
2309 this->interval.start = (this->interval.end = start) + 1;
2310 return false;
2311 }
2312
2313 this->interval.start = start;
2314 return true;
2315 }
2316
2317 virtual void invalidate()
2318 {
2319 if (positive_sign) positive_sign->invalidate();
2320 if (negative_sign) negative_sign->invalidate();
2321 if (special_sign) special_sign->invalidate();
2322 currency->invalidate();
2323 integer->invalidate();
2324 decimal_separator->invalidate();
2325 decimal->invalidate();
2327 }
2328
2329 public:
2330 std::shared_ptr<basic_parser<T>> positive_sign;
2331 std::shared_ptr<basic_parser<T>> negative_sign;
2332 std::shared_ptr<basic_parser<T>> special_sign;
2333 std::shared_ptr<basic_parser<T>> currency;
2334 std::shared_ptr<basic_parser<T>> integer;
2335 std::shared_ptr<basic_parser<T>> decimal_separator;
2336 std::shared_ptr<basic_parser<T>> decimal;
2337 };
2338
2341#ifdef _UNICODE
2343#else
2345#endif
2347
2351 template <class T>
2353 {
2354 public:
2356 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
2357 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
2358 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
2359 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
2360 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
2361 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
2362 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
2363 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
2364 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
2365 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
2366 _In_ const std::shared_ptr<basic_parser<T>>& separator,
2367 _In_ const std::locale& locale = std::locale()) :
2368 basic_parser<T>(locale),
2369 m_digit_0(digit_0),
2370 m_digit_1(digit_1),
2371 m_digit_2(digit_2),
2372 m_digit_3(digit_3),
2373 m_digit_4(digit_4),
2374 m_digit_5(digit_5),
2375 m_digit_6(digit_6),
2376 m_digit_7(digit_7),
2377 m_digit_8(digit_8),
2378 m_digit_9(digit_9),
2379 m_separator(separator)
2380 {
2381 value.s_addr = 0;
2382 }
2383
2384 virtual bool match(
2385 _In_reads_or_z_(end) const T* text,
2386 _In_ size_t start = 0,
2387 _In_ size_t end = (size_t)-1,
2388 _In_ int flags = match_default)
2389 {
2390 _Assume_(text || start >= end);
2391 this->interval.end = start;
2392 value.s_addr = 0;
2393
2394 size_t i;
2395 for (i = 0; i < 4; i++) {
2396 if (i) {
2397 if (m_separator->match(text, this->interval.end, end, flags))
2398 this->interval.end = m_separator->interval.end;
2399 else
2400 goto error;
2401 }
2402
2403 components[i].start = this->interval.end;
2404 bool is_empty = true;
2405 size_t x;
2406 for (x = 0; this->interval.end < end && text[this->interval.end];) {
2407 size_t dig, digit_end;
2408 if (m_digit_0->match(text, this->interval.end, end, flags)) { dig = 0; digit_end = m_digit_0->interval.end; }
2409 else if (m_digit_1->match(text, this->interval.end, end, flags)) { dig = 1; digit_end = m_digit_1->interval.end; }
2410 else if (m_digit_2->match(text, this->interval.end, end, flags)) { dig = 2; digit_end = m_digit_2->interval.end; }
2411 else if (m_digit_3->match(text, this->interval.end, end, flags)) { dig = 3; digit_end = m_digit_3->interval.end; }
2412 else if (m_digit_4->match(text, this->interval.end, end, flags)) { dig = 4; digit_end = m_digit_4->interval.end; }
2413 else if (m_digit_5->match(text, this->interval.end, end, flags)) { dig = 5; digit_end = m_digit_5->interval.end; }
2414 else if (m_digit_6->match(text, this->interval.end, end, flags)) { dig = 6; digit_end = m_digit_6->interval.end; }
2415 else if (m_digit_7->match(text, this->interval.end, end, flags)) { dig = 7; digit_end = m_digit_7->interval.end; }
2416 else if (m_digit_8->match(text, this->interval.end, end, flags)) { dig = 8; digit_end = m_digit_8->interval.end; }
2417 else if (m_digit_9->match(text, this->interval.end, end, flags)) { dig = 9; digit_end = m_digit_9->interval.end; }
2418 else break;
2419 size_t x_n = x * 10 + dig;
2420 if (x_n <= 255) {
2421 x = x_n;
2422 this->interval.end = digit_end;
2423 is_empty = false;
2424 }
2425 else
2426 break;
2427 }
2428 if (is_empty)
2429 goto error;
2430 components[i].end = this->interval.end;
2431 value.s_addr = (value.s_addr << 8) | (uint8_t)x;
2432 }
2433 if (i < 4)
2434 goto error;
2435
2436 this->interval.start = start;
2437 return true;
2438
2439 error:
2440 components[0].start = 1;
2441 components[0].end = 0;
2442 components[1].start = 1;
2443 components[1].end = 0;
2444 components[2].start = 1;
2445 components[2].end = 0;
2446 components[3].start = 1;
2447 components[3].end = 0;
2448 value.s_addr = 0;
2449 this->interval.start = (this->interval.end = start) + 1;
2450 return false;
2451 }
2452
2453 virtual void invalidate()
2454 {
2455 components[0].start = 1;
2456 components[0].end = 0;
2457 components[1].start = 1;
2458 components[1].end = 0;
2459 components[2].start = 1;
2460 components[2].end = 0;
2461 components[3].start = 1;
2462 components[3].end = 0;
2463 value.s_addr = 0;
2465 }
2466
2467 public:
2470
2471 protected:
2472 std::shared_ptr<basic_parser<T>>
2473 m_digit_0,
2474 m_digit_1,
2475 m_digit_2,
2476 m_digit_3,
2477 m_digit_4,
2478 m_digit_5,
2479 m_digit_6,
2480 m_digit_7,
2481 m_digit_8,
2482 m_digit_9;
2483 std::shared_ptr<basic_parser<T>> m_separator;
2484 };
2485
2488#ifdef _UNICODE
2490#else
2492#endif
2494
2498 template <class T>
2500 {
2501 public:
2502 basic_ipv6_scope_id_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
2503
2504 virtual bool match(
2505 _In_reads_or_z_(end) const T* text,
2506 _In_ size_t start = 0,
2507 _In_ size_t end = (size_t)-1,
2508 _In_ int flags = match_default)
2509 {
2510 _Assume_(text || start >= end);
2511 if (start < end && text[start]) {
2512 if (text[start] == '-' ||
2513 text[start] == '_' ||
2514 text[start] == ':' ||
2515 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
2516 {
2517 this->interval.end = (this->interval.start = start) + 1;
2518 return true;
2519 }
2520 }
2521 this->interval.start = (this->interval.end = start) + 1;
2522 return false;
2523 }
2524 };
2525
2528#ifdef _UNICODE
2530#else
2532#endif
2533
2538 {
2539 public:
2540 sgml_ipv6_scope_id_char(_In_ const std::locale& locale = std::locale()) : sgml_parser(locale) {}
2541
2542 virtual bool match(
2543 _In_reads_or_z_(end) const char* text,
2544 _In_ size_t start = 0,
2545 _In_ size_t end = (size_t)-1,
2546 _In_ int flags = match_default)
2547 {
2548 _Assume_(text || start >= end);
2549 if (start < end && text[start]) {
2550 wchar_t buf[3];
2551 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
2552 const wchar_t* chr_end = chr + stdex::strlen(chr);
2553 if (((chr[0] == L'-' ||
2554 chr[0] == L'_' ||
2555 chr[0] == L':') && chr[1] == 0) ||
2556 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
2557 {
2558 this->interval.start = start;
2559 return true;
2560 }
2561 }
2562 this->interval.start = (this->interval.end = start) + 1;
2563 return false;
2564 }
2565 };
2566
2570 template <class T>
2572 {
2573 public:
2575 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
2576 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
2577 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
2578 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
2579 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
2580 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
2581 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
2582 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
2583 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
2584 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
2585 _In_ const std::shared_ptr<basic_parser<T>>& digit_10,
2586 _In_ const std::shared_ptr<basic_parser<T>>& digit_11,
2587 _In_ const std::shared_ptr<basic_parser<T>>& digit_12,
2588 _In_ const std::shared_ptr<basic_parser<T>>& digit_13,
2589 _In_ const std::shared_ptr<basic_parser<T>>& digit_14,
2590 _In_ const std::shared_ptr<basic_parser<T>>& digit_15,
2591 _In_ const std::shared_ptr<basic_parser<T>>& separator,
2592 _In_ const std::shared_ptr<basic_parser<T>>& scope_id_separator = nullptr,
2593 _In_ const std::shared_ptr<basic_parser<T>>& _scope_id = nullptr,
2594 _In_ const std::locale& locale = std::locale()) :
2595 basic_parser<T>(locale),
2596 m_digit_0(digit_0),
2597 m_digit_1(digit_1),
2598 m_digit_2(digit_2),
2599 m_digit_3(digit_3),
2600 m_digit_4(digit_4),
2601 m_digit_5(digit_5),
2602 m_digit_6(digit_6),
2603 m_digit_7(digit_7),
2604 m_digit_8(digit_8),
2605 m_digit_9(digit_9),
2606 m_digit_10(digit_10),
2607 m_digit_11(digit_11),
2608 m_digit_12(digit_12),
2609 m_digit_13(digit_13),
2610 m_digit_14(digit_14),
2611 m_digit_15(digit_15),
2612 m_separator(separator),
2613 m_scope_id_separator(scope_id_separator),
2615 {
2616 memset(&value, 0, sizeof(value));
2617 }
2618
2619 virtual bool match(
2620 _In_reads_or_z_(end) const T* text,
2621 _In_ size_t start = 0,
2622 _In_ size_t end = (size_t)-1,
2623 _In_ int flags = match_default)
2624 {
2625 _Assume_(text || start >= end);
2626 this->interval.end = start;
2627 memset(&value, 0, sizeof(value));
2628
2629 size_t i, compaction_i = (size_t)-1, compaction_start = start;
2630 for (i = 0; i < 8; i++) {
2631 bool is_empty = true;
2632
2633 if (m_separator->match(text, this->interval.end, end, flags)) {
2634 if (m_separator->match(text, m_separator->interval.end, end, flags)) {
2635 // :: found
2636 if (compaction_i == (size_t)-1) {
2637 // Zero compaction start
2638 compaction_i = i;
2639 compaction_start = m_separator->interval.start;
2640 this->interval.end = m_separator->interval.end;
2641 }
2642 else {
2643 // More than one zero compaction
2644 break;
2645 }
2646 }
2647 else if (i) {
2648 // Inner : found
2649 this->interval.end = m_separator->interval.end;
2650 }
2651 else {
2652 // Leading : found
2653 goto error;
2654 }
2655 }
2656 else if (i) {
2657 // : missing
2658 break;
2659 }
2660
2661 components[i].start = this->interval.end;
2662 size_t x;
2663 for (x = 0; this->interval.end < end && text[this->interval.end];) {
2664 size_t dig, digit_end;
2665 if (m_digit_0->match(text, this->interval.end, end, flags)) { dig = 0; digit_end = m_digit_0->interval.end; }
2666 else if (m_digit_1->match(text, this->interval.end, end, flags)) { dig = 1; digit_end = m_digit_1->interval.end; }
2667 else if (m_digit_2->match(text, this->interval.end, end, flags)) { dig = 2; digit_end = m_digit_2->interval.end; }
2668 else if (m_digit_3->match(text, this->interval.end, end, flags)) { dig = 3; digit_end = m_digit_3->interval.end; }
2669 else if (m_digit_4->match(text, this->interval.end, end, flags)) { dig = 4; digit_end = m_digit_4->interval.end; }
2670 else if (m_digit_5->match(text, this->interval.end, end, flags)) { dig = 5; digit_end = m_digit_5->interval.end; }
2671 else if (m_digit_6->match(text, this->interval.end, end, flags)) { dig = 6; digit_end = m_digit_6->interval.end; }
2672 else if (m_digit_7->match(text, this->interval.end, end, flags)) { dig = 7; digit_end = m_digit_7->interval.end; }
2673 else if (m_digit_8->match(text, this->interval.end, end, flags)) { dig = 8; digit_end = m_digit_8->interval.end; }
2674 else if (m_digit_9->match(text, this->interval.end, end, flags)) { dig = 9; digit_end = m_digit_9->interval.end; }
2675 else if (m_digit_10->match(text, this->interval.end, end, flags)) { dig = 10; digit_end = m_digit_10->interval.end; }
2676 else if (m_digit_11->match(text, this->interval.end, end, flags)) { dig = 11; digit_end = m_digit_11->interval.end; }
2677 else if (m_digit_12->match(text, this->interval.end, end, flags)) { dig = 12; digit_end = m_digit_12->interval.end; }
2678 else if (m_digit_13->match(text, this->interval.end, end, flags)) { dig = 13; digit_end = m_digit_13->interval.end; }
2679 else if (m_digit_14->match(text, this->interval.end, end, flags)) { dig = 14; digit_end = m_digit_14->interval.end; }
2680 else if (m_digit_15->match(text, this->interval.end, end, flags)) { dig = 15; digit_end = m_digit_15->interval.end; }
2681 else break;
2682 size_t x_n = x * 16 + dig;
2683 if (x_n <= 0xffff) {
2684 x = x_n;
2685 this->interval.end = digit_end;
2686 is_empty = false;
2687 }
2688 else
2689 break;
2690 }
2691 if (is_empty) {
2692 if (compaction_i != (size_t)-1) {
2693 // Zero compaction active: no sweat.
2694 break;
2695 }
2696 goto error;
2697 }
2698 components[i].end = this->interval.end;
2699 this->value.s6_words[i] = (uint16_t)x;
2700 }
2701
2702 if (compaction_i != (size_t)-1) {
2703 // Align components right due to zero compaction.
2704 size_t j, k;
2705 for (j = 8, k = i; k > compaction_i;) {
2706 this->value.s6_words[--j] = this->value.s6_words[--k];
2708 }
2709 for (; j > compaction_i;) {
2710 this->value.s6_words[--j] = 0;
2711 components[j].start =
2713 }
2714 }
2715 else if (i < 8)
2716 goto error;
2717
2718 if (m_scope_id_separator && m_scope_id_separator->match(text, this->interval.end, end, flags) &&
2719 scope_id && scope_id->match(text, m_scope_id_separator->interval.end, end, flags))
2720 this->interval.end = scope_id->interval.end;
2721 else if (scope_id)
2722 scope_id->invalidate();
2723
2724 this->interval.start = start;
2725 return true;
2726
2727 error:
2728 components[0].start = 1;
2729 components[0].end = 0;
2730 components[1].start = 1;
2731 components[1].end = 0;
2732 components[2].start = 1;
2733 components[2].end = 0;
2734 components[3].start = 1;
2735 components[3].end = 0;
2736 components[4].start = 1;
2737 components[4].end = 0;
2738 components[5].start = 1;
2739 components[5].end = 0;
2740 components[6].start = 1;
2741 components[6].end = 0;
2742 components[7].start = 1;
2743 components[7].end = 0;
2744 memset(&value, 0, sizeof(value));
2745 if (scope_id) scope_id->invalidate();
2746 this->interval.start = (this->interval.end = start) + 1;
2747 return false;
2748 }
2749
2750 virtual void invalidate()
2751 {
2752 components[0].start = 1;
2753 components[0].end = 0;
2754 components[1].start = 1;
2755 components[1].end = 0;
2756 components[2].start = 1;
2757 components[2].end = 0;
2758 components[3].start = 1;
2759 components[3].end = 0;
2760 components[4].start = 1;
2761 components[4].end = 0;
2762 components[5].start = 1;
2763 components[5].end = 0;
2764 components[6].start = 1;
2765 components[6].end = 0;
2766 components[7].start = 1;
2767 components[7].end = 0;
2768 memset(&value, 0, sizeof(value));
2769 if (scope_id) scope_id->invalidate();
2771 }
2772
2773 public:
2776 std::shared_ptr<basic_parser<T>> scope_id;
2777
2778 protected:
2779 std::shared_ptr<basic_parser<T>>
2780 m_digit_0,
2781 m_digit_1,
2782 m_digit_2,
2783 m_digit_3,
2784 m_digit_4,
2785 m_digit_5,
2786 m_digit_6,
2787 m_digit_7,
2788 m_digit_8,
2789 m_digit_9,
2790 m_digit_10,
2791 m_digit_11,
2792 m_digit_12,
2793 m_digit_13,
2794 m_digit_14,
2795 m_digit_15;
2796 std::shared_ptr<basic_parser<T>> m_separator, m_scope_id_separator;
2797 };
2798
2801#ifdef _UNICODE
2803#else
2805#endif
2807
2811 template <class T>
2813 {
2814 public:
2816 _In_ bool allow_idn,
2817 _In_ const std::locale& locale = std::locale()) :
2818 basic_parser<T>(locale),
2819 m_allow_idn(allow_idn),
2820 allow_on_edge(true)
2821 {}
2822
2823 virtual bool match(
2824 _In_reads_or_z_(end) const T* text,
2825 _In_ size_t start = 0,
2826 _In_ size_t end = (size_t)-1,
2827 _In_ int flags = match_default)
2828 {
2829 _Assume_(text || start >= end);
2830 if (start < end && text[start]) {
2831 if (('A' <= text[start] && text[start] <= 'Z') ||
2832 ('a' <= text[start] && text[start] <= 'z') ||
2833 ('0' <= text[start] && text[start] <= '9'))
2834 allow_on_edge = true;
2835 else if (text[start] == '-')
2836 allow_on_edge = false;
2837 else if (m_allow_idn && std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
2838 allow_on_edge = true;
2839 else {
2840 this->interval.start = (this->interval.end = start) + 1;
2841 return false;
2842 }
2843 this->interval.end = (this->interval.start = start) + 1;
2844 return true;
2845 }
2846 this->interval.start = (this->interval.end = start) + 1;
2847 return false;
2848 }
2849
2850 public:
2852
2853 protected:
2854 bool m_allow_idn;
2855 };
2856
2859#ifdef _UNICODE
2861#else
2863#endif
2864
2869 {
2870 public:
2872 _In_ bool allow_idn,
2873 _In_ const std::locale& locale = std::locale()) :
2875 {}
2876
2877 virtual bool match(
2878 _In_reads_or_z_(end) const char* text,
2879 _In_ size_t start = 0,
2880 _In_ size_t end = (size_t)-1,
2881 _In_ int flags = match_default)
2882 {
2883 _Assume_(text || start >= end);
2884 if (start < end && text[start]) {
2885 wchar_t buf[3];
2886 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
2887 const wchar_t* chr_end = chr + stdex::strlen(chr);
2888 if ((('A' <= chr[0] && chr[0] <= 'Z') ||
2889 ('a' <= chr[0] && chr[0] <= 'z') ||
2890 ('0' <= chr[0] && chr[0] <= '9')) && chr[1] == 0)
2891 allow_on_edge = true;
2892 else if (chr[0] == '-' && chr[1] == 0)
2893 allow_on_edge = false;
2894 else if (m_allow_idn && std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
2895 allow_on_edge = true;
2896 else {
2897 this->interval.start = (this->interval.end = start) + 1;
2898 return false;
2899 }
2900 this->interval.start = start;
2901 return true;
2902 }
2903 this->interval.start = (this->interval.end = start) + 1;
2904 return false;
2905 }
2906 };
2907
2911 template <class T>
2913 {
2914 public:
2916 _In_ bool allow_absolute,
2917 _In_ const std::shared_ptr<basic_dns_domain_char<T>>& domain_char,
2918 _In_ const std::shared_ptr<basic_parser<T>>& separator,
2919 _In_ const std::locale& locale = std::locale()) :
2920 basic_parser<T>(locale),
2922 m_domain_char(domain_char),
2923 m_separator(separator)
2924 {}
2925
2926 virtual bool match(
2927 _In_reads_or_z_(end) const T* text,
2928 _In_ size_t start = 0,
2929 _In_ size_t end = (size_t)-1,
2930 _In_ int flags = match_default)
2931 {
2932 _Assume_(text || start >= end);
2933 size_t i = start, count;
2934 for (count = 0; i < end && text[i] && count < 127; count++) {
2935 if (m_domain_char->match(text, i, end, flags) &&
2936 m_domain_char->allow_on_edge)
2937 {
2938 // Domain start
2939 this->interval.end = i = m_domain_char->interval.end;
2940 while (i < end && text[i]) {
2941 if (m_domain_char->allow_on_edge &&
2942 m_separator->match(text, i, end, flags))
2943 {
2944 // Domain end
2945 if (m_allow_absolute)
2946 this->interval.end = i = m_separator->interval.end;
2947 else {
2948 this->interval.end = i;
2949 i = m_separator->interval.end;
2950 }
2951 break;
2952 }
2953 if (m_domain_char->match(text, i, end, flags)) {
2954 if (m_domain_char->allow_on_edge)
2955 this->interval.end = i = m_domain_char->interval.end;
2956 else
2957 i = m_domain_char->interval.end;
2958 }
2959 else {
2960 this->interval.start = start;
2961 return true;
2962 }
2963 }
2964 }
2965 else
2966 break;
2967 }
2968 if (count) {
2969 this->interval.start = start;
2970 return true;
2971 }
2972 this->interval.start = (this->interval.end = start) + 1;
2973 return false;
2974 }
2975
2976 protected:
2978 std::shared_ptr<basic_dns_domain_char<T>> m_domain_char;
2979 std::shared_ptr<basic_parser<T>> m_separator;
2980 };
2981
2984#ifdef _UNICODE
2985 using tdns_name = wdns_name;
2986#else
2987 using tdns_name = dns_name;
2988#endif
2990
2994 template <class T>
2996 {
2997 public:
2998 basic_url_username_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
2999
3000 virtual bool match(
3001 _In_reads_or_z_(end) const T* text,
3002 _In_ size_t start = 0,
3003 _In_ size_t end = (size_t)-1,
3004 _In_ int flags = match_default)
3005 {
3006 _Assume_(text || start >= end);
3007 if (start < end && text[start]) {
3008 if (text[start] == '-' ||
3009 text[start] == '.' ||
3010 text[start] == '_' ||
3011 text[start] == '~' ||
3012 text[start] == '%' ||
3013 text[start] == '!' ||
3014 text[start] == '$' ||
3015 text[start] == '&' ||
3016 text[start] == '\'' ||
3017 //text[start] == '(' ||
3018 //text[start] == ')' ||
3019 text[start] == '*' ||
3020 text[start] == '+' ||
3021 text[start] == ',' ||
3022 text[start] == ';' ||
3023 text[start] == '=' ||
3024 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
3025 {
3026 this->interval.end = (this->interval.start = start) + 1;
3027 return true;
3028 }
3029 }
3030 this->interval.start = (this->interval.end = start) + 1;
3031 return false;
3032 }
3033 };
3034
3037#ifdef _UNICODE
3039#else
3041#endif
3042
3047 {
3048 public:
3049 sgml_url_username_char(_In_ const std::locale& locale = std::locale()) : basic_url_username_char<char>(locale) {}
3050
3051 virtual bool match(
3052 _In_reads_or_z_(end) const char* text,
3053 _In_ size_t start = 0,
3054 _In_ size_t end = (size_t)-1,
3055 _In_ int flags = match_default)
3056 {
3057 _Assume_(text || start >= end);
3058 if (start < end && text[start]) {
3059 wchar_t buf[3];
3060 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
3061 const wchar_t* chr_end = chr + stdex::strlen(chr);
3062 if (((chr[0] == L'-' ||
3063 chr[0] == L'.' ||
3064 chr[0] == L'_' ||
3065 chr[0] == L'~' ||
3066 chr[0] == L'%' ||
3067 chr[0] == L'!' ||
3068 chr[0] == L'$' ||
3069 chr[0] == L'&' ||
3070 chr[0] == L'\'' ||
3071 //chr[0] == L'(' ||
3072 //chr[0] == L')' ||
3073 chr[0] == L'*' ||
3074 chr[0] == L'+' ||
3075 chr[0] == L',' ||
3076 chr[0] == L';' ||
3077 chr[0] == L'=') && chr[1] == 0) ||
3078 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
3079 {
3080 this->interval.start = start;
3081 return true;
3082 }
3083 }
3084
3085 this->interval.start = (this->interval.end = start) + 1;
3086 return false;
3087 }
3088 };
3089
3093 template <class T>
3095 {
3096 public:
3097 basic_url_password_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
3098
3099 virtual bool match(
3100 _In_reads_or_z_(end) const T* text,
3101 _In_ size_t start = 0,
3102 _In_ size_t end = (size_t)-1,
3103 _In_ int flags = match_default)
3104 {
3105 _Assume_(text || start >= end);
3106 if (start < end && text[start]) {
3107 if (text[start] == '-' ||
3108 text[start] == '.' ||
3109 text[start] == '_' ||
3110 text[start] == '~' ||
3111 text[start] == '%' ||
3112 text[start] == '!' ||
3113 text[start] == '$' ||
3114 text[start] == '&' ||
3115 text[start] == '\'' ||
3116 text[start] == '(' ||
3117 text[start] == ')' ||
3118 text[start] == '*' ||
3119 text[start] == '+' ||
3120 text[start] == ',' ||
3121 text[start] == ';' ||
3122 text[start] == '=' ||
3123 text[start] == ':' ||
3124 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
3125 {
3126 this->interval.end = (this->interval.start = start) + 1;
3127 return true;
3128 }
3129 }
3130 this->interval.start = (this->interval.end = start) + 1;
3131 return false;
3132 }
3133 };
3134
3137#ifdef _UNICODE
3139#else
3141#endif
3142
3147 {
3148 public:
3149 sgml_url_password_char(_In_ const std::locale& locale = std::locale()) : basic_url_password_char<char>(locale) {}
3150
3151 virtual bool match(
3152 _In_reads_or_z_(end) const char* text,
3153 _In_ size_t start = 0,
3154 _In_ size_t end = (size_t)-1,
3155 _In_ int flags = match_default)
3156 {
3157 _Assume_(text || start >= end);
3158 if (start < end && text[start]) {
3159 wchar_t buf[3];
3160 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
3161 const wchar_t* chr_end = chr + stdex::strlen(chr);
3162 if (((chr[0] == L'-' ||
3163 chr[0] == L'.' ||
3164 chr[0] == L'_' ||
3165 chr[0] == L'~' ||
3166 chr[0] == L'%' ||
3167 chr[0] == L'!' ||
3168 chr[0] == L'$' ||
3169 chr[0] == L'&' ||
3170 chr[0] == L'\'' ||
3171 chr[0] == L'(' ||
3172 chr[0] == L')' ||
3173 chr[0] == L'*' ||
3174 chr[0] == L'+' ||
3175 chr[0] == L',' ||
3176 chr[0] == L';' ||
3177 chr[0] == L'=' ||
3178 chr[0] == L':') && chr[1] == 0) ||
3179 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
3180 {
3181 this->interval.start = start;
3182 return true;
3183 }
3184 }
3185 this->interval.start = (this->interval.end = start) + 1;
3186 return false;
3187 }
3188 };
3189
3193 template <class T>
3195 {
3196 public:
3197 basic_url_path_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
3198
3199 virtual bool match(
3200 _In_reads_or_z_(end) const T* text,
3201 _In_ size_t start = 0,
3202 _In_ size_t end = (size_t)-1,
3203 _In_ int flags = match_default)
3204 {
3205 _Assume_(text || start >= end);
3206 if (start < end && text[start]) {
3207 if (text[start] == '/' ||
3208 text[start] == '-' ||
3209 text[start] == '.' ||
3210 text[start] == '_' ||
3211 text[start] == '~' ||
3212 text[start] == '%' ||
3213 text[start] == '!' ||
3214 text[start] == '$' ||
3215 text[start] == '&' ||
3216 text[start] == '\'' ||
3217 text[start] == '(' ||
3218 text[start] == ')' ||
3219 text[start] == '*' ||
3220 text[start] == '+' ||
3221 text[start] == ',' ||
3222 text[start] == ';' ||
3223 text[start] == '=' ||
3224 text[start] == ':' ||
3225 text[start] == '@' ||
3226 text[start] == '?' ||
3227 text[start] == '#' ||
3228 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
3229 {
3230 this->interval.end = (this->interval.start = start) + 1;
3231 return true;
3232 }
3233 }
3234 this->interval.start = (this->interval.end = start) + 1;
3235 return false;
3236 }
3237 };
3238
3241#ifdef _UNICODE
3243#else
3245#endif
3246
3251 {
3252 public:
3253 sgml_url_path_char(_In_ const std::locale& locale = std::locale()) : basic_url_path_char<char>(locale) {}
3254
3255 virtual bool match(
3256 _In_reads_or_z_(end) const char* text,
3257 _In_ size_t start = 0,
3258 _In_ size_t end = (size_t)-1,
3259 _In_ int flags = match_default)
3260 {
3261 _Assume_(text || start >= end);
3262 if (start < end && text[start]) {
3263 wchar_t buf[3];
3264 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
3265 const wchar_t* chr_end = chr + stdex::strlen(chr);
3266 if (((chr[0] == L'/' ||
3267 chr[0] == L'-' ||
3268 chr[0] == L'.' ||
3269 chr[0] == L'_' ||
3270 chr[0] == L'~' ||
3271 chr[0] == L'%' ||
3272 chr[0] == L'!' ||
3273 chr[0] == L'$' ||
3274 chr[0] == L'&' ||
3275 chr[0] == L'\'' ||
3276 chr[0] == L'(' ||
3277 chr[0] == L')' ||
3278 chr[0] == L'*' ||
3279 chr[0] == L'+' ||
3280 chr[0] == L',' ||
3281 chr[0] == L';' ||
3282 chr[0] == L'=' ||
3283 chr[0] == L':' ||
3284 chr[0] == L'@' ||
3285 chr[0] == L'?' ||
3286 chr[0] == L'#') && chr[1] == 0) ||
3287 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
3288 {
3289 this->interval.start = start;
3290 return true;
3291 }
3292 }
3293 this->interval.start = (this->interval.end = start) + 1;
3294 return false;
3295 }
3296 };
3297
3301 template <class T>
3303 {
3304 public:
3306 _In_ const std::shared_ptr<basic_parser<T>>& path_char,
3307 _In_ const std::shared_ptr<basic_parser<T>>& query_start,
3308 _In_ const std::shared_ptr<basic_parser<T>>& bookmark_start,
3309 _In_ const std::locale& locale = std::locale()) :
3310 basic_parser<T>(locale),
3311 m_path_char(path_char),
3312 m_query_start(query_start),
3313 m_bookmark_start(bookmark_start)
3314 {}
3315
3316 virtual bool match(
3317 _In_reads_or_z_(end) const T* text,
3318 _In_ size_t start = 0,
3319 _In_ size_t end = (size_t)-1,
3320 _In_ int flags = match_default)
3321 {
3322 _Assume_(text || start >= end);
3323
3324 this->interval.end = start;
3325 path.start = start;
3326 query.start = 1;
3327 query.end = 0;
3328 bookmark.start = 1;
3329 bookmark.end = 0;
3330
3331 for (;;) {
3332 if (this->interval.end >= end || !text[this->interval.end])
3333 break;
3334 if (m_query_start->match(text, this->interval.end, end, flags)) {
3335 path.end = this->interval.end;
3336 query.start = this->interval.end = m_query_start->interval.end;
3337 for (;;) {
3338 if (this->interval.end >= end || !text[this->interval.end]) {
3339 query.end = this->interval.end;
3340 break;
3341 }
3342 if (m_bookmark_start->match(text, this->interval.end, end, flags)) {
3343 query.end = this->interval.end;
3344 bookmark.start = this->interval.end = m_bookmark_start->interval.end;
3345 for (;;) {
3346 if (this->interval.end >= end || !text[this->interval.end]) {
3347 bookmark.end = this->interval.end;
3348 break;
3349 }
3350 if (m_path_char->match(text, this->interval.end, end, flags))
3351 this->interval.end = m_path_char->interval.end;
3352 else {
3353 bookmark.end = this->interval.end;
3354 break;
3355 }
3356 }
3357 this->interval.start = start;
3358 return true;
3359 }
3360 if (m_path_char->match(text, this->interval.end, end, flags))
3361 this->interval.end = m_path_char->interval.end;
3362 else {
3363 query.end = this->interval.end;
3364 break;
3365 }
3366 }
3367 this->interval.start = start;
3368 return true;
3369 }
3370 if (m_bookmark_start->match(text, this->interval.end, end, flags)) {
3371 path.end = this->interval.end;
3372 bookmark.start = this->interval.end = m_bookmark_start->interval.end;
3373 for (;;) {
3374 if (this->interval.end >= end || !text[this->interval.end]) {
3375 bookmark.end = this->interval.end;
3376 break;
3377 }
3378 if (m_path_char->match(text, this->interval.end, end, flags))
3379 this->interval.end = m_path_char->interval.end;
3380 else {
3381 bookmark.end = this->interval.end;
3382 break;
3383 }
3384 }
3385 this->interval.start = start;
3386 return true;
3387 }
3388 if (m_path_char->match(text, this->interval.end, end, flags))
3389 this->interval.end = m_path_char->interval.end;
3390 else
3391 break;
3392 }
3393
3395 path.end = this->interval.end;
3396 this->interval.start = start;
3397 return true;
3398 }
3399
3400 path.start = 1;
3401 path.end = 0;
3402 bookmark.start = 1;
3403 bookmark.end = 0;
3404 this->interval.start = (this->interval.end = start) + 1;
3405 return false;
3406 }
3407
3408 virtual void invalidate()
3409 {
3410 path.start = 1;
3411 path.end = 0;
3412 query.start = 1;
3413 query.end = 0;
3414 bookmark.start = 1;
3415 bookmark.end = 0;
3417 }
3418
3419 public:
3422 stdex::interval<size_t> bookmark;
3423
3424 protected:
3425 std::shared_ptr<basic_parser<T>> m_path_char;
3426 std::shared_ptr<basic_parser<T>> m_query_start;
3427 std::shared_ptr<basic_parser<T>> m_bookmark_start;
3428 };
3429
3432#ifdef _UNICODE
3433 using turl_path = wurl_path;
3434#else
3435 using turl_path = url_path;
3436#endif
3438
3442 template <class T>
3443 class basic_url : public basic_parser<T>
3444 {
3445 public:
3446 basic_url(
3447 _In_ const std::shared_ptr<basic_parser<T>>& _http_scheme,
3448 _In_ const std::shared_ptr<basic_parser<T>>& _ftp_scheme,
3449 _In_ const std::shared_ptr<basic_parser<T>>& _mailto_scheme,
3450 _In_ const std::shared_ptr<basic_parser<T>>& _file_scheme,
3451 _In_ const std::shared_ptr<basic_parser<T>>& colon,
3452 _In_ const std::shared_ptr<basic_parser<T>>& slash,
3453 _In_ const std::shared_ptr<basic_parser<T>>& _username,
3454 _In_ const std::shared_ptr<basic_parser<T>>& _password,
3455 _In_ const std::shared_ptr<basic_parser<T>>& at,
3456 _In_ const std::shared_ptr<basic_parser<T>>& ip_lbracket,
3457 _In_ const std::shared_ptr<basic_parser<T>>& ip_rbracket,
3458 _In_ const std::shared_ptr<basic_parser<T>>& _ipv4_host,
3459 _In_ const std::shared_ptr<basic_parser<T>>& _ipv6_host,
3460 _In_ const std::shared_ptr<basic_parser<T>>& _dns_host,
3461 _In_ const std::shared_ptr<basic_parser<T>>& _port,
3462 _In_ const std::shared_ptr<basic_parser<T>>& _path,
3463 _In_ const std::locale& locale = std::locale()) :
3464 basic_parser<T>(locale),
3465 http_scheme(_http_scheme),
3466 ftp_scheme(_ftp_scheme),
3467 mailto_scheme(_mailto_scheme),
3468 file_scheme(_file_scheme),
3469 m_colon(colon),
3470 m_slash(slash),
3471 username(_username),
3472 password(_password),
3473 m_at(at),
3474 m_ip_lbracket(ip_lbracket),
3475 m_ip_rbracket(ip_rbracket),
3476 ipv4_host(_ipv4_host),
3477 ipv6_host(_ipv6_host),
3478 dns_host(_dns_host),
3479 port(_port),
3480 path(_path)
3481 {}
3482
3483 virtual bool match(
3484 _In_reads_or_z_(end) const T* text,
3485 _In_ size_t start = 0,
3486 _In_ size_t end = (size_t)-1,
3487 _In_ int flags = match_default)
3488 {
3489 _Assume_(text || start >= end);
3490
3491 this->interval.end = start;
3492
3493 if (http_scheme->match(text, this->interval.end, end, flags) &&
3494 m_colon->match(text, http_scheme->interval.end, end, flags) &&
3495 m_slash->match(text, m_colon->interval.end, end, flags) &&
3496 m_slash->match(text, m_slash->interval.end, end, flags))
3497 {
3498 // http://
3499 this->interval.end = m_slash->interval.end;
3500 ftp_scheme->invalidate();
3501 mailto_scheme->invalidate();
3502 file_scheme->invalidate();
3503 }
3504 else if (ftp_scheme->match(text, this->interval.end, end, flags) &&
3505 m_colon->match(text, ftp_scheme->interval.end, end, flags) &&
3506 m_slash->match(text, m_colon->interval.end, end, flags) &&
3507 m_slash->match(text, m_slash->interval.end, end, flags))
3508 {
3509 // ftp://
3510 this->interval.end = m_slash->interval.end;
3511 http_scheme->invalidate();
3512 mailto_scheme->invalidate();
3513 file_scheme->invalidate();
3514 }
3515 else if (mailto_scheme->match(text, this->interval.end, end, flags) &&
3516 m_colon->match(text, mailto_scheme->interval.end, end, flags))
3517 {
3518 // mailto:
3519 this->interval.end = m_colon->interval.end;
3520 http_scheme->invalidate();
3521 ftp_scheme->invalidate();
3522 file_scheme->invalidate();
3523 }
3524 else if (file_scheme->match(text, this->interval.end, end, flags) &&
3525 m_colon->match(text, file_scheme->interval.end, end, flags) &&
3526 m_slash->match(text, m_colon->interval.end, end, flags) &&
3527 m_slash->match(text, m_slash->interval.end, end, flags))
3528 {
3529 // file://
3530 this->interval.end = m_slash->interval.end;
3531 http_scheme->invalidate();
3532 ftp_scheme->invalidate();
3533 mailto_scheme->invalidate();
3534 }
3535 else {
3536 // Default to http:
3537 http_scheme->invalidate();
3538 ftp_scheme->invalidate();
3539 mailto_scheme->invalidate();
3540 file_scheme->invalidate();
3541 }
3542
3543 if (ftp_scheme->interval) {
3544 if (username->match(text, this->interval.end, end, flags)) {
3545 if (m_colon->match(text, username->interval.end, end, flags) &&
3546 password->match(text, m_colon->interval.end, end, flags) &&
3547 m_at->match(text, password->interval.end, end, flags))
3548 {
3549 // Username and password
3550 this->interval.end = m_at->interval.end;
3551 }
3552 else if (m_at->match(text, this->interval.end, end, flags)) {
3553 // Username only
3554 this->interval.end = m_at->interval.end;
3555 password->invalidate();
3556 }
3557 else {
3558 username->invalidate();
3559 password->invalidate();
3560 }
3561 }
3562 else {
3563 username->invalidate();
3564 password->invalidate();
3565 }
3566
3567 if (ipv4_host->match(text, this->interval.end, end, flags)) {
3568 // Host is IPv4
3569 this->interval.end = ipv4_host->interval.end;
3570 ipv6_host->invalidate();
3571 dns_host->invalidate();
3572 }
3573 else if (
3574 m_ip_lbracket->match(text, this->interval.end, end, flags) &&
3575 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3576 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3577 {
3578 // Host is IPv6
3579 this->interval.end = m_ip_rbracket->interval.end;
3580 ipv4_host->invalidate();
3581 dns_host->invalidate();
3582 }
3583 else if (dns_host->match(text, this->interval.end, end, flags)) {
3584 // Host is hostname
3585 this->interval.end = dns_host->interval.end;
3586 ipv4_host->invalidate();
3587 ipv6_host->invalidate();
3588 }
3589 else {
3590 invalidate();
3591 return false;
3592 }
3593
3594 if (m_colon->match(text, this->interval.end, end, flags) &&
3595 port->match(text, m_colon->interval.end, end, flags))
3596 {
3597 // Port
3598 this->interval.end = port->interval.end;
3599 }
3600 else
3601 port->invalidate();
3602
3603 if (path->match(text, this->interval.end, end, flags)) {
3604 // Path
3605 this->interval.end = path->interval.end;
3606 }
3607
3608 this->interval.start = start;
3609 return true;
3610 }
3611
3612 if (mailto_scheme->interval) {
3613 if (username->match(text, this->interval.end, end, flags) &&
3614 m_at->match(text, username->interval.end, end, flags))
3615 {
3616 // Username
3617 this->interval.end = m_at->interval.end;
3618 }
3619 else {
3620 invalidate();
3621 return false;
3622 }
3623
3624 if (m_ip_lbracket->match(text, this->interval.end, end, flags) &&
3625 ipv4_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3626 m_ip_rbracket->match(text, ipv4_host->interval.end, end, flags))
3627 {
3628 // Host is IPv4
3629 this->interval.end = m_ip_rbracket->interval.end;
3630 ipv6_host->invalidate();
3631 dns_host->invalidate();
3632 }
3633 else if (
3634 m_ip_lbracket->match(text, this->interval.end, end, flags) &&
3635 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3636 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3637 {
3638 // Host is IPv6
3639 this->interval.end = m_ip_rbracket->interval.end;
3640 ipv4_host->invalidate();
3641 dns_host->invalidate();
3642 }
3643 else if (dns_host->match(text, this->interval.end, end, flags)) {
3644 // Host is hostname
3645 this->interval.end = dns_host->interval.end;
3646 ipv4_host->invalidate();
3647 ipv6_host->invalidate();
3648 }
3649 else {
3650 invalidate();
3651 return false;
3652 }
3653
3654 password->invalidate();
3655 port->invalidate();
3656 path->invalidate();
3657 this->interval.start = start;
3658 return true;
3659 }
3660
3661 if (file_scheme->interval) {
3662 if (path->match(text, this->interval.end, end, flags)) {
3663 // Path
3664 this->interval.end = path->interval.end;
3665 }
3666
3667 username->invalidate();
3668 password->invalidate();
3669 ipv4_host->invalidate();
3670 ipv6_host->invalidate();
3671 dns_host->invalidate();
3672 port->invalidate();
3673 this->interval.start = start;
3674 return true;
3675 }
3676
3677 // "http://" found or defaulted to
3678
3679 // If "http://" explicit, test for username&password.
3680 if (http_scheme->interval &&
3681 username->match(text, this->interval.end, end, flags))
3682 {
3683 if (m_colon->match(text, username->interval.end, end, flags) &&
3684 password->match(text, m_colon->interval.end, end, flags) &&
3685 m_at->match(text, password->interval.end, end, flags))
3686 {
3687 // Username and password
3688 this->interval.end = m_at->interval.end;
3689 }
3690 else if (m_at->match(text, username->interval.end, end, flags)) {
3691 // Username only
3692 this->interval.end = m_at->interval.end;
3693 password->invalidate();
3694 }
3695 else {
3696 username->invalidate();
3697 password->invalidate();
3698 }
3699 }
3700 else {
3701 username->invalidate();
3702 password->invalidate();
3703 }
3704
3705 if (ipv4_host->match(text, this->interval.end, end, flags)) {
3706 // Host is IPv4
3707 this->interval.end = ipv4_host->interval.end;
3708 ipv6_host->invalidate();
3709 dns_host->invalidate();
3710 }
3711 else if (
3712 m_ip_lbracket->match(text, this->interval.end, end, flags) &&
3713 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3714 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3715 {
3716 // Host is IPv6
3717 this->interval.end = m_ip_rbracket->interval.end;
3718 ipv4_host->invalidate();
3719 dns_host->invalidate();
3720 }
3721 else if (dns_host->match(text, this->interval.end, end, flags)) {
3722 // Host is hostname
3723 this->interval.end = dns_host->interval.end;
3724 ipv4_host->invalidate();
3725 ipv6_host->invalidate();
3726 }
3727 else {
3728 invalidate();
3729 return false;
3730 }
3731
3732 if (m_colon->match(text, this->interval.end, end, flags) &&
3733 port->match(text, m_colon->interval.end, end, flags))
3734 {
3735 // Port
3736 this->interval.end = port->interval.end;
3737 }
3738 else
3739 port->invalidate();
3740
3741 if (path->match(text, this->interval.end, end, flags)) {
3742 // Path
3743 this->interval.end = path->interval.end;
3744 }
3745
3746 this->interval.start = start;
3747 return true;
3748 }
3749
3750 virtual void invalidate()
3751 {
3752 http_scheme->invalidate();
3753 ftp_scheme->invalidate();
3754 mailto_scheme->invalidate();
3755 file_scheme->invalidate();
3756 username->invalidate();
3757 password->invalidate();
3758 ipv4_host->invalidate();
3759 ipv6_host->invalidate();
3760 dns_host->invalidate();
3761 port->invalidate();
3762 path->invalidate();
3764 }
3765
3766 public:
3767 std::shared_ptr<basic_parser<T>> http_scheme;
3768 std::shared_ptr<basic_parser<T>> ftp_scheme;
3769 std::shared_ptr<basic_parser<T>> mailto_scheme;
3770 std::shared_ptr<basic_parser<T>> file_scheme;
3771 std::shared_ptr<basic_parser<T>> username;
3772 std::shared_ptr<basic_parser<T>> password;
3773 std::shared_ptr<basic_parser<T>> ipv4_host;
3774 std::shared_ptr<basic_parser<T>> ipv6_host;
3775 std::shared_ptr<basic_parser<T>> dns_host;
3776 std::shared_ptr<basic_parser<T>> port;
3777 std::shared_ptr<basic_parser<T>> path;
3778
3779 protected:
3780 std::shared_ptr<basic_parser<T>> m_colon;
3781 std::shared_ptr<basic_parser<T>> m_slash;
3782 std::shared_ptr<basic_parser<T>> m_at;
3783 std::shared_ptr<basic_parser<T>> m_ip_lbracket;
3784 std::shared_ptr<basic_parser<T>> m_ip_rbracket;
3785 };
3786
3787 using url = basic_url<char>;
3788 using wurl = basic_url<wchar_t>;
3789#ifdef _UNICODE
3790 using turl = wurl;
3791#else
3792 using turl = url;
3793#endif
3794 using sgml_url = basic_url<char>;
3795
3799 template <class T>
3801 {
3802 public:
3804 _In_ const std::shared_ptr<basic_parser<T>>& _username,
3805 _In_ const std::shared_ptr<basic_parser<T>>& at,
3806 _In_ const std::shared_ptr<basic_parser<T>>& ip_lbracket,
3807 _In_ const std::shared_ptr<basic_parser<T>>& ip_rbracket,
3808 _In_ const std::shared_ptr<basic_parser<T>>& _ipv4_host,
3809 _In_ const std::shared_ptr<basic_parser<T>>& _ipv6_host,
3810 _In_ const std::shared_ptr<basic_parser<T>>& _dns_host,
3811 _In_ const std::locale& locale = std::locale()) :
3812 basic_parser<T>(locale),
3813 username(_username),
3814 m_at(at),
3815 m_ip_lbracket(ip_lbracket),
3816 m_ip_rbracket(ip_rbracket),
3817 ipv4_host(_ipv4_host),
3818 ipv6_host(_ipv6_host),
3819 dns_host(_dns_host)
3820 {}
3821
3822 virtual bool match(
3823 _In_reads_or_z_(end) const T* text,
3824 _In_ size_t start = 0,
3825 _In_ size_t end = (size_t)-1,
3826 _In_ int flags = match_default)
3827 {
3828 _Assume_(text || start >= end);
3829
3830 if (username->match(text, start, end, flags) &&
3831 m_at->match(text, username->interval.end, end, flags))
3832 {
3833 // Username@
3834 if (m_ip_lbracket->match(text, m_at->interval.end, end, flags) &&
3835 ipv4_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3836 m_ip_rbracket->match(text, ipv4_host->interval.end, end, flags))
3837 {
3838 // Host is IPv4
3839 this->interval.end = m_ip_rbracket->interval.end;
3840 ipv6_host->invalidate();
3841 dns_host->invalidate();
3842 }
3843 else if (
3844 m_ip_lbracket->match(text, m_at->interval.end, end, flags) &&
3845 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3846 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3847 {
3848 // Host is IPv6
3849 this->interval.end = m_ip_rbracket->interval.end;
3850 ipv4_host->invalidate();
3851 dns_host->invalidate();
3852 }
3853 else if (dns_host->match(text, m_at->interval.end, end, flags)) {
3854 // Host is hostname
3855 this->interval.end = dns_host->interval.end;
3856 ipv4_host->invalidate();
3857 ipv6_host->invalidate();
3858 }
3859 else
3860 goto error;
3861 this->interval.start = start;
3862 return true;
3863 }
3864
3865 error:
3866 username->invalidate();
3867 ipv4_host->invalidate();
3868 ipv6_host->invalidate();
3869 dns_host->invalidate();
3870 this->interval.start = (this->interval.end = start) + 1;
3871 return false;
3872 }
3873
3874 virtual void invalidate()
3875 {
3876 username->invalidate();
3877 ipv4_host->invalidate();
3878 ipv6_host->invalidate();
3879 dns_host->invalidate();
3881 }
3882
3883 public:
3884 std::shared_ptr<basic_parser<T>> username;
3885 std::shared_ptr<basic_parser<T>> ipv4_host;
3886 std::shared_ptr<basic_parser<T>> ipv6_host;
3887 std::shared_ptr<basic_parser<T>> dns_host;
3888
3889 protected:
3890 std::shared_ptr<basic_parser<T>> m_at;
3891 std::shared_ptr<basic_parser<T>> m_ip_lbracket;
3892 std::shared_ptr<basic_parser<T>> m_ip_rbracket;
3893 };
3894
3897#ifdef _UNICODE
3899#else
3901#endif
3903
3907 template <class T>
3909 {
3910 public:
3912 _In_ const std::shared_ptr<basic_parser<T>>& _emoticon,
3913 _In_ const std::shared_ptr<basic_parser<T>>& _apex,
3914 _In_ const std::shared_ptr<basic_parser<T>>& _eyes,
3915 _In_ const std::shared_ptr<basic_parser<T>>& _nose,
3916 _In_ const std::shared_ptr<basic_set<T>>& _mouth,
3917 _In_ const std::locale& locale = std::locale()) :
3918 basic_parser<T>(locale),
3920 apex(_apex),
3921 eyes(_eyes),
3922 nose(_nose),
3923 mouth(_mouth)
3924 {}
3925
3926 virtual bool match(
3927 _In_reads_or_z_(end) const T* text,
3928 _In_ size_t start = 0,
3929 _In_ size_t end = (size_t)-1,
3930 _In_ int flags = match_default)
3931 {
3932 _Assume_(text || start >= end);
3933
3934 if (emoticon && emoticon->match(text, start, end, flags)) {
3935 if (apex) apex->invalidate();
3936 eyes->invalidate();
3937 if (nose) nose->invalidate();
3938 mouth->invalidate();
3939 this->interval.start = start;
3940 this->interval.end = emoticon->interval.end;
3941 return true;
3942 }
3943
3944 this->interval.end = start;
3945
3946 if (apex && apex->match(text, this->interval.end, end, flags))
3947 this->interval.end = apex->interval.end;
3948
3949 if (eyes->match(text, this->interval.end, end, flags)) {
3950 if (nose && nose->match(text, eyes->interval.end, end, flags) &&
3951 mouth->match(text, nose->interval.end, end, flags))
3952 {
3953 size_t
3955 hit_offset = mouth->hit_offset;
3956 // Mouth may repeat :-)))))))
3957 for (this->interval.end = mouth->interval.end; mouth->match(text, this->interval.end, end, flags) && mouth->hit_offset == hit_offset; this->interval.end = mouth->interval.end);
3958 mouth->interval.start = start_mouth;
3959 mouth->interval.end = this->interval.end;
3960 this->interval.start = start;
3961 return true;
3962 }
3963 if (mouth->match(text, eyes->interval.end, end, flags)) {
3964 size_t
3966 hit_offset = mouth->hit_offset;
3967 // Mouth may repeat :-)))))))
3968 for (this->interval.end = mouth->interval.end; mouth->match(text, this->interval.end, end, flags) && mouth->hit_offset == hit_offset; this->interval.end = mouth->interval.end);
3969 if (nose) nose->invalidate();
3970 mouth->interval.start = start_mouth;
3971 mouth->interval.end = this->interval.end;
3972 this->interval.start = start;
3973 return true;
3974 }
3975 }
3976
3977 if (emoticon) emoticon->invalidate();
3978 if (apex) apex->invalidate();
3979 eyes->invalidate();
3980 if (nose) nose->invalidate();
3981 mouth->invalidate();
3982 this->interval.start = (this->interval.end = start) + 1;
3983 return false;
3984 }
3985
3986 virtual void invalidate()
3987 {
3988 if (emoticon) emoticon->invalidate();
3989 if (apex) apex->invalidate();
3990 eyes->invalidate();
3991 if (nose) nose->invalidate();
3992 mouth->invalidate();
3994 }
3995
3996 public:
3997 std::shared_ptr<basic_parser<T>> emoticon;
3998 std::shared_ptr<basic_parser<T>> apex;
3999 std::shared_ptr<basic_parser<T>> eyes;
4000 std::shared_ptr<basic_parser<T>> nose;
4001 std::shared_ptr<basic_set<T>> mouth;
4002 };
4003
4006#ifdef _UNICODE
4007 using temoticon = wemoticon;
4008#else
4009 using temoticon = emoticon;
4010#endif
4012
4016 enum date_format_t {
4017 date_format_none = 0,
4018 date_format_dmy = 0x1,
4019 date_format_mdy = 0x2,
4020 date_format_ymd = 0x4,
4021 date_format_ym = 0x8,
4022 date_format_my = 0x10,
4023 date_format_dm = 0x20,
4024 date_format_md = 0x40,
4025 };
4026
4030 template <class T>
4031 class basic_date : public basic_parser<T>
4032 {
4033 public:
4034 basic_date(
4035 _In_ int format_mask,
4036 _In_ const std::shared_ptr<basic_integer<T>>& _day,
4037 _In_ const std::shared_ptr<basic_integer<T>>& _month,
4038 _In_ const std::shared_ptr<basic_integer<T>>& _year,
4039 _In_ const std::shared_ptr<basic_set<T>>& separator,
4040 _In_ const std::shared_ptr<basic_parser<T>>& space,
4041 _In_ const std::locale& locale = std::locale()) :
4042 basic_parser<T>(locale),
4043 format(date_format_none),
4044 m_format_mask(format_mask),
4045 day(_day),
4046 month(_month),
4047 year(_year),
4048 m_separator(separator),
4049 m_space(space)
4050 {}
4051
4052 virtual bool match(
4053 _In_reads_or_z_(end) const T* text,
4054 _In_ size_t start = 0,
4055 _In_ size_t end = (size_t)-1,
4056 _In_ int flags = match_default)
4057 {
4058 _Assume_(text || start >= end);
4059
4060 const int space_match_flags = flags & ~match_multiline; // Spaces in dates must never be broken in new line.
4061 if ((m_format_mask & date_format_dmy) == date_format_dmy) {
4062 if (day->match(text, start, end, flags)) {
4063 for (this->interval.end = day->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4064 if (m_separator->match(text, this->interval.end, end, flags)) {
4065 size_t hit_offset = m_separator->hit_offset;
4066 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4067 if (month->match(text, this->interval.end, end, flags)) {
4068 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4069 if (m_separator->match(text, this->interval.end, end, flags) &&
4070 m_separator->hit_offset == hit_offset) // Both separators must match.
4071 {
4072 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4073 if (year->match(text, this->interval.end, end, flags) &&
4074 is_valid(day->value, month->value))
4075 {
4076 this->interval.start = start;
4077 this->interval.end = year->interval.end;
4078 format = date_format_dmy;
4079 return true;
4080 }
4081 }
4082 }
4083 }
4084 }
4085 }
4086
4087 if ((m_format_mask & date_format_mdy) == date_format_mdy) {
4088 if (month->match(text, start, end, flags)) {
4089 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4090 if (m_separator->match(text, this->interval.end, end, flags)) {
4091 size_t hit_offset = m_separator->hit_offset;
4092 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4093 if (day->match(text, this->interval.end, end, flags)) {
4094 for (this->interval.end = day->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4095 if (m_separator->match(text, this->interval.end, end, flags) &&
4096 m_separator->hit_offset == hit_offset) // Both separators must match.
4097 {
4098 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4099 if (year->match(text, this->interval.end, end, flags) &&
4100 is_valid(day->value, month->value))
4101 {
4102 this->interval.start = start;
4103 this->interval.end = year->interval.end;
4104 format = date_format_mdy;
4105 return true;
4106 }
4107 }
4108 }
4109 }
4110 }
4111 }
4112
4113 if ((m_format_mask & date_format_ymd) == date_format_ymd) {
4114 if (year->match(text, start, end, flags)) {
4115 for (this->interval.end = year->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4116 if (m_separator->match(text, this->interval.end, end, flags)) {
4117 size_t hit_offset = m_separator->hit_offset;
4118 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4119 if (month->match(text, this->interval.end, end, flags)) {
4120 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4121 if (m_separator->match(text, this->interval.end, end, flags) &&
4122 m_separator->hit_offset == hit_offset) // Both separators must match.
4123 {
4124 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4125 if (day->match(text, this->interval.end, end, flags) &&
4126 is_valid(day->value, month->value))
4127 {
4128 this->interval.start = start;
4129 this->interval.end = day->interval.end;
4130 format = date_format_ymd;
4131 return true;
4132 }
4133 }
4134 }
4135 }
4136 }
4137 }
4138
4139 if ((m_format_mask & date_format_ym) == date_format_ym) {
4140 if (year->match(text, start, end, flags)) {
4141 for (this->interval.end = year->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4142 if (m_separator->match(text, this->interval.end, end, flags)) {
4143 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4144 if (month->match(text, this->interval.end, end, flags) &&
4145 is_valid((size_t)-1, month->value))
4146 {
4147 if (day) day->invalidate();
4148 this->interval.start = start;
4149 this->interval.end = month->interval.end;
4150 format = date_format_ym;
4151 return true;
4152 }
4153 }
4154 }
4155 }
4156
4157 if ((m_format_mask & date_format_my) == date_format_my) {
4158 if (month->match(text, start, end, flags)) {
4159 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4160 if (m_separator->match(text, this->interval.end, end, flags)) {
4161 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4162 if (year->match(text, this->interval.end, end, flags) &&
4163 is_valid((size_t)-1, month->value))
4164 {
4165 if (day) day->invalidate();
4166 this->interval.start = start;
4167 this->interval.end = year->interval.end;
4168 format = date_format_my;
4169 return true;
4170 }
4171 }
4172 }
4173 }
4174
4175 if ((m_format_mask & date_format_dm) == date_format_dm) {
4176 if (day->match(text, start, end, flags)) {
4177 for (this->interval.end = day->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4178 if (m_separator->match(text, this->interval.end, end, flags)) {
4179 size_t hit_offset = m_separator->hit_offset;
4180 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4181 if (month->match(text, this->interval.end, end, flags) &&
4182 is_valid(day->value, month->value))
4183 {
4184 if (year) year->invalidate();
4185 this->interval.start = start;
4186 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4187 if (m_separator->match(text, this->interval.end, end, flags) &&
4188 m_separator->hit_offset == hit_offset) // Both separators must match.
4189 this->interval.end = m_separator->interval.end;
4190 else
4191 this->interval.end = month->interval.end;
4192 format = date_format_dm;
4193 return true;
4194 }
4195 }
4196 }
4197 }
4198
4199 if ((m_format_mask & date_format_md) == date_format_md) {
4200 if (month->match(text, start, end, flags)) {
4201 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4202 if (m_separator->match(text, this->interval.end, end, flags)) {
4203 size_t hit_offset = m_separator->hit_offset;
4204 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4205 if (day->match(text, this->interval.end, end, flags) &&
4206 is_valid(day->value, month->value))
4207 {
4208 if (year) year->invalidate();
4209 this->interval.start = start;
4210 for (this->interval.end = day->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4211 if (m_separator->match(text, this->interval.end, end, flags) &&
4212 m_separator->hit_offset == hit_offset) // Both separators must match.
4213 this->interval.end = m_separator->interval.end;
4214 else
4215 this->interval.end = day->interval.end;
4216 format = date_format_md;
4217 return true;
4218 }
4219 }
4220 }
4221 }
4222
4223 if (day) day->invalidate();
4224 if (month) month->invalidate();
4225 if (year) year->invalidate();
4226 format = date_format_none;
4227 this->interval.start = (this->interval.end = start) + 1;
4228 return false;
4229 }
4230
4231 virtual void invalidate()
4232 {
4233 if (day) day->invalidate();
4234 if (month) month->invalidate();
4235 if (year) year->invalidate();
4236 format = date_format_none;
4238 }
4239
4240 protected:
4241 static inline bool is_valid(size_t day, size_t month)
4242 {
4243 if (month == (size_t)-1) {
4244 // Default to January. This allows validating day only, as January has all 31 days.
4245 month = 1;
4246 }
4247 if (day == (size_t)-1) {
4248 // Default to 1st day in month. This allows validating month only, as each month has 1st day.
4249 day = 1;
4250 }
4251
4252 switch (month) {
4253 case 1:
4254 case 3:
4255 case 5:
4256 case 7:
4257 case 8:
4258 case 10:
4259 case 12:
4260 return 1 <= day && day <= 31;
4261 case 2:
4262 return 1 <= day && day <= 29;
4263 case 4:
4264 case 6:
4265 case 9:
4266 case 11:
4267 return 1 <= day && day <= 30;
4268 default:
4269 return false;
4270 }
4271 }
4272
4273 public:
4274 date_format_t format;
4275 std::shared_ptr<basic_integer<T>> day;
4276 std::shared_ptr<basic_integer<T>> month;
4277 std::shared_ptr<basic_integer<T>> year;
4278
4279 protected:
4280 int m_format_mask;
4281 std::shared_ptr<basic_set<T>> m_separator;
4282 std::shared_ptr<basic_parser<T>> m_space;
4283 };
4284
4285 using date = basic_date<char>;
4286 using wdate = basic_date<wchar_t>;
4287#ifdef _UNICODE
4288 using tdate = wdate;
4289#else
4290 using tdate = date;
4291#endif
4293
4297 template <class T>
4298 class basic_time : public basic_parser<T>
4299 {
4300 public:
4301 basic_time(
4302 _In_ const std::shared_ptr<basic_integer10<T>>& _hour,
4303 _In_ const std::shared_ptr<basic_integer10<T>>& _minute,
4304 _In_ const std::shared_ptr<basic_integer10<T>>& _second,
4305 _In_ const std::shared_ptr<basic_integer10<T>>& _millisecond,
4306 _In_ const std::shared_ptr<basic_set<T>>& separator,
4307 _In_ const std::shared_ptr<basic_parser<T>>& millisecond_separator,
4308 _In_ const std::locale& locale = std::locale()) :
4309 basic_parser<T>(locale),
4310 hour(_hour),
4311 minute(_minute),
4312 second(_second),
4313 millisecond(_millisecond),
4314 m_separator(separator),
4315 m_millisecond_separator(millisecond_separator)
4316 {}
4317
4318 virtual bool match(
4319 _In_reads_or_z_(end) const T* text,
4320 _In_ size_t start = 0,
4321 _In_ size_t end = (size_t)-1,
4322 _In_ int flags = match_default)
4323 {
4324 _Assume_(text || start >= end);
4325
4326 if (hour->match(text, start, end, flags) &&
4327 m_separator->match(text, hour->interval.end, end, flags) &&
4328 minute->match(text, m_separator->interval.end, end, flags) &&
4329 minute->value < 60)
4330 {
4331 // hh::mm
4332 size_t hit_offset = m_separator->hit_offset;
4333 if (m_separator->match(text, minute->interval.end, end, flags) &&
4334 m_separator->hit_offset == hit_offset && // Both separators must match.
4335 second && second->match(text, m_separator->interval.end, end, flags) &&
4336 second->value < 60)
4337 {
4338 // hh::mm:ss
4339 if (m_millisecond_separator && m_millisecond_separator->match(text, second->interval.end, end, flags) &&
4340 millisecond && millisecond->match(text, m_millisecond_separator->interval.end, end, flags) &&
4341 millisecond->value < 1000)
4342 {
4343 // hh::mm:ss.mmmm
4344 this->interval.end = millisecond->interval.end;
4345 }
4346 else {
4347 if (millisecond) millisecond->invalidate();
4348 this->interval.end = second->interval.end;
4349 }
4350 }
4351 else {
4352 if (second) second->invalidate();
4353 if (millisecond) millisecond->invalidate();
4354 this->interval.end = minute->interval.end;
4355 }
4356 this->interval.start = start;
4357 return true;
4358 }
4359
4360 hour->invalidate();
4361 minute->invalidate();
4362 if (second) second->invalidate();
4363 if (millisecond) millisecond->invalidate();
4364 this->interval.start = (this->interval.end = start) + 1;
4365 return false;
4366 }
4367
4368 virtual void invalidate()
4369 {
4370 hour->invalidate();
4371 minute->invalidate();
4372 if (second) second->invalidate();
4373 if (millisecond) millisecond->invalidate();
4375 }
4376
4377 public:
4378 std::shared_ptr<basic_integer10<T>> hour;
4379 std::shared_ptr<basic_integer10<T>> minute;
4380 std::shared_ptr<basic_integer10<T>> second;
4381 std::shared_ptr<basic_integer10<T>> millisecond;
4382
4383 protected:
4384 std::shared_ptr<basic_set<T>> m_separator;
4385 std::shared_ptr<basic_parser<T>> m_millisecond_separator;
4386 };
4387
4388 using time = basic_time<char>;
4389 using wtime = basic_time<wchar_t>;
4390#ifdef _UNICODE
4391 using ttime = wtime;
4392#else
4393 using ttime = time;
4394#endif
4396
4400 template <class T>
4401 class basic_angle : public basic_parser<T>
4402 {
4403 public:
4405 _In_ const std::shared_ptr<basic_integer10<T>>& _degree,
4406 _In_ const std::shared_ptr<basic_parser<T>>& _degree_separator,
4407 _In_ const std::shared_ptr<basic_integer10<T>>& _minute,
4408 _In_ const std::shared_ptr<basic_parser<T>>& _minute_separator,
4409 _In_ const std::shared_ptr<basic_integer10<T>>& _second,
4410 _In_ const std::shared_ptr<basic_parser<T>>& _second_separator,
4411 _In_ const std::shared_ptr<basic_parser<T>>& _decimal,
4412 _In_ const std::locale& locale = std::locale()) :
4413 basic_parser<T>(locale),
4414 degree(_degree),
4415 degree_separator(_degree_separator),
4416 minute(_minute),
4417 minute_separator(_minute_separator),
4418 second(_second),
4419 second_separator(_second_separator),
4420 decimal(_decimal)
4421 {}
4422
4423 virtual bool match(
4424 _In_reads_or_z_(end) const T* text,
4425 _In_ size_t start = 0,
4426 _In_ size_t end = (size_t)-1,
4427 _In_ int flags = match_default)
4428 {
4429 _Assume_(text || start >= end);
4430
4431 this->interval.end = start;
4432
4433 if (degree->match(text, this->interval.end, end, flags) &&
4434 degree_separator->match(text, degree->interval.end, end, flags))
4435 {
4436 // Degrees
4437 this->interval.end = degree_separator->interval.end;
4438 }
4439 else {
4440 degree->invalidate();
4441 degree_separator->invalidate();
4442 }
4443
4444 if (minute->match(text, this->interval.end, end, flags) &&
4445 minute->value < 60 &&
4446 minute_separator->match(text, minute->interval.end, end, flags))
4447 {
4448 // Minutes
4449 this->interval.end = minute_separator->interval.end;
4450 }
4451 else {
4452 minute->invalidate();
4453 minute_separator->invalidate();
4454 }
4455
4456 if (second && second->match(text, this->interval.end, end, flags) &&
4457 second->value < 60)
4458 {
4459 // Seconds
4460 this->interval.end = second->interval.end;
4461 if (second_separator && second_separator->match(text, this->interval.end, end, flags))
4462 this->interval.end = second_separator->interval.end;
4463 else
4464 if (second_separator) second_separator->invalidate();
4465 }
4466 else {
4467 if (second) second->invalidate();
4468 if (second_separator) second_separator->invalidate();
4469 }
4470
4471 if (degree->interval.start < degree->interval.end ||
4472 minute->interval.start < minute->interval.end ||
4473 (second && second->interval.start < second->interval.end))
4474 {
4475 if (decimal && decimal->match(text, this->interval.end, end, flags)) {
4476 // Decimals
4477 this->interval.end = decimal->interval.end;
4478 }
4479 else if (decimal)
4480 decimal->invalidate();
4481 this->interval.start = start;
4482 return true;
4483 }
4484 if (decimal) decimal->invalidate();
4485 this->interval.start = (this->interval.end = start) + 1;
4486 return false;
4487 }
4488
4489 virtual void invalidate()
4490 {
4491 degree->invalidate();
4492 degree_separator->invalidate();
4493 minute->invalidate();
4494 minute_separator->invalidate();
4495 if (second) second->invalidate();
4496 if (second_separator) second_separator->invalidate();
4497 if (decimal) decimal->invalidate();
4499 }
4500
4501 public:
4502 std::shared_ptr<basic_integer10<T>> degree;
4503 std::shared_ptr<basic_parser<T>> degree_separator;
4504 std::shared_ptr<basic_integer10<T>> minute;
4505 std::shared_ptr<basic_parser<T>> minute_separator;
4506 std::shared_ptr<basic_integer10<T>> second;
4507 std::shared_ptr<basic_parser<T>> second_separator;
4508 std::shared_ptr<basic_parser<T>> decimal;
4509 };
4510
4511 using angle = basic_angle<char>;
4513#ifdef _UNICODE
4514 using RRegElKot = wangle;
4515#else
4516 using RRegElKot = angle;
4517#endif
4519
4523 template <class T>
4525 {
4526 public:
4528 _In_ const std::shared_ptr<basic_parser<T>>& digit,
4529 _In_ const std::shared_ptr<basic_parser<T>>& plus_sign,
4530 _In_ const std::shared_ptr<basic_set<T>>& lparenthesis,
4531 _In_ const std::shared_ptr<basic_set<T>>& rparenthesis,
4532 _In_ const std::shared_ptr<basic_parser<T>>& separator,
4533 _In_ const std::shared_ptr<basic_parser<T>>& space,
4534 _In_ const std::locale& locale = std::locale()) :
4535 basic_parser<T>(locale),
4536 m_digit(digit),
4537 m_plus_sign(plus_sign),
4538 m_lparenthesis(lparenthesis),
4539 m_rparenthesis(rparenthesis),
4540 m_separator(separator),
4541 m_space(space)
4542 {}
4543
4544 virtual bool match(
4545 _In_reads_or_z_(end) const T* text,
4546 _In_ size_t start = 0,
4547 _In_ size_t end = (size_t)-1,
4548 _In_ int flags = match_default)
4549 {
4550 _Assume_(text || start >= end);
4551
4552 size_t safe_digit_end = start, safe_value_size = 0;
4553 bool has_digits = false, after_digit = false, in_parentheses = false, after_parentheses = false;
4554 const int space_match_flags = flags & ~match_multiline; // Spaces in phone numbers must never be broken in new line.
4555
4556 this->interval.end = start;
4557 value.clear();
4558 m_lparenthesis->invalidate();
4559 m_rparenthesis->invalidate();
4560
4561 if (m_plus_sign && m_plus_sign->match(text, this->interval.end, end, flags)) {
4562 value.append(text + m_plus_sign->interval.start, text + m_plus_sign->interval.end);
4563 safe_value_size = value.size();
4564 this->interval.end = m_plus_sign->interval.end;
4565 }
4566
4567 for (;;) {
4568 _Assume_(text || this->interval.end >= end);
4569 if (this->interval.end >= end || !text[this->interval.end])
4570 break;
4571 if (m_digit->match(text, this->interval.end, end, flags)) {
4572 // Digit
4573 value.append(text + m_digit->interval.start, text + m_digit->interval.end);
4574 this->interval.end = m_digit->interval.end;
4575 if (!in_parentheses) {
4576 safe_digit_end = this->interval.end;
4577 safe_value_size = value.size();
4578 has_digits = true;
4579 }
4580 after_digit = true;
4581 after_parentheses = false;
4582 }
4583 else if (
4584 m_lparenthesis && !m_lparenthesis->interval && // No left parenthesis yet
4585 m_rparenthesis && !m_rparenthesis->interval && // Right parenthesis after left
4586 m_lparenthesis->match(text, this->interval.end, end, flags))
4587 {
4588 // Left parenthesis
4589 value.append(text + m_lparenthesis->interval.start, m_lparenthesis->interval.size());
4590 this->interval.end = m_lparenthesis->interval.end;
4591 in_parentheses = true;
4592 after_digit = false;
4593 after_parentheses = false;
4594 }
4595 else if (
4596 in_parentheses && // After left parenthesis
4597 m_rparenthesis && !m_rparenthesis->interval && // No right parenthesis yet
4598 m_rparenthesis->match(text, this->interval.end, end, flags) &&
4599 m_lparenthesis->hit_offset == m_rparenthesis->hit_offset) // Left and right parentheses must match
4600 {
4601 // Right parenthesis
4602 value.append(text + m_rparenthesis->interval.start, text + m_rparenthesis->interval.end);
4603 this->interval.end = m_rparenthesis->interval.end;
4604 safe_digit_end = this->interval.end;
4605 safe_value_size = value.size();
4606 in_parentheses = false;
4607 after_digit = false;
4608 after_parentheses = true;
4609 }
4610 else if (
4611 after_digit &&
4612 !in_parentheses && // No separators inside parentheses
4613 !after_parentheses && // No separators following right parenthesis
4614 m_separator && m_separator->match(text, this->interval.end, end, flags))
4615 {
4616 // Separator
4617 this->interval.end = m_separator->interval.end;
4618 after_digit = false;
4619 after_parentheses = false;
4620 }
4621 else if (
4623 m_space && m_space->match(text, this->interval.end, end, space_match_flags))
4624 {
4625 // Space
4626 this->interval.end = m_space->interval.end;
4627 after_digit = false;
4628 after_parentheses = false;
4629 }
4630 else
4631 break;
4632 }
4633 if (has_digits) {
4634 value.erase(safe_value_size);
4635 this->interval.start = start;
4636 this->interval.end = safe_digit_end;
4637 return true;
4638 }
4639 value.clear();
4640 this->interval.start = (this->interval.end = start) + 1;
4641 return false;
4642 }
4643
4644 virtual void invalidate()
4645 {
4646 value.clear();
4648 }
4649
4650 public:
4651 std::basic_string<T> value;
4652
4653 protected:
4654 std::shared_ptr<basic_parser<T>> m_digit;
4655 std::shared_ptr<basic_parser<T>> m_plus_sign;
4656 std::shared_ptr<basic_set<T>> m_lparenthesis;
4657 std::shared_ptr<basic_set<T>> m_rparenthesis;
4658 std::shared_ptr<basic_parser<T>> m_separator;
4659 std::shared_ptr<basic_parser<T>> m_space;
4660 };
4661
4664#ifdef _UNICODE
4666#else
4668#endif
4670
4676 template <class T>
4677 class basic_iban : public basic_parser<T>
4678 {
4679 public:
4680 basic_iban(
4681 _In_ const std::shared_ptr<basic_parser<T>>& space,
4682 _In_ const std::locale& locale = std::locale()) :
4683 basic_parser<T>(locale),
4684 m_space(space)
4685 {
4686 this->country[0] = 0;
4687 this->check_digits[0] = 0;
4688 this->bban[0] = 0;
4689 this->is_valid = false;
4690 }
4691
4692 virtual bool match(
4693 _In_reads_or_z_(end) const T* text,
4694 _In_ size_t start = 0,
4695 _In_ size_t end = (size_t)-1,
4696 _In_ int flags = match_default)
4697 {
4698 _Assume_(text || start >= end);
4699 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
4700 const bool case_insensitive = flags & match_case_insensitive ? true : false;
4701 struct country_t {
4702 T country[2];
4703 T check_digits[2];
4704 size_t length;
4705 };
4706 static const country_t s_countries[] = {
4707 { { 'A', 'D' }, {}, 24 }, // Andorra
4708 { { 'A', 'E' }, {}, 23 }, // United Arab Emirates
4709 { { 'A', 'L' }, {}, 28 }, // Albania
4710 { { 'A', 'O' }, {}, 25 }, // Angola
4711 { { 'A', 'T' }, {}, 20 }, // Austria
4712 { { 'A', 'Z' }, {}, 28 }, // Azerbaijan
4713 { { 'B', 'A' }, { '3', '9' }, 20}, // Bosnia and Herzegovina
4714 { { 'B', 'E' }, {}, 16 }, // Belgium
4715 { { 'B', 'F' }, {}, 28 }, // Burkina Faso
4716 { { 'B', 'G' }, {}, 22 }, // Bulgaria
4717 { { 'B', 'H' }, {}, 22 }, // Bahrain
4718 { { 'B', 'I' }, {}, 27 }, // Burundi
4719 { { 'B', 'J' }, {}, 28 }, // Benin
4720 { { 'B', 'R' }, {}, 29 }, // Brazil
4721 { { 'B', 'Y' }, {}, 28 }, // Belarus
4722 { { 'C', 'F' }, {}, 27 }, // Central African Republic
4723 { { 'C', 'G' }, {}, 27 }, // Congo, Republic of the
4724 { { 'C', 'H' }, {}, 21 }, // Switzerland
4725 { { 'C', 'I' }, {}, 28 }, // Côte d'Ivoire
4726 { { 'C', 'M' }, {}, 27 }, // Cameroon
4727 { { 'C', 'R' }, {}, 22 }, // Costa Rica
4728 { { 'C', 'V' }, {}, 25 }, // Cabo Verde
4729 { { 'C', 'Y' }, {}, 28 }, // Cyprus
4730 { { 'C', 'Z' }, {}, 24 }, // Czech Republic
4731 { { 'D', 'E' }, {}, 22 }, // Germany
4732 { { 'D', 'J' }, {}, 27 }, // Djibouti
4733 { { 'D', 'K' }, {}, 18 }, // Denmark
4734 { { 'D', 'O' }, {}, 28 }, // Dominican Republic
4735 { { 'D', 'Z' }, {}, 26 }, // Algeria
4736 { { 'E', 'E' }, {}, 20 }, // Estonia
4737 { { 'E', 'G' }, {}, 29 }, // Egypt
4738 { { 'E', 'S' }, {}, 24 }, // Spain
4739 { { 'F', 'I' }, {}, 18 }, // Finland
4740 { { 'F', 'O' }, {}, 18 }, // Faroe Islands
4741 { { 'F', 'R' }, {}, 27 }, // France
4742 { { 'G', 'A' }, {}, 27 }, // Gabon
4743 { { 'G', 'B' }, {}, 22 }, // United Kingdom
4744 { { 'G', 'E' }, {}, 22 }, // Georgia
4745 { { 'G', 'I' }, {}, 23 }, // Gibraltar
4746 { { 'G', 'L' }, {}, 18 }, // Greenland
4747 { { 'G', 'Q' }, {}, 27 }, // Equatorial Guinea
4748 { { 'G', 'R' }, {}, 27 }, // Greece
4749 { { 'G', 'T' }, {}, 28 }, // Guatemala
4750 { { 'G', 'W' }, {}, 25 }, // Guinea-Bissau
4751 { { 'H', 'N' }, {}, 28 }, // Honduras
4752 { { 'H', 'R' }, {}, 21 }, // Croatia
4753 { { 'H', 'U' }, {}, 28 }, // Hungary
4754 { { 'I', 'E' }, {}, 22 }, // Ireland
4755 { { 'I', 'L' }, {}, 23 }, // Israel
4756 { { 'I', 'Q' }, {}, 23 }, // Iraq
4757 { { 'I', 'R' }, {}, 26 }, // Iran
4758 { { 'I', 'S' }, {}, 26 }, // Iceland
4759 { { 'I', 'T' }, {}, 27 }, // Italy
4760 { { 'J', 'O' }, {}, 30 }, // Jordan
4761 { { 'K', 'M' }, {}, 27 }, // Comoros
4762 { { 'K', 'W' }, {}, 30 }, // Kuwait
4763 { { 'K', 'Z' }, {}, 20 }, // Kazakhstan
4764 { { 'L', 'B' }, {}, 28 }, // Lebanon
4765 { { 'L', 'C' }, {}, 32 }, // Saint Lucia
4766 { { 'L', 'I' }, {}, 21 }, // Liechtenstein
4767 { { 'L', 'T' }, {}, 20 }, // Lithuania
4768 { { 'L', 'U' }, {}, 20 }, // Luxembourg
4769 { { 'L', 'V' }, {}, 21 }, // Latvia
4770 { { 'L', 'Y' }, {}, 25 }, // Libya
4771 { { 'M', 'A' }, {}, 28 }, // Morocco
4772 { { 'M', 'C' }, {}, 27 }, // Monaco
4773 { { 'M', 'D' }, {}, 24 }, // Moldova
4774 { { 'M', 'E' }, { '2', '5' }, 22 }, // Montenegro
4775 { { 'M', 'G' }, {}, 27 }, // Madagascar
4776 { { 'M', 'K' }, { '0', '7' }, 19 }, // North Macedonia
4777 { { 'M', 'L' }, {}, 28 }, // Mali
4778 { { 'M', 'R' }, { '1', '3' }, 27}, // Mauritania
4779 { { 'M', 'T' }, {}, 31 }, // Malta
4780 { { 'M', 'U' }, {}, 30 }, // Mauritius
4781 { { 'M', 'Z' }, {}, 25 }, // Mozambique
4782 { { 'N', 'E' }, {}, 28 }, // Niger
4783 { { 'N', 'I' }, {}, 32 }, // Nicaragua
4784 { { 'N', 'L' }, {}, 18 }, // Netherlands
4785 { { 'N', 'O' }, {}, 15 }, // Norway
4786 { { 'P', 'K' }, {}, 24 }, // Pakistan
4787 { { 'P', 'L' }, {}, 28 }, // Poland
4788 { { 'P', 'S' }, {}, 29 }, // Palestinian territories
4789 { { 'P', 'T' }, { '5', '0' }, 25 }, // Portugal
4790 { { 'Q', 'A' }, {}, 29 }, // Qatar
4791 { { 'R', 'O' }, {}, 24 }, // Romania
4792 { { 'R', 'S' }, { '3', '5' }, 22 }, // Serbia
4793 { { 'R', 'U' }, {}, 33 }, // Russia
4794 { { 'S', 'A' }, {}, 24 }, // Saudi Arabia
4795 { { 'S', 'C' }, {}, 31 }, // Seychelles
4796 { { 'S', 'D' }, {}, 18 }, // Sudan
4797 { { 'S', 'E' }, {}, 24 }, // Sweden
4798 { { 'S', 'I' }, { '5', '6' }, 19 }, // Slovenia
4799 { { 'S', 'K' }, {}, 24 }, // Slovakia
4800 { { 'S', 'M' }, {}, 27 }, // San Marino
4801 { { 'S', 'N' }, {}, 28 }, // Senegal
4802 { { 'S', 'T' }, {}, 25 }, // São Tomé and Príncipe
4803 { { 'S', 'V' }, {}, 28 }, // El Salvador
4804 { { 'T', 'D' }, {}, 27 }, // Chad
4805 { { 'T', 'G' }, {}, 28 }, // Togo
4806 { { 'T', 'L' }, { '3', '8' }, 23}, // East Timor
4807 { { 'T', 'N' }, { '5', '9' }, 24 }, // Tunisia
4808 { { 'T', 'R' }, {}, 26 }, // Turkey
4809 { { 'U', 'A' }, {}, 29 }, // Ukraine
4810 { { 'V', 'A' }, {}, 22 }, // Vatican City
4811 { { 'V', 'G' }, {}, 24 }, // Virgin Islands, British
4812 { { 'X', 'K' }, {}, 20 }, // Kosovo
4813 };
4814 const country_t* country_desc = nullptr;
4815 size_t n, available, next, bban_length;
4817
4818 this->interval.end = start;
4819 for (size_t i = 0; i < 2; ++i, ++this->interval.end) {
4820 if (this->interval.end >= end || !text[this->interval.end])
4821 goto error; // incomplete country code
4822 T chr = case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end];
4823 if (chr < 'A' || 'Z' < chr)
4824 goto error; // invalid country code
4825 this->country[i] = chr;
4826 }
4827 for (size_t l = 0, r = _countof(s_countries);;) {
4828 if (l >= r)
4829 goto error; // unknown country
4830 size_t m = (l + r) / 2;
4831 const country_t& c = s_countries[m];
4832 if (c.country[0] < this->country[0] || (c.country[0] == this->country[0] && c.country[1] < this->country[1]))
4833 l = m + 1;
4834 else if (this->country[0] < c.country[0] || (this->country[0] == c.country[0] && this->country[1] < c.country[1]))
4835 r = m;
4836 else {
4837 country_desc = &c;
4838 break;
4839 }
4840 }
4841 this->country[2] = 0;
4842
4843 for (size_t i = 0; i < 2; ++i, ++this->interval.end) {
4844 if (this->interval.end >= end || text[this->interval.end] < '0' || '9' < text[this->interval.end])
4845 goto error; // incomplete or invalid check digits
4846 this->check_digits[i] = text[this->interval.end];
4847 }
4848 this->check_digits[2] = 0;
4849
4850 if ((country_desc->check_digits[0] && this->check_digits[0] != country_desc->check_digits[0]) ||
4851 (country_desc->check_digits[1] && this->check_digits[1] != country_desc->check_digits[1]))
4852 goto error; // unexpected check digits
4853
4854 bban_length = country_desc->length - 4;
4855 for (n = 0; n < bban_length;) {
4856 if (this->interval.end >= end || !text[this->interval.end])
4857 goto error; // bban too short
4858 if (m_space && m_space->match(text, this->interval.end, end, flags)) {
4859 this->interval.end = m_space->interval.end;
4860 continue;
4861 }
4862 T chr = case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end];
4863 if (('0' <= chr && chr <= '9') || ('A' <= chr && chr <= 'Z')) {
4864 this->bban[n++] = chr;
4865 this->interval.end++;
4866 }
4867 else
4868 goto error; // invalid bban
4869 }
4870 this->bban[n] = 0;
4871
4872 // Normalize IBAN.
4873 T normalized[69];
4874 available = 0;
4875 for (size_t i = 0; ; ++i) {
4876 if (!this->bban[i]) {
4877 for (i = 0; i < 2; ++i) {
4878 if ('A' <= this->country[i] && this->country[i] <= 'J') {
4879 normalized[available++] = '1';
4880 normalized[available++] = '0' + this->country[i] - 'A';
4881 }
4882 else if ('K' <= this->country[i] && this->country[i] <= 'T') {
4883 normalized[available++] = '2';
4884 normalized[available++] = '0' + this->country[i] - 'K';
4885 }
4886 else if ('U' <= this->country[i] && this->country[i] <= 'Z') {
4887 normalized[available++] = '3';
4888 normalized[available++] = '0' + this->country[i] - 'U';
4889 }
4890 }
4891 normalized[available++] = this->check_digits[0];
4892 normalized[available++] = this->check_digits[1];
4893 normalized[available] = 0;
4894 break;
4895 }
4896 if ('0' <= this->bban[i] && this->bban[i] <= '9')
4897 normalized[available++] = this->bban[i];
4898 else if ('A' <= this->bban[i] && this->bban[i] <= 'J') {
4899 normalized[available++] = '1';
4900 normalized[available++] = '0' + this->bban[i] - 'A';
4901 }
4902 else if ('K' <= this->bban[i] && this->bban[i] <= 'T') {
4903 normalized[available++] = '2';
4904 normalized[available++] = '0' + this->bban[i] - 'K';
4905 }
4906 else if ('U' <= this->bban[i] && this->bban[i] <= 'Z') {
4907 normalized[available++] = '3';
4908 normalized[available++] = '0' + this->bban[i] - 'U';
4909 }
4910 }
4911
4912 // Calculate modulo 97.
4913 nominator = stdex::strtou32(normalized, 9, &next, 10);
4914 for (;;) {
4915 nominator %= 97;
4916 if (!normalized[next]) {
4917 this->is_valid = nominator == 1;
4918 break;
4919 }
4920 size_t digit_count = nominator == 0 ? 0 : nominator < 10 ? 1 : 2;
4921 for (; digit_count < 9 && normalized[next]; ++next, ++digit_count)
4922 nominator = nominator * 10 + (normalized[next] - '0');
4923 }
4924
4925 this->interval.start = start;
4926 return true;
4927
4928 error:
4929 this->country[0] = 0;
4930 this->check_digits[0] = 0;
4931 this->bban[0] = 0;
4932 this->is_valid = false;
4933 this->interval.start = (this->interval.end = start) + 1;
4934 return false;
4935 }
4936
4937 virtual void invalidate()
4938 {
4939 this->country[0] = 0;
4940 this->check_digits[0] = 0;
4941 this->bban[0] = 0;
4942 this->is_valid = false;
4944 }
4945
4946 public:
4947 T country[3];
4949 T bban[31];
4951
4952 protected:
4953 std::shared_ptr<basic_parser<T>> m_space;
4954 };
4955
4956 using iban = basic_iban<char>;
4957 using wiban = basic_iban<wchar_t>;
4958#ifdef _UNICODE
4959 using tiban = wiban;
4960#else
4961 using tiban = iban;
4962#endif
4964
4970 template <class T>
4972 {
4973 public:
4975 _In_ const std::shared_ptr<basic_parser<T>>& space,
4976 _In_ const std::locale& locale = std::locale()) :
4977 basic_parser<T>(locale),
4978 m_space(space)
4979 {
4980 this->check_digits[0] = 0;
4981 this->reference[0] = 0;
4982 this->is_valid = false;
4983 }
4984
4985 virtual bool match(
4986 _In_reads_or_z_(end) const T* text,
4987 _In_ size_t start = 0,
4988 _In_ size_t end = (size_t)-1,
4989 _In_ int flags = match_default)
4990 {
4991 _Assume_(text || start >= end);
4992 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
4993 const bool case_insensitive = flags & match_case_insensitive ? true : false;
4994 size_t n, available, next;
4996
4997 this->interval.end = start;
4998 if (this->interval.end + 1 >= end ||
4999 (case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end]) != 'R' ||
5000 (case_insensitive ? ctype.toupper(text[this->interval.end + 1]) : text[this->interval.end + 1]) != 'F')
5001 goto error; // incomplete or wrong reference ID
5002 this->interval.end += 2;
5003
5004 for (size_t i = 0; i < 2; ++i, ++this->interval.end) {
5005 if (this->interval.end >= end || text[this->interval.end] < '0' || '9' < text[this->interval.end])
5006 goto error; // incomplete or invalid check digits
5007 this->check_digits[i] = text[this->interval.end];
5008 }
5009 this->check_digits[2] = 0;
5010
5011 for (n = 0;;) {
5012 if (m_space && m_space->match(text, this->interval.end, end, flags))
5013 this->interval.end = m_space->interval.end;
5014 for (size_t j = 0; j < 4; ++j) {
5015 if (this->interval.end >= end || !text[this->interval.end])
5016 goto out;
5017 T chr = case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end];
5018 if (('0' <= chr && chr <= '9') || ('A' <= chr && chr <= 'Z')) {
5019 if (n >= _countof(reference) - 1)
5020 goto error; // reference overflow
5021 this->reference[n++] = chr;
5022 this->interval.end++;
5023 }
5024 else
5025 goto out;
5026 }
5027 }
5028 out:
5029 if (!n)
5030 goto error; // reference too short
5031 this->reference[_countof(this->reference) - 1] = 0;
5032 for (size_t i = n, j = _countof(this->reference) - 1; i;)
5033 this->reference[--j] = this->reference[--i];
5034 for (size_t j = _countof(this->reference) - 1 - n; j;)
5035 this->reference[--j] = '0';
5036
5037 // Normalize creditor reference.
5038 T normalized[47];
5039 available = 0;
5040 for (size_t i = 0; ; ++i) {
5041 if (!this->reference[i]) {
5042 normalized[available++] = '2'; // R
5043 normalized[available++] = '7';
5044 normalized[available++] = '1'; // F
5045 normalized[available++] = '5';
5046 normalized[available++] = this->check_digits[0];
5047 normalized[available++] = this->check_digits[1];
5048 normalized[available] = 0;
5049 break;
5050 }
5051 if ('0' <= this->reference[i] && this->reference[i] <= '9')
5052 normalized[available++] = this->reference[i];
5053 else if ('A' <= this->reference[i] && this->reference[i] <= 'J') {
5054 normalized[available++] = '1';
5055 normalized[available++] = '0' + this->reference[i] - 'A';
5056 }
5057 else if ('K' <= this->reference[i] && this->reference[i] <= 'T') {
5058 normalized[available++] = '2';
5059 normalized[available++] = '0' + this->reference[i] - 'K';
5060 }
5061 else if ('U' <= this->reference[i] && this->reference[i] <= 'Z') {
5062 normalized[available++] = '3';
5063 normalized[available++] = '0' + this->reference[i] - 'U';
5064 }
5065 }
5066
5067 // Calculate modulo 97.
5068 nominator = stdex::strtou32(normalized, 9, &next, 10);
5069 for (;;) {
5070 nominator %= 97;
5071 if (!normalized[next]) {
5072 this->is_valid = nominator == 1;
5073 break;
5074 }
5075 size_t digit_count = nominator == 0 ? 0 : nominator < 10 ? 1 : 2;
5076 for (; digit_count < 9 && normalized[next]; ++next, ++digit_count)
5077 nominator = nominator * 10 + (normalized[next] - '0');
5078 }
5079
5080 this->interval.start = start;
5081 return true;
5082
5083 error:
5084 this->check_digits[0] = 0;
5085 this->reference[0] = 0;
5086 this->is_valid = false;
5087 this->interval.start = (this->interval.end = start) + 1;
5088 return false;
5089 }
5090
5091 virtual void invalidate()
5092 {
5093 this->check_digits[0] = 0;
5094 this->reference[0] = 0;
5095 this->is_valid = false;
5097 }
5098
5099 public:
5103
5104 protected:
5105 std::shared_ptr<basic_parser<T>> m_space;
5106 };
5107
5110#ifdef _UNICODE
5112#else
5114#endif
5116
5122 template <class T>
5124 {
5125 public:
5126 basic_si_reference_part(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
5127
5128 virtual bool match(
5129 _In_reads_or_z_(end) const T* text,
5130 _In_ size_t start = 0,
5131 _In_ size_t end = (size_t)-1,
5132 _In_ int flags = match_default)
5133 {
5134 _Assume_(text || start >= end);
5135 this->interval.end = start;
5136 for (;;) {
5137 if (this->interval.end >= end || !text[this->interval.end])
5138 break;
5139 if ('0' <= text[this->interval.end] && text[this->interval.end] <= '9')
5140 this->interval.end++;
5141 else
5142 break;
5143 }
5145 this->interval.start = start;
5146 return true;
5147 }
5148 this->interval.start = (this->interval.end = start) + 1;
5149 return false;
5150 }
5151 };
5152
5155#ifdef _UNICODE
5157#else
5159#endif
5161
5167 template <class T>
5169 {
5170 public:
5171 basic_si_reference_delimiter(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
5172
5173 virtual bool match(
5174 _In_reads_or_z_(end) const T* text,
5175 _In_ size_t start = 0,
5176 _In_ size_t end = (size_t)-1,
5177 _In_ int flags = match_default)
5178 {
5179 _Assume_(text || start >= end);
5180 if (start < end && text[start] == '-') {
5181 this->interval.end = (this->interval.start = start) + 1;
5182 return true;
5183 }
5184 this->interval.start = (this->interval.end = start) + 1;
5185 return false;
5186 }
5187 };
5188
5191#ifdef _UNICODE
5193#else
5195#endif
5197
5205 template <class T>
5207 {
5208 public:
5210 _In_ const std::shared_ptr<basic_parser<T>>& space,
5211 _In_ const std::locale& locale = std::locale()) :
5212 basic_parser<T>(locale),
5213 part1(locale),
5214 part2(locale),
5215 part3(locale),
5216 is_valid(false),
5217 m_space(space),
5218 m_delimiter(locale)
5219 {
5220 this->model[0] = 0;
5221 }
5222
5223 virtual bool match(
5224 _In_reads_or_z_(end) const T* text,
5225 _In_ size_t start = 0,
5226 _In_ size_t end = (size_t)-1,
5227 _In_ int flags = match_default)
5228 {
5229 _Assume_(text || start >= end);
5230 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
5231 const bool case_insensitive = flags & match_case_insensitive ? true : false;
5232
5233 this->interval.end = start;
5234 if (this->interval.end + 1 >= end ||
5235 (case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end]) != 'S' ||
5236 (case_insensitive ? ctype.toupper(text[this->interval.end + 1]) : text[this->interval.end + 1]) != 'I')
5237 goto error; // incomplete or wrong reference ID
5238 this->interval.end += 2;
5239
5240 for (size_t i = 0; i < 2; ++i, ++this->interval.end) {
5241 if (this->interval.end >= end || text[this->interval.end] < '0' || '9' < text[this->interval.end])
5242 goto error; // incomplete or invalid model
5243 this->model[i] = text[this->interval.end];
5244 }
5245 this->model[2] = 0;
5246
5247 this->part1.invalidate();
5248 this->part2.invalidate();
5249 this->part3.invalidate();
5250 if (this->model[0] == '9' && this->model[1] == '9') {
5251 is_valid = true;
5252 this->interval.start = start;
5253 return true;
5254 }
5255
5256 if (m_space && m_space->match(text, this->interval.end, end, flags))
5257 this->interval.end = m_space->interval.end;
5258
5259 this->part1.match(text, this->interval.end, end, flags) &&
5260 this->m_delimiter.match(text, this->part1.interval.end, end, flags) &&
5261 this->part2.match(text, this->m_delimiter.interval.end, end, flags) &&
5262 this->m_delimiter.match(text, this->part2.interval.end, end, flags) &&
5263 this->part3.match(text, this->m_delimiter.interval.end, end, flags);
5264
5265 this->interval.start = start;
5266 if (this->part3.interval)
5267 this->interval.end = this->part3.interval.end;
5268 else if (this->part2.interval)
5269 this->interval.end = this->part2.interval.end;
5270 else if (this->part1.interval)
5271 this->interval.end = this->part1.interval.end;
5272 else
5273 this->interval.end = start + 4;
5274
5275 if (this->model[0] == '0' && this->model[1] == '0')
5276 is_valid =
5277 this->part3.interval ?
5278 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5279 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 :
5280 this->part2.interval ?
5281 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5282 this->part1.interval.size() + this->part2.interval.size() <= 20 :
5283 this->part1.interval ?
5284 this->part1.interval.size() <= 12 :
5285 false;
5286 else if (this->model[0] == '0' && this->model[1] == '1')
5287 is_valid =
5288 this->part3.interval ?
5289 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5290 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5291 check11(
5292 text + this->part1.interval.start, this->part1.interval.size(),
5293 text + this->part2.interval.start, this->part2.interval.size(),
5294 text + this->part3.interval.start, this->part3.interval.size()) :
5295 this->part2.interval ?
5296 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5297 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5298 check11(
5299 text + this->part1.interval.start, this->part1.interval.size(),
5300 text + this->part2.interval.start, this->part2.interval.size()) :
5301 this->part1.interval ?
5302 this->part1.interval.size() <= 12 &&
5303 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5304 false;
5305 else if (this->model[0] == '0' && this->model[1] == '2')
5306 is_valid =
5307 this->part3.interval ?
5308 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5309 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5310 check11(text + this->part2.interval.start, this->part2.interval.size()) &&
5311 check11(text + this->part3.interval.start, this->part3.interval.size()) :
5312 false;
5313 else if (this->model[0] == '0' && this->model[1] == '3')
5314 is_valid =
5315 this->part3.interval ?
5316 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5317 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5318 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5319 check11(text + this->part2.interval.start, this->part2.interval.size()) &&
5320 check11(text + this->part3.interval.start, this->part3.interval.size()) :
5321 false;
5322 else if (this->model[0] == '0' && this->model[1] == '4')
5323 is_valid =
5324 this->part3.interval ?
5325 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5326 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5327 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5328 check11(text + this->part3.interval.start, this->part3.interval.size()) :
5329 false;
5330 else if ((this->model[0] == '0' || this->model[0] == '5') && this->model[1] == '5')
5331 is_valid =
5332 this->part3.interval ?
5333 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5334 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5335 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5336 this->part2.interval ?
5337 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5338 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5339 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5340 this->part1.interval ?
5341 this->part1.interval.size() <= 12 &&
5342 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5343 false;
5344 else if (this->model[0] == '0' && this->model[1] == '6')
5345 is_valid =
5346 this->part3.interval ?
5347 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5348 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5349 check11(
5350 text + this->part2.interval.start, this->part2.interval.size(),
5351 text + this->part3.interval.start, this->part3.interval.size()) :
5352 this->part2.interval ?
5353 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5354 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5355 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5356 false;
5357 else if (this->model[0] == '0' && this->model[1] == '7')
5358 is_valid =
5359 this->part3.interval ?
5360 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5361 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5362 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5363 this->part2.interval ?
5364 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5365 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5366 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5367 false;
5368 else if (this->model[0] == '0' && this->model[1] == '8')
5369 is_valid =
5370 this->part3.interval ?
5371 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5372 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5373 check11(
5374 text + this->part1.interval.start, this->part1.interval.size(),
5375 text + this->part2.interval.start, this->part2.interval.size()) &&
5376 check11(text + this->part3.interval.start, this->part3.interval.size()) :
5377 false;
5378 else if (this->model[0] == '0' && this->model[1] == '9')
5379 is_valid =
5380 this->part3.interval ?
5381 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5382 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5383 check11(
5384 text + this->part1.interval.start, this->part1.interval.size(),
5385 text + this->part2.interval.start, this->part2.interval.size()) :
5386 this->part2.interval ?
5387 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5388 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5389 check11(
5390 text + this->part1.interval.start, this->part1.interval.size(),
5391 text + this->part2.interval.start, this->part2.interval.size()) :
5392 this->part1.interval ?
5393 this->part1.interval.size() <= 12 &&
5394 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5395 false;
5396 else if (this->model[0] == '1' && this->model[1] == '0')
5397 is_valid =
5398 this->part3.interval ?
5399 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5400 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5401 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5402 check11(
5403 text + this->part2.interval.start, this->part2.interval.size(),
5404 text + this->part3.interval.start, this->part3.interval.size()) :
5405 this->part2.interval ?
5406 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5407 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5408 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5409 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5410 false;
5411 else if (
5412 (this->model[0] == '1' && (this->model[1] == '1' || this->model[1] == '8' || this->model[1] == '9')) ||
5413 ((this->model[0] == '2' || this->model[0] == '3') && this->model[1] == '8') ||
5414 (this->model[0] == '4' && (this->model[1] == '0' || this->model[1] == '1' || this->model[1] == '8' || this->model[1] == '9')) ||
5415 (this->model[0] == '5' && (this->model[1] == '1' || this->model[1] == '8')))
5416 is_valid =
5417 this->part3.interval ?
5418 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5419 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5420 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5421 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5422 this->part2.interval ?
5423 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5424 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5425 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5426 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5427 false;
5428 else if (this->model[0] == '1' && this->model[1] == '2')
5429 is_valid =
5430 this->part3.interval ? false :
5431 this->part2.interval ? false :
5432 this->part1.interval ?
5433 this->part1.interval.size() <= 13 &&
5434 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5435 false;
5436 else if ((this->model[0] == '2' || this->model[0] == '3') && this->model[1] == '1')
5437 is_valid =
5438 this->part3.interval ? false :
5439 this->part2.interval ?
5440 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5441 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5442 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5443 false;
5444 else
5445 is_valid = true; // Assume models we don't handle as valid
5446 return true;
5447
5448 error:
5449 this->model[0] = 0;
5450 this->part1.interval.start = (this->part1.interval.end = start) + 1;
5451 this->part2.interval.start = (this->part2.interval.end = start) + 1;
5452 this->part3.interval.start = (this->part3.interval.end = start) + 1;
5453 this->is_valid = false;
5454 this->interval.start = (this->interval.end = start) + 1;
5455 return false;
5456 }
5457
5458 virtual void invalidate()
5459 {
5460 this->model[0] = 0;
5461 this->part1.invalidate();
5462 this->part2.invalidate();
5463 this->part3.invalidate();
5464 this->is_valid = false;
5466 }
5467
5468 protected:
5469 static bool check11(
5470 _In_count_(num_part1) const T* part1, _In_ size_t num_part1)
5471 {
5472 _Assume_(part1 && num_part1 >= 1);
5473 uint32_t nominator = 0, ponder = 2;
5474 for (size_t i = num_part1 - 1; i--; ++ponder)
5475 nominator += (part1[i] - '0') * ponder;
5476 uint8_t control = 11 - static_cast<uint8_t>(nominator % 11);
5477 if (control >= 10)
5478 control = 0;
5479 return control == part1[num_part1 - 1] - '0';
5480 }
5481
5482 static bool check11(
5483 _In_count_(num_part1) const T* part1, _In_ size_t num_part1,
5484 _In_count_(num_part2) const T* part2, _In_ size_t num_part2)
5485 {
5486 _Assume_(part1 || !num_part1);
5487 _Assume_(part2 && num_part2 >= 1);
5488 uint32_t nominator = 0, ponder = 2;
5489 for (size_t i = num_part2 - 1; i--; ++ponder)
5490 nominator += (part2[i] - '0') * ponder;
5491 for (size_t i = num_part1; i--; ++ponder)
5492 nominator += (part1[i] - '0') * ponder;
5493 uint8_t control = 11 - static_cast<uint8_t>(nominator % 11);
5494 if (control == 10)
5495 control = 0;
5496 return control == part2[num_part2 - 1] - '0';
5497 }
5498
5499 static bool check11(
5500 _In_count_(num_part1) const T* part1, _In_ size_t num_part1,
5501 _In_count_(num_part2) const T* part2, _In_ size_t num_part2,
5502 _In_count_(num_part3) const T* part3, _In_ size_t num_part3)
5503 {
5504 _Assume_(part1 || !num_part1);
5505 _Assume_(part2 || !num_part2);
5506 _Assume_(part3 && num_part3 >= 1);
5507 uint32_t nominator = 0, ponder = 2;
5508 for (size_t i = num_part3 - 1; i--; ++ponder)
5509 nominator += (part3[i] - '0') * ponder;
5510 for (size_t i = num_part2; i--; ++ponder)
5511 nominator += (part2[i] - '0') * ponder;
5512 for (size_t i = num_part1; i--; ++ponder)
5513 nominator += (part1[i] - '0') * ponder;
5514 uint8_t control = 11 - static_cast<uint8_t>(nominator % 11);
5515 if (control == 10)
5516 control = 0;
5517 return control == part2[num_part3 - 1] - '0';
5518 }
5519
5520 public:
5521 T model[3];
5526
5527 protected:
5528 std::shared_ptr<basic_parser<T>> m_space;
5530 };
5531
5534#ifdef _UNICODE
5536#else
5538#endif
5540
5544 template <class T>
5546 {
5547 public:
5549 _In_ const std::shared_ptr<basic_parser<T>>& element,
5550 _In_ const std::shared_ptr<basic_parser<T>>& digit,
5551 _In_ const std::shared_ptr<basic_parser<T>>& sign,
5552 _In_ const std::locale& locale = std::locale()) :
5553 basic_parser<T>(locale),
5554 m_element(element),
5555 m_digit(digit),
5556 m_sign(sign),
5557 has_digits(false),
5558 has_charge(false)
5559 {}
5560
5561 virtual bool match(
5562 _In_reads_or_z_(end) const T* text,
5563 _In_ size_t start = 0,
5564 _In_ size_t end = (size_t)-1,
5565 _In_ int flags = match_default)
5566 {
5567 _Assume_(text || start >= end);
5568
5569 has_digits = false;
5570 has_charge = false;
5571 this->interval.end = start;
5572
5573 const int element_match_flags = flags & ~match_case_insensitive; // Chemical elements are always case-sensitive.
5574 for (;;) {
5575 if (m_element->match(text, this->interval.end, end, element_match_flags)) {
5576 this->interval.end = m_element->interval.end;
5577 while (m_digit->match(text, this->interval.end, end, flags)) {
5578 this->interval.end = m_digit->interval.end;
5579 has_digits = true;
5580 }
5581 }
5582 else if (start < this->interval.end) {
5583 if (m_sign->match(text, this->interval.end, end, flags)) {
5584 this->interval.end = m_sign->interval.end;
5585 has_charge = true;
5586 }
5587 this->interval.start = start;
5588 return true;
5589 }
5590 else {
5591 this->interval.start = (this->interval.end = start) + 1;
5592 return false;
5593 }
5594 }
5595 }
5596
5597 virtual void invalidate()
5598 {
5599 has_digits = false;
5600 has_charge = false;
5602 }
5603
5604 public:
5605 bool has_digits;
5606 bool has_charge;
5607
5608 protected:
5609 std::shared_ptr<basic_parser<T>> m_element;
5610 std::shared_ptr<basic_parser<T>> m_digit;
5611 std::shared_ptr<basic_parser<T>> m_sign;
5612 };
5613
5616#ifdef _UNICODE
5618#else
5620#endif
5622
5627 {
5628 public:
5629 virtual bool match(
5630 _In_reads_or_z_(end) const char* text,
5631 _In_ size_t start = 0,
5632 _In_ size_t end = (size_t)-1,
5633 _In_ int flags = match_default)
5634 {
5635 _Assume_(text || start >= end);
5636 this->interval.end = start;
5637
5638 _Assume_(text || this->interval.end >= end);
5639 if (this->interval.end < end && text[this->interval.end]) {
5640 if (text[this->interval.end] == '\r') {
5641 this->interval.end++;
5642 if (this->interval.end < end && text[this->interval.end] == '\n') {
5643 this->interval.start = start;
5644 this->interval.end++;
5645 return true;
5646 }
5647 }
5648 else if (text[this->interval.end] == '\n') {
5649 this->interval.start = start;
5650 this->interval.end++;
5651 return true;
5652 }
5653 }
5654 this->interval.start = (this->interval.end = start) + 1;
5655 return false;
5656 }
5657 };
5658
5662 class http_space : public parser
5663 {
5664 public:
5665 virtual bool match(
5666 _In_reads_or_z_(end) const char* text,
5667 _In_ size_t start = 0,
5668 _In_ size_t end = (size_t)-1,
5669 _In_ int flags = match_default)
5670 {
5671 _Assume_(text || start >= end);
5672 this->interval.end = start;
5673 if (m_line_break.match(text, this->interval.end, end, flags)) {
5674 this->interval.end = m_line_break.interval.end;
5675 if (this->interval.end < end && text[this->interval.end] && isspace(text[this->interval.end])) {
5676 this->interval.start = start;
5677 this->interval.end++;
5678 while (this->interval.end < end && text[this->interval.end] && isspace(text[this->interval.end])) this->interval.end++;
5679 return true;
5680 }
5681 }
5682 else if (this->interval.end < end && text[this->interval.end] && isspace(text[this->interval.end])) {
5683 this->interval.start = start;
5684 this->interval.end++;
5685 while (this->interval.end < end && text[this->interval.end] && isspace(text[this->interval.end])) this->interval.end++;
5686 return true;
5687 }
5688 this->interval.start = (this->interval.end = start) + 1;
5689 return false;
5690 }
5691
5692 protected:
5693 http_line_break m_line_break;
5694 };
5695
5699 class http_text_char : public parser
5700 {
5701 public:
5702 virtual bool match(
5703 _In_reads_or_z_(end) const char* text,
5704 _In_ size_t start = 0,
5705 _In_ size_t end = (size_t)-1,
5706 _In_ int flags = match_default)
5707 {
5708 _Assume_(text || start >= end);
5709 this->interval.end = start;
5710
5711 _Assume_(text || this->interval.end >= end);
5712 if (m_space.match(text, this->interval.end, end, flags)) {
5713 this->interval.start = start;
5714 this->interval.end = m_space.interval.end;
5715 return true;
5716 }
5717 else if (this->interval.end < end && text[this->interval.end] && text[this->interval.end] >= 0x20) {
5718 this->interval.start = start;
5719 this->interval.end++;
5720 return true;
5721 }
5722 this->interval.start = (this->interval.end = start) + 1;
5723 return false;
5724 }
5725
5726 protected:
5727 http_space m_space;
5728 };
5729
5733 class http_token : public parser
5734 {
5735 public:
5736 virtual bool match(
5737 _In_reads_or_z_(end) const char* text,
5738 _In_ size_t start = 0,
5739 _In_ size_t end = (size_t)-1,
5740 _In_ int flags = match_default)
5741 {
5742 _Assume_(text || start >= end);
5743 this->interval.end = start;
5744 for (;;) {
5745 if (this->interval.end < end && text[this->interval.end]) {
5746 if ((unsigned int)text[this->interval.end] < 0x20 ||
5747 (unsigned int)text[this->interval.end] == 0x7f ||
5748 text[this->interval.end] == '(' ||
5749 text[this->interval.end] == ')' ||
5750 text[this->interval.end] == '<' ||
5751 text[this->interval.end] == '>' ||
5752 text[this->interval.end] == '@' ||
5753 text[this->interval.end] == ',' ||
5754 text[this->interval.end] == ';' ||
5755 text[this->interval.end] == ':' ||
5756 text[this->interval.end] == '\\' ||
5757 text[this->interval.end] == '\"' ||
5758 text[this->interval.end] == '/' ||
5759 text[this->interval.end] == '[' ||
5760 text[this->interval.end] == ']' ||
5761 text[this->interval.end] == '?' ||
5762 text[this->interval.end] == '=' ||
5763 text[this->interval.end] == '{' ||
5764 text[this->interval.end] == '}' ||
5765 isspace(text[this->interval.end]))
5766 break;
5767 else
5768 this->interval.end++;
5769 }
5770 else
5771 break;
5772 }
5774 this->interval.start = start;
5775 return true;
5776 }
5777 else {
5778 this->interval.start = (this->interval.end = start) + 1;
5779 return false;
5780 }
5781 }
5782 };
5783
5788 {
5789 public:
5790 virtual bool match(
5791 _In_reads_or_z_(end) const char* text,
5792 _In_ size_t start = 0,
5793 _In_ size_t end = (size_t)-1,
5794 _In_ int flags = match_default)
5795 {
5796 _Assume_(text || start >= end);
5797 this->interval.end = start;
5798 if (this->interval.end < end && text[this->interval.end] != '"')
5799 goto error;
5800 this->interval.end++;
5801 content.start = this->interval.end;
5802 for (;;) {
5803 _Assume_(text || this->interval.end >= end);
5804 if (this->interval.end < end && text[this->interval.end]) {
5805 if (text[this->interval.end] == '"') {
5806 content.end = this->interval.end;
5807 this->interval.end++;
5808 break;
5809 }
5810 else if (text[this->interval.end] == '\\') {
5811 this->interval.end++;
5812 if (this->interval.end < end && text[this->interval.end]) {
5813 this->interval.end++;
5814 }
5815 else
5816 goto error;
5817 }
5818 else if (m_chr.match(text, this->interval.end, end, flags))
5819 this->interval.end++;
5820 else
5821 goto error;
5822 }
5823 else
5824 goto error;
5825 }
5826 this->interval.start = start;
5827 return true;
5828
5829 error:
5830 content.start = 1;
5831 content.end = 0;
5832 this->interval.start = (this->interval.end = start) + 1;
5833 return false;
5834 }
5835
5836 virtual void invalidate()
5837 {
5838 content.start = 1;
5839 content.end = 0;
5840 parser::invalidate();
5841 }
5842
5843 public:
5845
5846 protected:
5847 http_text_char m_chr;
5848 };
5849
5853 class http_value : public parser
5854 {
5855 public:
5856 virtual bool match(
5857 _In_reads_or_z_(end) const char* text,
5858 _In_ size_t start = 0,
5859 _In_ size_t end = (size_t)-1,
5860 _In_ int flags = match_default)
5861 {
5862 _Assume_(text || start >= end);
5863 this->interval.end = start;
5864 if (string.match(text, this->interval.end, end, flags)) {
5865 token.invalidate();
5866 this->interval.end = string.interval.end;
5867 this->interval.start = start;
5868 return true;
5869 }
5870 else if (token.match(text, this->interval.end, end, flags)) {
5871 string.invalidate();
5872 this->interval.end = token.interval.end;
5873 this->interval.start = start;
5874 return true;
5875 }
5876 else {
5877 this->interval.start = (this->interval.end = start) + 1;
5878 return false;
5879 }
5880 }
5881
5882 virtual void invalidate()
5883 {
5884 string.invalidate();
5885 token.invalidate();
5886 parser::invalidate();
5887 }
5888
5889 public:
5892 };
5893
5897 class http_parameter : public parser
5898 {
5899 public:
5900 virtual bool match(
5901 _In_reads_or_z_(end) const char* text,
5902 _In_ size_t start = 0,
5903 _In_ size_t end = (size_t)-1,
5904 _In_ int flags = match_default)
5905 {
5906 _Assume_(text || start >= end);
5907 this->interval.end = start;
5908 if (name.match(text, this->interval.end, end, flags))
5909 this->interval.end = name.interval.end;
5910 else
5911 goto error;
5912 while (m_space.match(text, this->interval.end, end, flags))
5913 this->interval.end = m_space.interval.end;
5914 _Assume_(text || this->interval.end >= end);
5915 if (this->interval.end < end && text[this->interval.end] == '=')
5916 this->interval.end++;
5917 else
5918 while (m_space.match(text, this->interval.end, end, flags))
5919 this->interval.end = m_space.interval.end;
5920 if (value.match(text, this->interval.end, end, flags))
5921 this->interval.end = value.interval.end;
5922 else
5923 goto error;
5924 this->interval.start = start;
5925 return true;
5926
5927 error:
5928 name.invalidate();
5929 value.invalidate();
5930 this->interval.start = (this->interval.end = start) + 1;
5931 return false;
5932 }
5933
5934 virtual void invalidate()
5935 {
5936 name.invalidate();
5937 value.invalidate();
5938 parser::invalidate();
5939 }
5940
5941 public:
5944
5945 protected:
5946 http_space m_space;
5947 };
5948
5952 class http_any_type : public parser
5953 {
5954 public:
5955 virtual bool match(
5956 _In_reads_or_z_(end) const char* text,
5957 _In_ size_t start = 0,
5958 _In_ size_t end = (size_t)-1,
5959 _In_ int flags = match_default)
5960 {
5961 _Assume_(text || start >= end);
5962 if (start + 2 < end &&
5963 text[start] == '*' &&
5964 text[start + 1] == '/' &&
5965 text[start + 2] == '*')
5966 {
5967 this->interval.end = (this->interval.start = start) + 3;
5968 return true;
5969 }
5970 else if (start < end && text[start] == '*') {
5971 this->interval.end = (this->interval.start = start) + 1;
5972 return true;
5973 }
5974 else {
5975 this->interval.start = (this->interval.end = start) + 1;
5976 return false;
5977 }
5978 }
5979 };
5980
5985 {
5986 public:
5987 virtual bool match(
5988 _In_reads_or_z_(end) const char* text,
5989 _In_ size_t start = 0,
5990 _In_ size_t end = (size_t)-1,
5991 _In_ int flags = match_default)
5992 {
5993 _Assume_(text || start >= end);
5994 this->interval.end = start;
5995 if (type.match(text, this->interval.end, end, flags))
5996 this->interval.end = type.interval.end;
5997 else
5998 goto error;
5999 while (m_space.match(text, this->interval.end, end, flags))
6000 this->interval.end = m_space.interval.end;
6001 if (this->interval.end < end && text[this->interval.end] == '/')
6002 this->interval.end++;
6003 else
6004 goto error;
6005 while (m_space.match(text, this->interval.end, end, flags))
6006 this->interval.end = m_space.interval.end;
6007 if (subtype.match(text, this->interval.end, end, flags))
6008 this->interval.end = subtype.interval.end;
6009 else
6010 goto error;
6011 this->interval.start = start;
6012 return true;
6013
6014 error:
6015 type.invalidate();
6016 subtype.invalidate();
6017 this->interval.start = (this->interval.end = start) + 1;
6018 return false;
6019 }
6020
6021 virtual void invalidate()
6022 {
6023 type.invalidate();
6024 subtype.invalidate();
6025 parser::invalidate();
6026 }
6027
6028 public:
6029 http_token type;
6030 http_token subtype;
6031
6032 protected:
6033 http_space m_space;
6034 };
6035
6040 {
6041 public:
6042 virtual bool match(
6043 _In_reads_or_z_(end) const char* text,
6044 _In_ size_t start = 0,
6045 _In_ size_t end = (size_t)-1,
6046 _In_ int flags = match_default)
6047 {
6048 _Assume_(text || start >= end);
6049 if (!http_media_range::match(text, start, end, flags))
6050 goto error;
6051 params.clear();
6052 for (;;) {
6053 if (this->interval.end < end && text[this->interval.end]) {
6054 if (m_space.match(text, this->interval.end, end, flags))
6055 this->interval.end = m_space.interval.end;
6056 else if (text[this->interval.end] == ';') {
6057 this->interval.end++;
6058 while (m_space.match(text, this->interval.end, end, flags))
6059 this->interval.end = m_space.interval.end;
6061 if (param.match(text, this->interval.end, end, flags)) {
6062 this->interval.end = param.interval.end;
6063 params.push_back(std::move(param));
6064 }
6065 else
6066 break;
6067 }
6068 else
6069 break;
6070 }
6071 else
6072 break;
6073 }
6074 this->interval.end = params.empty() ? subtype.interval.end : params.back().interval.end;
6075 return true;
6076
6077 error:
6078 http_media_range::invalidate();
6079 params.clear();
6080 this->interval.start = (this->interval.end = start) + 1;
6081 return false;
6082 }
6083
6084 virtual void invalidate()
6085 {
6086 params.clear();
6087 http_media_range::invalidate();
6088 }
6089
6090 public:
6091 std::list<http_parameter> params;
6092 };
6093
6098 {
6099 public:
6100 virtual bool match(
6101 _In_reads_or_z_(end) const char* text,
6102 _In_ size_t start = 0,
6103 _In_ size_t end = (size_t)-1,
6104 _In_ int flags = match_default)
6105 {
6106 _Assume_(text || start >= end);
6107 this->interval.end = start;
6108 for (;;) {
6109 if (this->interval.end < end && text[this->interval.end]) {
6110 if ((unsigned int)text[this->interval.end] < 0x20 ||
6111 (unsigned int)text[this->interval.end] == 0x7f ||
6112 text[this->interval.end] == ':' ||
6113 text[this->interval.end] == '/' ||
6114 isspace(text[this->interval.end]))
6115 break;
6116 else
6117 this->interval.end++;
6118 }
6119 else
6120 break;
6121 }
6123 this->interval.start = start;
6124 return true;
6125 }
6126 this->interval.start = (this->interval.end = start) + 1;
6127 return false;
6128 }
6129 };
6130
6134 class http_url_port : public parser
6135 {
6136 public:
6137 http_url_port(_In_ const std::locale& locale = std::locale()) :
6138 parser(locale),
6139 value(0)
6140 {}
6141
6142 virtual bool match(
6143 _In_reads_or_z_(end) const char* text,
6144 _In_ size_t start = 0,
6145 _In_ size_t end = (size_t)-1,
6146 _In_ int flags = match_default)
6147 {
6148 _Assume_(text || start >= end);
6149 value = 0;
6150 this->interval.end = start;
6151 for (;;) {
6152 if (this->interval.end < end && text[this->interval.end]) {
6153 if ('0' <= text[this->interval.end] && text[this->interval.end] <= '9') {
6154 size_t _value = (size_t)value * 10 + text[this->interval.end] - '0';
6155 if (_value > (uint16_t)-1) {
6156 value = 0;
6157 this->interval.start = (this->interval.end = start) + 1;
6158 return false;
6159 }
6160 value = (uint16_t)_value;
6161 this->interval.end++;
6162 }
6163 else
6164 break;
6165 }
6166 else
6167 break;
6168 }
6170 this->interval.start = start;
6171 return true;
6172 }
6173 this->interval.start = (this->interval.end = start) + 1;
6174 return false;
6175 }
6176
6177 virtual void invalidate()
6178 {
6179 value = 0;
6180 parser::invalidate();
6181 }
6182
6183 public:
6184 uint16_t value;
6185 };
6186
6191 {
6192 public:
6193 virtual bool match(
6194 _In_reads_or_z_(end) const char* text,
6195 _In_ size_t start = 0,
6196 _In_ size_t end = (size_t)-1,
6197 _In_ int flags = match_default)
6198 {
6199 _Assume_(text || start >= end);
6200 this->interval.end = start;
6201 for (;;) {
6202 if (this->interval.end < end && text[this->interval.end]) {
6203 if ((unsigned int)text[this->interval.end] < 0x20 ||
6204 (unsigned int)text[this->interval.end] == 0x7f ||
6205 text[this->interval.end] == '?' ||
6206 text[this->interval.end] == '/' ||
6207 isspace(text[this->interval.end]))
6208 break;
6209 else
6210 this->interval.end++;
6211 }
6212 else
6213 break;
6214 }
6215 this->interval.start = start;
6216 return true;
6217 }
6218 };
6219
6223 class http_url_path : public parser
6224 {
6225 public:
6226 virtual bool match(
6227 _In_reads_or_z_(end) const char* text,
6228 _In_ size_t start = 0,
6229 _In_ size_t end = (size_t)-1,
6230 _In_ int flags = match_default)
6231 {
6232 _Assume_(text || start >= end);
6234 this->interval.end = start;
6235 segments.clear();
6236 _Assume_(text || this->interval.end >= end);
6237 if (this->interval.end < end && text[this->interval.end] != '/')
6238 goto error;
6239 this->interval.end++;
6240 s.match(text, this->interval.end, end, flags);
6241 segments.push_back(s);
6242 this->interval.end = s.interval.end;
6243 for (;;) {
6244 if (this->interval.end < end && text[this->interval.end]) {
6245 if (text[this->interval.end] == '/') {
6246 this->interval.end++;
6247 s.match(text, this->interval.end, end, flags);
6248 segments.push_back(s);
6249 this->interval.end = s.interval.end;
6250 }
6251 else
6252 break;
6253 }
6254 else
6255 break;
6256 }
6257 this->interval.start = start;
6258 return true;
6259
6260 error:
6261 segments.clear();
6262 this->interval.start = (this->interval.end = start) + 1;
6263 return false;
6264 }
6265
6266 virtual void invalidate()
6267 {
6268 segments.clear();
6269 parser::invalidate();
6270 }
6271
6272 public:
6273 std::vector<http_url_path_segment> segments;
6274 };
6275
6280 {
6281 public:
6282 virtual bool match(
6283 _In_reads_or_z_(end) const char* text,
6284 _In_ size_t start = 0,
6285 _In_ size_t end = (size_t)-1,
6286 _In_ int flags = match_default)
6287 {
6288 _Assume_(text || start >= end);
6289 this->interval.end = start;
6290 name.start = this->interval.end;
6291 for (;;) {
6292 if (this->interval.end < end && text[this->interval.end]) {
6293 if ((unsigned int)text[this->interval.end] < 0x20 ||
6294 (unsigned int)text[this->interval.end] == 0x7f ||
6295 text[this->interval.end] == '&' ||
6296 text[this->interval.end] == '=' ||
6297 isspace(text[this->interval.end]))
6298 break;
6299 else
6300 this->interval.end++;
6301 }
6302 else
6303 break;
6304 }
6306 name.end = this->interval.end;
6307 else
6308 goto error;
6309 if (text[this->interval.end] == '=') {
6310 this->interval.end++;
6311 value.start = this->interval.end;
6312 for (;;) {
6313 if (this->interval.end < end && text[this->interval.end]) {
6314 if ((unsigned int)text[this->interval.end] < 0x20 ||
6315 (unsigned int)text[this->interval.end] == 0x7f ||
6316 text[this->interval.end] == '&' ||
6317 isspace(text[this->interval.end]))
6318 break;
6319 else
6320 this->interval.end++;
6321 }
6322 else
6323 break;
6324 }
6325 value.end = this->interval.end;
6326 }
6327 else {
6328 value.start = 1;
6329 value.end = 0;
6330 }
6331 this->interval.start = start;
6332 return true;
6333
6334 error:
6335 name.start = 1;
6336 name.end = 0;
6337 value.start = 1;
6338 value.end = 0;
6339 this->interval.start = (this->interval.end = start) + 1;
6340 return false;
6341 }
6342
6343 virtual void invalidate()
6344 {
6345 name.start = 1;
6346 name.end = 0;
6347 value.start = 1;
6348 value.end = 0;
6349 parser::invalidate();
6350 }
6351
6352 public:
6355 };
6356
6360 class http_url : public parser
6361 {
6362 public:
6363 http_url(_In_ const std::locale& locale = std::locale()) :
6364 parser(locale),
6365 port(locale)
6366 {}
6367
6368 virtual bool match(
6369 _In_reads_or_z_(end) const char* text,
6370 _In_ size_t start = 0,
6371 _In_ size_t end = (size_t)-1,
6372 _In_ int flags = match_default)
6373 {
6374 _Assume_(text || start >= end);
6375 this->interval.end = start;
6376
6377 if (this->interval.end + 7 <= end && stdex::strnicmp(text + this->interval.end, 7, "http://", (size_t)-1, m_locale) == 0) {
6378 this->interval.end += 7;
6379 if (server.match(text, this->interval.end, end, flags))
6380 this->interval.end = server.interval.end;
6381 else
6382 goto error;
6383 if (this->interval.end < end && text[this->interval.end] == ':') {
6384 this->interval.end++;
6385 if (port.match(text, this->interval.end, end, flags))
6386 this->interval.end = port.interval.end;
6387 }
6388 else {
6389 port.invalidate();
6390 port.value = 80;
6391 }
6392 }
6393 else {
6394 server.invalidate();
6395 port.invalidate();
6396 port.value = 80;
6397 }
6398
6399 if (path.match(text, this->interval.end, end, flags))
6400 this->interval.end = path.interval.end;
6401 else
6402 goto error;
6403
6404 params.clear();
6405
6406 if (this->interval.end < end && text[this->interval.end] == '?') {
6407 this->interval.end++;
6408 for (;;) {
6409 if (this->interval.end < end && text[this->interval.end]) {
6410 if ((unsigned int)text[this->interval.end] < 0x20 ||
6411 (unsigned int)text[this->interval.end] == 0x7f ||
6412 isspace(text[this->interval.end]))
6413 break;
6414 else if (text[this->interval.end] == '&')
6415 this->interval.end++;
6416 else {
6418 if (param.match(text, this->interval.end, end, flags)) {
6419 this->interval.end = param.interval.end;
6420 params.push_back(std::move(param));
6421 }
6422 else
6423 break;
6424 }
6425 }
6426 else
6427 break;
6428 }
6429 }
6430
6431 this->interval.start = start;
6432 return true;
6433
6434 error:
6435 server.invalidate();
6436 port.invalidate();
6437 path.invalidate();
6438 params.clear();
6439 this->interval.start = (this->interval.end = start) + 1;
6440 return false;
6441 }
6442
6443 virtual void invalidate()
6444 {
6445 server.invalidate();
6446 port.invalidate();
6447 path.invalidate();
6448 params.clear();
6449 parser::invalidate();
6450 }
6451
6452 public:
6453 http_url_server server;
6454 http_url_port port;
6455 http_url_path path;
6456 std::list<http_url_parameter> params;
6457 };
6458
6462 class http_language : public parser
6463 {
6464 public:
6465 virtual bool match(
6466 _In_reads_or_z_(end) const char* text,
6467 _In_ size_t start = 0,
6468 _In_ size_t end = (size_t)-1,
6469 _In_ int flags = match_default)
6470 {
6471 _Assume_(text || start >= end);
6472 this->interval.end = start;
6473 components.clear();
6474 for (;;) {
6475 if (this->interval.end < end && text[this->interval.end]) {
6477 k.end = this->interval.end;
6478 for (;;) {
6479 if (k.end < end && text[k.end]) {
6480 if (isalpha(text[k.end]))
6481 k.end++;
6482 else
6483 break;
6484 }
6485 else
6486 break;
6487 }
6488 if (this->interval.end < k.end) {
6489 k.start = this->interval.end;
6490 this->interval.end = k.end;
6491 components.push_back(k);
6492 }
6493 else
6494 break;
6495 if (this->interval.end < end && text[this->interval.end] == '-')
6496 this->interval.end++;
6497 else
6498 break;
6499 }
6500 else
6501 break;
6502 }
6503 if (!components.empty()) {
6504 this->interval.start = start;
6505 this->interval.end = components.back().end;
6506 return true;
6507 }
6508 this->interval.start = (this->interval.end = start) + 1;
6509 return false;
6510 }
6511
6512 virtual void invalidate()
6513 {
6514 components.clear();
6515 parser::invalidate();
6516 }
6517
6518 public:
6519 std::vector<stdex::interval<size_t>> components;
6520 };
6521
6525 class http_weight : public parser
6526 {
6527 public:
6528 http_weight(_In_ const std::locale& locale = std::locale()) :
6529 parser(locale),
6530 value(1.0f)
6531 {}
6532
6533 virtual bool match(
6534 _In_reads_or_z_(end) const char* text,
6535 _In_ size_t start = 0,
6536 _In_ size_t end = (size_t)-1,
6537 _In_ int flags = match_default)
6538 {
6539 _Assume_(text || start >= end);
6540 size_t celi_del = 0, decimalni_del = 0, decimalni_del_n = 1;
6541 this->interval.end = start;
6542 for (;;) {
6543 if (this->interval.end < end && text[this->interval.end]) {
6544 if ('0' <= text[this->interval.end] && text[this->interval.end] <= '9') {
6545 celi_del = celi_del * 10 + text[this->interval.end] - '0';
6546 this->interval.end++;
6547 }
6548 else if (text[this->interval.end] == '.') {
6549 this->interval.end++;
6550 for (;;) {
6551 if (this->interval.end < end && text[this->interval.end]) {
6552 if ('0' <= text[this->interval.end] && text[this->interval.end] <= '9') {
6553 decimalni_del = decimalni_del * 10 + text[this->interval.end] - '0';
6554 decimalni_del_n *= 10;
6555 this->interval.end++;
6556 }
6557 else
6558 break;
6559 }
6560 else
6561 break;
6562 }
6563 break;
6564 }
6565 else
6566 break;
6567 }
6568 else
6569 break;
6570 }
6573 this->interval.start = start;
6574 return true;
6575 }
6576 value = 1.0f;
6577 this->interval.start = (this->interval.end = start) + 1;
6578 return false;
6579 }
6580
6581 virtual void invalidate()
6582 {
6583 value = 1.0f;
6584 parser::invalidate();
6585 }
6586
6587 public:
6588 float value;
6589 };
6590
6594 class http_asterisk : public parser
6595 {
6596 public:
6597 virtual bool match(
6598 _In_reads_or_z_(end) const char* text,
6599 _In_ size_t start = 0,
6600 _In_ size_t end = (size_t)-1,
6601 _In_ int flags = match_default)
6602 {
6603 _Assume_(text || end <= start);
6604 if (start < end && text[start] == '*') {
6605 this->interval.end = (this->interval.start = start) + 1;
6606 return true;
6607 }
6608 this->interval.start = (this->interval.end = start) + 1;
6609 return false;
6610 }
6611 };
6612
6616 template <class T, class T_asterisk = http_asterisk>
6618 {
6619 public:
6620 http_weighted_value(_In_ const std::locale& locale = std::locale()) :
6621 parser(locale),
6622 factor(locale)
6623 {}
6624
6625 virtual bool match(
6626 _In_reads_or_z_(end) const char* text,
6627 _In_ size_t start = 0,
6628 _In_ size_t end = (size_t)-1,
6629 _In_ int flags = match_default)
6630 {
6631 _Assume_(text || start >= end);
6632 size_t konec_vrednosti;
6633 this->interval.end = start;
6634 if (asterisk.match(text, this->interval.end, end, flags)) {
6635 this->interval.end = konec_vrednosti = asterisk.interval.end;
6636 value.invalidate();
6637 }
6638 else if (value.match(text, this->interval.end, end, flags)) {
6639 this->interval.end = konec_vrednosti = value.interval.end;
6640 asterisk.invalidate();
6641 }
6642 else {
6643 asterisk.invalidate();
6644 value.invalidate();
6645 this->interval.start = (this->interval.end = start) + 1;
6646 return false;
6647 }
6648
6649 while (this->interval.end < end && text[this->interval.end] && isspace(text[this->interval.end])) this->interval.end++;
6650 if (this->interval.end < end && text[this->interval.end] == ';') {
6651 this->interval.end++;
6652 while (this->interval.end < end && text[this->interval.end] && isspace(text[this->interval.end])) this->interval.end++;
6653 if (this->interval.end < end && (text[this->interval.end] == 'q' || text[this->interval.end] == 'Q')) {
6654 this->interval.end++;
6655 while (this->interval.end < end && text[this->interval.end] && isspace(text[this->interval.end])) this->interval.end++;
6656 if (this->interval.end < end && text[this->interval.end] == '=') {
6657 this->interval.end++;
6658 while (this->interval.end < end && text[this->interval.end] && isspace(text[this->interval.end])) this->interval.end++;
6659 if (factor.match(text, this->interval.end, end, flags))
6660 this->interval.end = factor.interval.end;
6661 }
6662 }
6663 }
6664 if (!factor.interval) {
6665 factor.invalidate();
6667 }
6668 this->interval.start = start;
6669 return true;
6670 }
6671
6672 virtual void invalidate()
6673 {
6674 asterisk.invalidate();
6675 value.invalidate();
6676 factor.invalidate();
6677 parser::invalidate();
6678 }
6679
6680 public:
6681 T_asterisk asterisk;
6682 T value;
6683 http_weight factor;
6684 };
6685
6690 {
6691 public:
6692 virtual bool match(
6693 _In_reads_or_z_(end) const char* text,
6694 _In_ size_t start = 0,
6695 _In_ size_t end = (size_t)-1,
6696 _In_ int flags = match_default)
6697 {
6698 _Assume_(text || start >= end);
6699 this->interval.end = start;
6700 if (this->interval.end < end && text[this->interval.end] == '$')
6701 this->interval.end++;
6702 else
6703 goto error;
6704 if (name.match(text, this->interval.end, end, flags))
6705 this->interval.end = name.interval.end;
6706 else
6707 goto error;
6708 while (m_space.match(text, this->interval.end, end, flags))
6709 this->interval.end = m_space.interval.end;
6710 if (this->interval.end < end && text[this->interval.end] == '=')
6711 this->interval.end++;
6712 else
6713 goto error;
6714 while (m_space.match(text, this->interval.end, end, flags))
6715 this->interval.end = m_space.interval.end;
6716 if (value.match(text, this->interval.end, end, flags))
6717 this->interval.end = value.interval.end;
6718 else
6719 goto error;
6720 this->interval.start = start;
6721 return true;
6722
6723 error:
6724 name.invalidate();
6725 value.invalidate();
6726 this->interval.start = (this->interval.end = start) + 1;
6727 return false;
6728 }
6729
6730 virtual void invalidate()
6731 {
6732 name.invalidate();
6733 value.invalidate();
6734 parser::invalidate();
6735 }
6736
6737 public:
6738 http_token name;
6739 http_value value;
6740
6741 protected:
6742 http_space m_space;
6743 };
6744
6748 class http_cookie : public parser
6749 {
6750 public:
6751 virtual bool match(
6752 _In_reads_or_z_(end) const char* text,
6753 _In_ size_t start = 0,
6754 _In_ size_t end = (size_t)-1,
6755 _In_ int flags = match_default)
6756 {
6757 _Assume_(text || start >= end);
6758 this->interval.end = start;
6759 if (name.match(text, this->interval.end, end, flags))
6760 this->interval.end = name.interval.end;
6761 else
6762 goto error;
6763 while (m_space.match(text, this->interval.end, end, flags))
6764 this->interval.end = m_space.interval.end;
6765 if (this->interval.end < end && text[this->interval.end] == '=')
6766 this->interval.end++;
6767 else
6768 goto error;
6769 while (m_space.match(text, this->interval.end, end, flags))
6770 this->interval.end = m_space.interval.end;
6771 if (value.match(text, this->interval.end, end, flags))
6772 this->interval.end = value.interval.end;
6773 else
6774 goto error;
6775 params.clear();
6776 for (;;) {
6777 if (this->interval.end < end && text[this->interval.end]) {
6778 if (m_space.match(text, this->interval.end, end, flags))
6779 this->interval.end = m_space.interval.end;
6780 else if (text[this->interval.end] == ';') {
6781 this->interval.end++;
6782 while (m_space.match(text, this->interval.end, end, flags))
6783 this->interval.end = m_space.interval.end;
6785 if (param.match(text, this->interval.end, end, flags)) {
6786 this->interval.end = param.interval.end;
6787 params.push_back(std::move(param));
6788 }
6789 else
6790 break;
6791 }
6792 else
6793 break;
6794 }
6795 else
6796 break;
6797 }
6798 this->interval.start = start;
6799 this->interval.end = params.empty() ? value.interval.end : params.back().interval.end;
6800 return true;
6801
6802 error:
6803 name.invalidate();
6804 value.invalidate();
6805 params.clear();
6806 this->interval.start = (this->interval.end = start) + 1;
6807 return false;
6808 }
6809
6810 virtual void invalidate()
6811 {
6812 name.invalidate();
6813 value.invalidate();
6814 params.clear();
6815 parser::invalidate();
6816 }
6817
6818 public:
6821 std::list<http_cookie_parameter> params;
6822
6823 protected:
6824 http_space m_space;
6825 };
6826
6830 class http_agent : public parser
6831 {
6832 public:
6833 virtual bool match(
6834 _In_reads_or_z_(end) const char* text,
6835 _In_ size_t start = 0,
6836 _In_ size_t end = (size_t)-1,
6837 _In_ int flags = match_default)
6838 {
6839 _Assume_(text || start >= end);
6840 this->interval.end = start;
6841 type.start = this->interval.end;
6842 for (;;) {
6843 if (this->interval.end < end && text[this->interval.end]) {
6844 if (text[this->interval.end] == '/') {
6845 type.end = this->interval.end;
6846 this->interval.end++;
6847 version.start = this->interval.end;
6848 for (;;) {
6849 if (this->interval.end < end && text[this->interval.end]) {
6850 if (isspace(text[this->interval.end])) {
6851 version.end = this->interval.end;
6852 break;
6853 }
6854 else
6855 this->interval.end++;
6856 }
6857 else {
6858 version.end = this->interval.end;
6859 break;
6860 }
6861 }
6862 break;
6863 }
6864 else if (isspace(text[this->interval.end])) {
6865 type.end = this->interval.end;
6866 break;
6867 }
6868 else
6869 this->interval.end++;
6870 }
6871 else {
6872 type.end = this->interval.end;
6873 break;
6874 }
6875 }
6877 this->interval.start = start;
6878 return true;
6879 }
6880 type.start = 1;
6881 type.end = 0;
6882 version.start = 1;
6883 version.end = 0;
6884 this->interval.start = 1;
6885 this->interval.end = 0;
6886 return false;
6887 }
6888
6889 virtual void invalidate()
6890 {
6891 type.start = 1;
6892 type.end = 0;
6893 version.start = 1;
6894 version.end = 0;
6895 parser::invalidate();
6896 }
6897
6898 public:
6901 };
6902
6906 class http_protocol : public parser
6907 {
6908 public:
6909 http_protocol(_In_ const std::locale& locale = std::locale()) :
6910 parser(locale),
6911 version(0x009)
6912 {}
6913
6914 virtual bool match(
6915 _In_reads_or_z_(end) const char* text,
6916 _In_ size_t start = 0,
6917 _In_ size_t end = (size_t)-1,
6918 _In_ int flags = match_default)
6919 {
6920 _Assume_(text || start >= end);
6921 this->interval.end = start;
6922 type.start = this->interval.end;
6923 for (;;) {
6924 if (this->interval.end < end && text[this->interval.end]) {
6925 if (text[this->interval.end] == '/') {
6926 type.end = this->interval.end;
6927 this->interval.end++;
6928 break;
6929 }
6930 else if (isspace(text[this->interval.end]))
6931 goto error;
6932 else
6933 this->interval.end++;
6934 }
6935 else {
6936 type.end = this->interval.end;
6937 goto error;
6938 }
6939 }
6940 version_maj.start = this->interval.end;
6941 for (;;) {
6942 if (this->interval.end < end && text[this->interval.end]) {
6943 if (text[this->interval.end] == '.') {
6944 version_maj.end = this->interval.end;
6945 this->interval.end++;
6946 version_min.start = this->interval.end;
6947 for (;;) {
6948 if (this->interval.end < end && text[this->interval.end]) {
6949 if (isspace(text[this->interval.end])) {
6950 version_min.end = this->interval.end;
6951 version =
6952 (uint16_t)strtoui(text + version_maj.start, version_maj.size(), nullptr, 10) * 0x100 +
6953 (uint16_t)strtoui(text + version_min.start, version_min.size(), nullptr, 10);
6954 break;
6955 }
6956 else
6957 this->interval.end++;
6958 }
6959 else
6960 goto error;
6961 }
6962 break;
6963 }
6964 else if (isspace(text[this->interval.end])) {
6965 version_maj.end = this->interval.end;
6966 version_min.start = 1;
6967 version_min.end = 0;
6968 version = (uint16_t)strtoui(text + version_maj.start, version_maj.size(), nullptr, 10) * 0x100;
6969 break;
6970 }
6971 else
6972 this->interval.end++;
6973 }
6974 else
6975 goto error;
6976 }
6977 this->interval.start = start;
6978 return true;
6979
6980 error:
6981 type.start = 1;
6982 type.end = 0;
6983 version_maj.start = 1;
6984 version_maj.end = 0;
6985 version_min.start = 1;
6986 version_min.end = 0;
6987 version = 0x009;
6988 this->interval.start = 1;
6989 this->interval.end = 0;
6990 return false;
6991 }
6992
6993 virtual void invalidate()
6994 {
6995 type.start = 1;
6996 type.end = 0;
6997 version_maj.start = 1;
6998 version_maj.end = 0;
6999 version_min.start = 1;
7000 version_min.end = 0;
7001 version = 0x009;
7002 parser::invalidate();
7003 }
7004
7005 public:
7007 stdex::interval<size_t> version_maj;
7008 stdex::interval<size_t> version_min;
7010 };
7011
7015 class http_request : public parser
7016 {
7017 public:
7018 http_request(_In_ const std::locale& locale = std::locale()) :
7019 parser(locale),
7020 url(locale),
7021 protocol(locale)
7022 {}
7023
7024 virtual bool match(
7025 _In_reads_or_z_(end) const char* text,
7026 _In_ size_t start = 0,
7027 _In_ size_t end = (size_t)-1,
7028 _In_ int flags = match_default)
7029 {
7030 _Assume_(text || start >= end);
7031 this->interval.end = start;
7032
7033 for (;;) {
7034 if (m_line_break.match(text, this->interval.end, end, flags))
7035 goto error;
7036 else if (this->interval.end < end && text[this->interval.end]) {
7037 if (isspace(text[this->interval.end]))
7038 this->interval.end++;
7039 else
7040 break;
7041 }
7042 else
7043 goto error;
7044 }
7045 verb.start = this->interval.end;
7046 for (;;) {
7047 if (m_line_break.match(text, this->interval.end, end, flags))
7048 goto error;
7049 else if (this->interval.end < end && text[this->interval.end]) {
7050 if (isspace(text[this->interval.end])) {
7051 verb.end = this->interval.end;
7052 this->interval.end++;
7053 break;
7054 }
7055 else
7056 this->interval.end++;
7057 }
7058 else
7059 goto error;
7060 }
7061
7062 for (;;) {
7063 if (m_line_break.match(text, this->interval.end, end, flags))
7064 goto error;
7065 else if (this->interval.end < end && text[this->interval.end]) {
7066 if (isspace(text[this->interval.end]))
7067 this->interval.end++;
7068 else
7069 break;
7070 }
7071 else
7072 goto error;
7073 }
7074 if (url.match(text, this->interval.end, end, flags))
7075 this->interval.end = url.interval.end;
7076 else
7077 goto error;
7078
7079 protocol.invalidate();
7080 for (;;) {
7081 if (m_line_break.match(text, this->interval.end, end, flags)) {
7082 this->interval.end = m_line_break.interval.end;
7083 goto end;
7084 }
7085 else if (this->interval.end < end && text[this->interval.end]) {
7086 if (isspace(text[this->interval.end]))
7087 this->interval.end++;
7088 else
7089 break;
7090 }
7091 else
7092 goto end;
7093 }
7094 for (;;) {
7095 if (m_line_break.match(text, this->interval.end, end, flags)) {
7096 this->interval.end = m_line_break.interval.end;
7097 goto end;
7098 }
7099 else if (protocol.match(text, this->interval.end, end, flags)) {
7100 this->interval.end = protocol.interval.end;
7101 break;
7102 }
7103 else
7104 goto end;
7105 }
7106
7107 for (;;) {
7108 if (m_line_break.match(text, this->interval.end, end, flags)) {
7109 this->interval.end = m_line_break.interval.end;
7110 break;
7111 }
7112 else if (this->interval.end < end && text[this->interval.end])
7113 this->interval.end++;
7114 else
7115 goto end;
7116 }
7117
7118 end:
7119 this->interval.start = start;
7120 return true;
7121
7122 error:
7123 verb.start = 1;
7124 verb.end = 0;
7125 url.invalidate();
7126 protocol.invalidate();
7127 this->interval.start = 1;
7128 this->interval.end = 0;
7129 return false;
7130 }
7131
7132 virtual void invalidate()
7133 {
7134 verb.start = 1;
7135 verb.end = 0;
7136 url.invalidate();
7137 protocol.invalidate();
7138 parser::invalidate();
7139 }
7140
7141 public:
7143 http_url url;
7144 http_protocol protocol;
7145
7146 protected:
7147 http_line_break m_line_break;
7148 };
7149
7153 class http_header : public parser
7154 {
7155 public:
7156 virtual bool match(
7157 _In_reads_or_z_(end) const char* text,
7158 _In_ size_t start = 0,
7159 _In_ size_t end = (size_t)-1,
7160 _In_ int flags = match_default)
7161 {
7162 _Assume_(text || start >= end);
7163 this->interval.end = start;
7164
7165 if (m_line_break.match(text, this->interval.end, end, flags) ||
7166 (this->interval.end < end && text[this->interval.end] && isspace(text[this->interval.end])))
7167 goto error;
7168 name.start = this->interval.end;
7169 for (;;) {
7170 if (m_line_break.match(text, this->interval.end, end, flags))
7171 goto error;
7172 else if (this->interval.end < end && text[this->interval.end]) {
7173 if (isspace(text[this->interval.end])) {
7174 name.end = this->interval.end;
7175 this->interval.end++;
7176 for (;;) {
7177 if (m_line_break.match(text, this->interval.end, end, flags))
7178 goto error;
7179 else if (this->interval.end < end && text[this->interval.end]) {
7180 if (isspace(text[this->interval.end]))
7181 this->interval.end++;
7182 else
7183 break;
7184 }
7185 else
7186 goto error;
7187 }
7188 if (this->interval.end < end && text[this->interval.end] == ':') {
7189 this->interval.end++;
7190 break;
7191 }
7192 else
7193 goto error;
7194 break;
7195 }
7196 else if (text[this->interval.end] == ':') {
7197 name.end = this->interval.end;
7198 this->interval.end++;
7199 break;
7200 }
7201 else
7202 this->interval.end++;
7203 }
7204 else
7205 goto error;
7206 }
7207 value.start = (size_t)-1;
7208 value.end = 0;
7209 for (;;) {
7210 if (m_line_break.match(text, this->interval.end, end, flags)) {
7211 this->interval.end = m_line_break.interval.end;
7212 if (!m_line_break.match(text, this->interval.end, end, flags) &&
7213 this->interval.end < end && text[this->interval.end] && isspace(text[this->interval.end]))
7214 this->interval.end++;
7215 else
7216 break;
7217 }
7218 else if (this->interval.end < end && text[this->interval.end]) {
7219 if (isspace(text[this->interval.end]))
7220 this->interval.end++;
7221 else {
7222 if (value.start == (size_t)-1) value.start = this->interval.end;
7223 value.end = ++this->interval.end;
7224 }
7225 }
7226 else
7227 break;
7228 }
7229 this->interval.start = start;
7230 return true;
7231
7232 error:
7233 name.start = 1;
7234 name.end = 0;
7235 value.start = 1;
7236 value.end = 0;
7237 this->interval.start = 1;
7238 this->interval.end = 0;
7239 return false;
7240 }
7241
7242 virtual void invalidate()
7243 {
7244 name.start = 1;
7245 name.end = 0;
7246 value.start = 1;
7247 value.end = 0;
7248 parser::invalidate();
7249 }
7250
7251 public:
7254
7255 protected:
7256 http_line_break m_line_break;
7257 };
7258
7262 template <class _Key, class T>
7263 class http_value_collection : public T
7264 {
7265 public:
7266 void insert(
7267 _In_reads_or_z_(end) const char* text,
7268 _In_ size_t start = 0,
7269 _In_ size_t end = (size_t)-1,
7270 _In_ int flags = match_default)
7271 {
7272 while (start < end) {
7273 while (start < end && text[start] && isspace(text[start])) start++;
7274 if (start < end && text[start] == ',') {
7275 start++;
7276 while (start < end&& text[start] && isspace(text[start])) start++;
7277 }
7278 _Key el;
7279 if (el.match(text, start, end, flags)) {
7280 start = el.interval.end;
7281 T::insert(std::move(el));
7282 }
7283 else
7284 break;
7285 }
7286 }
7287 };
7288
7289 template <class T>
7291 constexpr bool operator()(const T& a, const T& b) const noexcept
7292 {
7293 return a.factor.value > b.factor.value;
7294 }
7295 };
7296
7300 template <class T, class _Alloc = std::allocator<T>>
7302
7306 template <class T>
7308 {
7309 public:
7311 _In_ const std::shared_ptr<basic_parser<T>>& quote,
7312 _In_ const std::shared_ptr<basic_parser<T>>& chr,
7313 _In_ const std::shared_ptr<basic_parser<T>>& escape,
7314 _In_ const std::shared_ptr<basic_parser<T>>& sol,
7315 _In_ const std::shared_ptr<basic_parser<T>>& bs,
7316 _In_ const std::shared_ptr<basic_parser<T>>& ff,
7317 _In_ const std::shared_ptr<basic_parser<T>>& lf,
7318 _In_ const std::shared_ptr<basic_parser<T>>& cr,
7319 _In_ const std::shared_ptr<basic_parser<T>>& htab,
7320 _In_ const std::shared_ptr<basic_parser<T>>& uni,
7321 _In_ const std::shared_ptr<basic_integer16<T>>& hex,
7322 _In_ const std::locale& locale = std::locale()) :
7323 basic_parser<T>(locale),
7324 m_quote(quote),
7325 m_chr(chr),
7326 m_escape(escape),
7327 m_sol(sol),
7328 m_bs(bs),
7329 m_ff(ff),
7330 m_lf(lf),
7331 m_cr(cr),
7332 m_htab(htab),
7333 m_uni(uni),
7334 m_hex(hex)
7335 {}
7336
7337 virtual bool match(
7338 _In_reads_or_z_(end) const T* text,
7339 _In_ size_t start = 0,
7340 _In_ size_t end = (size_t)-1,
7341 _In_ int flags = match_default)
7342 {
7343 _Assume_(text || start >= end);
7344 this->interval.end = start;
7345 if (m_quote->match(text, this->interval.end, end, flags)) {
7346 this->interval.end = m_quote->interval.end;
7347 value.clear();
7348 for (;;) {
7349 if (m_quote->match(text, this->interval.end, end, flags)) {
7350 this->interval.start = start;
7351 this->interval.end = m_quote->interval.end;
7352 return true;
7353 }
7354 if (m_escape->match(text, this->interval.end, end, flags)) {
7355 if (m_quote->match(text, m_escape->interval.end, end, flags)) {
7356 value += '"'; this->interval.end = m_quote->interval.end;
7357 continue;
7358 }
7359 if (m_sol->match(text, m_escape->interval.end, end, flags)) {
7360 value += '/'; this->interval.end = m_sol->interval.end;
7361 continue;
7362 }
7363 if (m_bs->match(text, m_escape->interval.end, end, flags)) {
7364 value += '\b'; this->interval.end = m_bs->interval.end;
7365 continue;
7366 }
7367 if (m_ff->match(text, m_escape->interval.end, end, flags)) {
7368 value += '\f'; this->interval.end = m_ff->interval.end;
7369 continue;
7370 }
7371 if (m_lf->match(text, m_escape->interval.end, end, flags)) {
7372 value += '\n'; this->interval.end = m_lf->interval.end;
7373 continue;
7374 }
7375 if (m_cr->match(text, m_escape->interval.end, end, flags)) {
7376 value += '\r'; this->interval.end = m_cr->interval.end;
7377 continue;
7378 }
7379 if (m_htab->match(text, m_escape->interval.end, end, flags)) {
7380 value += '\t'; this->interval.end = m_htab->interval.end;
7381 continue;
7382 }
7383 if (
7384 m_uni->match(text, m_escape->interval.end, end, flags) &&
7385 m_hex->match(text, m_uni->interval.end, std::min(m_uni->interval.end + 4, end), flags | match_case_insensitive) &&
7386 m_hex->interval.size() == 4 /* JSON requests 4-digit Unicode sequneces: \u.... */)
7387 {
7388 _Assume_(m_hex->value <= 0xffff);
7389 if (sizeof(T) == 1) {
7390 if (m_hex->value > 0x7ff) {
7391 value += (T)(0xe0 | ((m_hex->value >> 12) & 0x0f));
7392 value += (T)(0x80 | ((m_hex->value >> 6) & 0x3f));
7393 value += (T)(0x80 | (m_hex->value & 0x3f));
7394 }
7395 else if (m_hex->value > 0x7f) {
7396 value += (T)(0xc0 | ((m_hex->value >> 6) & 0x1f));
7397 value += (T)(0x80 | (m_hex->value & 0x3f));
7398 }
7399 else
7400 value += (T)(m_hex->value & 0x7f);
7401 }
7402 else
7403 value += (T)m_hex->value;
7404 this->interval.end = m_hex->interval.end;
7405 continue;
7406 }
7407 if (m_escape->match(text, m_escape->interval.end, end, flags)) {
7408 value += '\\'; this->interval.end = m_escape->interval.end;
7409 continue;
7410 }
7411 }
7412 if (m_chr->match(text, this->interval.end, end, flags)) {
7413 value.Prilepi(text + m_chr->interval.start, m_chr->interval.size());
7414 this->interval.end = m_chr->interval.end;
7415 continue;
7416 }
7417 break;
7418 }
7419 }
7420 value.clear();
7421 this->interval.start = (this->interval.end = start) + 1;
7422 return false;
7423 }
7424
7425 virtual void invalidate()
7426 {
7427 value.clear();
7429 }
7430
7431 public:
7432 std::basic_string<T> value;
7433
7434 protected:
7435 std::shared_ptr<basic_parser<T>> m_quote;
7436 std::shared_ptr<basic_parser<T>> m_chr;
7437 std::shared_ptr<basic_parser<T>> m_escape;
7438 std::shared_ptr<basic_parser<T>> m_sol;
7439 std::shared_ptr<basic_parser<T>> m_bs;
7440 std::shared_ptr<basic_parser<T>> m_ff;
7441 std::shared_ptr<basic_parser<T>> m_lf;
7442 std::shared_ptr<basic_parser<T>> m_cr;
7443 std::shared_ptr<basic_parser<T>> m_htab;
7444 std::shared_ptr<basic_parser<T>> m_uni;
7445 std::shared_ptr<basic_integer16<T>> m_hex;
7446 };
7447
7450#ifdef _UNICODE
7451 using tjson_string = wjson_string;
7452#else
7453 using tjson_string = json_string;
7454#endif
7455 }
7456}
7457
7458#undef ENUM_FLAG_OPERATOR
7459#undef ENUM_FLAGS
7460
7461#ifdef _MSC_VER
7462#pragma warning(pop)
7463#endif
Test for angle in d°mm'ss.dddd form.
Definition parser.hpp:4402
Test for any code unit.
Definition parser.hpp:228
Test for beginning of line.
Definition parser.hpp:622
Test for any.
Definition parser.hpp:1064
Test for chemical formula.
Definition parser.hpp:5546
Test for Creditor Reference.
Definition parser.hpp:4972
T reference[22]
Normalized national reference number.
Definition parser.hpp:5101
T check_digits[3]
Two check digits.
Definition parser.hpp:5100
bool is_valid
Is reference valid per ISO 7064.
Definition parser.hpp:5102
Test for any code unit from a given string of code units.
Definition parser.hpp:727
Test for specific code unit.
Definition parser.hpp:298
Test for date.
Definition parser.hpp:4032
Test for valid DNS domain character.
Definition parser.hpp:2813
bool allow_on_edge
Is character allowed at the beginning or an end of a DNS domain?
Definition parser.hpp:2851
Test for DNS domain/hostname.
Definition parser.hpp:2913
bool m_allow_absolute
May DNS names end with a dot (absolute name)?
Definition parser.hpp:2977
Test for e-mail address.
Definition parser.hpp:3801
Test for emoticon.
Definition parser.hpp:3909
std::shared_ptr< basic_parser< T > > apex
apex/eyebrows/halo (e.g. O, 0)
Definition parser.hpp:3998
std::shared_ptr< basic_parser< T > > eyes
eyes (e.g. :, ;, >, |, B)
Definition parser.hpp:3999
std::shared_ptr< basic_set< T > > mouth
mouth (e.g. ), ), (, (, |, P, D, p, d)
Definition parser.hpp:4001
std::shared_ptr< basic_parser< T > > nose
nose (e.g. -, o)
Definition parser.hpp:4000
std::shared_ptr< basic_parser< T > > emoticon
emoticon as a whole (e.g. 😀, 🤔, 😶)
Definition parser.hpp:3997
Test for end of line.
Definition parser.hpp:660
Test for fraction.
Definition parser.hpp:1693
Test for International Bank Account Number.
Definition parser.hpp:4678
T bban[31]
Normalized Basic Bank Account Number.
Definition parser.hpp:4949
T country[3]
ISO 3166-1 alpha-2 country code.
Definition parser.hpp:4947
T check_digits[3]
Two check digits.
Definition parser.hpp:4948
bool is_valid
Is IBAN valid per ISO 7064.
Definition parser.hpp:4950
Test for decimal integer.
Definition parser.hpp:1302
Test for decimal integer possibly containing thousand separators.
Definition parser.hpp:1387
bool has_separators
Did integer have any separators?
Definition parser.hpp:1447
size_t digit_count
Total number of digits in integer.
Definition parser.hpp:1446
Test for hexadecimal integer.
Definition parser.hpp:1468
Base class for integer testing.
Definition parser.hpp:1280
size_t value
Calculated value of the numeral.
Definition parser.hpp:1294
Test for IPv4 address.
Definition parser.hpp:2353
stdex::interval< size_t > components[4]
Individual component intervals.
Definition parser.hpp:2468
struct in_addr value
IPv4 address value.
Definition parser.hpp:2469
Test for IPv6 address.
Definition parser.hpp:2572
std::shared_ptr< basic_parser< T > > scope_id
Scope ID (e.g. NIC index with link-local addresses)
Definition parser.hpp:2776
stdex::interval< size_t > components[8]
Individual component intervals.
Definition parser.hpp:2774
struct in6_addr value
IPv6 address value.
Definition parser.hpp:2775
Test for valid IPv6 address scope ID character.
Definition parser.hpp:2500
Test for repeating.
Definition parser.hpp:917
bool m_greedy
try to match as long sequence as possible
Definition parser.hpp:956
std::shared_ptr< basic_parser< T > > m_el
repeating element
Definition parser.hpp:953
size_t m_min_iterations
minimum number of iterations
Definition parser.hpp:954
size_t m_max_iterations
maximum number of iterations
Definition parser.hpp:955
Test for JSON string.
Definition parser.hpp:7308
Test for mixed numeral.
Definition parser.hpp:1929
std::shared_ptr< basic_parser< T > > fraction
fraction
Definition parser.hpp:2035
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:2033
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:2032
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:2031
std::shared_ptr< basic_parser< T > > integer
Integer part.
Definition parser.hpp:2034
Test for monetary numeral.
Definition parser.hpp:2224
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:2330
std::shared_ptr< basic_parser< T > > decimal_separator
Decimal separator.
Definition parser.hpp:2335
std::shared_ptr< basic_parser< T > > currency
Currency part.
Definition parser.hpp:2333
std::shared_ptr< basic_parser< T > > decimal
Decimal part.
Definition parser.hpp:2336
std::shared_ptr< basic_parser< T > > integer
Integer part.
Definition parser.hpp:2334
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:2331
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:2332
"No-op" match
Definition parser.hpp:196
Base template for all parsers.
Definition parser.hpp:77
interval< size_t > interval
Region of the last match.
Definition parser.hpp:176
Test for permutation.
Definition parser.hpp:1204
Test for phone number.
Definition parser.hpp:4525
std::basic_string< T > value
Normalized phone number.
Definition parser.hpp:4651
Test for any punctuation code unit.
Definition parser.hpp:470
Test for Roman numeral.
Definition parser.hpp:1577
Test for scientific numeral.
Definition parser.hpp:2055
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:2199
std::shared_ptr< basic_parser< T > > exponent_symbol
Exponent symbol (e.g. 'e')
Definition parser.hpp:2203
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:2197
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:2198
double value
Calculated value of the numeral.
Definition parser.hpp:2207
std::shared_ptr< basic_parser< T > > negative_exp_sign
Negative exponent sign (e.g. '-')
Definition parser.hpp:2205
std::shared_ptr< basic_integer< T > > decimal
Decimal part.
Definition parser.hpp:2202
std::shared_ptr< basic_parser< T > > positive_exp_sign
Positive exponent sign (e.g. '+')
Definition parser.hpp:2204
std::shared_ptr< basic_integer< T > > exponent
Exponent part.
Definition parser.hpp:2206
std::shared_ptr< basic_parser< T > > decimal_separator
Decimal separator.
Definition parser.hpp:2201
std::shared_ptr< basic_integer< T > > integer
Integer part.
Definition parser.hpp:2200
Test for match score.
Definition parser.hpp:1756
Test for sequence.
Definition parser.hpp:1013
Definition parser.hpp:695
Test for SI Reference delimiter.
Definition parser.hpp:5169
Test for SI Reference part.
Definition parser.hpp:5124
Test for SI Reference.
Definition parser.hpp:5207
basic_si_reference_part< T > part3
Reference data part 3 (P3)
Definition parser.hpp:5524
basic_si_reference_part< T > part1
Reference data part 1 (P1)
Definition parser.hpp:5522
bool is_valid
Is reference valid.
Definition parser.hpp:5525
T model[3]
Reference model.
Definition parser.hpp:5521
basic_si_reference_part< T > part2
Reference data part 2 (P2)
Definition parser.hpp:5523
Test for signed numeral.
Definition parser.hpp:1843
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:1911
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:1910
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:1909
std::shared_ptr< basic_parser< T > > number
Number.
Definition parser.hpp:1912
Test for any space code unit.
Definition parser.hpp:391
Test for any space or punctuation code unit.
Definition parser.hpp:544
Test for any string.
Definition parser.hpp:1132
Test for given string.
Definition parser.hpp:822
Test for time.
Definition parser.hpp:4299
Test for valid URL password character.
Definition parser.hpp:3095
Test for valid URL path character.
Definition parser.hpp:3195
Test for URL path.
Definition parser.hpp:3303
Test for valid URL username character.
Definition parser.hpp:2996
Test for URL.
Definition parser.hpp:3444
Test for HTTP agent.
Definition parser.hpp:6831
Test for HTTP any type.
Definition parser.hpp:5953
Test for HTTP asterisk.
Definition parser.hpp:6595
Test for HTTP header.
Definition parser.hpp:7154
Test for HTTP language (RFC1766)
Definition parser.hpp:6463
Test for HTTP line break (RFC2616: CRLF | LF)
Definition parser.hpp:5627
Test for HTTP media range (RFC2616: media-range)
Definition parser.hpp:5985
Test for HTTP media type (RFC2616: media-type)
Definition parser.hpp:6040
Test for HTTP parameter (RFC2616: parameter)
Definition parser.hpp:5898
http_token name
Parameter name.
Definition parser.hpp:5942
http_value value
Parameter value.
Definition parser.hpp:5943
Test for HTTP protocol.
Definition parser.hpp:6907
uint16_t version
HTTP protocol version: 0x100 = 1.0, 0x101 = 1.1...
Definition parser.hpp:7009
Test for HTTP quoted string (RFC2616: quoted-string)
Definition parser.hpp:5788
stdex::interval< size_t > content
String content (without quotes)
Definition parser.hpp:5844
Test for HTTP request.
Definition parser.hpp:7016
Test for HTTP space (RFC2616: LWS)
Definition parser.hpp:5663
Test for HTTP text character (RFC2616: TEXT)
Definition parser.hpp:5700
Test for HTTP token (RFC2616: token - tolerates non-ASCII)
Definition parser.hpp:5734
Test for HTTP URL parameter.
Definition parser.hpp:6280
Test for HTTP URL path segment.
Definition parser.hpp:6191
Test for HTTP URL path segment.
Definition parser.hpp:6224
std::vector< http_url_path_segment > segments
Path segments.
Definition parser.hpp:6273
Test for HTTP URL port.
Definition parser.hpp:6135
Test for HTTP URL server.
Definition parser.hpp:6098
Test for HTTP URL.
Definition parser.hpp:6361
Collection of HTTP values.
Definition parser.hpp:7264
Test for HTTP value (RFC2616: value)
Definition parser.hpp:5854
http_quoted_string string
Value when matched as quoted string.
Definition parser.hpp:5890
http_token token
Value when matched as token.
Definition parser.hpp:5891
Test for HTTP weight factor.
Definition parser.hpp:6526
float value
Calculated value of the weight factor.
Definition parser.hpp:6588
Test for HTTP weighted value.
Definition parser.hpp:6618
Base template for collection-holding parsers.
Definition parser.hpp:973
Test for any SGML code point.
Definition parser.hpp:260
Test for any SGML code point from a given string of SGML code points.
Definition parser.hpp:779
Test for specific SGML code point.
Definition parser.hpp:347
Test for valid DNS domain SGML character.
Definition parser.hpp:2869
Test for valid IPv6 address scope ID SGML character.
Definition parser.hpp:2538
Test for any SGML punctuation code point.
Definition parser.hpp:511
Test for any SGML space code point.
Definition parser.hpp:434
Test for any SGML space or punctuation code point.
Definition parser.hpp:587
Test for SGML given string.
Definition parser.hpp:869
Test for valid URL password SGML character.
Definition parser.hpp:3147
Test for valid URL path SGML character.
Definition parser.hpp:3251
Test for valid URL username SGML character.
Definition parser.hpp:3047
Numerical interval.
Definition interval.hpp:18
T size() const
Returns interval size.
Definition interval.hpp:47
T end
interval end
Definition interval.hpp:20
interval() noexcept
Constructs an invalid interval.
Definition interval.hpp:25
T start
interval start
Definition interval.hpp:19
Definition parser.hpp:7290