stdex
Additional custom or not Standard C++ covered algorithms
Loading...
Searching...
No Matches
parser.hpp
1/*
2 SPDX-License-Identifier: MIT
3 Copyright © 2023 Amebis
4*/
5
6#pragma once
7
8#include "compat.hpp"
9#include "interval.hpp"
10#include "memory.hpp"
11#include "sgml.hpp"
12#include "string.hpp"
13#include <stdarg.h>
14#include <stdint.h>
15#include <math.h>
16#if defined(_WIN32)
17#include <winsock2.h>
18#if _MSC_VER >= 1300
19#include <ws2ipdef.h>
20#endif
21#include <ws2tcpip.h>
22#else
23#include <netinet/in.h>
24#endif
25#include <limits>
26#include <list>
27#include <locale>
28#include <memory>
29#include <set>
30#include <string_view>
31#include <string>
32
33#ifdef _MSC_VER
34#pragma warning(push)
35#pragma warning(disable: 4100)
36#endif
37
38#define ENUM_FLAG_OPERATOR(T,X) \
39inline T operator X (const T lhs, const T rhs) { return static_cast<T>(static_cast<std::underlying_type_t<T>>(lhs) X static_cast<std::underlying_type_t<T>>(rhs)); } \
40inline T operator X (const T lhs, const std::underlying_type_t<T> rhs) { return static_cast<T>(static_cast<std::underlying_type_t<T>>(lhs) X rhs); } \
41inline T operator X (const std::underlying_type_t<T> lhs, const T rhs) { return static_cast<T>(lhs X static_cast<std::underlying_type_t<T>>(rhs)); } \
42inline T& operator X= (T& lhs, const T rhs) { return lhs = lhs X rhs; } \
43inline T& operator X= (T& lhs, const std::underlying_type_t<T> rhs) { return lhs = lhs X rhs; }
44#define ENUM_FLAGS(T, type) \
45enum class T : type; \
46inline T operator ~ (T t) { return (T) (~static_cast<std::underlying_type_t <T>>(t)); } \
47ENUM_FLAG_OPERATOR(T,|) \
48ENUM_FLAG_OPERATOR(T,^) \
49ENUM_FLAG_OPERATOR(T,&) \
50enum class T : type
51
52#if defined(_WIN32)
53#elif defined(__APPLE__)
54#define s6_words __u6_addr.__u6_addr16
55#else
56#define s6_words s6_addr16
57#endif
58
59namespace stdex
60{
61 namespace parser
62 {
66 constexpr int match_default = 0;
67 constexpr int match_case_insensitive = 0x1;
68 constexpr int match_multiline = 0x2;
69
73 template <class T>
75 {
76 public:
77 basic_parser(_In_ const std::locale& locale = std::locale()) : m_locale(locale) {}
78 virtual ~basic_parser() {}
79
80 bool search(
81 _In_reads_or_z_opt_(end) const T* text,
82 _In_ size_t start = 0,
83 _In_ size_t end = SIZE_MAX,
84 _In_ int flags = match_default)
85 {
86 for (size_t i = start; i < end && text[i]; i++)
87 if (match(text, i, end, flags))
88 return true;
89 return false;
90 }
91
92 bool match(
93 _In_reads_or_z_opt_(end) const T* text,
94 _In_ size_t start = 0,
95 _In_ size_t end = SIZE_MAX,
96 _In_ int flags = match_default)
97 {
98 return do_match(text, start, end, flags);
99 }
100
101 bool match(
102 _In_ const std::basic_string_view<T, std::char_traits<T>> text,
103 _In_ size_t start = 0,
104 _In_ size_t end = SIZE_MAX,
105 _In_ int flags = match_default)
106 {
107 return match(text.data(), start, std::min<size_t>(end, text.size()), flags);
108 }
109
110 virtual void invalidate()
111 {
112 this->interval.invalidate();
113 }
114
116
117 protected:
118 virtual bool do_match(
119 _In_reads_or_z_opt_(end) const T* text,
120 _In_ size_t start = 0,
121 _In_ size_t end = SIZE_MAX,
122 _In_ int flags = match_default) = 0;
123
125 const wchar_t* next_sgml_cp(_In_ const char* text, _In_ size_t start, _In_ size_t end, _Out_ size_t& chr_end, _Out_ wchar_t(&buf)[3])
126 {
127 if (text[start] == '&') {
128 // Potential entity start
129 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
130 for (chr_end = start + 1;; chr_end++) {
131 if (chr_end >= end || text[chr_end] == 0) {
132 // Unterminated entity
133 break;
134 }
135 if (text[chr_end] == ';') {
136 // Entity end
137 size_t n = chr_end - start - 1;
138 if (n >= 2 && text[start + 1] == '#') {
139 // Numerical entity
140 char32_t unicode;
141 if (text[start + 2] == 'x' || text[start + 2] == 'X')
142 unicode = strtou32(text + start + 3, n - 2, nullptr, 16);
143 else
144 unicode = strtou32(text + start + 2, n - 1, nullptr, 10);
145#ifdef _WIN32
146 if (unicode < 0x10000) {
147 buf[0] = (wchar_t)unicode;
148 buf[1] = 0;
149 }
150 else {
151 ucs4_to_surrogate_pair(buf, unicode);
152 buf[2] = 0;
153 }
154#else
155 buf[0] = (wchar_t)unicode;
156 buf[1] = 0;
157#endif
158 chr_end++;
159 return buf;
160 }
161 const wchar_t* entity_w = sgml2uni(text + start + 1, n);
162 if (entity_w) {
163 chr_end++;
164 return entity_w;
165 }
166 // Unknown entity.
167 break;
168 }
169 else if (text[chr_end] == '&' || ctype.is(ctype.space, text[chr_end])) {
170 // This char cannot possibly be a part of entity.
171 break;
172 }
173 }
174 }
175 buf[0] = text[start];
176 buf[1] = 0;
177 chr_end = start + 1;
178 return buf;
179 }
181
182 std::locale m_locale;
183 };
184
185 using parser = basic_parser<char>;
186 using wparser = basic_parser<wchar_t>;
187#ifdef _UNICODE
188 using tparser = wparser;
189#else
190 using tparser = parser;
191#endif
192 using sgml_parser = basic_parser<char>;
193
197 template <class T>
198 class basic_noop : public basic_parser<T>
199 {
200 protected:
201 virtual bool do_match(
202 _In_reads_or_z_opt_(end) const T* text,
203 _In_ size_t start = 0,
204 _In_ size_t end = SIZE_MAX,
205 _In_ int flags = match_default)
206 {
207 _Assume_(text || start >= end);
208 if (start < end && text[start]) {
209 this->interval.start = this->interval.end = start;
210 return true;
211 }
212 this->interval.invalidate();
213 return false;
214 }
215 };
216
217 using noop = basic_noop<char>;
219#ifdef _UNICODE
220 using tnoop = wnoop;
221#else
222 using tnoop = noop;
223#endif
225
229 template <class T>
230 class basic_any_cu : public basic_parser<T>
231 {
232 public:
233 basic_any_cu(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
234
235 protected:
236 virtual bool do_match(
237 _In_reads_or_z_opt_(end) const T* text,
238 _In_ size_t start = 0,
239 _In_ size_t end = SIZE_MAX,
240 _In_ int flags = match_default)
241 {
242 _Assume_(text || start >= end);
243 if (start < end && text[start]) {
244 this->interval.end = (this->interval.start = start) + 1;
245 return true;
246 }
247 this->interval.invalidate();
248 return false;
249 }
250 };
251
254#ifdef _UNICODE
255 using tany_cu = wany_cu;
256#else
257 using tany_cu = any_cu;
258#endif
259
263 class sgml_any_cp : public basic_any_cu<char>
264 {
265 public:
266 sgml_any_cp(_In_ const std::locale& locale = std::locale()) : basic_any_cu<char>(locale) {}
267
268 protected:
269 virtual bool do_match(
270 _In_reads_or_z_(end) const char* text,
271 _In_ size_t start = 0,
272 _In_ size_t end = SIZE_MAX,
273 _In_ int flags = match_default)
274 {
275 _Assume_(text || start >= end);
276 if (start < end && text[start]) {
277 if (text[start] == '&') {
278 // SGML entity
279 const auto& ctype = std::use_facet<std::ctype<char>>(m_locale);
280 for (this->interval.end = start + 1; this->interval.end < end && text[this->interval.end]; this->interval.end++)
281 if (text[this->interval.end] == ';') {
282 this->interval.end++;
283 this->interval.start = start;
284 return true;
285 }
286 else if (text[this->interval.end] == '&' || ctype.is(ctype.space, text[this->interval.end]))
287 break;
288 // Unterminated entity
289 }
290 this->interval.end = (this->interval.start = start) + 1;
291 return true;
292 }
293 this->interval.invalidate();
294 return false;
295 }
296 };
297
301 template <class T>
302 class basic_cu : public basic_parser<T>
303 {
304 public:
305 basic_cu(T chr, bool invert = false, _In_ const std::locale& locale = std::locale()) :
307 m_chr(chr),
308 m_invert(invert)
309 {}
310
311 protected:
312 virtual bool do_match(
313 _In_reads_or_z_opt_(end) const T* text,
314 _In_ size_t start = 0,
315 _In_ size_t end = SIZE_MAX,
316 _In_ int flags = match_default)
317 {
318 _Assume_(text || start >= end);
319 if (start < end && text[start]) {
320 bool r;
321 if (flags & match_case_insensitive) {
322 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
323 r = ctype.tolower(text[start]) == ctype.tolower(m_chr);
324 }
325 else
326 r = text[start] == m_chr;
327 if ((r && !m_invert) || (!r && m_invert)) {
328 this->interval.end = (this->interval.start = start) + 1;
329 return true;
330 }
331 }
332 this->interval.invalidate();
333 return false;
334 }
335
336 T m_chr;
337 bool m_invert;
338 };
339
340 using cu = basic_cu<char>;
341 using wcu = basic_cu<wchar_t>;
342#ifdef _UNICODE
343 using tcu = wcu;
344#else
345 using tcu = cu;
346#endif
347
351 class sgml_cp : public sgml_parser
352 {
353 public:
354 sgml_cp(const char* chr, size_t count = SIZE_MAX, bool invert = false, _In_ const std::locale& locale = std::locale()) :
356 m_invert(invert)
357 {
358 _Assume_(chr || !count);
359 wchar_t buf[3];
360 size_t chr_end;
361 m_chr.assign(count ? next_sgml_cp(chr, 0, count, chr_end, buf) : L"");
362 }
363
364 protected:
365 virtual bool do_match(
366 _In_reads_or_z_(end) const char* text,
367 _In_ size_t start = 0,
368 _In_ size_t end = SIZE_MAX,
369 _In_ int flags = match_default)
370 {
371 _Assume_(text || start >= end);
372 if (start < end && text[start]) {
373 wchar_t buf[3];
374 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
375 bool r = ((flags & match_case_insensitive) ?
376 stdex::strnicmp(chr, SIZE_MAX, m_chr.data(), m_chr.size(), m_locale) :
377 stdex::strncmp(chr, SIZE_MAX, m_chr.data(), m_chr.size())) == 0;
378 if ((r && !m_invert) || (!r && m_invert)) {
379 this->interval.start = start;
380 return true;
381 }
382 }
383 this->interval.invalidate();
384 return false;
385 }
386
387 std::wstring m_chr;
388 bool m_invert;
389 };
390
394 template <class T>
395 class basic_space_cu : public basic_parser<T>
396 {
397 public:
398 basic_space_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) :
400 m_invert(invert)
401 {}
402
403 protected:
404 virtual bool do_match(
405 _In_reads_or_z_opt_(end) const T* text,
406 _In_ size_t start = 0,
407 _In_ size_t end = SIZE_MAX,
408 _In_ int flags = match_default)
409 {
410 _Assume_(text || start >= end);
411 if (start < end && text[start]) {
412 bool r =
413 ((flags & match_multiline) || !stdex::islbreak(text[start])) &&
414 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::space, text[start]);
415 if ((r && !m_invert) || (!r && m_invert)) {
416 this->interval.end = (this->interval.start = start) + 1;
417 return true;
418 }
419 }
420 this->interval.invalidate();
421 return false;
422 }
423
424 bool m_invert;
425 };
426
429#ifdef _UNICODE
430 using tspace_cu = wspace_cu;
431#else
432 using tspace_cu = space_cu;
433#endif
434
438 class sgml_space_cp : public basic_space_cu<char>
439 {
440 public:
441 sgml_space_cp(_In_ bool invert = false, _In_ const std::locale& locale = std::locale()) :
443 {}
444
445 protected:
446 virtual bool do_match(
447 _In_reads_or_z_(end) const char* text,
448 _In_ size_t start = 0,
449 _In_ size_t end = SIZE_MAX,
450 _In_ int flags = match_default)
451 {
452 _Assume_(text || start >= end);
453 if (start < end && text[start]) {
454 wchar_t buf[3];
455 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
456 const wchar_t* chr_end = chr + stdex::strlen(chr);
457 bool r =
458 ((flags & match_multiline) || !stdex::islbreak(chr, SIZE_MAX)) &&
459 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::space, chr, chr_end) == chr_end;
460 if ((r && !m_invert) || (!r && m_invert)) {
461 this->interval.start = start;
462 return true;
463 }
464 }
465
466 this->interval.invalidate();
467 return false;
468 }
469 };
470
474 template <class T>
475 class basic_punct_cu : public basic_parser<T>
476 {
477 public:
478 basic_punct_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) :
480 m_invert(invert)
481 {}
482
483 protected:
484 virtual bool do_match(
485 _In_reads_or_z_opt_(end) const T* text,
486 _In_ size_t start = 0,
487 _In_ size_t end = SIZE_MAX,
488 _In_ int flags = match_default)
489 {
490 _Assume_(text || start >= end);
491 if (start < end && text[start]) {
492 bool r = std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::punct, text[start]);
493 if ((r && !m_invert) || (!r && m_invert)) {
494 this->interval.end = (this->interval.start = start) + 1;
495 return true;
496 }
497 }
498 this->interval.invalidate();
499 return false;
500 }
501
502 bool m_invert;
503 };
504
507#ifdef _UNICODE
508 using tpunct_cu = wpunct_cu;
509#else
510 using tpunct_cu = punct_cu;
511#endif
512
516 class sgml_punct_cp : public basic_punct_cu<char>
517 {
518 public:
519 sgml_punct_cp(bool invert = false, _In_ const std::locale& locale = std::locale()) :
521 {}
522
523 protected:
524 virtual bool do_match(
525 _In_reads_or_z_(end) const char* text,
526 _In_ size_t start = 0,
527 _In_ size_t end = SIZE_MAX,
528 _In_ int flags = match_default)
529 {
530 _Assume_(text || start >= end);
531 if (start < end && text[start]) {
532 wchar_t buf[3];
533 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
534 const wchar_t* chr_end = chr + stdex::strlen(chr);
535 bool r = std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::punct, chr, chr_end) == chr_end;
536 if ((r && !m_invert) || (!r && m_invert)) {
537 this->interval.start = start;
538 return true;
539 }
540 }
541 this->interval.invalidate();
542 return false;
543 }
544 };
545
549 template <class T>
551 {
552 public:
553 basic_space_or_punct_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) :
555 m_invert(invert)
556 {}
557
558 protected:
559 virtual bool do_match(
560 _In_reads_or_z_opt_(end) const T* text,
561 _In_ size_t start = 0,
562 _In_ size_t end = SIZE_MAX,
563 _In_ int flags = match_default)
564 {
565 _Assume_(text || start >= end);
566 if (start < end && text[start]) {
567 bool r =
568 ((flags & match_multiline) || !stdex::islbreak(text[start])) &&
569 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::space | std::ctype_base::punct, text[start]);
570 if ((r && !m_invert) || (!r && m_invert)) {
571 this->interval.end = (this->interval.start = start) + 1;
572 return true;
573 }
574 }
575 this->interval.invalidate();
576 return false;
577 }
578
579 bool m_invert;
580 };
581
584#ifdef _UNICODE
586#else
588#endif
589
594 {
595 public:
596 sgml_space_or_punct_cp(bool invert = false, _In_ const std::locale& locale = std::locale()) :
598 {}
599
600 protected:
601 virtual bool do_match(
602 _In_reads_or_z_(end) const char* text,
603 _In_ size_t start = 0,
604 _In_ size_t end = SIZE_MAX,
605 _In_ int flags = match_default)
606 {
607 _Assume_(text || start >= end);
608 if (start < end && text[start]) {
609 wchar_t buf[3];
610 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
611 const wchar_t* chr_end = chr + stdex::strlen(chr);
612 bool r =
613 ((flags & match_multiline) || !stdex::islbreak(chr, SIZE_MAX)) &&
614 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::space | std::ctype_base::punct, chr, chr_end) == chr_end;
615 if ((r && !m_invert) || (!r && m_invert)) {
616 this->interval.start = start;
617 return true;
618 }
619 }
620 this->interval.invalidate();
621 return false;
622 }
623 };
624
628 template <class T>
629 class basic_bol : public basic_parser<T>
630 {
631 public:
632 basic_bol(bool invert = false) : m_invert(invert) {}
633
634 protected:
635 virtual bool do_match(
636 _In_reads_or_z_opt_(end) const T* text,
637 _In_ size_t start = 0,
638 _In_ size_t end = SIZE_MAX,
639 _In_ int flags = match_default)
640 {
641 _Assume_(text || !end);
642 _Assume_(text || start >= end);
643 bool r = start == 0 || (start <= end && stdex::islbreak(text[start - 1]));
644 if ((r && !m_invert) || (!r && m_invert)) {
645 this->interval.end = this->interval.start = start;
646 return true;
647 }
648 this->interval.invalidate();
649 return false;
650 }
651
652 bool m_invert;
653 };
654
655 using bol = basic_bol<char>;
656 using wbol = basic_bol<wchar_t>;
657#ifdef _UNICODE
658 using tbol = wbol;
659#else
660 using tbol = bol;
661#endif
663
667 template <class T>
668 class basic_eol : public basic_parser<T>
669 {
670 public:
671 basic_eol(bool invert = false) : m_invert(invert) {}
672
673 protected:
674 virtual bool do_match(
675 _In_reads_or_z_opt_(end) const T* text,
676 _In_ size_t start = 0,
677 _In_ size_t end = SIZE_MAX,
678 _In_ int flags = match_default)
679 {
680 _Assume_(text || start >= end);
681 bool r = start >= end || !text[start] || stdex::islbreak(text[start]);
682 if ((r && !m_invert) || (!r && m_invert)) {
683 this->interval.end = this->interval.start = start;
684 return true;
685 }
686 this->interval.invalidate();
687 return false;
688 }
689
690 bool m_invert;
691 };
692
693 using eol = basic_eol<char>;
694 using weol = basic_eol<wchar_t>;
695#ifdef _UNICODE
696 using teol = weol;
697#else
698 using teol = eol;
699#endif
701
702 template <class T>
703 class basic_set : public basic_parser<T>
704 {
705 public:
706 basic_set(bool invert = false, _In_ const std::locale& locale = std::locale()) :
708 hit_offset(SIZE_MAX),
709 m_invert(invert)
710 {}
711
712 virtual void invalidate()
713 {
714 hit_offset = SIZE_MAX;
716 }
717
718 size_t hit_offset;
719
720 protected:
721 virtual bool do_match(
722 _In_reads_or_z_opt_(end) const T* text,
723 _In_ size_t start = 0,
724 _In_ size_t end = SIZE_MAX,
725 _In_ int flags = match_default) = 0;
726
727 bool m_invert;
728 };
729
733 template <class T>
734 class basic_cu_set : public basic_set<T>
735 {
736 public:
738 _In_reads_or_z_(count) const T* set,
739 _In_ size_t count = SIZE_MAX,
740 _In_ bool invert = false,
741 _In_ const std::locale& locale = std::locale()) :
743 {
744 if (set)
745 m_set.assign(set, set + stdex::strnlen(set, count));
746 }
747
748 protected:
749 virtual bool do_match(
750 _In_reads_or_z_opt_(end) const T* text,
751 _In_ size_t start = 0,
752 _In_ size_t end = SIZE_MAX,
753 _In_ int flags = match_default)
754 {
755 _Assume_(text || start >= end);
756 if (start < end && text[start]) {
757 const T* set = m_set.data();
758 size_t r = (flags & match_case_insensitive) ?
759 stdex::strnichr(set, m_set.size(), text[start], this->m_locale) :
760 stdex::strnchr(set, m_set.size(), text[start]);
761 if ((r != stdex::npos && !this->m_invert) || (r == stdex::npos && this->m_invert)) {
762 this->hit_offset = r;
763 this->interval.end = (this->interval.start = start) + 1;
764 return true;
765 }
766 }
767 this->hit_offset = SIZE_MAX;
768 this->interval.invalidate();
769 return false;
770 }
771
772 std::basic_string<T> m_set;
773 };
774
777#ifdef _UNICODE
778 using tcu_set = wcu_set;
779#else
780 using tcu_set = cu_set;
781#endif
782
786 class sgml_cp_set : public basic_set<char>
787 {
788 public:
789 sgml_cp_set(const char* set, size_t count = SIZE_MAX, bool invert = false, _In_ const std::locale& locale = std::locale()) :
791 {
792 if (set)
793 m_set = sgml2str(set, count);
794 }
795
796 protected:
797 virtual bool do_match(
798 _In_reads_or_z_(end) const char* text,
799 _In_ size_t start = 0,
800 _In_ size_t end = SIZE_MAX,
801 _In_ int flags = match_default)
802 {
803 _Assume_(text || start >= end);
804 if (start < end && text[start]) {
805 wchar_t buf[3];
806 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
807 const wchar_t* set = m_set.data();
808 size_t r = (flags & match_case_insensitive) ?
809 stdex::strnistr(set, m_set.size(), chr, m_locale) :
810 stdex::strnstr(set, m_set.size(), chr);
811 if ((r != stdex::npos && !m_invert) || (r == stdex::npos && m_invert)) {
812 hit_offset = r;
813 this->interval.start = start;
814 return true;
815 }
816 }
817 hit_offset = SIZE_MAX;
818 this->interval.invalidate();
819 return false;
820 }
821
822 std::wstring m_set;
823 };
824
828 template <class T>
829 class basic_string : public basic_parser<T>
830 {
831 public:
833 _In_reads_or_z_(count) const T* str,
834 _In_ size_t count = SIZE_MAX,
835 _In_ const std::locale& locale = std::locale()) :
837 m_str(str, str + stdex::strnlen(str, count))
838 {}
839
840 protected:
841 virtual bool do_match(
842 _In_reads_or_z_opt_(end) const T* text,
843 _In_ size_t start = 0,
844 _In_ size_t end = SIZE_MAX,
845 _In_ int flags = match_default)
846 {
847 _Assume_(text || start >= end);
848 size_t
849 m = m_str.size(),
850 n = std::min<size_t>(end - start, m);
851 bool r = ((flags & match_case_insensitive) ?
852 stdex::strnicmp(text + start, n, m_str.data(), m, this->m_locale) :
853 stdex::strncmp(text + start, n, m_str.data(), m)) == 0;
854 if (r) {
855 this->interval.end = (this->interval.start = start) + n;
856 return true;
857 }
858 this->interval.invalidate();
859 return false;
860 }
861
862 std::basic_string<T> m_str;
863 };
864
867#ifdef _UNICODE
868 using tstring = wstring;
869#else
870 using tstring = string;
871#endif
872
877 {
878 public:
879 sgml_string(const char* str, size_t count = SIZE_MAX, _In_ const std::locale& locale = std::locale()) :
881 m_str(sgml2str(str, count))
882 {}
883
884 protected:
885 virtual bool do_match(
886 _In_reads_or_z_(end) const char* text,
887 _In_ size_t start = 0,
888 _In_ size_t end = SIZE_MAX,
889 _In_ int flags = match_default)
890 {
891 _Assume_(text || start >= end);
892 const wchar_t* str = m_str.data();
893 const bool case_insensitive = flags & match_case_insensitive ? true : false;
894 const auto& ctype = std::use_facet<std::ctype<wchar_t>>(m_locale);
895 for (this->interval.end = start;;) {
896 if (!*str) {
897 this->interval.start = start;
898 return true;
899 }
900 if (this->interval.end >= end || !text[this->interval.end]) {
901 this->interval.invalidate();
902 return false;
903 }
904 wchar_t buf[3];
905 const wchar_t* chr = next_sgml_cp(text, this->interval.end, end, this->interval.end, buf);
906 for (; *chr; ++str, ++chr) {
907 if (!*str ||
908 (case_insensitive ? ctype.tolower(*str) != ctype.tolower(*chr) : *str != *chr))
909 {
910 this->interval.invalidate();
911 return false;
912 }
913 }
914 }
915 }
916
917 std::wstring m_str;
918 };
919
923 template <class T>
925 {
926 public:
927 basic_iterations(const std::shared_ptr<basic_parser<T>>& el, size_t min_iterations = 0, size_t max_iterations = SIZE_MAX, bool greedy = true) :
928 m_el(el),
932 {}
933
934 protected:
935 virtual bool do_match(
936 _In_reads_or_z_opt_(end) const T* text,
937 _In_ size_t start = 0,
938 _In_ size_t end = SIZE_MAX,
939 _In_ int flags = match_default)
940 {
941 _Assume_(text || start >= end);
942 this->interval.start = this->interval.end = start;
943 for (size_t i = 0; ; i++) {
944 if ((!m_greedy && i >= m_min_iterations) || i >= m_max_iterations)
945 return true;
946 if (!m_el->match(text, this->interval.end, end, flags)) {
947 if (i >= m_min_iterations)
948 return true;
949 break;
950 }
951 if (m_el->interval.end == this->interval.end) {
952 // Element did match, but the matching interval was empty. Quit instead of spinning.
953 return true;
954 }
955 this->interval.end = m_el->interval.end;
956 }
957 this->interval.invalidate();
958 return false;
959 }
960
961 std::shared_ptr<basic_parser<T>> m_el;
964 bool m_greedy;
965 };
966
969#ifdef _UNICODE
970 using titerations = witerations;
971#else
972 using titerations = iterations;
973#endif
975
979 template <class T>
981 {
982 protected:
983 parser_collection(_In_ const std::locale& locale) : basic_parser<T>(locale) {}
984
985 public:
987 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el,
988 _In_ size_t count,
989 _In_ const std::locale& locale = std::locale()) :
991 {
992 _Assume_(el || !count);
993 m_collection.reserve(count);
994 for (size_t i = 0; i < count; i++)
995 m_collection.push_back(el[i]);
996 }
997
999 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1000 _In_ const std::locale& locale = std::locale()) :
1002 m_collection(std::move(collection))
1003 {}
1004
1005 virtual void invalidate()
1006 {
1007 for (auto& el : m_collection)
1008 el->invalidate();
1010 }
1011
1012 protected:
1013 std::vector<std::shared_ptr<basic_parser<T>>> m_collection;
1014 };
1015
1019 template <class T>
1021 {
1022 public:
1024 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el = nullptr,
1025 _In_ size_t count = 0,
1026 _In_ const std::locale& locale = std::locale()) :
1028 {}
1029
1031 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1032 _In_ const std::locale& locale = std::locale()) :
1034 {}
1035
1036 protected:
1037 virtual bool do_match(
1038 _In_reads_or_z_opt_(end) const T* text,
1039 _In_ size_t start = 0,
1040 _In_ size_t end = SIZE_MAX,
1041 _In_ int flags = match_default)
1042 {
1043 _Assume_(text || start >= end);
1044 this->interval.end = start;
1045 for (auto i = this->m_collection.begin(); i != this->m_collection.end(); ++i) {
1046 if (!(*i)->match(text, this->interval.end, end, flags)) {
1047 for (++i; i != this->m_collection.end(); ++i)
1048 (*i)->invalidate();
1049 this->interval.invalidate();
1050 return false;
1051 }
1052 this->interval.end = (*i)->interval.end;
1053 }
1054 this->interval.start = start;
1055 return true;
1056 }
1057 };
1058
1061#ifdef _UNICODE
1062 using tsequence = wsequence;
1063#else
1064 using tsequence = sequence;
1065#endif
1067
1071 template <class T>
1073 {
1074 protected:
1075 basic_branch(_In_ const std::locale& locale) :
1077 hit_offset(SIZE_MAX)
1078 {}
1079
1080 public:
1082 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el = nullptr,
1083 _In_ size_t count = 0,
1084 _In_ const std::locale& locale = std::locale()) :
1086 hit_offset(SIZE_MAX)
1087 {}
1088
1090 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1091 _In_ const std::locale& locale = std::locale()) :
1093 hit_offset(SIZE_MAX)
1094 {}
1095
1096 virtual void invalidate()
1097 {
1098 hit_offset = SIZE_MAX;
1100 }
1101
1102 size_t hit_offset;
1103
1104 protected:
1105 virtual bool do_match(
1106 _In_reads_or_z_opt_(end) const T* text,
1107 _In_ size_t start = 0,
1108 _In_ size_t end = SIZE_MAX,
1109 _In_ int flags = match_default)
1110 {
1111 _Assume_(text || start >= end);
1112 hit_offset = 0;
1113 for (auto i = this->m_collection.begin(); i != this->m_collection.end(); ++i, ++hit_offset) {
1114 if ((*i)->match(text, start, end, flags)) {
1115 this->interval = (*i)->interval;
1116 for (++i; i != this->m_collection.end(); ++i)
1117 (*i)->invalidate();
1118 return true;
1119 }
1120 }
1121 hit_offset = SIZE_MAX;
1122 this->interval.invalidate();
1123 return false;
1124 }
1125 };
1126
1127 using branch = basic_branch<char>;
1129#ifdef _UNICODE
1130 using tbranch = wbranch;
1131#else
1132 using tbranch = branch;
1133#endif
1135
1139 template <class T, class T_parser = basic_string<T>>
1141 {
1142 public:
1144 _In_reads_(count) const T* str_z = nullptr,
1145 _In_ size_t count = 0,
1146 _In_ const std::locale& locale = std::locale()) :
1148 {
1149 build(str_z, count);
1150 }
1151
1152 basic_string_branch(_In_z_ const T* str, ...) :
1153 basic_branch<T>(std::locale())
1154 {
1155 va_list params;
1156 va_start(params, str);
1157 build(str, params);
1158 va_end(params);
1159 }
1160
1161 basic_string_branch(_In_ const std::locale& locale, _In_z_ const T* str, ...) :
1163 {
1164 va_list params;
1165 va_start(params, str);
1166 build(str, params);
1167 va_end(params);
1168 }
1169
1170 protected:
1171 void build(_In_reads_(count) const T* str_z, _In_ size_t count)
1172 {
1173 _Assume_(str_z || !count);
1174 if (count) {
1175 size_t offset, n;
1176 for (
1177 offset = n = 0;
1178 offset < count && str_z[offset];
1179 offset += stdex::strnlen(str_z + offset, count - offset) + 1, ++n);
1180 this->m_collection.reserve(n);
1181 for (
1182 offset = 0;
1183 offset < count && str_z[offset];
1184 offset += stdex::strnlen(str_z + offset, count - offset) + 1)
1185 this->m_collection.push_back(std::move(std::make_shared<T_parser>(str_z + offset, count - offset, this->m_locale)));
1186 }
1187 }
1188
1189 void build(_In_z_ const T* str, _In_ va_list params)
1190 {
1191 const T* p;
1192 for (
1193 this->m_collection.push_back(std::move(std::make_shared<T_parser>(str, SIZE_MAX, this->m_locale)));
1194 (p = va_arg(params, const T*)) != nullptr;
1195 this->m_collection.push_back(std::move(std::make_shared<T_parser>(p, SIZE_MAX, this->m_locale))));
1196 }
1197 };
1198
1201#ifdef _UNICODE
1203#else
1205#endif
1207
1211 template <class T>
1213 {
1214 public:
1216 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el = nullptr,
1217 _In_ size_t count = 0,
1218 _In_ const std::locale& locale = std::locale()) :
1220 {}
1221
1223 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1224 _In_ const std::locale& locale = std::locale()) :
1226 {}
1227
1228 protected:
1229 virtual bool do_match(
1230 _In_reads_or_z_opt_(end) const T* text,
1231 _In_ size_t start = 0,
1232 _In_ size_t end = SIZE_MAX,
1233 _In_ int flags = match_default)
1234 {
1235 _Assume_(text || start >= end);
1236 for (auto& el : this->m_collection)
1237 el->invalidate();
1238 if (match_recursively(text, start, end, flags)) {
1239 this->interval.start = start;
1240 return true;
1241 }
1242 this->interval.invalidate();
1243 return false;
1244 }
1245
1246 bool match_recursively(
1247 _In_reads_or_z_opt_(end) const T* text,
1248 _In_ size_t start = 0,
1249 _In_ size_t end = SIZE_MAX,
1250 _In_ int flags = match_default)
1251 {
1252 bool all_matched = true;
1253 for (auto& el : this->m_collection) {
1254 if (!el->interval) {
1255 // Element was not matched in permutatuion yet.
1256 all_matched = false;
1257 if (el->match(text, start, end, flags)) {
1258 // Element matched for the first time.
1259 if (match_recursively(text, el->interval.end, end, flags)) {
1260 // Rest of the elements matched too.
1261 return true;
1262 }
1263 el->invalidate();
1264 }
1265 }
1266 }
1267 if (all_matched) {
1268 this->interval.end = start;
1269 return true;
1270 }
1271 return false;
1272 }
1273 };
1274
1277#ifdef _UNICODE
1278 using tpermutation = wpermutation;
1279#else
1280 using tpermutation = permutation;
1281#endif
1283
1287 template <class T>
1288 class basic_integer : public basic_parser<T>
1289 {
1290 public:
1291 basic_integer(_In_ const std::locale& locale = std::locale()) :
1293 value(0)
1294 {}
1295
1296 virtual void invalidate()
1297 {
1298 value = 0;
1300 }
1301
1302 public:
1303 size_t value;
1304 };
1305
1309 template <class T>
1311 {
1312 public:
1314 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
1315 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
1316 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
1317 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
1318 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
1319 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
1320 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
1321 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
1322 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
1323 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
1324 _In_ const std::locale& locale = std::locale()) :
1326 m_digit_0(digit_0),
1327 m_digit_1(digit_1),
1328 m_digit_2(digit_2),
1329 m_digit_3(digit_3),
1330 m_digit_4(digit_4),
1331 m_digit_5(digit_5),
1332 m_digit_6(digit_6),
1333 m_digit_7(digit_7),
1334 m_digit_8(digit_8),
1335 m_digit_9(digit_9)
1336 {}
1337
1338 protected:
1339 virtual bool do_match(
1340 _In_reads_or_z_opt_(end) const T* text,
1341 _In_ size_t start = 0,
1342 _In_ size_t end = SIZE_MAX,
1343 _In_ int flags = match_default)
1344 {
1345 _Assume_(text || start >= end);
1346 for (this->interval.end = start, this->value = 0; this->interval.end < end && text[this->interval.end];) {
1347 size_t dig;
1348 if (m_digit_0->match(text, this->interval.end, end, flags)) { dig = 0; this->interval.end = m_digit_0->interval.end; }
1349 else if (m_digit_1->match(text, this->interval.end, end, flags)) { dig = 1; this->interval.end = m_digit_1->interval.end; }
1350 else if (m_digit_2->match(text, this->interval.end, end, flags)) { dig = 2; this->interval.end = m_digit_2->interval.end; }
1351 else if (m_digit_3->match(text, this->interval.end, end, flags)) { dig = 3; this->interval.end = m_digit_3->interval.end; }
1352 else if (m_digit_4->match(text, this->interval.end, end, flags)) { dig = 4; this->interval.end = m_digit_4->interval.end; }
1353 else if (m_digit_5->match(text, this->interval.end, end, flags)) { dig = 5; this->interval.end = m_digit_5->interval.end; }
1354 else if (m_digit_6->match(text, this->interval.end, end, flags)) { dig = 6; this->interval.end = m_digit_6->interval.end; }
1355 else if (m_digit_7->match(text, this->interval.end, end, flags)) { dig = 7; this->interval.end = m_digit_7->interval.end; }
1356 else if (m_digit_8->match(text, this->interval.end, end, flags)) { dig = 8; this->interval.end = m_digit_8->interval.end; }
1357 else if (m_digit_9->match(text, this->interval.end, end, flags)) { dig = 9; this->interval.end = m_digit_9->interval.end; }
1358 else break;
1359 this->value = this->value * 10 + dig;
1360 }
1362 this->interval.start = start;
1363 return true;
1364 }
1365 this->interval.invalidate();
1366 return false;
1367 }
1368
1369 std::shared_ptr<basic_parser<T>>
1370 m_digit_0,
1371 m_digit_1,
1372 m_digit_2,
1373 m_digit_3,
1374 m_digit_4,
1375 m_digit_5,
1376 m_digit_6,
1377 m_digit_7,
1378 m_digit_8,
1379 m_digit_9;
1380 };
1381
1384#ifdef _UNICODE
1385 using tinteger10 = winteger10;
1386#else
1387 using tinteger10 = integer10;
1388#endif
1390
1394 template <class T>
1396 {
1397 public:
1399 _In_ const std::shared_ptr<basic_integer10<T>>& digits,
1400 _In_ const std::shared_ptr<basic_set<T>>& separator,
1401 _In_ const std::locale& locale = std::locale()) :
1403 digit_count(0),
1404 has_separators(false),
1405 m_digits(digits),
1406 m_separator(separator)
1407 {}
1408
1409 virtual void invalidate()
1410 {
1411 digit_count = 0;
1412 has_separators = false;
1414 }
1415
1418
1419 protected:
1420 virtual bool do_match(
1421 _In_reads_or_z_opt_(end) const T* text,
1422 _In_ size_t start = 0,
1423 _In_ size_t end = SIZE_MAX,
1424 _In_ int flags = match_default)
1425 {
1426 _Assume_(text || start >= end);
1427 if (m_digits->match(text, start, end, flags)) {
1428 // Leading part match.
1429 this->value = m_digits->value;
1430 digit_count = m_digits->interval.size();
1431 has_separators = false;
1432 this->interval.start = start;
1433 this->interval.end = m_digits->interval.end;
1434 if (m_digits->interval.size() <= 3) {
1435 // Maybe separated with thousand separators?
1436 size_t hit_offset = SIZE_MAX;
1437 while (m_separator->match(text, this->interval.end, end, flags) &&
1438 (hit_offset == SIZE_MAX || hit_offset == m_separator->hit_offset) && // All separators must be the same, no mixing.
1439 m_digits->match(text, m_separator->interval.end, end, flags) &&
1440 m_digits->interval.size() == 3)
1441 {
1442 // Thousand separator and three-digit integer followed.
1443 this->value = this->value * 1000 + m_digits->value;
1444 digit_count += 3;
1445 has_separators = true;
1446 this->interval.end = m_digits->interval.end;
1447 hit_offset = m_separator->hit_offset;
1448 }
1449 }
1450
1451 return true;
1452 }
1453 this->value = 0;
1454 this->interval.invalidate();
1455 return false;
1456 }
1457
1458 std::shared_ptr<basic_integer10<T>> m_digits;
1459 std::shared_ptr<basic_set<T>> m_separator;
1460 };
1461
1462 using integer10ts = basic_integer10ts<char>;
1463 using winteger10ts = basic_integer10ts<wchar_t>;
1464#ifdef _UNICODE
1465 using tinteger10ts = winteger10ts;
1466#else
1467 using tinteger10ts = integer10ts;
1468#endif
1469 using sgml_integer10ts = basic_integer10ts<char>;
1470
1474 template <class T>
1476 {
1477 public:
1479 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
1480 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
1481 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
1482 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
1483 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
1484 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
1485 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
1486 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
1487 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
1488 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
1489 _In_ const std::shared_ptr<basic_parser<T>>& digit_10,
1490 _In_ const std::shared_ptr<basic_parser<T>>& digit_11,
1491 _In_ const std::shared_ptr<basic_parser<T>>& digit_12,
1492 _In_ const std::shared_ptr<basic_parser<T>>& digit_13,
1493 _In_ const std::shared_ptr<basic_parser<T>>& digit_14,
1494 _In_ const std::shared_ptr<basic_parser<T>>& digit_15,
1495 _In_ const std::locale& locale = std::locale()) :
1497 m_digit_0(digit_0),
1498 m_digit_1(digit_1),
1499 m_digit_2(digit_2),
1500 m_digit_3(digit_3),
1501 m_digit_4(digit_4),
1502 m_digit_5(digit_5),
1503 m_digit_6(digit_6),
1504 m_digit_7(digit_7),
1505 m_digit_8(digit_8),
1506 m_digit_9(digit_9),
1507 m_digit_10(digit_10),
1508 m_digit_11(digit_11),
1509 m_digit_12(digit_12),
1510 m_digit_13(digit_13),
1511 m_digit_14(digit_14),
1512 m_digit_15(digit_15)
1513 {}
1514
1515 protected:
1516 virtual bool do_match(
1517 _In_reads_or_z_opt_(end) const T* text,
1518 _In_ size_t start = 0,
1519 _In_ size_t end = SIZE_MAX,
1520 _In_ int flags = match_default)
1521 {
1522 _Assume_(text || start >= end);
1523 for (this->interval.end = start, this->value = 0; this->interval.end < end && text[this->interval.end];) {
1524 size_t dig;
1525 if (m_digit_0->match(text, this->interval.end, end, flags)) { dig = 0; this->interval.end = m_digit_0->interval.end; }
1526 else if (m_digit_1->match(text, this->interval.end, end, flags)) { dig = 1; this->interval.end = m_digit_1->interval.end; }
1527 else if (m_digit_2->match(text, this->interval.end, end, flags)) { dig = 2; this->interval.end = m_digit_2->interval.end; }
1528 else if (m_digit_3->match(text, this->interval.end, end, flags)) { dig = 3; this->interval.end = m_digit_3->interval.end; }
1529 else if (m_digit_4->match(text, this->interval.end, end, flags)) { dig = 4; this->interval.end = m_digit_4->interval.end; }
1530 else if (m_digit_5->match(text, this->interval.end, end, flags)) { dig = 5; this->interval.end = m_digit_5->interval.end; }
1531 else if (m_digit_6->match(text, this->interval.end, end, flags)) { dig = 6; this->interval.end = m_digit_6->interval.end; }
1532 else if (m_digit_7->match(text, this->interval.end, end, flags)) { dig = 7; this->interval.end = m_digit_7->interval.end; }
1533 else if (m_digit_8->match(text, this->interval.end, end, flags)) { dig = 8; this->interval.end = m_digit_8->interval.end; }
1534 else if (m_digit_9->match(text, this->interval.end, end, flags)) { dig = 9; this->interval.end = m_digit_9->interval.end; }
1535 else if (m_digit_10->match(text, this->interval.end, end, flags)) { dig = 10; this->interval.end = m_digit_10->interval.end; }
1536 else if (m_digit_11->match(text, this->interval.end, end, flags)) { dig = 11; this->interval.end = m_digit_11->interval.end; }
1537 else if (m_digit_12->match(text, this->interval.end, end, flags)) { dig = 12; this->interval.end = m_digit_12->interval.end; }
1538 else if (m_digit_13->match(text, this->interval.end, end, flags)) { dig = 13; this->interval.end = m_digit_13->interval.end; }
1539 else if (m_digit_14->match(text, this->interval.end, end, flags)) { dig = 14; this->interval.end = m_digit_14->interval.end; }
1540 else if (m_digit_15->match(text, this->interval.end, end, flags)) { dig = 15; this->interval.end = m_digit_15->interval.end; }
1541 else break;
1542 this->value = this->value * 16 + dig;
1543 }
1545 this->interval.start = start;
1546 return true;
1547 }
1548 this->interval.invalidate();
1549 return false;
1550 }
1551
1552 std::shared_ptr<basic_parser<T>>
1553 m_digit_0,
1554 m_digit_1,
1555 m_digit_2,
1556 m_digit_3,
1557 m_digit_4,
1558 m_digit_5,
1559 m_digit_6,
1560 m_digit_7,
1561 m_digit_8,
1562 m_digit_9,
1563 m_digit_10,
1564 m_digit_11,
1565 m_digit_12,
1566 m_digit_13,
1567 m_digit_14,
1568 m_digit_15;
1569 };
1570
1573#ifdef _UNICODE
1574 using tinteger16 = winteger16;
1575#else
1576 using tinteger16 = integer16;
1577#endif
1579
1583 template <class T>
1585 {
1586 public:
1588 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
1589 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
1590 _In_ const std::shared_ptr<basic_parser<T>>& digit_10,
1591 _In_ const std::shared_ptr<basic_parser<T>>& digit_50,
1592 _In_ const std::shared_ptr<basic_parser<T>>& digit_100,
1593 _In_ const std::shared_ptr<basic_parser<T>>& digit_500,
1594 _In_ const std::shared_ptr<basic_parser<T>>& digit_1000,
1595 _In_ const std::shared_ptr<basic_parser<T>>& digit_5000,
1596 _In_ const std::shared_ptr<basic_parser<T>>& digit_10000,
1597 _In_ const std::locale& locale = std::locale()) :
1599 m_digit_1(digit_1),
1600 m_digit_5(digit_5),
1601 m_digit_10(digit_10),
1602 m_digit_50(digit_50),
1603 m_digit_100(digit_100),
1604 m_digit_500(digit_500),
1605 m_digit_1000(digit_1000),
1606 m_digit_5000(digit_5000),
1607 m_digit_10000(digit_10000)
1608 {}
1609
1610 protected:
1611 virtual bool do_match(
1612 _In_reads_or_z_opt_(end) const T* text,
1613 _In_ size_t start = 0,
1614 _In_ size_t end = SIZE_MAX,
1615 _In_ int flags = match_default)
1616 {
1617 _Assume_(text || start >= end);
1618 size_t
1620 end2;
1621
1622 for (this->interval.end = start, this->value = 0; this->interval.end < end && text[this->interval.end]; dig[3] = dig[2], dig[2] = dig[1], dig[1] = dig[0], this->interval.end = end2) {
1623 if (m_digit_1 && m_digit_1->match(text, this->interval.end, end, flags)) { dig[0] = 1; end2 = m_digit_1->interval.end; }
1624 else if (m_digit_5 && m_digit_5->match(text, this->interval.end, end, flags)) { dig[0] = 5; end2 = m_digit_5->interval.end; }
1625 else if (m_digit_10 && m_digit_10->match(text, this->interval.end, end, flags)) { dig[0] = 10; end2 = m_digit_10->interval.end; }
1626 else if (m_digit_50 && m_digit_50->match(text, this->interval.end, end, flags)) { dig[0] = 50; end2 = m_digit_50->interval.end; }
1627 else if (m_digit_100 && m_digit_100->match(text, this->interval.end, end, flags)) { dig[0] = 100; end2 = m_digit_100->interval.end; }
1628 else if (m_digit_500 && m_digit_500->match(text, this->interval.end, end, flags)) { dig[0] = 500; end2 = m_digit_500->interval.end; }
1629 else if (m_digit_1000 && m_digit_1000->match(text, this->interval.end, end, flags)) { dig[0] = 1000; end2 = m_digit_1000->interval.end; }
1630 else if (m_digit_5000 && m_digit_5000->match(text, this->interval.end, end, flags)) { dig[0] = 5000; end2 = m_digit_5000->interval.end; }
1631 else if (m_digit_10000 && m_digit_10000->match(text, this->interval.end, end, flags)) { dig[0] = 10000; end2 = m_digit_10000->interval.end; }
1632 else break;
1633
1634 // Store first digit.
1635 if (dig[4] == SIZE_MAX) dig[4] = dig[0];
1636
1637 if (dig[3] == dig[2] && dig[2] == dig[1] && dig[1] == dig[0] && dig[0] != dig[4]) {
1638 // Same digit repeated four times. No-go, unless first digit. E.g. XIIII vs. XIV. MMMMMCD allowed, IIII also...
1639 break;
1640 }
1641 if (dig[0] <= dig[1]) {
1642 // Digit is less or equal previous one: add.
1643 this->value += dig[0];
1644 }
1645 else if (
1646 (dig[1] == 1 && (dig[0] == 5 || dig[0] == 10)) ||
1647 (dig[1] == 10 && (dig[0] == 50 || dig[0] == 100)) ||
1648 (dig[1] == 100 && (dig[0] == 500 || dig[0] == 1000)) ||
1649 (dig[1] == 1000 && (dig[0] == 5000 || dig[0] == 10000)))
1650 {
1651 // Digit is up to two orders bigger than previous one: subtract. But...
1652 if (dig[2] < dig[0]) {
1653 // Digit is also bigger than pre-previous one. E.g. VIX (V < X => invalid)
1654 break;
1655 }
1656 this->value -= dig[1]; // Cancel addition in the previous step.
1657 dig[0] -= dig[1]; // Combine last two digits.
1658 dig[1] = dig[2]; // The true previous digit is now pre-previous one. :)
1659 dig[2] = dig[3]; // The true pre-previous digit is now pre-pre-previous one. :)
1660 this->value += dig[0]; // Add combined value.
1661 }
1662 else {
1663 // New digit is too big than the previous one. E.g. VX (V < X => invalid)
1664 break;
1665 }
1666 }
1667 if (this->value) {
1668 this->interval.start = start;
1669 return true;
1670 }
1671 this->interval.invalidate();
1672 return false;
1673 }
1674
1675 std::shared_ptr<basic_parser<T>>
1676 m_digit_1,
1677 m_digit_5,
1678 m_digit_10,
1679 m_digit_50,
1680 m_digit_100,
1681 m_digit_500,
1682 m_digit_1000,
1683 m_digit_5000,
1684 m_digit_10000;
1685 };
1686
1689#ifdef _UNICODE
1691#else
1693#endif
1695
1699 template <class T>
1701 {
1702 public:
1704 _In_ const std::shared_ptr<basic_parser<T>>& _numerator,
1705 _In_ const std::shared_ptr<basic_parser<T>>& _fraction_line,
1706 _In_ const std::shared_ptr<basic_parser<T>>& _denominator,
1707 _In_ const std::locale& locale = std::locale()) :
1709 numerator(_numerator),
1710 fraction_line(_fraction_line),
1711 denominator(_denominator)
1712 {}
1713
1714 virtual void invalidate()
1715 {
1716 numerator->invalidate();
1717 fraction_line->invalidate();
1718 denominator->invalidate();
1720 }
1721
1722 std::shared_ptr<basic_parser<T>> numerator;
1723 std::shared_ptr<basic_parser<T>> fraction_line;
1724 std::shared_ptr<basic_parser<T>> denominator;
1725
1726 protected:
1727 virtual bool do_match(
1728 _In_reads_or_z_opt_(end) const T* text,
1729 _In_ size_t start = 0,
1730 _In_ size_t end = SIZE_MAX,
1731 _In_ int flags = match_default)
1732 {
1733 _Assume_(text || start >= end);
1734 if (numerator->match(text, start, end, flags) &&
1735 fraction_line->match(text, numerator->interval.end, end, flags) &&
1736 denominator->match(text, fraction_line->interval.end, end, flags))
1737 {
1738 this->interval.start = start;
1739 this->interval.end = denominator->interval.end;
1740 return true;
1741 }
1742 numerator->invalidate();
1743 fraction_line->invalidate();
1744 denominator->invalidate();
1745 this->interval.invalidate();
1746 return false;
1747 }
1748 };
1749
1752#ifdef _UNICODE
1753 using tfraction = wfraction;
1754#else
1755 using tfraction = fraction;
1756#endif
1758
1762 template <class T>
1763 class basic_score : public basic_parser<T>
1764 {
1765 public:
1767 _In_ const std::shared_ptr<basic_parser<T>>& _home,
1768 _In_ const std::shared_ptr<basic_parser<T>>& _separator,
1769 _In_ const std::shared_ptr<basic_parser<T>>& _guest,
1770 _In_ const std::shared_ptr<basic_parser<T>>& space,
1771 _In_ const std::locale& locale = std::locale()) :
1773 home(_home),
1774 separator(_separator),
1775 guest(_guest),
1776 m_space(space)
1777 {}
1778
1779 virtual void invalidate()
1780 {
1781 home->invalidate();
1782 separator->invalidate();
1783 guest->invalidate();
1785 }
1786
1787 std::shared_ptr<basic_parser<T>> home;
1788 std::shared_ptr<basic_parser<T>> separator;
1789 std::shared_ptr<basic_parser<T>> guest;
1790
1791 protected:
1792 virtual bool do_match(
1793 _In_reads_or_z_opt_(end) const T* text,
1794 _In_ size_t start = 0,
1795 _In_ size_t end = SIZE_MAX,
1796 _In_ int flags = match_default)
1797 {
1798 _Assume_(text || start >= end);
1799 this->interval.end = start;
1800
1801 const int space_match_flags = flags & ~match_multiline; // Spaces in score must never be broken in new line.
1802
1803 if (home->match(text, this->interval.end, end, flags))
1804 this->interval.end = home->interval.end;
1805 else
1806 goto end;
1807
1808 for (; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
1809
1810 if (separator->match(text, this->interval.end, end, flags))
1811 this->interval.end = separator->interval.end;
1812 else
1813 goto end;
1814
1815 for (; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
1816
1817 if (guest->match(text, this->interval.end, end, flags))
1818 this->interval.end = guest->interval.end;
1819 else
1820 goto end;
1821
1822 this->interval.start = start;
1823 return true;
1824
1825 end:
1826 home->invalidate();
1827 separator->invalidate();
1828 guest->invalidate();
1829 this->interval.invalidate();
1830 return false;
1831 }
1832
1833 std::shared_ptr<basic_parser<T>> m_space;
1834 };
1835
1836 using score = basic_score<char>;
1838#ifdef _UNICODE
1839 using tscore = wscore;
1840#else
1841 using tscore = score;
1842#endif
1844
1848 template <class T>
1850 {
1851 public:
1853 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
1854 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
1855 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
1856 _In_ const std::shared_ptr<basic_parser<T>>& _number,
1857 _In_ const std::locale& locale = std::locale()) :
1863 {}
1864
1865 virtual void invalidate()
1866 {
1867 if (positive_sign) positive_sign->invalidate();
1868 if (negative_sign) negative_sign->invalidate();
1869 if (special_sign) special_sign->invalidate();
1870 number->invalidate();
1872 }
1873
1874 std::shared_ptr<basic_parser<T>> positive_sign;
1875 std::shared_ptr<basic_parser<T>> negative_sign;
1876 std::shared_ptr<basic_parser<T>> special_sign;
1877 std::shared_ptr<basic_parser<T>> number;
1878
1879 protected:
1880 virtual bool do_match(
1881 _In_reads_or_z_opt_(end) const T* text,
1882 _In_ size_t start = 0,
1883 _In_ size_t end = SIZE_MAX,
1884 _In_ int flags = match_default)
1885 {
1886 _Assume_(text || start >= end);
1887 this->interval.end = start;
1888 if (positive_sign && positive_sign->match(text, this->interval.end, end, flags)) {
1889 this->interval.end = positive_sign->interval.end;
1890 if (negative_sign) negative_sign->invalidate();
1891 if (special_sign) special_sign->invalidate();
1892 }
1893 else if (negative_sign && negative_sign->match(text, this->interval.end, end, flags)) {
1894 this->interval.end = negative_sign->interval.end;
1895 if (positive_sign) positive_sign->invalidate();
1896 if (special_sign) special_sign->invalidate();
1897 }
1898 else if (special_sign && special_sign->match(text, this->interval.end, end, flags)) {
1899 this->interval.end = special_sign->interval.end;
1900 if (positive_sign) positive_sign->invalidate();
1901 if (negative_sign) negative_sign->invalidate();
1902 }
1903 else {
1904 if (positive_sign) positive_sign->invalidate();
1905 if (negative_sign) negative_sign->invalidate();
1906 if (special_sign) special_sign->invalidate();
1907 }
1908 if (number->match(text, this->interval.end, end, flags)) {
1909 this->interval.start = start;
1910 this->interval.end = number->interval.end;
1911 return true;
1912 }
1913 if (positive_sign) positive_sign->invalidate();
1914 if (negative_sign) negative_sign->invalidate();
1915 if (special_sign) special_sign->invalidate();
1916 number->invalidate();
1917 this->interval.invalidate();
1918 return false;
1919 }
1920 };
1921
1922 using signed_numeral = basic_signed_numeral<char>;
1923 using wsigned_numeral = basic_signed_numeral<wchar_t>;
1924#ifdef _UNICODE
1925 using tsigned_numeral = wsigned_numeral;
1926#else
1927 using tsigned_numeral = signed_numeral;
1928#endif
1929 using sgml_signed_numeral = basic_signed_numeral<char>;
1930
1934 template <class T>
1936 {
1937 public:
1939 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
1940 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
1941 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
1942 _In_ const std::shared_ptr<basic_parser<T>>& _integer,
1943 _In_ const std::shared_ptr<basic_parser<T>>& space,
1944 _In_ const std::shared_ptr<basic_parser<T>>& _fraction,
1945 _In_ const std::locale& locale = std::locale()) :
1952 m_space(space)
1953 {}
1954
1955 virtual void invalidate()
1956 {
1957 if (positive_sign) positive_sign->invalidate();
1958 if (negative_sign) negative_sign->invalidate();
1959 if (special_sign) special_sign->invalidate();
1960 integer->invalidate();
1961 fraction->invalidate();
1963 }
1964
1965 std::shared_ptr<basic_parser<T>> positive_sign;
1966 std::shared_ptr<basic_parser<T>> negative_sign;
1967 std::shared_ptr<basic_parser<T>> special_sign;
1968 std::shared_ptr<basic_parser<T>> integer;
1969 std::shared_ptr<basic_parser<T>> fraction;
1970
1971 protected:
1972 virtual bool do_match(
1973 _In_reads_or_z_opt_(end) const T* text,
1974 _In_ size_t start = 0,
1975 _In_ size_t end = SIZE_MAX,
1976 _In_ int flags = match_default)
1977 {
1978 _Assume_(text || start >= end);
1979 this->interval.end = start;
1980
1981 if (positive_sign && positive_sign->match(text, this->interval.end, end, flags)) {
1982 this->interval.end = positive_sign->interval.end;
1983 if (negative_sign) negative_sign->invalidate();
1984 if (special_sign) special_sign->invalidate();
1985 }
1986 else if (negative_sign && negative_sign->match(text, this->interval.end, end, flags)) {
1987 this->interval.end = negative_sign->interval.end;
1988 if (positive_sign) positive_sign->invalidate();
1989 if (special_sign) special_sign->invalidate();
1990 }
1991 else if (special_sign && special_sign->match(text, this->interval.end, end, flags)) {
1992 this->interval.end = special_sign->interval.end;
1993 if (positive_sign) positive_sign->invalidate();
1994 if (negative_sign) negative_sign->invalidate();
1995 }
1996 else {
1997 if (positive_sign) positive_sign->invalidate();
1998 if (negative_sign) negative_sign->invalidate();
1999 if (special_sign) special_sign->invalidate();
2000 }
2001
2002 // Check for <integer> <fraction>
2003 const int space_match_flags = flags & ~match_multiline; // Spaces in fractions must never be broken in new line.
2004 if (integer->match(text, this->interval.end, end, flags) &&
2005 m_space->match(text, integer->interval.end, end, space_match_flags))
2006 {
2007 for (this->interval.end = m_space->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
2008 if (fraction->match(text, this->interval.end, end, flags)) {
2009 this->interval.start = start;
2010 this->interval.end = fraction->interval.end;
2011 return true;
2012 }
2013 fraction->invalidate();
2014 this->interval.start = start;
2015 this->interval.end = integer->interval.end;
2016 return true;
2017 }
2018
2019 // Check for <fraction>
2020 if (fraction->match(text, this->interval.end, end, flags)) {
2021 integer->invalidate();
2022 this->interval.start = start;
2023 this->interval.end = fraction->interval.end;
2024 return true;
2025 }
2026
2027 // Check for <integer>
2028 if (integer->match(text, this->interval.end, end, flags)) {
2029 fraction->invalidate();
2030 this->interval.start = start;
2031 this->interval.end = integer->interval.end;
2032 return true;
2033 }
2034
2035 if (positive_sign) positive_sign->invalidate();
2036 if (negative_sign) negative_sign->invalidate();
2037 if (special_sign) special_sign->invalidate();
2038 integer->invalidate();
2039 fraction->invalidate();
2040 this->interval.invalidate();
2041 return false;
2042 }
2043
2044 std::shared_ptr<basic_parser<T>> m_space;
2045 };
2046
2047 using mixed_numeral = basic_mixed_numeral<char>;
2048 using wmixed_numeral = basic_mixed_numeral<wchar_t>;
2049#ifdef _UNICODE
2050 using tmixed_numeral = wmixed_numeral;
2051#else
2052 using tmixed_numeral = mixed_numeral;
2053#endif
2054 using sgml_mixed_numeral = basic_mixed_numeral<char>;
2055
2059 template <class T>
2061 {
2062 public:
2064 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
2065 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
2066 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
2067 _In_ const std::shared_ptr<basic_integer<T>>& _integer,
2068 _In_ const std::shared_ptr<basic_parser<T>>& _decimal_separator,
2069 _In_ const std::shared_ptr<basic_integer<T>>& _decimal,
2070 _In_ const std::shared_ptr<basic_parser<T>>& _exponent_symbol,
2071 _In_ const std::shared_ptr<basic_parser<T>>& _positive_exp_sign,
2072 _In_ const std::shared_ptr<basic_parser<T>>& _negative_exp_sign,
2073 _In_ const std::shared_ptr<basic_integer<T>>& _exponent,
2074 _In_ const std::locale& locale = std::locale()) :
2086 value(std::numeric_limits<double>::quiet_NaN())
2087 {}
2088
2089 virtual void invalidate()
2090 {
2091 if (positive_sign) positive_sign->invalidate();
2092 if (negative_sign) negative_sign->invalidate();
2093 if (special_sign) special_sign->invalidate();
2094 integer->invalidate();
2095 decimal_separator->invalidate();
2096 decimal->invalidate();
2097 if (exponent_symbol) exponent_symbol->invalidate();
2098 if (positive_exp_sign) positive_exp_sign->invalidate();
2099 if (negative_exp_sign) negative_exp_sign->invalidate();
2100 if (exponent) exponent->invalidate();
2101 value = std::numeric_limits<double>::quiet_NaN();
2103 }
2104
2105 std::shared_ptr<basic_parser<T>> positive_sign;
2106 std::shared_ptr<basic_parser<T>> negative_sign;
2107 std::shared_ptr<basic_parser<T>> special_sign;
2108 std::shared_ptr<basic_integer<T>> integer;
2109 std::shared_ptr<basic_parser<T>> decimal_separator;
2110 std::shared_ptr<basic_integer<T>> decimal;
2111 std::shared_ptr<basic_parser<T>> exponent_symbol;
2112 std::shared_ptr<basic_parser<T>> positive_exp_sign;
2113 std::shared_ptr<basic_parser<T>> negative_exp_sign;
2114 std::shared_ptr<basic_integer<T>> exponent;
2115 double value;
2116
2117 protected:
2118 virtual bool do_match(
2119 _In_reads_or_z_opt_(end) const T* text,
2120 _In_ size_t start = 0,
2121 _In_ size_t end = SIZE_MAX,
2122 _In_ int flags = match_default)
2123 {
2124 _Assume_(text || start >= end);
2125 this->interval.end = start;
2126
2127 if (positive_sign && positive_sign->match(text, this->interval.end, end, flags)) {
2128 this->interval.end = positive_sign->interval.end;
2129 if (negative_sign) negative_sign->invalidate();
2130 if (special_sign) special_sign->invalidate();
2131 }
2132 else if (negative_sign && negative_sign->match(text, this->interval.end, end, flags)) {
2133 this->interval.end = negative_sign->interval.end;
2134 if (positive_sign) positive_sign->invalidate();
2135 if (special_sign) special_sign->invalidate();
2136 }
2137 else if (special_sign && special_sign->match(text, this->interval.end, end, flags)) {
2138 this->interval.end = special_sign->interval.end;
2139 if (positive_sign) positive_sign->invalidate();
2140 if (negative_sign) negative_sign->invalidate();
2141 }
2142 else {
2143 if (positive_sign) positive_sign->invalidate();
2144 if (negative_sign) negative_sign->invalidate();
2145 if (special_sign) special_sign->invalidate();
2146 }
2147
2148 if (integer->match(text, this->interval.end, end, flags))
2149 this->interval.end = integer->interval.end;
2150
2151 if (decimal_separator->match(text, this->interval.end, end, flags) &&
2152 decimal->match(text, decimal_separator->interval.end, end, flags))
2153 this->interval.end = decimal->interval.end;
2154 else {
2155 decimal_separator->invalidate();
2156 decimal->invalidate();
2157 }
2158
2159 if (integer->interval.empty() &&
2160 decimal->interval.empty())
2161 {
2162 // No integer part, no decimal part.
2163 if (positive_sign) positive_sign->invalidate();
2164 if (negative_sign) negative_sign->invalidate();
2165 if (special_sign) special_sign->invalidate();
2166 integer->invalidate();
2167 decimal_separator->invalidate();
2168 decimal->invalidate();
2169 if (exponent_symbol) exponent_symbol->invalidate();
2170 if (positive_exp_sign) positive_exp_sign->invalidate();
2171 if (negative_exp_sign) negative_exp_sign->invalidate();
2172 if (exponent) exponent->invalidate();
2173 this->interval.invalidate();
2174 return false;
2175 }
2176
2177 if (exponent_symbol && exponent_symbol->match(text, this->interval.end, end, flags) &&
2178 ((positive_exp_sign && positive_exp_sign->match(text, exponent_symbol->interval.end, end, flags) &&
2179 exponent && exponent->match(text, positive_exp_sign->interval.end, end, flags)) ||
2180 (exponent && exponent->match(text, exponent_symbol->interval.end, end, flags))))
2181 {
2182 this->interval.end = exponent->interval.end;
2183 if (negative_exp_sign) negative_exp_sign->invalidate();
2184 }
2185 else if (exponent_symbol && exponent_symbol->match(text, this->interval.end, end, flags) &&
2186 negative_exp_sign && negative_exp_sign->match(text, exponent_symbol->interval.end, end, flags) &&
2187 exponent && exponent->match(text, negative_exp_sign->interval.end, end, flags))
2188 {
2189 this->interval.end = exponent->interval.end;
2190 if (positive_exp_sign) positive_exp_sign->invalidate();
2191 }
2192 else {
2193 if (exponent_symbol) exponent_symbol->invalidate();
2194 if (positive_exp_sign) positive_exp_sign->invalidate();
2195 if (negative_exp_sign) negative_exp_sign->invalidate();
2196 if (exponent) exponent->invalidate();
2197 }
2198
2199 value = (double)integer->value;
2200 if (decimal->interval)
2201 value += (double)decimal->value * pow(10.0, -(double)decimal->interval.size());
2202 if (negative_sign && negative_sign->interval)
2203 value = -value;
2204 if (exponent && exponent->interval) {
2205 double e = (double)exponent->value;
2206 if (negative_exp_sign && negative_exp_sign->interval)
2207 e = -e;
2208 value *= pow(10.0, e);
2209 }
2210
2211 this->interval.start = start;
2212 return true;
2213 }
2214 };
2215
2216 using scientific_numeral = basic_scientific_numeral<char>;
2217 using wscientific_numeral = basic_scientific_numeral<wchar_t>;
2218#ifdef _UNICODE
2219 using tscientific_numeral = wscientific_numeral;
2220#else
2221 using tscientific_numeral = scientific_numeral;
2222#endif
2223 using sgml_scientific_numeral = basic_scientific_numeral<char>;
2224
2228 template <class T>
2230 {
2231 public:
2233 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
2234 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
2235 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
2236 _In_ const std::shared_ptr<basic_parser<T>>& _currency,
2237 _In_ const std::shared_ptr<basic_parser<T>>& _integer,
2238 _In_ const std::shared_ptr<basic_parser<T>>& _decimal_separator,
2239 _In_ const std::shared_ptr<basic_parser<T>>& _decimal,
2240 _In_ const std::locale& locale = std::locale()) :
2249 {}
2250
2251 virtual void invalidate()
2252 {
2253 if (positive_sign) positive_sign->invalidate();
2254 if (negative_sign) negative_sign->invalidate();
2255 if (special_sign) special_sign->invalidate();
2256 currency->invalidate();
2257 integer->invalidate();
2258 decimal_separator->invalidate();
2259 decimal->invalidate();
2261 }
2262
2263 std::shared_ptr<basic_parser<T>> positive_sign;
2264 std::shared_ptr<basic_parser<T>> negative_sign;
2265 std::shared_ptr<basic_parser<T>> special_sign;
2266 std::shared_ptr<basic_parser<T>> currency;
2267 std::shared_ptr<basic_parser<T>> integer;
2268 std::shared_ptr<basic_parser<T>> decimal_separator;
2269 std::shared_ptr<basic_parser<T>> decimal;
2270
2271 protected:
2272 virtual bool do_match(
2273 _In_reads_or_z_opt_(end) const T* text,
2274 _In_ size_t start = 0,
2275 _In_ size_t end = SIZE_MAX,
2276 _In_ int flags = match_default)
2277 {
2278 _Assume_(text || start >= end);
2279 this->interval.end = start;
2280
2281 if (positive_sign->match(text, this->interval.end, end, flags)) {
2282 this->interval.end = positive_sign->interval.end;
2283 if (negative_sign) negative_sign->invalidate();
2284 if (special_sign) special_sign->invalidate();
2285 }
2286 else if (negative_sign->match(text, this->interval.end, end, flags)) {
2287 this->interval.end = negative_sign->interval.end;
2288 if (positive_sign) positive_sign->invalidate();
2289 if (special_sign) special_sign->invalidate();
2290 }
2291 else if (special_sign->match(text, this->interval.end, end, flags)) {
2292 this->interval.end = special_sign->interval.end;
2293 if (positive_sign) positive_sign->invalidate();
2294 if (negative_sign) negative_sign->invalidate();
2295 }
2296 else {
2297 if (positive_sign) positive_sign->invalidate();
2298 if (negative_sign) negative_sign->invalidate();
2299 if (special_sign) special_sign->invalidate();
2300 }
2301
2302 if (currency->match(text, this->interval.end, end, flags))
2303 this->interval.end = currency->interval.end;
2304 else {
2305 if (positive_sign) positive_sign->invalidate();
2306 if (negative_sign) negative_sign->invalidate();
2307 if (special_sign) special_sign->invalidate();
2308 integer->invalidate();
2309 decimal_separator->invalidate();
2310 decimal->invalidate();
2311 this->interval.invalidate();
2312 return false;
2313 }
2314
2315 if (integer->match(text, this->interval.end, end, flags))
2316 this->interval.end = integer->interval.end;
2317 if (decimal_separator->match(text, this->interval.end, end, flags) &&
2318 decimal->match(text, decimal_separator->interval.end, end, flags))
2319 this->interval.end = decimal->interval.end;
2320 else {
2321 decimal_separator->invalidate();
2322 decimal->invalidate();
2323 }
2324
2325 if (integer->interval.empty() &&
2326 decimal->interval.empty())
2327 {
2328 // No integer part, no decimal part.
2329 if (positive_sign) positive_sign->invalidate();
2330 if (negative_sign) negative_sign->invalidate();
2331 if (special_sign) special_sign->invalidate();
2332 currency->invalidate();
2333 integer->invalidate();
2334 decimal_separator->invalidate();
2335 decimal->invalidate();
2336 this->interval.invalidate();
2337 return false;
2338 }
2339
2340 this->interval.start = start;
2341 return true;
2342 }
2343 };
2344
2345 using monetary_numeral = basic_monetary_numeral<char>;
2346 using wmonetary_numeral = basic_monetary_numeral<wchar_t>;
2347#ifdef _UNICODE
2348 using tmonetary_numeral = wmonetary_numeral;
2349#else
2350 using tmonetary_numeral = monetary_numeral;
2351#endif
2352 using sgml_monetary_numeral = basic_monetary_numeral<char>;
2353
2357 template <class T>
2359 {
2360 public:
2362 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
2363 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
2364 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
2365 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
2366 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
2367 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
2368 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
2369 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
2370 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
2371 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
2372 _In_ const std::shared_ptr<basic_parser<T>>& separator,
2373 _In_ const std::locale& locale = std::locale()) :
2375 m_digit_0(digit_0),
2376 m_digit_1(digit_1),
2377 m_digit_2(digit_2),
2378 m_digit_3(digit_3),
2379 m_digit_4(digit_4),
2380 m_digit_5(digit_5),
2381 m_digit_6(digit_6),
2382 m_digit_7(digit_7),
2383 m_digit_8(digit_8),
2384 m_digit_9(digit_9),
2385 m_separator(separator)
2386 {
2387 value.s_addr = 0;
2388 }
2389
2390 virtual void invalidate()
2391 {
2392 components[0].start = 1;
2393 components[0].end = 0;
2394 components[1].start = 1;
2395 components[1].end = 0;
2396 components[2].start = 1;
2397 components[2].end = 0;
2398 components[3].start = 1;
2399 components[3].end = 0;
2400 value.s_addr = 0;
2402 }
2403
2406
2407 protected:
2408 virtual bool do_match(
2409 _In_reads_or_z_opt_(end) const T* text,
2410 _In_ size_t start = 0,
2411 _In_ size_t end = SIZE_MAX,
2412 _In_ int flags = match_default)
2413 {
2414 _Assume_(text || start >= end);
2415 this->interval.end = start;
2416 value.s_addr = 0;
2417
2418 size_t i;
2419 for (i = 0; i < 4; i++) {
2420 if (i) {
2421 if (m_separator->match(text, this->interval.end, end, flags))
2422 this->interval.end = m_separator->interval.end;
2423 else
2424 goto error;
2425 }
2426
2427 components[i].start = this->interval.end;
2428 bool is_empty = true;
2429 size_t x;
2430 for (x = 0; this->interval.end < end && text[this->interval.end];) {
2431 size_t dig, digit_end;
2432 if (m_digit_0->match(text, this->interval.end, end, flags)) { dig = 0; digit_end = m_digit_0->interval.end; }
2433 else if (m_digit_1->match(text, this->interval.end, end, flags)) { dig = 1; digit_end = m_digit_1->interval.end; }
2434 else if (m_digit_2->match(text, this->interval.end, end, flags)) { dig = 2; digit_end = m_digit_2->interval.end; }
2435 else if (m_digit_3->match(text, this->interval.end, end, flags)) { dig = 3; digit_end = m_digit_3->interval.end; }
2436 else if (m_digit_4->match(text, this->interval.end, end, flags)) { dig = 4; digit_end = m_digit_4->interval.end; }
2437 else if (m_digit_5->match(text, this->interval.end, end, flags)) { dig = 5; digit_end = m_digit_5->interval.end; }
2438 else if (m_digit_6->match(text, this->interval.end, end, flags)) { dig = 6; digit_end = m_digit_6->interval.end; }
2439 else if (m_digit_7->match(text, this->interval.end, end, flags)) { dig = 7; digit_end = m_digit_7->interval.end; }
2440 else if (m_digit_8->match(text, this->interval.end, end, flags)) { dig = 8; digit_end = m_digit_8->interval.end; }
2441 else if (m_digit_9->match(text, this->interval.end, end, flags)) { dig = 9; digit_end = m_digit_9->interval.end; }
2442 else break;
2443 size_t x_n = x * 10 + dig;
2444 if (x_n <= 255) {
2445 x = x_n;
2446 this->interval.end = digit_end;
2447 is_empty = false;
2448 }
2449 else
2450 break;
2451 }
2452 if (is_empty)
2453 goto error;
2454 components[i].end = this->interval.end;
2455 value.s_addr = (value.s_addr << 8) | (uint8_t)x;
2456 }
2457 if (i < 4)
2458 goto error;
2459
2460 this->interval.start = start;
2461 return true;
2462
2463 error:
2464 invalidate();
2465 return false;
2466 }
2467
2468 std::shared_ptr<basic_parser<T>>
2469 m_digit_0,
2470 m_digit_1,
2471 m_digit_2,
2472 m_digit_3,
2473 m_digit_4,
2474 m_digit_5,
2475 m_digit_6,
2476 m_digit_7,
2477 m_digit_8,
2478 m_digit_9;
2479 std::shared_ptr<basic_parser<T>> m_separator;
2480 };
2481
2482 using ipv4_address = basic_ipv4_address<char>;
2483 using wipv4_address = basic_ipv4_address<wchar_t>;
2484#ifdef _UNICODE
2485 using tipv4_address = wipv4_address;
2486#else
2487 using tipv4_address = ipv4_address;
2488#endif
2489 using sgml_ipv4_address = basic_ipv4_address<char>;
2490
2494 template <class T>
2496 {
2497 public:
2498 basic_ipv6_scope_id_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
2499
2500 protected:
2501 virtual bool do_match(
2502 _In_reads_or_z_opt_(end) const T* text,
2503 _In_ size_t start = 0,
2504 _In_ size_t end = SIZE_MAX,
2505 _In_ int flags = match_default)
2506 {
2507 _Assume_(text || start >= end);
2508 if (start < end && text[start]) {
2509 if (text[start] == '-' ||
2510 text[start] == '_' ||
2511 text[start] == ':' ||
2512 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
2513 {
2514 this->interval.end = (this->interval.start = start) + 1;
2515 return true;
2516 }
2517 }
2518 this->interval.invalidate();
2519 return false;
2520 }
2521 };
2522
2525#ifdef _UNICODE
2527#else
2529#endif
2530
2535 {
2536 public:
2537 sgml_ipv6_scope_id_char(_In_ const std::locale& locale = std::locale()) : sgml_parser(locale) {}
2538
2539 protected:
2540 virtual bool do_match(
2541 _In_reads_or_z_(end) const char* text,
2542 _In_ size_t start = 0,
2543 _In_ size_t end = SIZE_MAX,
2544 _In_ int flags = match_default)
2545 {
2546 _Assume_(text || start >= end);
2547 if (start < end && text[start]) {
2548 wchar_t buf[3];
2549 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
2550 const wchar_t* chr_end = chr + stdex::strlen(chr);
2551 if (((chr[0] == L'-' ||
2552 chr[0] == L'_' ||
2553 chr[0] == L':') && chr[1] == 0) ||
2554 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
2555 {
2556 this->interval.start = start;
2557 return true;
2558 }
2559 }
2560 this->interval.invalidate();
2561 return false;
2562 }
2563 };
2564
2568 template <class T>
2570 {
2571 public:
2573 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
2574 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
2575 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
2576 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
2577 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
2578 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
2579 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
2580 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
2581 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
2582 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
2583 _In_ const std::shared_ptr<basic_parser<T>>& digit_10,
2584 _In_ const std::shared_ptr<basic_parser<T>>& digit_11,
2585 _In_ const std::shared_ptr<basic_parser<T>>& digit_12,
2586 _In_ const std::shared_ptr<basic_parser<T>>& digit_13,
2587 _In_ const std::shared_ptr<basic_parser<T>>& digit_14,
2588 _In_ const std::shared_ptr<basic_parser<T>>& digit_15,
2589 _In_ const std::shared_ptr<basic_parser<T>>& separator,
2590 _In_ const std::shared_ptr<basic_parser<T>>& scope_id_separator = nullptr,
2591 _In_ const std::shared_ptr<basic_parser<T>>& _scope_id = nullptr,
2592 _In_ const std::locale& locale = std::locale()) :
2594 m_digit_0(digit_0),
2595 m_digit_1(digit_1),
2596 m_digit_2(digit_2),
2597 m_digit_3(digit_3),
2598 m_digit_4(digit_4),
2599 m_digit_5(digit_5),
2600 m_digit_6(digit_6),
2601 m_digit_7(digit_7),
2602 m_digit_8(digit_8),
2603 m_digit_9(digit_9),
2604 m_digit_10(digit_10),
2605 m_digit_11(digit_11),
2606 m_digit_12(digit_12),
2607 m_digit_13(digit_13),
2608 m_digit_14(digit_14),
2609 m_digit_15(digit_15),
2610 m_separator(separator),
2611 m_scope_id_separator(scope_id_separator),
2613 {
2614 memset(&value, 0, sizeof(value));
2615 }
2616
2617 virtual void invalidate()
2618 {
2619 components[0].start = 1;
2620 components[0].end = 0;
2621 components[1].start = 1;
2622 components[1].end = 0;
2623 components[2].start = 1;
2624 components[2].end = 0;
2625 components[3].start = 1;
2626 components[3].end = 0;
2627 components[4].start = 1;
2628 components[4].end = 0;
2629 components[5].start = 1;
2630 components[5].end = 0;
2631 components[6].start = 1;
2632 components[6].end = 0;
2633 components[7].start = 1;
2634 components[7].end = 0;
2635 memset(&value, 0, sizeof(value));
2636 if (scope_id) scope_id->invalidate();
2638 }
2639
2642 std::shared_ptr<basic_parser<T>> scope_id;
2643
2644 protected:
2645 virtual bool do_match(
2646 _In_reads_or_z_opt_(end) const T* text,
2647 _In_ size_t start = 0,
2648 _In_ size_t end = SIZE_MAX,
2649 _In_ int flags = match_default)
2650 {
2651 _Assume_(text || start >= end);
2652 this->interval.end = start;
2653 memset(&value, 0, sizeof(value));
2654
2655 size_t i, compaction_i = SIZE_MAX, compaction_start = start;
2656 for (i = 0; i < 8; i++) {
2657 bool is_empty = true;
2658
2659 if (m_separator->match(text, this->interval.end, end, flags)) {
2660 // : found
2661 this->interval.end = m_separator->interval.end;
2662 if (m_separator->match(text, this->interval.end, end, flags)) {
2663 // :: found
2664 if (compaction_i == SIZE_MAX) {
2665 // Zero compaction start
2666 compaction_i = i;
2667 compaction_start = m_separator->interval.start;
2668 this->interval.end = m_separator->interval.end;
2669 }
2670 else {
2671 // More than one zero compaction
2672 break;
2673 }
2674 }
2675 else if (!i) {
2676 // Leading : found
2677 goto error;
2678 }
2679 }
2680 else if (i) {
2681 // : missing
2682 break;
2683 }
2684
2685 components[i].start = this->interval.end;
2686 size_t x;
2687 for (x = 0; this->interval.end < end && text[this->interval.end];) {
2688 size_t dig, digit_end;
2689 if (m_digit_0->match(text, this->interval.end, end, flags)) { dig = 0; digit_end = m_digit_0->interval.end; }
2690 else if (m_digit_1->match(text, this->interval.end, end, flags)) { dig = 1; digit_end = m_digit_1->interval.end; }
2691 else if (m_digit_2->match(text, this->interval.end, end, flags)) { dig = 2; digit_end = m_digit_2->interval.end; }
2692 else if (m_digit_3->match(text, this->interval.end, end, flags)) { dig = 3; digit_end = m_digit_3->interval.end; }
2693 else if (m_digit_4->match(text, this->interval.end, end, flags)) { dig = 4; digit_end = m_digit_4->interval.end; }
2694 else if (m_digit_5->match(text, this->interval.end, end, flags)) { dig = 5; digit_end = m_digit_5->interval.end; }
2695 else if (m_digit_6->match(text, this->interval.end, end, flags)) { dig = 6; digit_end = m_digit_6->interval.end; }
2696 else if (m_digit_7->match(text, this->interval.end, end, flags)) { dig = 7; digit_end = m_digit_7->interval.end; }
2697 else if (m_digit_8->match(text, this->interval.end, end, flags)) { dig = 8; digit_end = m_digit_8->interval.end; }
2698 else if (m_digit_9->match(text, this->interval.end, end, flags)) { dig = 9; digit_end = m_digit_9->interval.end; }
2699 else if (m_digit_10->match(text, this->interval.end, end, flags)) { dig = 10; digit_end = m_digit_10->interval.end; }
2700 else if (m_digit_11->match(text, this->interval.end, end, flags)) { dig = 11; digit_end = m_digit_11->interval.end; }
2701 else if (m_digit_12->match(text, this->interval.end, end, flags)) { dig = 12; digit_end = m_digit_12->interval.end; }
2702 else if (m_digit_13->match(text, this->interval.end, end, flags)) { dig = 13; digit_end = m_digit_13->interval.end; }
2703 else if (m_digit_14->match(text, this->interval.end, end, flags)) { dig = 14; digit_end = m_digit_14->interval.end; }
2704 else if (m_digit_15->match(text, this->interval.end, end, flags)) { dig = 15; digit_end = m_digit_15->interval.end; }
2705 else break;
2706 size_t x_n = x * 16 + dig;
2707 if (x_n <= 0xffff) {
2708 x = x_n;
2709 this->interval.end = digit_end;
2710 is_empty = false;
2711 }
2712 else
2713 break;
2714 }
2715 if (is_empty) {
2716 if (compaction_i != SIZE_MAX) {
2717 // Zero compaction active: no sweat.
2718 break;
2719 }
2720 goto error;
2721 }
2722 components[i].end = this->interval.end;
2723 this->value.s6_words[i] = (uint16_t)x;
2724 }
2725
2726 if (compaction_i != SIZE_MAX) {
2727 // Align components right due to zero compaction.
2728 size_t j, k;
2729 for (j = 8, k = i; k > compaction_i;) {
2730 this->value.s6_words[--j] = this->value.s6_words[--k];
2732 }
2733 for (; j > compaction_i;) {
2734 this->value.s6_words[--j] = 0;
2735 components[j].start =
2737 }
2738 }
2739 else if (i < 8)
2740 goto error;
2741
2742 if (m_scope_id_separator && m_scope_id_separator->match(text, this->interval.end, end, flags) &&
2743 scope_id && scope_id->match(text, m_scope_id_separator->interval.end, end, flags))
2744 this->interval.end = scope_id->interval.end;
2745 else if (scope_id)
2746 scope_id->invalidate();
2747
2748 this->interval.start = start;
2749 return true;
2750
2751 error:
2752 invalidate();
2753 return false;
2754 }
2755
2756 std::shared_ptr<basic_parser<T>>
2757 m_digit_0,
2758 m_digit_1,
2759 m_digit_2,
2760 m_digit_3,
2761 m_digit_4,
2762 m_digit_5,
2763 m_digit_6,
2764 m_digit_7,
2765 m_digit_8,
2766 m_digit_9,
2767 m_digit_10,
2768 m_digit_11,
2769 m_digit_12,
2770 m_digit_13,
2771 m_digit_14,
2772 m_digit_15;
2773 std::shared_ptr<basic_parser<T>> m_separator, m_scope_id_separator;
2774 };
2775
2776 using ipv6_address = basic_ipv6_address<char>;
2777 using wipv6_address = basic_ipv6_address<wchar_t>;
2778#ifdef _UNICODE
2779 using tipv6_address = wipv6_address;
2780#else
2781 using tipv6_address = ipv6_address;
2782#endif
2783 using sgml_ipv6_address = basic_ipv6_address<char>;
2784
2788 template <class T>
2790 {
2791 public:
2793 _In_ bool allow_idn,
2794 _In_ const std::locale& locale = std::locale()) :
2796 m_allow_idn(allow_idn),
2797 allow_on_edge(true)
2798 {}
2799
2801
2802 protected:
2803 virtual bool do_match(
2804 _In_reads_or_z_opt_(end) const T* text,
2805 _In_ size_t start = 0,
2806 _In_ size_t end = SIZE_MAX,
2807 _In_ int flags = match_default)
2808 {
2809 _Assume_(text || start >= end);
2810 if (start < end && text[start]) {
2811 if (('A' <= text[start] && text[start] <= 'Z') ||
2812 ('a' <= text[start] && text[start] <= 'z') ||
2813 ('0' <= text[start] && text[start] <= '9'))
2814 allow_on_edge = true;
2815 else if (text[start] == '-')
2816 allow_on_edge = false;
2817 else if (m_allow_idn && std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
2818 allow_on_edge = true;
2819 else {
2820 this->interval.invalidate();
2821 return false;
2822 }
2823 this->interval.end = (this->interval.start = start) + 1;
2824 return true;
2825 }
2826 this->interval.invalidate();
2827 return false;
2828 }
2829
2830 bool m_allow_idn;
2831 };
2832
2833 using dns_domain_char = basic_dns_domain_char<char>;
2834 using wdns_domain_char = basic_dns_domain_char<wchar_t>;
2835#ifdef _UNICODE
2836 using tdns_domain_char = wdns_domain_char;
2837#else
2838 using tdns_domain_char = dns_domain_char;
2839#endif
2840
2845 {
2846 public:
2848 _In_ bool allow_idn,
2849 _In_ const std::locale& locale = std::locale()) :
2851 {}
2852
2853 protected:
2854 virtual bool do_match(
2855 _In_reads_or_z_(end) const char* text,
2856 _In_ size_t start = 0,
2857 _In_ size_t end = SIZE_MAX,
2858 _In_ int flags = match_default)
2859 {
2860 _Assume_(text || start >= end);
2861 if (start < end && text[start]) {
2862 wchar_t buf[3];
2863 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
2864 const wchar_t* chr_end = chr + stdex::strlen(chr);
2865 if ((('A' <= chr[0] && chr[0] <= 'Z') ||
2866 ('a' <= chr[0] && chr[0] <= 'z') ||
2867 ('0' <= chr[0] && chr[0] <= '9')) && chr[1] == 0)
2868 allow_on_edge = true;
2869 else if (chr[0] == '-' && chr[1] == 0)
2870 allow_on_edge = false;
2871 else if (m_allow_idn && std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
2872 allow_on_edge = true;
2873 else {
2874 this->interval.invalidate();
2875 return false;
2876 }
2877 this->interval.start = start;
2878 return true;
2879 }
2880 this->interval.invalidate();
2881 return false;
2882 }
2883 };
2884
2888 template <class T>
2890 {
2891 public:
2893 _In_ bool allow_absolute,
2894 _In_ const std::shared_ptr<basic_dns_domain_char<T>>& domain_char,
2895 _In_ const std::shared_ptr<basic_parser<T>>& separator,
2896 _In_ const std::locale& locale = std::locale()) :
2899 m_domain_char(domain_char),
2900 m_separator(separator)
2901 {}
2902
2903 protected:
2904 virtual bool do_match(
2905 _In_reads_or_z_opt_(end) const T* text,
2906 _In_ size_t start = 0,
2907 _In_ size_t end = SIZE_MAX,
2908 _In_ int flags = match_default)
2909 {
2910 _Assume_(text || start >= end);
2911 size_t i = start, count;
2912 for (count = 0; i < end && text[i] && count < 127; count++) {
2913 if (m_domain_char->match(text, i, end, flags) &&
2914 m_domain_char->allow_on_edge)
2915 {
2916 // Domain start
2917 this->interval.end = i = m_domain_char->interval.end;
2918 while (i < end && text[i]) {
2919 if (m_domain_char->allow_on_edge &&
2920 m_separator->match(text, i, end, flags))
2921 {
2922 // Domain end
2923 if (m_allow_absolute)
2924 this->interval.end = i = m_separator->interval.end;
2925 else {
2926 this->interval.end = i;
2927 i = m_separator->interval.end;
2928 }
2929 break;
2930 }
2931 if (m_domain_char->match(text, i, end, flags)) {
2932 if (m_domain_char->allow_on_edge)
2933 this->interval.end = i = m_domain_char->interval.end;
2934 else
2935 i = m_domain_char->interval.end;
2936 }
2937 else {
2938 this->interval.start = start;
2939 return true;
2940 }
2941 }
2942 }
2943 else
2944 break;
2945 }
2946 if (count) {
2947 this->interval.start = start;
2948 return true;
2949 }
2950 this->interval.invalidate();
2951 return false;
2952 }
2953
2955 std::shared_ptr<basic_dns_domain_char<T>> m_domain_char;
2956 std::shared_ptr<basic_parser<T>> m_separator;
2957 };
2958
2961#ifdef _UNICODE
2962 using tdns_name = wdns_name;
2963#else
2964 using tdns_name = dns_name;
2965#endif
2967
2971 template <class T>
2973 {
2974 public:
2975 basic_url_username_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
2976
2977 protected:
2978 virtual bool do_match(
2979 _In_reads_or_z_opt_(end) const T* text,
2980 _In_ size_t start = 0,
2981 _In_ size_t end = SIZE_MAX,
2982 _In_ int flags = match_default)
2983 {
2984 _Assume_(text || start >= end);
2985 if (start < end && text[start]) {
2986 if (text[start] == '-' ||
2987 text[start] == '.' ||
2988 text[start] == '_' ||
2989 text[start] == '~' ||
2990 text[start] == '%' ||
2991 text[start] == '!' ||
2992 text[start] == '$' ||
2993 text[start] == '&' ||
2994 text[start] == '\'' ||
2995 //text[start] == '(' ||
2996 //text[start] == ')' ||
2997 text[start] == '*' ||
2998 text[start] == '+' ||
2999 text[start] == ',' ||
3000 text[start] == ';' ||
3001 text[start] == '=' ||
3002 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
3003 {
3004 this->interval.end = (this->interval.start = start) + 1;
3005 return true;
3006 }
3007 }
3008 this->interval.invalidate();
3009 return false;
3010 }
3011 };
3012
3015#ifdef _UNICODE
3017#else
3019#endif
3020
3025 {
3026 public:
3027 sgml_url_username_char(_In_ const std::locale& locale = std::locale()) : basic_url_username_char<char>(locale) {}
3028
3029 protected:
3030 virtual bool do_match(
3031 _In_reads_or_z_(end) const char* text,
3032 _In_ size_t start = 0,
3033 _In_ size_t end = SIZE_MAX,
3034 _In_ int flags = match_default)
3035 {
3036 _Assume_(text || start >= end);
3037 if (start < end && text[start]) {
3038 wchar_t buf[3];
3039 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
3040 const wchar_t* chr_end = chr + stdex::strlen(chr);
3041 if (((chr[0] == L'-' ||
3042 chr[0] == L'.' ||
3043 chr[0] == L'_' ||
3044 chr[0] == L'~' ||
3045 chr[0] == L'%' ||
3046 chr[0] == L'!' ||
3047 chr[0] == L'$' ||
3048 chr[0] == L'&' ||
3049 chr[0] == L'\'' ||
3050 //chr[0] == L'(' ||
3051 //chr[0] == L')' ||
3052 chr[0] == L'*' ||
3053 chr[0] == L'+' ||
3054 chr[0] == L',' ||
3055 chr[0] == L';' ||
3056 chr[0] == L'=') && chr[1] == 0) ||
3057 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
3058 {
3059 this->interval.start = start;
3060 return true;
3061 }
3062 }
3063
3064 this->interval.invalidate();
3065 return false;
3066 }
3067 };
3068
3072 template <class T>
3074 {
3075 public:
3076 basic_url_password_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
3077
3078 protected:
3079 virtual bool do_match(
3080 _In_reads_or_z_opt_(end) const T* text,
3081 _In_ size_t start = 0,
3082 _In_ size_t end = SIZE_MAX,
3083 _In_ int flags = match_default)
3084 {
3085 _Assume_(text || start >= end);
3086 if (start < end && text[start]) {
3087 if (text[start] == '-' ||
3088 text[start] == '.' ||
3089 text[start] == '_' ||
3090 text[start] == '~' ||
3091 text[start] == '%' ||
3092 text[start] == '!' ||
3093 text[start] == '$' ||
3094 text[start] == '&' ||
3095 text[start] == '\'' ||
3096 text[start] == '(' ||
3097 text[start] == ')' ||
3098 text[start] == '*' ||
3099 text[start] == '+' ||
3100 text[start] == ',' ||
3101 text[start] == ';' ||
3102 text[start] == '=' ||
3103 text[start] == ':' ||
3104 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
3105 {
3106 this->interval.end = (this->interval.start = start) + 1;
3107 return true;
3108 }
3109 }
3110 this->interval.invalidate();
3111 return false;
3112 }
3113 };
3114
3117#ifdef _UNICODE
3119#else
3121#endif
3122
3127 {
3128 public:
3129 sgml_url_password_char(_In_ const std::locale& locale = std::locale()) : basic_url_password_char<char>(locale) {}
3130
3131 protected:
3132 virtual bool do_match(
3133 _In_reads_or_z_(end) const char* text,
3134 _In_ size_t start = 0,
3135 _In_ size_t end = SIZE_MAX,
3136 _In_ int flags = match_default)
3137 {
3138 _Assume_(text || start >= end);
3139 if (start < end && text[start]) {
3140 wchar_t buf[3];
3141 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
3142 const wchar_t* chr_end = chr + stdex::strlen(chr);
3143 if (((chr[0] == L'-' ||
3144 chr[0] == L'.' ||
3145 chr[0] == L'_' ||
3146 chr[0] == L'~' ||
3147 chr[0] == L'%' ||
3148 chr[0] == L'!' ||
3149 chr[0] == L'$' ||
3150 chr[0] == L'&' ||
3151 chr[0] == L'\'' ||
3152 chr[0] == L'(' ||
3153 chr[0] == L')' ||
3154 chr[0] == L'*' ||
3155 chr[0] == L'+' ||
3156 chr[0] == L',' ||
3157 chr[0] == L';' ||
3158 chr[0] == L'=' ||
3159 chr[0] == L':') && chr[1] == 0) ||
3160 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
3161 {
3162 this->interval.start = start;
3163 return true;
3164 }
3165 }
3166 this->interval.invalidate();
3167 return false;
3168 }
3169 };
3170
3174 template <class T>
3176 {
3177 public:
3178 basic_url_path_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
3179
3180 protected:
3181 virtual bool do_match(
3182 _In_reads_or_z_opt_(end) const T* text,
3183 _In_ size_t start = 0,
3184 _In_ size_t end = SIZE_MAX,
3185 _In_ int flags = match_default)
3186 {
3187 _Assume_(text || start >= end);
3188 if (start < end && text[start]) {
3189 if (text[start] == '/' ||
3190 text[start] == '-' ||
3191 text[start] == '.' ||
3192 text[start] == '_' ||
3193 text[start] == '~' ||
3194 text[start] == '%' ||
3195 text[start] == '!' ||
3196 text[start] == '$' ||
3197 text[start] == '&' ||
3198 text[start] == '\'' ||
3199 text[start] == '(' ||
3200 text[start] == ')' ||
3201 text[start] == '*' ||
3202 text[start] == '+' ||
3203 text[start] == ',' ||
3204 text[start] == ';' ||
3205 text[start] == '=' ||
3206 text[start] == ':' ||
3207 text[start] == '@' ||
3208 text[start] == '?' ||
3209 text[start] == '#' ||
3210 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
3211 {
3212 this->interval.end = (this->interval.start = start) + 1;
3213 return true;
3214 }
3215 }
3216 this->interval.invalidate();
3217 return false;
3218 }
3219 };
3220
3223#ifdef _UNICODE
3225#else
3227#endif
3228
3233 {
3234 public:
3235 sgml_url_path_char(_In_ const std::locale& locale = std::locale()) : basic_url_path_char<char>(locale) {}
3236
3237 protected:
3238 virtual bool do_match(
3239 _In_reads_or_z_(end) const char* text,
3240 _In_ size_t start = 0,
3241 _In_ size_t end = SIZE_MAX,
3242 _In_ int flags = match_default)
3243 {
3244 _Assume_(text || start >= end);
3245 if (start < end && text[start]) {
3246 wchar_t buf[3];
3247 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
3248 const wchar_t* chr_end = chr + stdex::strlen(chr);
3249 if (((chr[0] == L'/' ||
3250 chr[0] == L'-' ||
3251 chr[0] == L'.' ||
3252 chr[0] == L'_' ||
3253 chr[0] == L'~' ||
3254 chr[0] == L'%' ||
3255 chr[0] == L'!' ||
3256 chr[0] == L'$' ||
3257 chr[0] == L'&' ||
3258 chr[0] == L'\'' ||
3259 chr[0] == L'(' ||
3260 chr[0] == L')' ||
3261 chr[0] == L'*' ||
3262 chr[0] == L'+' ||
3263 chr[0] == L',' ||
3264 chr[0] == L';' ||
3265 chr[0] == L'=' ||
3266 chr[0] == L':' ||
3267 chr[0] == L'@' ||
3268 chr[0] == L'?' ||
3269 chr[0] == L'#') && chr[1] == 0) ||
3270 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
3271 {
3272 this->interval.start = start;
3273 return true;
3274 }
3275 }
3276 this->interval.invalidate();
3277 return false;
3278 }
3279 };
3280
3284 template <class T>
3286 {
3287 public:
3289 _In_ const std::shared_ptr<basic_parser<T>>& path_char,
3290 _In_ const std::shared_ptr<basic_parser<T>>& query_start,
3291 _In_ const std::shared_ptr<basic_parser<T>>& bookmark_start,
3292 _In_ const std::locale& locale = std::locale()) :
3294 m_path_char(path_char),
3295 m_query_start(query_start),
3296 m_bookmark_start(bookmark_start)
3297 {}
3298
3299 virtual void invalidate()
3300 {
3301 path.start = 1;
3302 path.end = 0;
3303 query.start = 1;
3304 query.end = 0;
3305 bookmark.start = 1;
3306 bookmark.end = 0;
3308 }
3309
3312 stdex::interval<size_t> bookmark;
3313
3314 protected:
3315 virtual bool do_match(
3316 _In_reads_or_z_opt_(end) const T* text,
3317 _In_ size_t start = 0,
3318 _In_ size_t end = SIZE_MAX,
3319 _In_ int flags = match_default)
3320 {
3321 _Assume_(text || start >= end);
3322
3323 this->interval.end = start;
3324 path.start = start;
3325 query.start = 1;
3326 query.end = 0;
3327 bookmark.start = 1;
3328 bookmark.end = 0;
3329
3330 for (;;) {
3331 if (this->interval.end >= end || !text[this->interval.end])
3332 break;
3333 if (m_query_start->match(text, this->interval.end, end, flags)) {
3334 path.end = this->interval.end;
3335 query.start = this->interval.end = m_query_start->interval.end;
3336 for (;;) {
3337 if (this->interval.end >= end || !text[this->interval.end]) {
3338 query.end = this->interval.end;
3339 break;
3340 }
3341 if (m_bookmark_start->match(text, this->interval.end, end, flags)) {
3342 query.end = this->interval.end;
3343 bookmark.start = this->interval.end = m_bookmark_start->interval.end;
3344 for (;;) {
3345 if (this->interval.end >= end || !text[this->interval.end]) {
3346 bookmark.end = this->interval.end;
3347 break;
3348 }
3349 if (m_path_char->match(text, this->interval.end, end, flags))
3350 this->interval.end = m_path_char->interval.end;
3351 else {
3352 bookmark.end = this->interval.end;
3353 break;
3354 }
3355 }
3356 this->interval.start = start;
3357 return true;
3358 }
3359 if (m_path_char->match(text, this->interval.end, end, flags))
3360 this->interval.end = m_path_char->interval.end;
3361 else {
3362 query.end = this->interval.end;
3363 break;
3364 }
3365 }
3366 this->interval.start = start;
3367 return true;
3368 }
3369 if (m_bookmark_start->match(text, this->interval.end, end, flags)) {
3370 path.end = this->interval.end;
3371 bookmark.start = this->interval.end = m_bookmark_start->interval.end;
3372 for (;;) {
3373 if (this->interval.end >= end || !text[this->interval.end]) {
3374 bookmark.end = this->interval.end;
3375 break;
3376 }
3377 if (m_path_char->match(text, this->interval.end, end, flags))
3378 this->interval.end = m_path_char->interval.end;
3379 else {
3380 bookmark.end = this->interval.end;
3381 break;
3382 }
3383 }
3384 this->interval.start = start;
3385 return true;
3386 }
3387 if (m_path_char->match(text, this->interval.end, end, flags))
3388 this->interval.end = m_path_char->interval.end;
3389 else
3390 break;
3391 }
3392
3394 path.end = this->interval.end;
3395 this->interval.start = start;
3396 return true;
3397 }
3398
3399 path.start = 1;
3400 path.end = 0;
3401 bookmark.start = 1;
3402 bookmark.end = 0;
3403 this->interval.invalidate();
3404 return false;
3405 }
3406
3407 std::shared_ptr<basic_parser<T>> m_path_char;
3408 std::shared_ptr<basic_parser<T>> m_query_start;
3409 std::shared_ptr<basic_parser<T>> m_bookmark_start;
3410 };
3411
3414#ifdef _UNICODE
3415 using turl_path = wurl_path;
3416#else
3417 using turl_path = url_path;
3418#endif
3420
3424 template <class T>
3425 class basic_url : public basic_parser<T>
3426 {
3427 public:
3428 basic_url(
3429 _In_ const std::shared_ptr<basic_parser<T>>& _http_scheme,
3430 _In_ const std::shared_ptr<basic_parser<T>>& _ftp_scheme,
3431 _In_ const std::shared_ptr<basic_parser<T>>& _mailto_scheme,
3432 _In_ const std::shared_ptr<basic_parser<T>>& _file_scheme,
3433 _In_ const std::shared_ptr<basic_parser<T>>& colon,
3434 _In_ const std::shared_ptr<basic_parser<T>>& slash,
3435 _In_ const std::shared_ptr<basic_parser<T>>& _username,
3436 _In_ const std::shared_ptr<basic_parser<T>>& _password,
3437 _In_ const std::shared_ptr<basic_parser<T>>& at,
3438 _In_ const std::shared_ptr<basic_parser<T>>& ip_lbracket,
3439 _In_ const std::shared_ptr<basic_parser<T>>& ip_rbracket,
3440 _In_ const std::shared_ptr<basic_parser<T>>& _ipv4_host,
3441 _In_ const std::shared_ptr<basic_parser<T>>& _ipv6_host,
3442 _In_ const std::shared_ptr<basic_parser<T>>& _dns_host,
3443 _In_ const std::shared_ptr<basic_parser<T>>& _port,
3444 _In_ const std::shared_ptr<basic_parser<T>>& _path,
3445 _In_ const std::locale& locale = std::locale()) :
3447 http_scheme(_http_scheme),
3448 ftp_scheme(_ftp_scheme),
3449 mailto_scheme(_mailto_scheme),
3450 file_scheme(_file_scheme),
3451 m_colon(colon),
3452 m_slash(slash),
3453 username(_username),
3454 password(_password),
3455 m_at(at),
3456 m_ip_lbracket(ip_lbracket),
3457 m_ip_rbracket(ip_rbracket),
3458 ipv4_host(_ipv4_host),
3459 ipv6_host(_ipv6_host),
3460 dns_host(_dns_host),
3461 port(_port),
3462 path(_path)
3463 {}
3464
3465 virtual void invalidate()
3466 {
3467 http_scheme->invalidate();
3468 ftp_scheme->invalidate();
3469 mailto_scheme->invalidate();
3470 file_scheme->invalidate();
3471 username->invalidate();
3472 password->invalidate();
3473 ipv4_host->invalidate();
3474 ipv6_host->invalidate();
3475 dns_host->invalidate();
3476 port->invalidate();
3477 path->invalidate();
3479 }
3480
3481 std::shared_ptr<basic_parser<T>> http_scheme;
3482 std::shared_ptr<basic_parser<T>> ftp_scheme;
3483 std::shared_ptr<basic_parser<T>> mailto_scheme;
3484 std::shared_ptr<basic_parser<T>> file_scheme;
3485 std::shared_ptr<basic_parser<T>> username;
3486 std::shared_ptr<basic_parser<T>> password;
3487 std::shared_ptr<basic_parser<T>> ipv4_host;
3488 std::shared_ptr<basic_parser<T>> ipv6_host;
3489 std::shared_ptr<basic_parser<T>> dns_host;
3490 std::shared_ptr<basic_parser<T>> port;
3491 std::shared_ptr<basic_parser<T>> path;
3492
3493 protected:
3494 virtual bool do_match(
3495 _In_reads_or_z_opt_(end) const T* text,
3496 _In_ size_t start = 0,
3497 _In_ size_t end = SIZE_MAX,
3498 _In_ int flags = match_default)
3499 {
3500 _Assume_(text || start >= end);
3501
3502 this->interval.end = start;
3503
3504 if (http_scheme->match(text, this->interval.end, end, flags) &&
3505 m_colon->match(text, http_scheme->interval.end, end, flags) &&
3506 m_slash->match(text, m_colon->interval.end, end, flags) &&
3507 m_slash->match(text, m_slash->interval.end, end, flags))
3508 {
3509 // http://
3510 this->interval.end = m_slash->interval.end;
3511 ftp_scheme->invalidate();
3512 mailto_scheme->invalidate();
3513 file_scheme->invalidate();
3514 }
3515 else if (ftp_scheme->match(text, this->interval.end, end, flags) &&
3516 m_colon->match(text, ftp_scheme->interval.end, end, flags) &&
3517 m_slash->match(text, m_colon->interval.end, end, flags) &&
3518 m_slash->match(text, m_slash->interval.end, end, flags))
3519 {
3520 // ftp://
3521 this->interval.end = m_slash->interval.end;
3522 http_scheme->invalidate();
3523 mailto_scheme->invalidate();
3524 file_scheme->invalidate();
3525 }
3526 else if (mailto_scheme->match(text, this->interval.end, end, flags) &&
3527 m_colon->match(text, mailto_scheme->interval.end, end, flags))
3528 {
3529 // mailto:
3530 this->interval.end = m_colon->interval.end;
3531 http_scheme->invalidate();
3532 ftp_scheme->invalidate();
3533 file_scheme->invalidate();
3534 }
3535 else if (file_scheme->match(text, this->interval.end, end, flags) &&
3536 m_colon->match(text, file_scheme->interval.end, end, flags) &&
3537 m_slash->match(text, m_colon->interval.end, end, flags) &&
3538 m_slash->match(text, m_slash->interval.end, end, flags))
3539 {
3540 // file://
3541 this->interval.end = m_slash->interval.end;
3542 http_scheme->invalidate();
3543 ftp_scheme->invalidate();
3544 mailto_scheme->invalidate();
3545 }
3546 else {
3547 // Default to http:
3548 http_scheme->invalidate();
3549 ftp_scheme->invalidate();
3550 mailto_scheme->invalidate();
3551 file_scheme->invalidate();
3552 }
3553
3554 if (ftp_scheme->interval) {
3555 if (username->match(text, this->interval.end, end, flags)) {
3556 if (m_colon->match(text, username->interval.end, end, flags) &&
3557 password->match(text, m_colon->interval.end, end, flags) &&
3558 m_at->match(text, password->interval.end, end, flags))
3559 {
3560 // Username and password
3561 this->interval.end = m_at->interval.end;
3562 }
3563 else if (m_at->match(text, this->interval.end, end, flags)) {
3564 // Username only
3565 this->interval.end = m_at->interval.end;
3566 password->invalidate();
3567 }
3568 else {
3569 username->invalidate();
3570 password->invalidate();
3571 }
3572 }
3573 else {
3574 username->invalidate();
3575 password->invalidate();
3576 }
3577
3578 if (ipv4_host->match(text, this->interval.end, end, flags)) {
3579 // Host is IPv4
3580 this->interval.end = ipv4_host->interval.end;
3581 ipv6_host->invalidate();
3582 dns_host->invalidate();
3583 }
3584 else if (
3585 m_ip_lbracket->match(text, this->interval.end, end, flags) &&
3586 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3587 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3588 {
3589 // Host is IPv6
3590 this->interval.end = m_ip_rbracket->interval.end;
3591 ipv4_host->invalidate();
3592 dns_host->invalidate();
3593 }
3594 else if (dns_host->match(text, this->interval.end, end, flags)) {
3595 // Host is hostname
3596 this->interval.end = dns_host->interval.end;
3597 ipv4_host->invalidate();
3598 ipv6_host->invalidate();
3599 }
3600 else {
3601 invalidate();
3602 return false;
3603 }
3604
3605 if (m_colon->match(text, this->interval.end, end, flags) &&
3606 port->match(text, m_colon->interval.end, end, flags))
3607 {
3608 // Port
3609 this->interval.end = port->interval.end;
3610 }
3611 else
3612 port->invalidate();
3613
3614 if (path->match(text, this->interval.end, end, flags)) {
3615 // Path
3616 this->interval.end = path->interval.end;
3617 }
3618
3619 this->interval.start = start;
3620 return true;
3621 }
3622
3623 if (mailto_scheme->interval) {
3624 if (username->match(text, this->interval.end, end, flags) &&
3625 m_at->match(text, username->interval.end, end, flags))
3626 {
3627 // Username
3628 this->interval.end = m_at->interval.end;
3629 }
3630 else {
3631 invalidate();
3632 return false;
3633 }
3634
3635 if (m_ip_lbracket->match(text, this->interval.end, end, flags) &&
3636 ipv4_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3637 m_ip_rbracket->match(text, ipv4_host->interval.end, end, flags))
3638 {
3639 // Host is IPv4
3640 this->interval.end = m_ip_rbracket->interval.end;
3641 ipv6_host->invalidate();
3642 dns_host->invalidate();
3643 }
3644 else if (
3645 m_ip_lbracket->match(text, this->interval.end, end, flags) &&
3646 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3647 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3648 {
3649 // Host is IPv6
3650 this->interval.end = m_ip_rbracket->interval.end;
3651 ipv4_host->invalidate();
3652 dns_host->invalidate();
3653 }
3654 else if (dns_host->match(text, this->interval.end, end, flags)) {
3655 // Host is hostname
3656 this->interval.end = dns_host->interval.end;
3657 ipv4_host->invalidate();
3658 ipv6_host->invalidate();
3659 }
3660 else {
3661 invalidate();
3662 return false;
3663 }
3664
3665 password->invalidate();
3666 port->invalidate();
3667 path->invalidate();
3668 this->interval.start = start;
3669 return true;
3670 }
3671
3672 if (file_scheme->interval) {
3673 if (path->match(text, this->interval.end, end, flags)) {
3674 // Path
3675 this->interval.end = path->interval.end;
3676 }
3677
3678 username->invalidate();
3679 password->invalidate();
3680 ipv4_host->invalidate();
3681 ipv6_host->invalidate();
3682 dns_host->invalidate();
3683 port->invalidate();
3684 this->interval.start = start;
3685 return true;
3686 }
3687
3688 // "http://" found or defaulted to
3689
3690 // If "http://" explicit, test for username&password.
3691 if (http_scheme->interval &&
3692 username->match(text, this->interval.end, end, flags))
3693 {
3694 if (m_colon->match(text, username->interval.end, end, flags) &&
3695 password->match(text, m_colon->interval.end, end, flags) &&
3696 m_at->match(text, password->interval.end, end, flags))
3697 {
3698 // Username and password
3699 this->interval.end = m_at->interval.end;
3700 }
3701 else if (m_at->match(text, username->interval.end, end, flags)) {
3702 // Username only
3703 this->interval.end = m_at->interval.end;
3704 password->invalidate();
3705 }
3706 else {
3707 username->invalidate();
3708 password->invalidate();
3709 }
3710 }
3711 else {
3712 username->invalidate();
3713 password->invalidate();
3714 }
3715
3716 if (ipv4_host->match(text, this->interval.end, end, flags)) {
3717 // Host is IPv4
3718 this->interval.end = ipv4_host->interval.end;
3719 ipv6_host->invalidate();
3720 dns_host->invalidate();
3721 }
3722 else if (
3723 m_ip_lbracket->match(text, this->interval.end, end, flags) &&
3724 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3725 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3726 {
3727 // Host is IPv6
3728 this->interval.end = m_ip_rbracket->interval.end;
3729 ipv4_host->invalidate();
3730 dns_host->invalidate();
3731 }
3732 else if (dns_host->match(text, this->interval.end, end, flags)) {
3733 // Host is hostname
3734 this->interval.end = dns_host->interval.end;
3735 ipv4_host->invalidate();
3736 ipv6_host->invalidate();
3737 }
3738 else {
3739 invalidate();
3740 return false;
3741 }
3742
3743 if (m_colon->match(text, this->interval.end, end, flags) &&
3744 port->match(text, m_colon->interval.end, end, flags))
3745 {
3746 // Port
3747 this->interval.end = port->interval.end;
3748 }
3749 else
3750 port->invalidate();
3751
3752 if (path->match(text, this->interval.end, end, flags)) {
3753 // Path
3754 this->interval.end = path->interval.end;
3755 }
3756
3757 this->interval.start = start;
3758 return true;
3759 }
3760
3761 std::shared_ptr<basic_parser<T>> m_colon;
3762 std::shared_ptr<basic_parser<T>> m_slash;
3763 std::shared_ptr<basic_parser<T>> m_at;
3764 std::shared_ptr<basic_parser<T>> m_ip_lbracket;
3765 std::shared_ptr<basic_parser<T>> m_ip_rbracket;
3766 };
3767
3768 using url = basic_url<char>;
3769 using wurl = basic_url<wchar_t>;
3770#ifdef _UNICODE
3771 using turl = wurl;
3772#else
3773 using turl = url;
3774#endif
3775 using sgml_url = basic_url<char>;
3776
3780 template <class T>
3782 {
3783 public:
3785 _In_ const std::shared_ptr<basic_parser<T>>& _username,
3786 _In_ const std::shared_ptr<basic_parser<T>>& at,
3787 _In_ const std::shared_ptr<basic_parser<T>>& ip_lbracket,
3788 _In_ const std::shared_ptr<basic_parser<T>>& ip_rbracket,
3789 _In_ const std::shared_ptr<basic_parser<T>>& _ipv4_host,
3790 _In_ const std::shared_ptr<basic_parser<T>>& _ipv6_host,
3791 _In_ const std::shared_ptr<basic_parser<T>>& _dns_host,
3792 _In_ const std::locale& locale = std::locale()) :
3794 username(_username),
3795 m_at(at),
3796 m_ip_lbracket(ip_lbracket),
3797 m_ip_rbracket(ip_rbracket),
3798 ipv4_host(_ipv4_host),
3799 ipv6_host(_ipv6_host),
3800 dns_host(_dns_host)
3801 {}
3802
3803 virtual void invalidate()
3804 {
3805 username->invalidate();
3806 ipv4_host->invalidate();
3807 ipv6_host->invalidate();
3808 dns_host->invalidate();
3810 }
3811
3812 std::shared_ptr<basic_parser<T>> username;
3813 std::shared_ptr<basic_parser<T>> ipv4_host;
3814 std::shared_ptr<basic_parser<T>> ipv6_host;
3815 std::shared_ptr<basic_parser<T>> dns_host;
3816
3817 protected:
3818 virtual bool do_match(
3819 _In_reads_or_z_opt_(end) const T* text,
3820 _In_ size_t start = 0,
3821 _In_ size_t end = SIZE_MAX,
3822 _In_ int flags = match_default)
3823 {
3824 _Assume_(text || start >= end);
3825
3826 if (username->match(text, start, end, flags) &&
3827 m_at->match(text, username->interval.end, end, flags))
3828 {
3829 // Username@
3830 if (m_ip_lbracket->match(text, m_at->interval.end, end, flags) &&
3831 ipv4_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3832 m_ip_rbracket->match(text, ipv4_host->interval.end, end, flags))
3833 {
3834 // Host is IPv4
3835 this->interval.end = m_ip_rbracket->interval.end;
3836 ipv6_host->invalidate();
3837 dns_host->invalidate();
3838 }
3839 else if (
3840 m_ip_lbracket->match(text, m_at->interval.end, end, flags) &&
3841 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3842 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3843 {
3844 // Host is IPv6
3845 this->interval.end = m_ip_rbracket->interval.end;
3846 ipv4_host->invalidate();
3847 dns_host->invalidate();
3848 }
3849 else if (dns_host->match(text, m_at->interval.end, end, flags)) {
3850 // Host is hostname
3851 this->interval.end = dns_host->interval.end;
3852 ipv4_host->invalidate();
3853 ipv6_host->invalidate();
3854 }
3855 else
3856 goto error;
3857 this->interval.start = start;
3858 return true;
3859 }
3860
3861 error:
3862 invalidate();
3863 return false;
3864 }
3865
3866 std::shared_ptr<basic_parser<T>> m_at;
3867 std::shared_ptr<basic_parser<T>> m_ip_lbracket;
3868 std::shared_ptr<basic_parser<T>> m_ip_rbracket;
3869 };
3870
3873#ifdef _UNICODE
3875#else
3877#endif
3879
3883 template <class T>
3885 {
3886 public:
3888 _In_ const std::shared_ptr<basic_parser<T>>& _emoticon,
3889 _In_ const std::shared_ptr<basic_parser<T>>& _apex,
3890 _In_ const std::shared_ptr<basic_parser<T>>& _eyes,
3891 _In_ const std::shared_ptr<basic_parser<T>>& _nose,
3892 _In_ const std::shared_ptr<basic_set<T>>& _mouth,
3893 _In_ const std::locale& locale = std::locale()) :
3896 apex(_apex),
3897 eyes(_eyes),
3898 nose(_nose),
3899 mouth(_mouth)
3900 {}
3901
3902 virtual void invalidate()
3903 {
3904 if (emoticon) emoticon->invalidate();
3905 if (apex) apex->invalidate();
3906 eyes->invalidate();
3907 if (nose) nose->invalidate();
3908 mouth->invalidate();
3910 }
3911
3912 std::shared_ptr<basic_parser<T>> emoticon;
3913 std::shared_ptr<basic_parser<T>> apex;
3914 std::shared_ptr<basic_parser<T>> eyes;
3915 std::shared_ptr<basic_parser<T>> nose;
3916 std::shared_ptr<basic_set<T>> mouth;
3917
3918 protected:
3919 virtual bool do_match(
3920 _In_reads_or_z_opt_(end) const T* text,
3921 _In_ size_t start = 0,
3922 _In_ size_t end = SIZE_MAX,
3923 _In_ int flags = match_default)
3924 {
3925 _Assume_(text || start >= end);
3926
3927 if (emoticon && emoticon->match(text, start, end, flags)) {
3928 if (apex) apex->invalidate();
3929 eyes->invalidate();
3930 if (nose) nose->invalidate();
3931 mouth->invalidate();
3932 this->interval.start = start;
3933 this->interval.end = emoticon->interval.end;
3934 return true;
3935 }
3936
3937 this->interval.end = start;
3938
3939 if (apex && apex->match(text, this->interval.end, end, flags))
3940 this->interval.end = apex->interval.end;
3941
3942 if (eyes->match(text, this->interval.end, end, flags)) {
3943 if (nose && nose->match(text, eyes->interval.end, end, flags) &&
3944 mouth->match(text, nose->interval.end, end, flags))
3945 {
3946 size_t
3948 hit_offset = mouth->hit_offset;
3949 // Mouth may repeat :-)))))))
3950 for (this->interval.end = mouth->interval.end; mouth->match(text, this->interval.end, end, flags) && mouth->hit_offset == hit_offset; this->interval.end = mouth->interval.end);
3951 mouth->interval.start = start_mouth;
3952 mouth->interval.end = this->interval.end;
3953 this->interval.start = start;
3954 return true;
3955 }
3956 if (mouth->match(text, eyes->interval.end, end, flags)) {
3957 size_t
3959 hit_offset = mouth->hit_offset;
3960 // Mouth may repeat :-)))))))
3961 for (this->interval.end = mouth->interval.end; mouth->match(text, this->interval.end, end, flags) && mouth->hit_offset == hit_offset; this->interval.end = mouth->interval.end);
3962 if (nose) nose->invalidate();
3963 mouth->interval.start = start_mouth;
3964 mouth->interval.end = this->interval.end;
3965 this->interval.start = start;
3966 return true;
3967 }
3968 }
3969
3970 if (emoticon) emoticon->invalidate();
3971 if (apex) apex->invalidate();
3972 eyes->invalidate();
3973 if (nose) nose->invalidate();
3974 mouth->invalidate();
3975 this->interval.invalidate();
3976 return false;
3977 }
3978 };
3979
3980 using emoticon = basic_emoticon<char>;
3981 using wemoticon = basic_emoticon<wchar_t>;
3982#ifdef _UNICODE
3983 using temoticon = wemoticon;
3984#else
3985 using temoticon = emoticon;
3986#endif
3987 using sgml_emoticon = basic_emoticon<char>;
3988
3992 enum date_format_t {
3993 date_format_none = 0,
3994 date_format_dmy = 0x1,
3995 date_format_mdy = 0x2,
3996 date_format_ymd = 0x4,
3997 date_format_ym = 0x8,
3998 date_format_my = 0x10,
3999 date_format_dm = 0x20,
4000 date_format_md = 0x40,
4001 };
4002
4006 template <class T>
4007 class basic_date : public basic_parser<T>
4008 {
4009 public:
4010 basic_date(
4011 _In_ int format_mask,
4012 _In_ const std::shared_ptr<basic_integer<T>>& _day,
4013 _In_ const std::shared_ptr<basic_integer<T>>& _month,
4014 _In_ const std::shared_ptr<basic_integer<T>>& _year,
4015 _In_ const std::shared_ptr<basic_set<T>>& separator,
4016 _In_ const std::shared_ptr<basic_parser<T>>& space,
4017 _In_ const std::locale& locale = std::locale()) :
4019 format(date_format_none),
4020 m_format_mask(format_mask),
4021 day(_day),
4022 month(_month),
4023 year(_year),
4024 m_separator(separator),
4025 m_space(space)
4026 {}
4027
4028 virtual void invalidate()
4029 {
4030 if (day) day->invalidate();
4031 if (month) month->invalidate();
4032 if (year) year->invalidate();
4033 format = date_format_none;
4035 }
4036
4037 date_format_t format;
4038 std::shared_ptr<basic_integer<T>> day;
4039 std::shared_ptr<basic_integer<T>> month;
4040 std::shared_ptr<basic_integer<T>> year;
4041
4042 protected:
4043 virtual bool do_match(
4044 _In_reads_or_z_opt_(end) const T* text,
4045 _In_ size_t start = 0,
4046 _In_ size_t end = SIZE_MAX,
4047 _In_ int flags = match_default)
4048 {
4049 _Assume_(text || start >= end);
4050
4051 const int space_match_flags = flags & ~match_multiline; // Spaces in dates must never be broken in new line.
4052 if ((m_format_mask & date_format_dmy) == date_format_dmy) {
4053 if (day->match(text, start, end, flags)) {
4054 for (this->interval.end = day->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4055 if (m_separator->match(text, this->interval.end, end, flags)) {
4056 size_t hit_offset = m_separator->hit_offset;
4057 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4058 if (month->match(text, this->interval.end, end, flags)) {
4059 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4060 if (m_separator->match(text, this->interval.end, end, flags) &&
4061 m_separator->hit_offset == hit_offset) // Both separators must match.
4062 {
4063 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4064 if (year->match(text, this->interval.end, end, flags) &&
4065 is_valid(day->value, month->value))
4066 {
4067 this->interval.start = start;
4068 this->interval.end = year->interval.end;
4069 format = date_format_dmy;
4070 return true;
4071 }
4072 }
4073 }
4074 }
4075 }
4076 }
4077
4078 if ((m_format_mask & date_format_mdy) == date_format_mdy) {
4079 if (month->match(text, start, end, flags)) {
4080 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4081 if (m_separator->match(text, this->interval.end, end, flags)) {
4082 size_t hit_offset = m_separator->hit_offset;
4083 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4084 if (day->match(text, this->interval.end, end, flags)) {
4085 for (this->interval.end = day->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4086 if (m_separator->match(text, this->interval.end, end, flags) &&
4087 m_separator->hit_offset == hit_offset) // Both separators must match.
4088 {
4089 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4090 if (year->match(text, this->interval.end, end, flags) &&
4091 is_valid(day->value, month->value))
4092 {
4093 this->interval.start = start;
4094 this->interval.end = year->interval.end;
4095 format = date_format_mdy;
4096 return true;
4097 }
4098 }
4099 }
4100 }
4101 }
4102 }
4103
4104 if ((m_format_mask & date_format_ymd) == date_format_ymd) {
4105 if (year->match(text, start, end, flags)) {
4106 for (this->interval.end = year->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4107 if (m_separator->match(text, this->interval.end, end, flags)) {
4108 size_t hit_offset = m_separator->hit_offset;
4109 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4110 if (month->match(text, this->interval.end, end, flags)) {
4111 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4112 if (m_separator->match(text, this->interval.end, end, flags) &&
4113 m_separator->hit_offset == hit_offset) // Both separators must match.
4114 {
4115 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4116 if (day->match(text, this->interval.end, end, flags) &&
4117 is_valid(day->value, month->value))
4118 {
4119 this->interval.start = start;
4120 this->interval.end = day->interval.end;
4121 format = date_format_ymd;
4122 return true;
4123 }
4124 }
4125 }
4126 }
4127 }
4128 }
4129
4130 if ((m_format_mask & date_format_ym) == date_format_ym) {
4131 if (year->match(text, start, end, flags)) {
4132 for (this->interval.end = year->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4133 if (m_separator->match(text, this->interval.end, end, flags)) {
4134 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4135 if (month->match(text, this->interval.end, end, flags) &&
4136 is_valid(SIZE_MAX, month->value))
4137 {
4138 if (day) day->invalidate();
4139 this->interval.start = start;
4140 this->interval.end = month->interval.end;
4141 format = date_format_ym;
4142 return true;
4143 }
4144 }
4145 }
4146 }
4147
4148 if ((m_format_mask & date_format_my) == date_format_my) {
4149 if (month->match(text, start, end, flags)) {
4150 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4151 if (m_separator->match(text, this->interval.end, end, flags)) {
4152 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4153 if (year->match(text, this->interval.end, end, flags) &&
4154 is_valid(SIZE_MAX, month->value))
4155 {
4156 if (day) day->invalidate();
4157 this->interval.start = start;
4158 this->interval.end = year->interval.end;
4159 format = date_format_my;
4160 return true;
4161 }
4162 }
4163 }
4164 }
4165
4166 if ((m_format_mask & date_format_dm) == date_format_dm) {
4167 if (day->match(text, start, end, flags)) {
4168 for (this->interval.end = day->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4169 if (m_separator->match(text, this->interval.end, end, flags)) {
4170 size_t hit_offset = m_separator->hit_offset;
4171 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4172 if (month->match(text, this->interval.end, end, flags) &&
4173 is_valid(day->value, month->value))
4174 {
4175 if (year) year->invalidate();
4176 this->interval.start = start;
4177 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4178 if (m_separator->match(text, this->interval.end, end, flags) &&
4179 m_separator->hit_offset == hit_offset) // Both separators must match.
4180 this->interval.end = m_separator->interval.end;
4181 else
4182 this->interval.end = month->interval.end;
4183 format = date_format_dm;
4184 return true;
4185 }
4186 }
4187 }
4188 }
4189
4190 if ((m_format_mask & date_format_md) == date_format_md) {
4191 if (month->match(text, start, end, flags)) {
4192 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4193 if (m_separator->match(text, this->interval.end, end, flags)) {
4194 size_t hit_offset = m_separator->hit_offset;
4195 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4196 if (day->match(text, this->interval.end, end, flags) &&
4197 is_valid(day->value, month->value))
4198 {
4199 if (year) year->invalidate();
4200 this->interval.start = start;
4201 for (this->interval.end = day->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4202 if (m_separator->match(text, this->interval.end, end, flags) &&
4203 m_separator->hit_offset == hit_offset) // Both separators must match.
4204 this->interval.end = m_separator->interval.end;
4205 else
4206 this->interval.end = day->interval.end;
4207 format = date_format_md;
4208 return true;
4209 }
4210 }
4211 }
4212 }
4213
4214 if (day) day->invalidate();
4215 if (month) month->invalidate();
4216 if (year) year->invalidate();
4217 format = date_format_none;
4218 this->interval.invalidate();
4219 return false;
4220 }
4221
4222 static bool is_valid(size_t day, size_t month)
4223 {
4224 if (month == SIZE_MAX) {
4225 // Default to January. This allows validating day only, as January has all 31 days.
4226 month = 1;
4227 }
4228 if (day == SIZE_MAX) {
4229 // Default to 1st day in month. This allows validating month only, as each month has 1st day.
4230 day = 1;
4231 }
4232
4233 switch (month) {
4234 case 1:
4235 case 3:
4236 case 5:
4237 case 7:
4238 case 8:
4239 case 10:
4240 case 12:
4241 return 1 <= day && day <= 31;
4242 case 2:
4243 return 1 <= day && day <= 29;
4244 case 4:
4245 case 6:
4246 case 9:
4247 case 11:
4248 return 1 <= day && day <= 30;
4249 default:
4250 return false;
4251 }
4252 }
4253
4254 int m_format_mask;
4255 std::shared_ptr<basic_set<T>> m_separator;
4256 std::shared_ptr<basic_parser<T>> m_space;
4257 };
4258
4259 using date = basic_date<char>;
4260 using wdate = basic_date<wchar_t>;
4261#ifdef _UNICODE
4262 using tdate = wdate;
4263#else
4264 using tdate = date;
4265#endif
4267
4271 template <class T>
4272 class basic_time : public basic_parser<T>
4273 {
4274 public:
4275 basic_time(
4276 _In_ const std::shared_ptr<basic_integer10<T>>& _hour,
4277 _In_ const std::shared_ptr<basic_integer10<T>>& _minute,
4278 _In_ const std::shared_ptr<basic_integer10<T>>& _second,
4279 _In_ const std::shared_ptr<basic_integer10<T>>& _millisecond,
4280 _In_ const std::shared_ptr<basic_set<T>>& separator,
4281 _In_ const std::shared_ptr<basic_parser<T>>& millisecond_separator,
4282 _In_ const std::locale& locale = std::locale()) :
4284 hour(_hour),
4285 minute(_minute),
4286 second(_second),
4287 millisecond(_millisecond),
4288 m_separator(separator),
4289 m_millisecond_separator(millisecond_separator)
4290 {}
4291
4292 virtual void invalidate()
4293 {
4294 hour->invalidate();
4295 minute->invalidate();
4296 if (second) second->invalidate();
4297 if (millisecond) millisecond->invalidate();
4299 }
4300
4301 std::shared_ptr<basic_integer10<T>> hour;
4302 std::shared_ptr<basic_integer10<T>> minute;
4303 std::shared_ptr<basic_integer10<T>> second;
4304 std::shared_ptr<basic_integer10<T>> millisecond;
4305
4306 protected:
4307 virtual bool do_match(
4308 _In_reads_or_z_opt_(end) const T* text,
4309 _In_ size_t start = 0,
4310 _In_ size_t end = SIZE_MAX,
4311 _In_ int flags = match_default)
4312 {
4313 _Assume_(text || start >= end);
4314
4315 if (hour->match(text, start, end, flags) &&
4316 m_separator->match(text, hour->interval.end, end, flags) &&
4317 minute->match(text, m_separator->interval.end, end, flags) &&
4318 minute->value < 60)
4319 {
4320 // hh::mm
4321 size_t hit_offset = m_separator->hit_offset;
4322 if (m_separator->match(text, minute->interval.end, end, flags) &&
4323 m_separator->hit_offset == hit_offset && // Both separators must match.
4324 second && second->match(text, m_separator->interval.end, end, flags) &&
4325 second->value < 60)
4326 {
4327 // hh::mm:ss
4328 if (m_millisecond_separator && m_millisecond_separator->match(text, second->interval.end, end, flags) &&
4329 millisecond && millisecond->match(text, m_millisecond_separator->interval.end, end, flags) &&
4330 millisecond->value < 1000)
4331 {
4332 // hh::mm:ss.mmmm
4333 this->interval.end = millisecond->interval.end;
4334 }
4335 else {
4336 if (millisecond) millisecond->invalidate();
4337 this->interval.end = second->interval.end;
4338 }
4339 }
4340 else {
4341 if (second) second->invalidate();
4342 if (millisecond) millisecond->invalidate();
4343 this->interval.end = minute->interval.end;
4344 }
4345 this->interval.start = start;
4346 return true;
4347 }
4348
4349 hour->invalidate();
4350 minute->invalidate();
4351 if (second) second->invalidate();
4352 if (millisecond) millisecond->invalidate();
4353 this->interval.invalidate();
4354 return false;
4355 }
4356
4357 std::shared_ptr<basic_set<T>> m_separator;
4358 std::shared_ptr<basic_parser<T>> m_millisecond_separator;
4359 };
4360
4361 using time = basic_time<char>;
4362 using wtime = basic_time<wchar_t>;
4363#ifdef _UNICODE
4364 using ttime = wtime;
4365#else
4366 using ttime = time;
4367#endif
4369
4373 template <class T>
4374 class basic_angle : public basic_parser<T>
4375 {
4376 public:
4378 _In_ const std::shared_ptr<basic_integer10<T>>& _degree,
4379 _In_ const std::shared_ptr<basic_parser<T>>& _degree_separator,
4380 _In_ const std::shared_ptr<basic_integer10<T>>& _minute,
4381 _In_ const std::shared_ptr<basic_parser<T>>& _minute_separator,
4382 _In_ const std::shared_ptr<basic_integer10<T>>& _second,
4383 _In_ const std::shared_ptr<basic_parser<T>>& _second_separator,
4384 _In_ const std::shared_ptr<basic_parser<T>>& _decimal,
4385 _In_ const std::locale& locale = std::locale()) :
4387 degree(_degree),
4388 degree_separator(_degree_separator),
4389 minute(_minute),
4390 minute_separator(_minute_separator),
4391 second(_second),
4392 second_separator(_second_separator),
4393 decimal(_decimal)
4394 {}
4395
4396 virtual void invalidate()
4397 {
4398 degree->invalidate();
4399 degree_separator->invalidate();
4400 minute->invalidate();
4401 minute_separator->invalidate();
4402 if (second) second->invalidate();
4403 if (second_separator) second_separator->invalidate();
4404 if (decimal) decimal->invalidate();
4406 }
4407
4408 std::shared_ptr<basic_integer10<T>> degree;
4409 std::shared_ptr<basic_parser<T>> degree_separator;
4410 std::shared_ptr<basic_integer10<T>> minute;
4411 std::shared_ptr<basic_parser<T>> minute_separator;
4412 std::shared_ptr<basic_integer10<T>> second;
4413 std::shared_ptr<basic_parser<T>> second_separator;
4414 std::shared_ptr<basic_parser<T>> decimal;
4415
4416 protected:
4417 virtual bool do_match(
4418 _In_reads_or_z_opt_(end) const T* text,
4419 _In_ size_t start = 0,
4420 _In_ size_t end = SIZE_MAX,
4421 _In_ int flags = match_default)
4422 {
4423 _Assume_(text || start >= end);
4424
4425 this->interval.end = start;
4426
4427 if (degree->match(text, this->interval.end, end, flags) &&
4428 degree_separator->match(text, degree->interval.end, end, flags))
4429 {
4430 // Degrees
4431 this->interval.end = degree_separator->interval.end;
4432 }
4433 else {
4434 degree->invalidate();
4435 degree_separator->invalidate();
4436 }
4437
4438 if (minute->match(text, this->interval.end, end, flags) &&
4439 minute->value < 60 &&
4440 minute_separator->match(text, minute->interval.end, end, flags))
4441 {
4442 // Minutes
4443 this->interval.end = minute_separator->interval.end;
4444 }
4445 else {
4446 minute->invalidate();
4447 minute_separator->invalidate();
4448 }
4449
4450 if (second && second->match(text, this->interval.end, end, flags) &&
4451 second->value < 60)
4452 {
4453 // Seconds
4454 this->interval.end = second->interval.end;
4455 if (second_separator && second_separator->match(text, this->interval.end, end, flags))
4456 this->interval.end = second_separator->interval.end;
4457 else
4458 if (second_separator) second_separator->invalidate();
4459 }
4460 else {
4461 if (second) second->invalidate();
4462 if (second_separator) second_separator->invalidate();
4463 }
4464
4465 if (degree->interval.start < degree->interval.end ||
4466 minute->interval.start < minute->interval.end ||
4467 (second && second->interval.start < second->interval.end))
4468 {
4469 if (decimal && decimal->match(text, this->interval.end, end, flags)) {
4470 // Decimals
4471 this->interval.end = decimal->interval.end;
4472 }
4473 else if (decimal)
4474 decimal->invalidate();
4475 this->interval.start = start;
4476 return true;
4477 }
4478 if (decimal) decimal->invalidate();
4479 this->interval.invalidate();
4480 return false;
4481 }
4482 };
4483
4484 using angle = basic_angle<char>;
4486#ifdef _UNICODE
4487 using RRegElKot = wangle;
4488#else
4489 using RRegElKot = angle;
4490#endif
4492
4496 template <class T>
4498 {
4499 public:
4501 _In_ const std::shared_ptr<basic_parser<T>>& digit,
4502 _In_ const std::shared_ptr<basic_parser<T>>& plus_sign,
4503 _In_ const std::shared_ptr<basic_set<T>>& lparenthesis,
4504 _In_ const std::shared_ptr<basic_set<T>>& rparenthesis,
4505 _In_ const std::shared_ptr<basic_parser<T>>& separator,
4506 _In_ const std::shared_ptr<basic_parser<T>>& space,
4507 _In_ const std::locale& locale = std::locale()) :
4509 m_digit(digit),
4510 m_plus_sign(plus_sign),
4511 m_lparenthesis(lparenthesis),
4512 m_rparenthesis(rparenthesis),
4513 m_separator(separator),
4514 m_space(space)
4515 {}
4516
4517 virtual void invalidate()
4518 {
4519 value.clear();
4521 }
4522
4523 std::basic_string<T> value;
4524
4525 protected:
4526 virtual bool do_match(
4527 _In_reads_or_z_opt_(end) const T* text,
4528 _In_ size_t start = 0,
4529 _In_ size_t end = SIZE_MAX,
4530 _In_ int flags = match_default)
4531 {
4532 _Assume_(text || start >= end);
4533
4534 size_t safe_digit_end = start, safe_value_size = 0;
4535 bool has_digits = false, after_digit = false, in_parentheses = false, after_parentheses = false;
4536 const int space_match_flags = flags & ~match_multiline; // Spaces in phone numbers must never be broken in new line.
4537
4538 this->interval.end = start;
4539 value.clear();
4540 m_lparenthesis->invalidate();
4541 m_rparenthesis->invalidate();
4542
4543 if (m_plus_sign && m_plus_sign->match(text, this->interval.end, end, flags)) {
4544 value.append(text + m_plus_sign->interval.start, text + m_plus_sign->interval.end);
4545 safe_value_size = value.size();
4546 this->interval.end = m_plus_sign->interval.end;
4547 }
4548
4549 for (;;) {
4550 _Assume_(text || this->interval.end >= end);
4551 if (this->interval.end >= end || !text[this->interval.end])
4552 break;
4553 if (m_digit->match(text, this->interval.end, end, flags)) {
4554 // Digit
4555 value.append(text + m_digit->interval.start, text + m_digit->interval.end);
4556 this->interval.end = m_digit->interval.end;
4557 if (!in_parentheses) {
4558 safe_digit_end = this->interval.end;
4559 safe_value_size = value.size();
4560 has_digits = true;
4561 }
4562 after_digit = true;
4563 after_parentheses = false;
4564 }
4565 else if (
4566 m_lparenthesis && !m_lparenthesis->interval && // No left parenthesis yet
4567 m_rparenthesis && !m_rparenthesis->interval && // Right parenthesis after left
4568 m_lparenthesis->match(text, this->interval.end, end, flags))
4569 {
4570 // Left parenthesis
4571 value.append(text + m_lparenthesis->interval.start, m_lparenthesis->interval.size());
4572 this->interval.end = m_lparenthesis->interval.end;
4573 in_parentheses = true;
4574 after_digit = false;
4575 after_parentheses = false;
4576 }
4577 else if (
4578 in_parentheses && // After left parenthesis
4579 m_rparenthesis && !m_rparenthesis->interval && // No right parenthesis yet
4580 m_rparenthesis->match(text, this->interval.end, end, flags) &&
4581 m_lparenthesis->hit_offset == m_rparenthesis->hit_offset) // Left and right parentheses must match
4582 {
4583 // Right parenthesis
4584 value.append(text + m_rparenthesis->interval.start, text + m_rparenthesis->interval.end);
4585 this->interval.end = m_rparenthesis->interval.end;
4586 safe_digit_end = this->interval.end;
4587 safe_value_size = value.size();
4588 in_parentheses = false;
4589 after_digit = false;
4590 after_parentheses = true;
4591 }
4592 else if (
4593 after_digit &&
4594 !in_parentheses && // No separators inside parentheses
4595 !after_parentheses && // No separators following right parenthesis
4596 m_separator && m_separator->match(text, this->interval.end, end, flags))
4597 {
4598 // Separator
4599 this->interval.end = m_separator->interval.end;
4600 after_digit = false;
4601 after_parentheses = false;
4602 }
4603 else if (
4605 m_space && m_space->match(text, this->interval.end, end, space_match_flags))
4606 {
4607 // Space
4608 this->interval.end = m_space->interval.end;
4609 after_digit = false;
4610 after_parentheses = false;
4611 }
4612 else
4613 break;
4614 }
4615 if (has_digits) {
4616 value.erase(safe_value_size);
4617 this->interval.start = start;
4618 this->interval.end = safe_digit_end;
4619 return true;
4620 }
4621 value.clear();
4622 this->interval.invalidate();
4623 return false;
4624 }
4625
4626 std::shared_ptr<basic_parser<T>> m_digit;
4627 std::shared_ptr<basic_parser<T>> m_plus_sign;
4628 std::shared_ptr<basic_set<T>> m_lparenthesis;
4629 std::shared_ptr<basic_set<T>> m_rparenthesis;
4630 std::shared_ptr<basic_parser<T>> m_separator;
4631 std::shared_ptr<basic_parser<T>> m_space;
4632 };
4633
4634 using phone_number = basic_phone_number<char>;
4635 using wphone_number = basic_phone_number<wchar_t>;
4636#ifdef _UNICODE
4637 using tphone_number = wphone_number;
4638#else
4639 using tphone_number = phone_number;
4640#endif
4641 using sgml_phone_number = basic_phone_number<char>;
4642
4648 template <class T>
4649 class basic_iban : public basic_parser<T>
4650 {
4651 public:
4652 basic_iban(
4653 _In_ const std::shared_ptr<basic_parser<T>>& space,
4654 _In_ const std::locale& locale = std::locale()) :
4656 m_space(space)
4657 {
4658 this->country[0] = 0;
4659 this->check_digits[0] = 0;
4660 this->bban[0] = 0;
4661 this->is_valid = false;
4662 }
4663
4664 virtual void invalidate()
4665 {
4666 this->country[0] = 0;
4667 this->check_digits[0] = 0;
4668 this->bban[0] = 0;
4669 this->is_valid = false;
4671 }
4672
4673 T country[3];
4675 T bban[31];
4677
4678 protected:
4679 virtual bool do_match(
4680 _In_reads_or_z_opt_(end) const T* text,
4681 _In_ size_t start = 0,
4682 _In_ size_t end = SIZE_MAX,
4683 _In_ int flags = match_default)
4684 {
4685 _Assume_(text || start >= end);
4686 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
4687 const bool case_insensitive = flags & match_case_insensitive ? true : false;
4688 struct country_t {
4689 T country[2];
4690 T check_digits[2];
4691 size_t length;
4692 };
4693 static const country_t s_countries[] = {
4694 { { 'A', 'D' }, {}, 24 }, // Andorra
4695 { { 'A', 'E' }, {}, 23 }, // United Arab Emirates
4696 { { 'A', 'L' }, {}, 28 }, // Albania
4697 { { 'A', 'O' }, {}, 25 }, // Angola
4698 { { 'A', 'T' }, {}, 20 }, // Austria
4699 { { 'A', 'Z' }, {}, 28 }, // Azerbaijan
4700 { { 'B', 'A' }, { '3', '9' }, 20}, // Bosnia and Herzegovina
4701 { { 'B', 'E' }, {}, 16 }, // Belgium
4702 { { 'B', 'F' }, {}, 28 }, // Burkina Faso
4703 { { 'B', 'G' }, {}, 22 }, // Bulgaria
4704 { { 'B', 'H' }, {}, 22 }, // Bahrain
4705 { { 'B', 'I' }, {}, 27 }, // Burundi
4706 { { 'B', 'J' }, {}, 28 }, // Benin
4707 { { 'B', 'R' }, {}, 29 }, // Brazil
4708 { { 'B', 'Y' }, {}, 28 }, // Belarus
4709 { { 'C', 'F' }, {}, 27 }, // Central African Republic
4710 { { 'C', 'G' }, {}, 27 }, // Congo, Republic of the
4711 { { 'C', 'H' }, {}, 21 }, // Switzerland
4712 { { 'C', 'I' }, {}, 28 }, // Côte d'Ivoire
4713 { { 'C', 'M' }, {}, 27 }, // Cameroon
4714 { { 'C', 'R' }, {}, 22 }, // Costa Rica
4715 { { 'C', 'V' }, {}, 25 }, // Cabo Verde
4716 { { 'C', 'Y' }, {}, 28 }, // Cyprus
4717 { { 'C', 'Z' }, {}, 24 }, // Czech Republic
4718 { { 'D', 'E' }, {}, 22 }, // Germany
4719 { { 'D', 'J' }, {}, 27 }, // Djibouti
4720 { { 'D', 'K' }, {}, 18 }, // Denmark
4721 { { 'D', 'O' }, {}, 28 }, // Dominican Republic
4722 { { 'D', 'Z' }, {}, 26 }, // Algeria
4723 { { 'E', 'E' }, {}, 20 }, // Estonia
4724 { { 'E', 'G' }, {}, 29 }, // Egypt
4725 { { 'E', 'S' }, {}, 24 }, // Spain
4726 { { 'F', 'I' }, {}, 18 }, // Finland
4727 { { 'F', 'O' }, {}, 18 }, // Faroe Islands
4728 { { 'F', 'R' }, {}, 27 }, // France
4729 { { 'G', 'A' }, {}, 27 }, // Gabon
4730 { { 'G', 'B' }, {}, 22 }, // United Kingdom
4731 { { 'G', 'E' }, {}, 22 }, // Georgia
4732 { { 'G', 'I' }, {}, 23 }, // Gibraltar
4733 { { 'G', 'L' }, {}, 18 }, // Greenland
4734 { { 'G', 'Q' }, {}, 27 }, // Equatorial Guinea
4735 { { 'G', 'R' }, {}, 27 }, // Greece
4736 { { 'G', 'T' }, {}, 28 }, // Guatemala
4737 { { 'G', 'W' }, {}, 25 }, // Guinea-Bissau
4738 { { 'H', 'N' }, {}, 28 }, // Honduras
4739 { { 'H', 'R' }, {}, 21 }, // Croatia
4740 { { 'H', 'U' }, {}, 28 }, // Hungary
4741 { { 'I', 'E' }, {}, 22 }, // Ireland
4742 { { 'I', 'L' }, {}, 23 }, // Israel
4743 { { 'I', 'Q' }, {}, 23 }, // Iraq
4744 { { 'I', 'R' }, {}, 26 }, // Iran
4745 { { 'I', 'S' }, {}, 26 }, // Iceland
4746 { { 'I', 'T' }, {}, 27 }, // Italy
4747 { { 'J', 'O' }, {}, 30 }, // Jordan
4748 { { 'K', 'M' }, {}, 27 }, // Comoros
4749 { { 'K', 'W' }, {}, 30 }, // Kuwait
4750 { { 'K', 'Z' }, {}, 20 }, // Kazakhstan
4751 { { 'L', 'B' }, {}, 28 }, // Lebanon
4752 { { 'L', 'C' }, {}, 32 }, // Saint Lucia
4753 { { 'L', 'I' }, {}, 21 }, // Liechtenstein
4754 { { 'L', 'T' }, {}, 20 }, // Lithuania
4755 { { 'L', 'U' }, {}, 20 }, // Luxembourg
4756 { { 'L', 'V' }, {}, 21 }, // Latvia
4757 { { 'L', 'Y' }, {}, 25 }, // Libya
4758 { { 'M', 'A' }, {}, 28 }, // Morocco
4759 { { 'M', 'C' }, {}, 27 }, // Monaco
4760 { { 'M', 'D' }, {}, 24 }, // Moldova
4761 { { 'M', 'E' }, { '2', '5' }, 22 }, // Montenegro
4762 { { 'M', 'G' }, {}, 27 }, // Madagascar
4763 { { 'M', 'K' }, { '0', '7' }, 19 }, // North Macedonia
4764 { { 'M', 'L' }, {}, 28 }, // Mali
4765 { { 'M', 'R' }, { '1', '3' }, 27}, // Mauritania
4766 { { 'M', 'T' }, {}, 31 }, // Malta
4767 { { 'M', 'U' }, {}, 30 }, // Mauritius
4768 { { 'M', 'Z' }, {}, 25 }, // Mozambique
4769 { { 'N', 'E' }, {}, 28 }, // Niger
4770 { { 'N', 'I' }, {}, 32 }, // Nicaragua
4771 { { 'N', 'L' }, {}, 18 }, // Netherlands
4772 { { 'N', 'O' }, {}, 15 }, // Norway
4773 { { 'P', 'K' }, {}, 24 }, // Pakistan
4774 { { 'P', 'L' }, {}, 28 }, // Poland
4775 { { 'P', 'S' }, {}, 29 }, // Palestinian territories
4776 { { 'P', 'T' }, { '5', '0' }, 25 }, // Portugal
4777 { { 'Q', 'A' }, {}, 29 }, // Qatar
4778 { { 'R', 'O' }, {}, 24 }, // Romania
4779 { { 'R', 'S' }, { '3', '5' }, 22 }, // Serbia
4780 { { 'R', 'U' }, {}, 33 }, // Russia
4781 { { 'S', 'A' }, {}, 24 }, // Saudi Arabia
4782 { { 'S', 'C' }, {}, 31 }, // Seychelles
4783 { { 'S', 'D' }, {}, 18 }, // Sudan
4784 { { 'S', 'E' }, {}, 24 }, // Sweden
4785 { { 'S', 'I' }, { '5', '6' }, 19 }, // Slovenia
4786 { { 'S', 'K' }, {}, 24 }, // Slovakia
4787 { { 'S', 'M' }, {}, 27 }, // San Marino
4788 { { 'S', 'N' }, {}, 28 }, // Senegal
4789 { { 'S', 'T' }, {}, 25 }, // São Tomé and Príncipe
4790 { { 'S', 'V' }, {}, 28 }, // El Salvador
4791 { { 'T', 'D' }, {}, 27 }, // Chad
4792 { { 'T', 'G' }, {}, 28 }, // Togo
4793 { { 'T', 'L' }, { '3', '8' }, 23}, // East Timor
4794 { { 'T', 'N' }, { '5', '9' }, 24 }, // Tunisia
4795 { { 'T', 'R' }, {}, 26 }, // Turkey
4796 { { 'U', 'A' }, {}, 29 }, // Ukraine
4797 { { 'V', 'A' }, {}, 22 }, // Vatican City
4798 { { 'V', 'G' }, {}, 24 }, // Virgin Islands, British
4799 { { 'X', 'K' }, {}, 20 }, // Kosovo
4800 };
4801 const country_t* country_desc = nullptr;
4802 size_t n, available, next, bban_length;
4804
4805 this->interval.end = start;
4806 for (size_t i = 0; i < 2; ++i, ++this->interval.end) {
4807 if (this->interval.end >= end || !text[this->interval.end])
4808 goto error; // incomplete country code
4809 T chr = case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end];
4810 if (chr < 'A' || 'Z' < chr)
4811 goto error; // invalid country code
4812 this->country[i] = chr;
4813 }
4814 for (size_t l = 0, r = _countof(s_countries);;) {
4815 if (l >= r)
4816 goto error; // unknown country
4817 size_t m = (l + r) / 2;
4818 const country_t& c = s_countries[m];
4819 if (c.country[0] < this->country[0] || (c.country[0] == this->country[0] && c.country[1] < this->country[1]))
4820 l = m + 1;
4821 else if (this->country[0] < c.country[0] || (this->country[0] == c.country[0] && this->country[1] < c.country[1]))
4822 r = m;
4823 else {
4824 country_desc = &c;
4825 break;
4826 }
4827 }
4828 this->country[2] = 0;
4829
4830 for (size_t i = 0; i < 2; ++i, ++this->interval.end) {
4831 if (this->interval.end >= end || text[this->interval.end] < '0' || '9' < text[this->interval.end])
4832 goto error; // incomplete or invalid check digits
4833 this->check_digits[i] = text[this->interval.end];
4834 }
4835 this->check_digits[2] = 0;
4836
4837 if ((country_desc->check_digits[0] && this->check_digits[0] != country_desc->check_digits[0]) ||
4838 (country_desc->check_digits[1] && this->check_digits[1] != country_desc->check_digits[1]))
4839 goto error; // unexpected check digits
4840
4841 bban_length = country_desc->length - 4;
4842 for (n = 0; n < bban_length;) {
4843 if (this->interval.end >= end || !text[this->interval.end])
4844 goto error; // bban too short
4845 if (m_space && m_space->match(text, this->interval.end, end, flags)) {
4846 this->interval.end = m_space->interval.end;
4847 continue;
4848 }
4849 T chr = case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end];
4850 if (('0' <= chr && chr <= '9') || ('A' <= chr && chr <= 'Z')) {
4851 this->bban[n++] = chr;
4852 this->interval.end++;
4853 }
4854 else
4855 goto error; // invalid bban
4856 }
4857 this->bban[n] = 0;
4858
4859 // Normalize IBAN.
4860 T normalized[69];
4861 available = 0;
4862 for (size_t i = 0; ; ++i) {
4863 if (!this->bban[i]) {
4864 for (i = 0; i < 2; ++i) {
4865 if ('A' <= this->country[i] && this->country[i] <= 'J') {
4866 normalized[available++] = '1';
4867 normalized[available++] = '0' + this->country[i] - 'A';
4868 }
4869 else if ('K' <= this->country[i] && this->country[i] <= 'T') {
4870 normalized[available++] = '2';
4871 normalized[available++] = '0' + this->country[i] - 'K';
4872 }
4873 else if ('U' <= this->country[i] && this->country[i] <= 'Z') {
4874 normalized[available++] = '3';
4875 normalized[available++] = '0' + this->country[i] - 'U';
4876 }
4877 }
4878 normalized[available++] = this->check_digits[0];
4879 normalized[available++] = this->check_digits[1];
4880 normalized[available] = 0;
4881 break;
4882 }
4883 if ('0' <= this->bban[i] && this->bban[i] <= '9')
4884 normalized[available++] = this->bban[i];
4885 else if ('A' <= this->bban[i] && this->bban[i] <= 'J') {
4886 normalized[available++] = '1';
4887 normalized[available++] = '0' + this->bban[i] - 'A';
4888 }
4889 else if ('K' <= this->bban[i] && this->bban[i] <= 'T') {
4890 normalized[available++] = '2';
4891 normalized[available++] = '0' + this->bban[i] - 'K';
4892 }
4893 else if ('U' <= this->bban[i] && this->bban[i] <= 'Z') {
4894 normalized[available++] = '3';
4895 normalized[available++] = '0' + this->bban[i] - 'U';
4896 }
4897 }
4898
4899 // Calculate modulo 97.
4900 nominator = stdex::strtou32(normalized, 9, &next, 10);
4901 for (;;) {
4902 nominator %= 97;
4903 if (!normalized[next]) {
4904 this->is_valid = nominator == 1;
4905 break;
4906 }
4907 size_t digit_count = nominator == 0 ? 0 : nominator < 10 ? 1 : 2;
4908 for (; digit_count < 9 && normalized[next]; ++next, ++digit_count)
4909 nominator = nominator * 10 + (normalized[next] - '0');
4910 }
4911
4912 this->interval.start = start;
4913 return true;
4914
4915 error:
4916 invalidate();
4917 return false;
4918 }
4919
4920 std::shared_ptr<basic_parser<T>> m_space;
4921 };
4922
4923 using iban = basic_iban<char>;
4924 using wiban = basic_iban<wchar_t>;
4925#ifdef _UNICODE
4926 using tiban = wiban;
4927#else
4928 using tiban = iban;
4929#endif
4930 using sgml_iban = basic_iban<char>;
4931
4937 template <class T>
4939 {
4940 public:
4942 _In_ const std::shared_ptr<basic_parser<T>>& space,
4943 _In_ const std::locale& locale = std::locale()) :
4945 m_space(space)
4946 {
4947 this->check_digits[0] = 0;
4948 this->reference[0] = 0;
4949 this->is_valid = false;
4950 }
4951
4952 virtual void invalidate()
4953 {
4954 this->check_digits[0] = 0;
4955 this->reference[0] = 0;
4956 this->is_valid = false;
4958 }
4959
4963
4964 protected:
4965 virtual bool do_match(
4966 _In_reads_or_z_opt_(end) const T* text,
4967 _In_ size_t start = 0,
4968 _In_ size_t end = SIZE_MAX,
4969 _In_ int flags = match_default)
4970 {
4971 _Assume_(text || start >= end);
4972 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
4973 const bool case_insensitive = flags & match_case_insensitive ? true : false;
4974 size_t n, available, next;
4976
4977 this->interval.end = start;
4978 if (this->interval.end + 1 >= end ||
4979 (case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end]) != 'R' ||
4980 (case_insensitive ? ctype.toupper(text[this->interval.end + 1]) : text[this->interval.end + 1]) != 'F')
4981 goto error; // incomplete or wrong reference ID
4982 this->interval.end += 2;
4983
4984 for (size_t i = 0; i < 2; ++i, ++this->interval.end) {
4985 if (this->interval.end >= end || text[this->interval.end] < '0' || '9' < text[this->interval.end])
4986 goto error; // incomplete or invalid check digits
4987 this->check_digits[i] = text[this->interval.end];
4988 }
4989 this->check_digits[2] = 0;
4990
4991 for (n = 0;;) {
4992 if (m_space && m_space->match(text, this->interval.end, end, flags))
4993 this->interval.end = m_space->interval.end;
4994 for (size_t j = 0; j < 4; ++j) {
4995 if (this->interval.end >= end || !text[this->interval.end])
4996 goto out;
4997 T chr = case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end];
4998 if (('0' <= chr && chr <= '9') || ('A' <= chr && chr <= 'Z')) {
4999 if (n >= _countof(reference) - 1)
5000 goto error; // reference overflow
5001 this->reference[n++] = chr;
5002 this->interval.end++;
5003 }
5004 else
5005 goto out;
5006 }
5007 }
5008 out:
5009 if (!n)
5010 goto error; // reference too short
5011 this->reference[_countof(this->reference) - 1] = 0;
5012 for (size_t i = n, j = _countof(this->reference) - 1; i;)
5013 this->reference[--j] = this->reference[--i];
5014 for (size_t j = _countof(this->reference) - 1 - n; j;)
5015 this->reference[--j] = '0';
5016
5017 // Normalize creditor reference.
5018 T normalized[47];
5019 available = 0;
5020 for (size_t i = 0; ; ++i) {
5021 if (!this->reference[i]) {
5022 normalized[available++] = '2'; // R
5023 normalized[available++] = '7';
5024 normalized[available++] = '1'; // F
5025 normalized[available++] = '5';
5026 normalized[available++] = this->check_digits[0];
5027 normalized[available++] = this->check_digits[1];
5028 normalized[available] = 0;
5029 break;
5030 }
5031 if ('0' <= this->reference[i] && this->reference[i] <= '9')
5032 normalized[available++] = this->reference[i];
5033 else if ('A' <= this->reference[i] && this->reference[i] <= 'J') {
5034 normalized[available++] = '1';
5035 normalized[available++] = '0' + this->reference[i] - 'A';
5036 }
5037 else if ('K' <= this->reference[i] && this->reference[i] <= 'T') {
5038 normalized[available++] = '2';
5039 normalized[available++] = '0' + this->reference[i] - 'K';
5040 }
5041 else if ('U' <= this->reference[i] && this->reference[i] <= 'Z') {
5042 normalized[available++] = '3';
5043 normalized[available++] = '0' + this->reference[i] - 'U';
5044 }
5045 }
5046
5047 // Calculate modulo 97.
5048 nominator = stdex::strtou32(normalized, 9, &next, 10);
5049 for (;;) {
5050 nominator %= 97;
5051 if (!normalized[next]) {
5052 this->is_valid = nominator == 1;
5053 break;
5054 }
5055 size_t digit_count = nominator == 0 ? 0 : nominator < 10 ? 1 : 2;
5056 for (; digit_count < 9 && normalized[next]; ++next, ++digit_count)
5057 nominator = nominator * 10 + (normalized[next] - '0');
5058 }
5059
5060 this->interval.start = start;
5061 return true;
5062
5063 error:
5064 invalidate();
5065 return false;
5066 }
5067
5068 std::shared_ptr<basic_parser<T>> m_space;
5069 };
5070
5071 using creditor_reference = basic_creditor_reference<char>;
5072 using wcreditor_reference = basic_creditor_reference<wchar_t>;
5073#ifdef _UNICODE
5074 using tcreditor_reference = wcreditor_reference;
5075#else
5076 using tcreditor_reference = creditor_reference;
5077#endif
5078 using sgml_creditor_reference = basic_creditor_reference<char>;
5079
5085 template <class T>
5087 {
5088 public:
5089 basic_si_reference_part(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
5090
5091 protected:
5092 virtual bool do_match(
5093 _In_reads_or_z_opt_(end) const T* text,
5094 _In_ size_t start = 0,
5095 _In_ size_t end = SIZE_MAX,
5096 _In_ int flags = match_default)
5097 {
5098 _Assume_(text || start >= end);
5099 this->interval.end = start;
5100 for (;;) {
5101 if (this->interval.end >= end || !text[this->interval.end])
5102 break;
5103 if ('0' <= text[this->interval.end] && text[this->interval.end] <= '9')
5104 this->interval.end++;
5105 else
5106 break;
5107 }
5109 this->interval.start = start;
5110 return true;
5111 }
5112 this->interval.invalidate();
5113 return false;
5114 }
5115 };
5116
5119#ifdef _UNICODE
5121#else
5123#endif
5125
5131 template <class T>
5133 {
5134 public:
5135 basic_si_reference_delimiter(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
5136
5137 protected:
5138 virtual bool do_match(
5139 _In_reads_or_z_opt_(end) const T* text,
5140 _In_ size_t start = 0,
5141 _In_ size_t end = SIZE_MAX,
5142 _In_ int flags = match_default)
5143 {
5144 _Assume_(text || start >= end);
5145 if (start < end && text[start] == '-') {
5146 this->interval.end = (this->interval.start = start) + 1;
5147 return true;
5148 }
5149 this->interval.invalidate();
5150 return false;
5151 }
5152 };
5153
5156#ifdef _UNICODE
5158#else
5160#endif
5162
5170 template <class T>
5172 {
5173 public:
5175 _In_ const std::shared_ptr<basic_parser<T>>& space,
5176 _In_ const std::locale& locale = std::locale()) :
5178 part1(locale),
5179 part2(locale),
5180 part3(locale),
5181 is_valid(false),
5182 m_space(space),
5183 m_delimiter(locale)
5184 {
5185 this->model[0] = 0;
5186 }
5187
5188 virtual void invalidate()
5189 {
5190 this->model[0] = 0;
5191 this->part1.invalidate();
5192 this->part2.invalidate();
5193 this->part3.invalidate();
5194 this->is_valid = false;
5196 }
5197
5198 T model[3];
5203
5204 protected:
5205 virtual bool do_match(
5206 _In_reads_or_z_opt_(end) const T* text,
5207 _In_ size_t start = 0,
5208 _In_ size_t end = SIZE_MAX,
5209 _In_ int flags = match_default)
5210 {
5211 _Assume_(text || start >= end);
5212 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
5213 const bool case_insensitive = flags & match_case_insensitive ? true : false;
5214
5215 this->interval.end = start;
5216 if (this->interval.end + 1 >= end ||
5217 (case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end]) != 'S' ||
5218 (case_insensitive ? ctype.toupper(text[this->interval.end + 1]) : text[this->interval.end + 1]) != 'I')
5219 goto error; // incomplete or wrong reference ID
5220 this->interval.end += 2;
5221
5222 for (size_t i = 0; i < 2; ++i, ++this->interval.end) {
5223 if (this->interval.end >= end || text[this->interval.end] < '0' || '9' < text[this->interval.end])
5224 goto error; // incomplete or invalid model
5225 this->model[i] = text[this->interval.end];
5226 }
5227 this->model[2] = 0;
5228
5229 this->part1.invalidate();
5230 this->part2.invalidate();
5231 this->part3.invalidate();
5232 if (this->model[0] == '9' && this->model[1] == '9') {
5233 is_valid = true;
5234 this->interval.start = start;
5235 return true;
5236 }
5237
5238 if (m_space && m_space->match(text, this->interval.end, end, flags))
5239 this->interval.end = m_space->interval.end;
5240
5241 this->part1.match(text, this->interval.end, end, flags) &&
5242 this->m_delimiter.match(text, this->part1.interval.end, end, flags) &&
5243 this->part2.match(text, this->m_delimiter.interval.end, end, flags) &&
5244 this->m_delimiter.match(text, this->part2.interval.end, end, flags) &&
5245 this->part3.match(text, this->m_delimiter.interval.end, end, flags);
5246
5247 this->interval.start = start;
5248 if (this->part3.interval)
5249 this->interval.end = this->part3.interval.end;
5250 else if (this->part2.interval)
5251 this->interval.end = this->part2.interval.end;
5252 else if (this->part1.interval)
5253 this->interval.end = this->part1.interval.end;
5254 else
5255 this->interval.end = start + 4;
5256
5257 if (this->model[0] == '0' && this->model[1] == '0')
5258 is_valid =
5259 this->part3.interval ?
5260 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5261 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 :
5262 this->part2.interval ?
5263 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5264 this->part1.interval.size() + this->part2.interval.size() <= 20 :
5265 this->part1.interval ?
5266 this->part1.interval.size() <= 12 :
5267 false;
5268 else if (this->model[0] == '0' && this->model[1] == '1')
5269 is_valid =
5270 this->part3.interval ?
5271 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5272 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5273 check11(
5274 text + this->part1.interval.start, this->part1.interval.size(),
5275 text + this->part2.interval.start, this->part2.interval.size(),
5276 text + this->part3.interval.start, this->part3.interval.size()) :
5277 this->part2.interval ?
5278 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5279 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5280 check11(
5281 text + this->part1.interval.start, this->part1.interval.size(),
5282 text + this->part2.interval.start, this->part2.interval.size()) :
5283 this->part1.interval ?
5284 this->part1.interval.size() <= 12 &&
5285 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5286 false;
5287 else if (this->model[0] == '0' && this->model[1] == '2')
5288 is_valid =
5289 this->part3.interval ?
5290 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5291 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5292 check11(text + this->part2.interval.start, this->part2.interval.size()) &&
5293 check11(text + this->part3.interval.start, this->part3.interval.size()) :
5294 false;
5295 else if (this->model[0] == '0' && this->model[1] == '3')
5296 is_valid =
5297 this->part3.interval ?
5298 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5299 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5300 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5301 check11(text + this->part2.interval.start, this->part2.interval.size()) &&
5302 check11(text + this->part3.interval.start, this->part3.interval.size()) :
5303 false;
5304 else if (this->model[0] == '0' && this->model[1] == '4')
5305 is_valid =
5306 this->part3.interval ?
5307 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5308 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5309 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5310 check11(text + this->part3.interval.start, this->part3.interval.size()) :
5311 false;
5312 else if ((this->model[0] == '0' || this->model[0] == '5') && this->model[1] == '5')
5313 is_valid =
5314 this->part3.interval ?
5315 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5316 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5317 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5318 this->part2.interval ?
5319 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5320 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5321 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5322 this->part1.interval ?
5323 this->part1.interval.size() <= 12 &&
5324 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5325 false;
5326 else if (this->model[0] == '0' && this->model[1] == '6')
5327 is_valid =
5328 this->part3.interval ?
5329 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5330 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5331 check11(
5332 text + this->part2.interval.start, this->part2.interval.size(),
5333 text + this->part3.interval.start, this->part3.interval.size()) :
5334 this->part2.interval ?
5335 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5336 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5337 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5338 false;
5339 else if (this->model[0] == '0' && this->model[1] == '7')
5340 is_valid =
5341 this->part3.interval ?
5342 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5343 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5344 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5345 this->part2.interval ?
5346 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5347 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5348 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5349 false;
5350 else if (this->model[0] == '0' && this->model[1] == '8')
5351 is_valid =
5352 this->part3.interval ?
5353 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5354 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5355 check11(
5356 text + this->part1.interval.start, this->part1.interval.size(),
5357 text + this->part2.interval.start, this->part2.interval.size()) &&
5358 check11(text + this->part3.interval.start, this->part3.interval.size()) :
5359 false;
5360 else if (this->model[0] == '0' && this->model[1] == '9')
5361 is_valid =
5362 this->part3.interval ?
5363 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5364 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5365 check11(
5366 text + this->part1.interval.start, this->part1.interval.size(),
5367 text + this->part2.interval.start, this->part2.interval.size()) :
5368 this->part2.interval ?
5369 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5370 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5371 check11(
5372 text + this->part1.interval.start, this->part1.interval.size(),
5373 text + this->part2.interval.start, this->part2.interval.size()) :
5374 this->part1.interval ?
5375 this->part1.interval.size() <= 12 &&
5376 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5377 false;
5378 else if (this->model[0] == '1' && this->model[1] == '0')
5379 is_valid =
5380 this->part3.interval ?
5381 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5382 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5383 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5384 check11(
5385 text + this->part2.interval.start, this->part2.interval.size(),
5386 text + this->part3.interval.start, this->part3.interval.size()) :
5387 this->part2.interval ?
5388 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5389 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5390 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5391 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5392 false;
5393 else if (
5394 (this->model[0] == '1' && (this->model[1] == '1' || this->model[1] == '8' || this->model[1] == '9')) ||
5395 ((this->model[0] == '2' || this->model[0] == '3') && this->model[1] == '8') ||
5396 (this->model[0] == '4' && (this->model[1] == '0' || this->model[1] == '1' || this->model[1] == '8' || this->model[1] == '9')) ||
5397 (this->model[0] == '5' && (this->model[1] == '1' || this->model[1] == '8')))
5398 is_valid =
5399 this->part3.interval ?
5400 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5401 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5402 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5403 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5404 this->part2.interval ?
5405 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5406 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5407 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5408 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5409 false;
5410 else if (this->model[0] == '1' && this->model[1] == '2')
5411 is_valid =
5412 this->part3.interval ? false :
5413 this->part2.interval ? false :
5414 this->part1.interval ?
5415 this->part1.interval.size() <= 13 &&
5416 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5417 false;
5418 else if ((this->model[0] == '2' || this->model[0] == '3') && this->model[1] == '1')
5419 is_valid =
5420 this->part3.interval ? false :
5421 this->part2.interval ?
5422 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5423 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5424 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5425 false;
5426 else
5427 is_valid = true; // Assume models we don't handle as valid
5428 return true;
5429
5430 error:
5431 invalidate();
5432 return false;
5433 }
5434
5435 static bool check11(
5436 _In_count_(num_part1) const T* part1, _In_ size_t num_part1)
5437 {
5438 _Assume_(part1 && num_part1 >= 1);
5439 uint32_t nominator = 0, ponder = 2;
5440 for (size_t i = num_part1 - 1; i--; ++ponder)
5441 nominator += (part1[i] - '0') * ponder;
5442 uint8_t control = 11 - static_cast<uint8_t>(nominator % 11);
5443 if (control >= 10)
5444 control = 0;
5445 return control == part1[num_part1 - 1] - '0';
5446 }
5447
5448 static bool check11(
5449 _In_count_(num_part1) const T* part1, _In_ size_t num_part1,
5450 _In_count_(num_part2) const T* part2, _In_ size_t num_part2)
5451 {
5452 _Assume_(part1 || !num_part1);
5453 _Assume_(part2 && num_part2 >= 1);
5454 uint32_t nominator = 0, ponder = 2;
5455 for (size_t i = num_part2 - 1; i--; ++ponder)
5456 nominator += (part2[i] - '0') * ponder;
5457 for (size_t i = num_part1; i--; ++ponder)
5458 nominator += (part1[i] - '0') * ponder;
5459 uint8_t control = 11 - static_cast<uint8_t>(nominator % 11);
5460 if (control == 10)
5461 control = 0;
5462 return control == part2[num_part2 - 1] - '0';
5463 }
5464
5465 static bool check11(
5466 _In_count_(num_part1) const T* part1, _In_ size_t num_part1,
5467 _In_count_(num_part2) const T* part2, _In_ size_t num_part2,
5468 _In_count_(num_part3) const T* part3, _In_ size_t num_part3)
5469 {
5470 _Assume_(part1 || !num_part1);
5471 _Assume_(part2 || !num_part2);
5472 _Assume_(part3 && num_part3 >= 1);
5473 uint32_t nominator = 0, ponder = 2;
5474 for (size_t i = num_part3 - 1; i--; ++ponder)
5475 nominator += (part3[i] - '0') * ponder;
5476 for (size_t i = num_part2; i--; ++ponder)
5477 nominator += (part2[i] - '0') * ponder;
5478 for (size_t i = num_part1; i--; ++ponder)
5479 nominator += (part1[i] - '0') * ponder;
5480 uint8_t control = 11 - static_cast<uint8_t>(nominator % 11);
5481 if (control == 10)
5482 control = 0;
5483 return control == part2[num_part3 - 1] - '0';
5484 }
5485
5486 std::shared_ptr<basic_parser<T>> m_space;
5487 basic_si_reference_delimiter<T> m_delimiter;
5488 };
5489
5490 using si_reference = basic_si_reference<char>;
5491 using wsi_reference = basic_si_reference<wchar_t>;
5492#ifdef _UNICODE
5493 using tsi_reference = wsi_reference;
5494#else
5495 using tsi_reference = si_reference;
5496#endif
5497 using sgml_si_reference = basic_si_reference<char>;
5498
5502 template <class T>
5504 {
5505 public:
5507 _In_ const std::shared_ptr<basic_parser<T>>& element,
5508 _In_ const std::shared_ptr<basic_parser<T>>& digit,
5509 _In_ const std::shared_ptr<basic_parser<T>>& sign,
5510 _In_ const std::locale& locale = std::locale()) :
5512 m_element(element),
5513 m_digit(digit),
5514 m_sign(sign),
5515 has_digits(false),
5516 has_charge(false)
5517 {}
5518
5519 virtual void invalidate()
5520 {
5521 has_digits = false;
5522 has_charge = false;
5524 }
5525
5526 bool has_digits;
5527 bool has_charge;
5528
5529 protected:
5530 virtual bool do_match(
5531 _In_reads_or_z_opt_(end) const T* text,
5532 _In_ size_t start = 0,
5533 _In_ size_t end = SIZE_MAX,
5534 _In_ int flags = match_default)
5535 {
5536 _Assume_(text || start >= end);
5537
5538 has_digits = false;
5539 has_charge = false;
5540 this->interval.end = start;
5541
5542 const int element_match_flags = flags & ~match_case_insensitive; // Chemical elements are always case-sensitive.
5543 for (;;) {
5544 if (m_element->match(text, this->interval.end, end, element_match_flags)) {
5545 this->interval.end = m_element->interval.end;
5546 while (m_digit->match(text, this->interval.end, end, flags)) {
5547 this->interval.end = m_digit->interval.end;
5548 has_digits = true;
5549 }
5550 }
5551 else if (start < this->interval.end) {
5552 if (m_sign->match(text, this->interval.end, end, flags)) {
5553 this->interval.end = m_sign->interval.end;
5554 has_charge = true;
5555 }
5556 this->interval.start = start;
5557 return true;
5558 }
5559 else {
5560 this->interval.invalidate();
5561 return false;
5562 }
5563 }
5564 }
5565
5566 std::shared_ptr<basic_parser<T>> m_element;
5567 std::shared_ptr<basic_parser<T>> m_digit;
5568 std::shared_ptr<basic_parser<T>> m_sign;
5569 };
5570
5573#ifdef _UNICODE
5575#else
5577#endif
5579
5584 {
5585 protected:
5586 virtual bool do_match(
5587 _In_reads_or_z_(end) const char* text,
5588 _In_ size_t start = 0,
5589 _In_ size_t end = SIZE_MAX,
5590 _In_ int flags = match_default)
5591 {
5592 _Assume_(text || start >= end);
5593 this->interval.end = start;
5594
5595 _Assume_(text || this->interval.end >= end);
5596 if (this->interval.end < end && text[this->interval.end]) {
5597 if (text[this->interval.end] == '\r') {
5598 this->interval.end++;
5599 if (this->interval.end < end && text[this->interval.end] == '\n') {
5600 this->interval.start = start;
5601 this->interval.end++;
5602 return true;
5603 }
5604 }
5605 else if (text[this->interval.end] == '\n') {
5606 this->interval.start = start;
5607 this->interval.end++;
5608 return true;
5609 }
5610 }
5611 this->interval.invalidate();
5612 return false;
5613 }
5614 };
5615
5619 class http_space : public parser
5620 {
5621 protected:
5622 virtual bool do_match(
5623 _In_reads_or_z_(end) const char* text,
5624 _In_ size_t start = 0,
5625 _In_ size_t end = SIZE_MAX,
5626 _In_ int flags = match_default)
5627 {
5628 _Assume_(text || start >= end);
5629 this->interval.end = start;
5630 if (m_line_break.match(text, this->interval.end, end, flags)) {
5631 this->interval.end = m_line_break.interval.end;
5632 if (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) {
5633 this->interval.start = start;
5634 this->interval.end++;
5635 while (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) this->interval.end++;
5636 return true;
5637 }
5638 }
5639 else if (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) {
5640 this->interval.start = start;
5641 this->interval.end++;
5642 while (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) this->interval.end++;
5643 return true;
5644 }
5645 this->interval.invalidate();
5646 return false;
5647 }
5648
5649 http_line_break m_line_break;
5650 };
5651
5655 class http_text_char : public parser
5656 {
5657 protected:
5658 virtual bool do_match(
5659 _In_reads_or_z_(end) const char* text,
5660 _In_ size_t start = 0,
5661 _In_ size_t end = SIZE_MAX,
5662 _In_ int flags = match_default)
5663 {
5664 _Assume_(text || start >= end);
5665 this->interval.end = start;
5666
5667 _Assume_(text || this->interval.end >= end);
5668 if (m_space.match(text, this->interval.end, end, flags)) {
5669 this->interval.start = start;
5670 this->interval.end = m_space.interval.end;
5671 return true;
5672 }
5673 else if (this->interval.end < end && text[this->interval.end] && text[this->interval.end] >= 0x20) {
5674 this->interval.start = start;
5675 this->interval.end++;
5676 return true;
5677 }
5678 this->interval.invalidate();
5679 return false;
5680 }
5681
5682 http_space m_space;
5683 };
5684
5688 class http_token : public parser
5689 {
5690 protected:
5691 virtual bool do_match(
5692 _In_reads_or_z_(end) const char* text,
5693 _In_ size_t start = 0,
5694 _In_ size_t end = SIZE_MAX,
5695 _In_ int flags = match_default)
5696 {
5697 _Assume_(text || start >= end);
5698 this->interval.end = start;
5699 for (;;) {
5700 if (this->interval.end < end && text[this->interval.end]) {
5701 if ((unsigned int)text[this->interval.end] < 0x20 ||
5702 (unsigned int)text[this->interval.end] == 0x7f ||
5703 text[this->interval.end] == '(' ||
5704 text[this->interval.end] == ')' ||
5705 text[this->interval.end] == '<' ||
5706 text[this->interval.end] == '>' ||
5707 text[this->interval.end] == '@' ||
5708 text[this->interval.end] == ',' ||
5709 text[this->interval.end] == ';' ||
5710 text[this->interval.end] == ':' ||
5711 text[this->interval.end] == '\\' ||
5712 text[this->interval.end] == '\"' ||
5713 text[this->interval.end] == '/' ||
5714 text[this->interval.end] == '[' ||
5715 text[this->interval.end] == ']' ||
5716 text[this->interval.end] == '?' ||
5717 text[this->interval.end] == '=' ||
5718 text[this->interval.end] == '{' ||
5719 text[this->interval.end] == '}' ||
5720 stdex::isspace(text[this->interval.end]))
5721 break;
5722 else
5723 this->interval.end++;
5724 }
5725 else
5726 break;
5727 }
5729 this->interval.start = start;
5730 return true;
5731 }
5732 else {
5733 this->interval.invalidate();
5734 return false;
5735 }
5736 }
5737 };
5738
5743 {
5744 public:
5745 virtual void invalidate()
5746 {
5747 content.start = 1;
5748 content.end = 0;
5749 parser::invalidate();
5750 }
5751
5753
5754 protected:
5755 virtual bool do_match(
5756 _In_reads_or_z_(end) const char* text,
5757 _In_ size_t start = 0,
5758 _In_ size_t end = SIZE_MAX,
5759 _In_ int flags = match_default)
5760 {
5761 _Assume_(text || start >= end);
5762 this->interval.end = start;
5763 if (this->interval.end < end && text[this->interval.end] != '"')
5764 goto error;
5765 this->interval.end++;
5766 content.start = this->interval.end;
5767 for (;;) {
5768 _Assume_(text || this->interval.end >= end);
5769 if (this->interval.end < end && text[this->interval.end]) {
5770 if (text[this->interval.end] == '"') {
5771 content.end = this->interval.end;
5772 this->interval.end++;
5773 break;
5774 }
5775 else if (text[this->interval.end] == '\\') {
5776 this->interval.end++;
5777 if (this->interval.end < end && text[this->interval.end]) {
5778 this->interval.end++;
5779 }
5780 else
5781 goto error;
5782 }
5783 else if (m_chr.match(text, this->interval.end, end, flags))
5784 this->interval.end++;
5785 else
5786 goto error;
5787 }
5788 else
5789 goto error;
5790 }
5791 this->interval.start = start;
5792 return true;
5793
5794 error:
5795 invalidate();
5796 return false;
5797 }
5798
5799 http_text_char m_chr;
5800 };
5801
5805 class http_value : public parser
5806 {
5807 public:
5808 virtual void invalidate()
5809 {
5810 string.invalidate();
5811 token.invalidate();
5812 parser::invalidate();
5813 }
5814
5817
5818 protected:
5819 virtual bool do_match(
5820 _In_reads_or_z_(end) const char* text,
5821 _In_ size_t start = 0,
5822 _In_ size_t end = SIZE_MAX,
5823 _In_ int flags = match_default)
5824 {
5825 _Assume_(text || start >= end);
5826 this->interval.end = start;
5827 if (string.match(text, this->interval.end, end, flags)) {
5828 token.invalidate();
5829 this->interval.end = string.interval.end;
5830 this->interval.start = start;
5831 return true;
5832 }
5833 else if (token.match(text, this->interval.end, end, flags)) {
5834 string.invalidate();
5835 this->interval.end = token.interval.end;
5836 this->interval.start = start;
5837 return true;
5838 }
5839 else {
5840 this->interval.invalidate();
5841 return false;
5842 }
5843 }
5844 };
5845
5849 class http_parameter : public parser
5850 {
5851 public:
5852 virtual void invalidate()
5853 {
5854 name.invalidate();
5855 value.invalidate();
5856 parser::invalidate();
5857 }
5858
5861
5862 protected:
5863 virtual bool do_match(
5864 _In_reads_or_z_(end) const char* text,
5865 _In_ size_t start = 0,
5866 _In_ size_t end = SIZE_MAX,
5867 _In_ int flags = match_default)
5868 {
5869 _Assume_(text || start >= end);
5870 this->interval.end = start;
5871 if (name.match(text, this->interval.end, end, flags))
5872 this->interval.end = name.interval.end;
5873 else
5874 goto error;
5875 while (m_space.match(text, this->interval.end, end, flags))
5876 this->interval.end = m_space.interval.end;
5877 _Assume_(text || this->interval.end >= end);
5878 if (this->interval.end < end && text[this->interval.end] == '=')
5879 this->interval.end++;
5880 else
5881 while (m_space.match(text, this->interval.end, end, flags))
5882 this->interval.end = m_space.interval.end;
5883 if (value.match(text, this->interval.end, end, flags))
5884 this->interval.end = value.interval.end;
5885 else
5886 goto error;
5887 this->interval.start = start;
5888 return true;
5889
5890 error:
5891 invalidate();
5892 return false;
5893 }
5894
5895 http_space m_space;
5896 };
5897
5901 class http_any_type : public parser
5902 {
5903 protected:
5904 virtual bool do_match(
5905 _In_reads_or_z_(end) const char* text,
5906 _In_ size_t start = 0,
5907 _In_ size_t end = SIZE_MAX,
5908 _In_ int flags = match_default)
5909 {
5910 _Assume_(text || start >= end);
5911 if (start + 2 < end &&
5912 text[start] == '*' &&
5913 text[start + 1] == '/' &&
5914 text[start + 2] == '*')
5915 {
5916 this->interval.end = (this->interval.start = start) + 3;
5917 return true;
5918 }
5919 else if (start < end && text[start] == '*') {
5920 this->interval.end = (this->interval.start = start) + 1;
5921 return true;
5922 }
5923 else {
5924 this->interval.invalidate();
5925 return false;
5926 }
5927 }
5928 };
5929
5934 {
5935 public:
5936 virtual void invalidate()
5937 {
5938 type.invalidate();
5939 subtype.invalidate();
5940 parser::invalidate();
5941 }
5942
5943 http_token type;
5944 http_token subtype;
5945
5946 protected:
5947 virtual bool do_match(
5948 _In_reads_or_z_(end) const char* text,
5949 _In_ size_t start = 0,
5950 _In_ size_t end = SIZE_MAX,
5951 _In_ int flags = match_default)
5952 {
5953 _Assume_(text || start >= end);
5954 this->interval.end = start;
5955 if (type.match(text, this->interval.end, end, flags))
5956 this->interval.end = type.interval.end;
5957 else
5958 goto error;
5959 while (m_space.match(text, this->interval.end, end, flags))
5960 this->interval.end = m_space.interval.end;
5961 if (this->interval.end < end && text[this->interval.end] == '/')
5962 this->interval.end++;
5963 else
5964 goto error;
5965 while (m_space.match(text, this->interval.end, end, flags))
5966 this->interval.end = m_space.interval.end;
5967 if (subtype.match(text, this->interval.end, end, flags))
5968 this->interval.end = subtype.interval.end;
5969 else
5970 goto error;
5971 this->interval.start = start;
5972 return true;
5973
5974 error:
5975 invalidate();
5976 return false;
5977 }
5978
5979 http_space m_space;
5980 };
5981
5986 {
5987 public:
5988 virtual void invalidate()
5989 {
5990 params.clear();
5991 http_media_range::invalidate();
5992 }
5993
5994 std::list<http_parameter> params;
5995
5996 protected:
5997 virtual bool do_match(
5998 _In_reads_or_z_(end) const char* text,
5999 _In_ size_t start = 0,
6000 _In_ size_t end = SIZE_MAX,
6001 _In_ int flags = match_default)
6002 {
6003 _Assume_(text || start >= end);
6004 if (!http_media_range::do_match(text, start, end, flags))
6005 goto error;
6006 params.clear();
6007 for (;;) {
6008 if (this->interval.end < end && text[this->interval.end]) {
6009 if (m_space.match(text, this->interval.end, end, flags))
6010 this->interval.end = m_space.interval.end;
6011 else if (text[this->interval.end] == ';') {
6012 this->interval.end++;
6013 while (m_space.match(text, this->interval.end, end, flags))
6014 this->interval.end = m_space.interval.end;
6015 http_parameter param;
6016 if (param.match(text, this->interval.end, end, flags)) {
6017 this->interval.end = param.interval.end;
6018 params.push_back(std::move(param));
6019 }
6020 else
6021 break;
6022 }
6023 else
6024 break;
6025 }
6026 else
6027 break;
6028 }
6029 this->interval.end = params.empty() ? subtype.interval.end : params.back().interval.end;
6030 return true;
6031
6032 error:
6033 invalidate();
6034 return false;
6035 }
6036 };
6037
6042 {
6043 protected:
6044 virtual bool do_match(
6045 _In_reads_or_z_(end) const char* text,
6046 _In_ size_t start = 0,
6047 _In_ size_t end = SIZE_MAX,
6048 _In_ int flags = match_default)
6049 {
6050 _Assume_(text || start >= end);
6051 this->interval.end = start;
6052 for (;;) {
6053 if (this->interval.end < end && text[this->interval.end]) {
6054 if ((unsigned int)text[this->interval.end] < 0x20 ||
6055 (unsigned int)text[this->interval.end] == 0x7f ||
6056 text[this->interval.end] == ':' ||
6057 text[this->interval.end] == '/' ||
6058 stdex::isspace(text[this->interval.end]))
6059 break;
6060 else
6061 this->interval.end++;
6062 }
6063 else
6064 break;
6065 }
6067 this->interval.start = start;
6068 return true;
6069 }
6070 this->interval.invalidate();
6071 return false;
6072 }
6073 };
6074
6078 class http_url_port : public parser
6079 {
6080 public:
6081 http_url_port(_In_ const std::locale& locale = std::locale()) :
6082 parser(locale),
6083 value(0)
6084 {}
6085
6086 virtual void invalidate()
6087 {
6088 value = 0;
6089 parser::invalidate();
6090 }
6091
6092 uint16_t value;
6093
6094 protected:
6095 virtual bool do_match(
6096 _In_reads_or_z_(end) const char* text,
6097 _In_ size_t start = 0,
6098 _In_ size_t end = SIZE_MAX,
6099 _In_ int flags = match_default)
6100 {
6101 _Assume_(text || start >= end);
6102 value = 0;
6103 this->interval.end = start;
6104 for (;;) {
6105 if (this->interval.end < end && text[this->interval.end]) {
6106 if ('0' <= text[this->interval.end] && text[this->interval.end] <= '9') {
6107 size_t _value = (size_t)value * 10 + text[this->interval.end] - '0';
6108 if (_value > (uint16_t)-1) {
6109 value = 0;
6110 this->interval.invalidate();
6111 return false;
6112 }
6113 value = (uint16_t)_value;
6114 this->interval.end++;
6115 }
6116 else
6117 break;
6118 }
6119 else
6120 break;
6121 }
6123 this->interval.start = start;
6124 return true;
6125 }
6126 this->interval.invalidate();
6127 return false;
6128 }
6129 };
6130
6135 {
6136 protected:
6137 virtual bool do_match(
6138 _In_reads_or_z_(end) const char* text,
6139 _In_ size_t start = 0,
6140 _In_ size_t end = SIZE_MAX,
6141 _In_ int flags = match_default)
6142 {
6143 _Assume_(text || start >= end);
6144 this->interval.end = start;
6145 for (;;) {
6146 if (this->interval.end < end && text[this->interval.end]) {
6147 if ((unsigned int)text[this->interval.end] < 0x20 ||
6148 (unsigned int)text[this->interval.end] == 0x7f ||
6149 text[this->interval.end] == '?' ||
6150 text[this->interval.end] == '/' ||
6151 stdex::isspace(text[this->interval.end]))
6152 break;
6153 else
6154 this->interval.end++;
6155 }
6156 else
6157 break;
6158 }
6159 this->interval.start = start;
6160 return true;
6161 }
6162 };
6163
6167 class http_url_path : public parser
6168 {
6169 public:
6170 virtual void invalidate()
6171 {
6172 segments.clear();
6173 parser::invalidate();
6174 }
6175
6176 std::vector<http_url_path_segment> segments;
6177
6178 protected:
6179 virtual bool do_match(
6180 _In_reads_or_z_(end) const char* text,
6181 _In_ size_t start = 0,
6182 _In_ size_t end = SIZE_MAX,
6183 _In_ int flags = match_default)
6184 {
6185 _Assume_(text || start >= end);
6187 this->interval.end = start;
6188 segments.clear();
6189 _Assume_(text || this->interval.end >= end);
6190 if (this->interval.end < end && text[this->interval.end] != '/')
6191 goto error;
6192 this->interval.end++;
6193 s.match(text, this->interval.end, end, flags);
6194 segments.push_back(s);
6195 this->interval.end = s.interval.end;
6196 for (;;) {
6197 if (this->interval.end < end && text[this->interval.end]) {
6198 if (text[this->interval.end] == '/') {
6199 this->interval.end++;
6200 s.match(text, this->interval.end, end, flags);
6201 segments.push_back(s);
6202 this->interval.end = s.interval.end;
6203 }
6204 else
6205 break;
6206 }
6207 else
6208 break;
6209 }
6210 this->interval.start = start;
6211 return true;
6212
6213 error:
6214 invalidate();
6215 return false;
6216 }
6217 };
6218
6223 {
6224 public:
6225 virtual void invalidate()
6226 {
6227 name.start = 1;
6228 name.end = 0;
6229 value.start = 1;
6230 value.end = 0;
6231 parser::invalidate();
6232 }
6233
6236
6237 protected:
6238 virtual bool do_match(
6239 _In_reads_or_z_(end) const char* text,
6240 _In_ size_t start = 0,
6241 _In_ size_t end = SIZE_MAX,
6242 _In_ int flags = match_default)
6243 {
6244 _Assume_(text || start >= end);
6245 this->interval.end = start;
6246 name.start = this->interval.end;
6247 for (;;) {
6248 if (this->interval.end < end && text[this->interval.end]) {
6249 if ((unsigned int)text[this->interval.end] < 0x20 ||
6250 (unsigned int)text[this->interval.end] == 0x7f ||
6251 text[this->interval.end] == '&' ||
6252 text[this->interval.end] == '=' ||
6253 stdex::isspace(text[this->interval.end]))
6254 break;
6255 else
6256 this->interval.end++;
6257 }
6258 else
6259 break;
6260 }
6262 name.end = this->interval.end;
6263 else
6264 goto error;
6265 if (text[this->interval.end] == '=') {
6266 this->interval.end++;
6267 value.start = this->interval.end;
6268 for (;;) {
6269 if (this->interval.end < end && text[this->interval.end]) {
6270 if ((unsigned int)text[this->interval.end] < 0x20 ||
6271 (unsigned int)text[this->interval.end] == 0x7f ||
6272 text[this->interval.end] == '&' ||
6273 stdex::isspace(text[this->interval.end]))
6274 break;
6275 else
6276 this->interval.end++;
6277 }
6278 else
6279 break;
6280 }
6281 value.end = this->interval.end;
6282 }
6283 else {
6284 value.start = 1;
6285 value.end = 0;
6286 }
6287 this->interval.start = start;
6288 return true;
6289
6290 error:
6291 invalidate();
6292 return false;
6293 }
6294 };
6295
6299 class http_url : public parser
6300 {
6301 public:
6302 http_url(_In_ const std::locale& locale = std::locale()) :
6303 parser(locale),
6304 port(locale)
6305 {}
6306
6307 virtual void invalidate()
6308 {
6309 server.invalidate();
6310 port.invalidate();
6311 path.invalidate();
6312 params.clear();
6313 parser::invalidate();
6314 }
6315
6316 http_url_server server;
6317 http_url_port port;
6318 http_url_path path;
6319 std::list<http_url_parameter> params;
6320
6321 protected:
6322 virtual bool do_match(
6323 _In_reads_or_z_(end) const char* text,
6324 _In_ size_t start = 0,
6325 _In_ size_t end = SIZE_MAX,
6326 _In_ int flags = match_default)
6327 {
6328 _Assume_(text || start >= end);
6329 this->interval.end = start;
6330
6331 if (this->interval.end + 7 <= end && stdex::strnicmp(text + this->interval.end, 7, "http://", SIZE_MAX, m_locale) == 0) {
6332 this->interval.end += 7;
6333 if (server.match(text, this->interval.end, end, flags))
6334 this->interval.end = server.interval.end;
6335 else
6336 goto error;
6337 if (this->interval.end < end && text[this->interval.end] == ':') {
6338 this->interval.end++;
6339 if (port.match(text, this->interval.end, end, flags))
6340 this->interval.end = port.interval.end;
6341 }
6342 else {
6343 port.invalidate();
6344 port.value = 80;
6345 }
6346 }
6347 else {
6348 server.invalidate();
6349 port.invalidate();
6350 port.value = 80;
6351 }
6352
6353 if (path.match(text, this->interval.end, end, flags))
6354 this->interval.end = path.interval.end;
6355 else
6356 goto error;
6357
6358 params.clear();
6359
6360 if (this->interval.end < end && text[this->interval.end] == '?') {
6361 this->interval.end++;
6362 for (;;) {
6363 if (this->interval.end < end && text[this->interval.end]) {
6364 if ((unsigned int)text[this->interval.end] < 0x20 ||
6365 (unsigned int)text[this->interval.end] == 0x7f ||
6366 stdex::isspace(text[this->interval.end]))
6367 break;
6368 else if (text[this->interval.end] == '&')
6369 this->interval.end++;
6370 else {
6371 http_url_parameter param;
6372 if (param.match(text, this->interval.end, end, flags)) {
6373 this->interval.end = param.interval.end;
6374 params.push_back(std::move(param));
6375 }
6376 else
6377 break;
6378 }
6379 }
6380 else
6381 break;
6382 }
6383 }
6384
6385 this->interval.start = start;
6386 return true;
6387
6388 error:
6389 invalidate();
6390 return false;
6391 }
6392 };
6393
6397 class http_language : public parser
6398 {
6399 public:
6400 virtual void invalidate()
6401 {
6402 components.clear();
6403 parser::invalidate();
6404 }
6405
6406 std::vector<stdex::interval<size_t>> components;
6407
6408 protected:
6409 virtual bool do_match(
6410 _In_reads_or_z_(end) const char* text,
6411 _In_ size_t start = 0,
6412 _In_ size_t end = SIZE_MAX,
6413 _In_ int flags = match_default)
6414 {
6415 _Assume_(text || start >= end);
6416 this->interval.end = start;
6417 components.clear();
6418 for (;;) {
6419 if (this->interval.end < end && text[this->interval.end]) {
6421 k.end = this->interval.end;
6422 for (;;) {
6423 if (k.end < end && text[k.end]) {
6424 if (stdex::isalpha(text[k.end]))
6425 k.end++;
6426 else
6427 break;
6428 }
6429 else
6430 break;
6431 }
6432 if (this->interval.end < k.end) {
6433 k.start = this->interval.end;
6434 this->interval.end = k.end;
6435 components.push_back(k);
6436 }
6437 else
6438 break;
6439 if (this->interval.end < end && text[this->interval.end] == '-')
6440 this->interval.end++;
6441 else
6442 break;
6443 }
6444 else
6445 break;
6446 }
6447 if (!components.empty()) {
6448 this->interval.start = start;
6449 this->interval.end = components.back().end;
6450 return true;
6451 }
6452 this->interval.invalidate();
6453 return false;
6454 }
6455 };
6456
6460 class http_weight : public parser
6461 {
6462 public:
6463 http_weight(_In_ const std::locale& locale = std::locale()) :
6464 parser(locale),
6465 value(1.0f)
6466 {}
6467
6468 virtual void invalidate()
6469 {
6470 value = 1.0f;
6471 parser::invalidate();
6472 }
6473
6474 float value;
6475
6476 protected:
6477 virtual bool do_match(
6478 _In_reads_or_z_(end) const char* text,
6479 _In_ size_t start = 0,
6480 _In_ size_t end = SIZE_MAX,
6481 _In_ int flags = match_default)
6482 {
6483 _Assume_(text || start >= end);
6484 size_t celi_del = 0, decimalni_del = 0, decimalni_del_n = 1;
6485 this->interval.end = start;
6486 for (;;) {
6487 if (this->interval.end < end && text[this->interval.end]) {
6488 if ('0' <= text[this->interval.end] && text[this->interval.end] <= '9') {
6489 celi_del = celi_del * 10 + text[this->interval.end] - '0';
6490 this->interval.end++;
6491 }
6492 else if (text[this->interval.end] == '.') {
6493 this->interval.end++;
6494 for (;;) {
6495 if (this->interval.end < end && text[this->interval.end]) {
6496 if ('0' <= text[this->interval.end] && text[this->interval.end] <= '9') {
6497 decimalni_del = decimalni_del * 10 + text[this->interval.end] - '0';
6498 decimalni_del_n *= 10;
6499 this->interval.end++;
6500 }
6501 else
6502 break;
6503 }
6504 else
6505 break;
6506 }
6507 break;
6508 }
6509 else
6510 break;
6511 }
6512 else
6513 break;
6514 }
6517 this->interval.start = start;
6518 return true;
6519 }
6520 value = 1.0f;
6521 this->interval.invalidate();
6522 return false;
6523 }
6524 };
6525
6529 class http_asterisk : public parser
6530 {
6531 protected:
6532 virtual bool do_match(
6533 _In_reads_or_z_(end) const char* text,
6534 _In_ size_t start = 0,
6535 _In_ size_t end = SIZE_MAX,
6536 _In_ int flags = match_default)
6537 {
6538 _Assume_(text || end <= start);
6539 if (start < end && text[start] == '*') {
6540 this->interval.end = (this->interval.start = start) + 1;
6541 return true;
6542 }
6543 this->interval.invalidate();
6544 return false;
6545 }
6546 };
6547
6551 template <class T, class T_asterisk = http_asterisk>
6553 {
6554 public:
6555 http_weighted_value(_In_ const std::locale& locale = std::locale()) :
6556 parser(locale),
6557 factor(locale)
6558 {}
6559
6560 virtual void invalidate()
6561 {
6562 asterisk.invalidate();
6563 value.invalidate();
6564 factor.invalidate();
6565 parser::invalidate();
6566 }
6567
6568 T_asterisk asterisk;
6569 T value;
6570 http_weight factor;
6571
6572 protected:
6573 virtual bool do_match(
6574 _In_reads_or_z_(end) const char* text,
6575 _In_ size_t start = 0,
6576 _In_ size_t end = SIZE_MAX,
6577 _In_ int flags = match_default)
6578 {
6579 _Assume_(text || start >= end);
6580 size_t konec_vrednosti;
6581 this->interval.end = start;
6582 if (asterisk.match(text, this->interval.end, end, flags)) {
6583 this->interval.end = konec_vrednosti = asterisk.interval.end;
6584 value.invalidate();
6585 }
6586 else if (value.match(text, this->interval.end, end, flags)) {
6587 this->interval.end = konec_vrednosti = value.interval.end;
6588 asterisk.invalidate();
6589 }
6590 else {
6591 asterisk.invalidate();
6592 value.invalidate();
6593 this->interval.invalidate();
6594 return false;
6595 }
6596
6597 while (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) this->interval.end++;
6598 if (this->interval.end < end && text[this->interval.end] == ';') {
6599 this->interval.end++;
6600 while (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) this->interval.end++;
6601 if (this->interval.end < end && (text[this->interval.end] == 'q' || text[this->interval.end] == 'Q')) {
6602 this->interval.end++;
6603 while (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) this->interval.end++;
6604 if (this->interval.end < end && text[this->interval.end] == '=') {
6605 this->interval.end++;
6606 while (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) this->interval.end++;
6607 if (factor.match(text, this->interval.end, end, flags))
6608 this->interval.end = factor.interval.end;
6609 }
6610 }
6611 }
6612 if (!factor.interval) {
6613 factor.invalidate();
6614 this->interval.end = konec_vrednosti;
6615 }
6616 this->interval.start = start;
6617 return true;
6618 }
6619 };
6620
6625 {
6626 public:
6627 virtual void invalidate()
6628 {
6629 name.invalidate();
6630 value.invalidate();
6631 parser::invalidate();
6632 }
6633
6634 http_token name;
6635 http_value value;
6636
6637 protected:
6638 virtual bool do_match(
6639 _In_reads_or_z_(end) const char* text,
6640 _In_ size_t start = 0,
6641 _In_ size_t end = SIZE_MAX,
6642 _In_ int flags = match_default)
6643 {
6644 _Assume_(text || start >= end);
6645 this->interval.end = start;
6646 if (this->interval.end < end && text[this->interval.end] == '$')
6647 this->interval.end++;
6648 else
6649 goto error;
6650 if (name.match(text, this->interval.end, end, flags))
6651 this->interval.end = name.interval.end;
6652 else
6653 goto error;
6654 while (m_space.match(text, this->interval.end, end, flags))
6655 this->interval.end = m_space.interval.end;
6656 if (this->interval.end < end && text[this->interval.end] == '=')
6657 this->interval.end++;
6658 else
6659 goto error;
6660 while (m_space.match(text, this->interval.end, end, flags))
6661 this->interval.end = m_space.interval.end;
6662 if (value.match(text, this->interval.end, end, flags))
6663 this->interval.end = value.interval.end;
6664 else
6665 goto error;
6666 this->interval.start = start;
6667 return true;
6668
6669 error:
6670 invalidate();
6671 return false;
6672 }
6673
6674 http_space m_space;
6675 };
6676
6680 class http_cookie : public parser
6681 {
6682 public:
6683 virtual void invalidate()
6684 {
6685 name.invalidate();
6686 value.invalidate();
6687 params.clear();
6688 parser::invalidate();
6689 }
6690
6693 std::list<http_cookie_parameter> params;
6694
6695 protected:
6696 virtual bool do_match(
6697 _In_reads_or_z_(end) const char* text,
6698 _In_ size_t start = 0,
6699 _In_ size_t end = SIZE_MAX,
6700 _In_ int flags = match_default)
6701 {
6702 _Assume_(text || start >= end);
6703 this->interval.end = start;
6704 if (name.match(text, this->interval.end, end, flags))
6705 this->interval.end = name.interval.end;
6706 else
6707 goto error;
6708 while (m_space.match(text, this->interval.end, end, flags))
6709 this->interval.end = m_space.interval.end;
6710 if (this->interval.end < end && text[this->interval.end] == '=')
6711 this->interval.end++;
6712 else
6713 goto error;
6714 while (m_space.match(text, this->interval.end, end, flags))
6715 this->interval.end = m_space.interval.end;
6716 if (value.match(text, this->interval.end, end, flags))
6717 this->interval.end = value.interval.end;
6718 else
6719 goto error;
6720 params.clear();
6721 for (;;) {
6722 if (this->interval.end < end && text[this->interval.end]) {
6723 if (m_space.match(text, this->interval.end, end, flags))
6724 this->interval.end = m_space.interval.end;
6725 else if (text[this->interval.end] == ';') {
6726 this->interval.end++;
6727 while (m_space.match(text, this->interval.end, end, flags))
6728 this->interval.end = m_space.interval.end;
6730 if (param.match(text, this->interval.end, end, flags)) {
6731 this->interval.end = param.interval.end;
6732 params.push_back(std::move(param));
6733 }
6734 else
6735 break;
6736 }
6737 else
6738 break;
6739 }
6740 else
6741 break;
6742 }
6743 this->interval.start = start;
6744 this->interval.end = params.empty() ? value.interval.end : params.back().interval.end;
6745 return true;
6746
6747 error:
6748 invalidate();
6749 return false;
6750 }
6751
6752 http_space m_space;
6753 };
6754
6758 class http_agent : public parser
6759 {
6760 public:
6761 virtual void invalidate()
6762 {
6763 type.start = 1;
6764 type.end = 0;
6765 version.start = 1;
6766 version.end = 0;
6767 parser::invalidate();
6768 }
6769
6772
6773 protected:
6774 virtual bool do_match(
6775 _In_reads_or_z_(end) const char* text,
6776 _In_ size_t start = 0,
6777 _In_ size_t end = SIZE_MAX,
6778 _In_ int flags = match_default)
6779 {
6780 _Assume_(text || start >= end);
6781 this->interval.end = start;
6782 type.start = this->interval.end;
6783 for (;;) {
6784 if (this->interval.end < end && text[this->interval.end]) {
6785 if (text[this->interval.end] == '/') {
6786 type.end = this->interval.end;
6787 this->interval.end++;
6788 version.start = this->interval.end;
6789 for (;;) {
6790 if (this->interval.end < end && text[this->interval.end]) {
6791 if (stdex::isspace(text[this->interval.end])) {
6792 version.end = this->interval.end;
6793 break;
6794 }
6795 else
6796 this->interval.end++;
6797 }
6798 else {
6799 version.end = this->interval.end;
6800 break;
6801 }
6802 }
6803 break;
6804 }
6805 else if (stdex::isspace(text[this->interval.end])) {
6806 type.end = this->interval.end;
6807 break;
6808 }
6809 else
6810 this->interval.end++;
6811 }
6812 else {
6813 type.end = this->interval.end;
6814 break;
6815 }
6816 }
6818 this->interval.start = start;
6819 return true;
6820 }
6821 type.start = 1;
6822 type.end = 0;
6823 version.start = 1;
6824 version.end = 0;
6825 this->interval.invalidate();
6826 return false;
6827 }
6828 };
6829
6833 class http_protocol : public parser
6834 {
6835 public:
6836 http_protocol(_In_ const std::locale& locale = std::locale()) :
6837 parser(locale),
6838 version(0x009)
6839 {}
6840
6841 virtual void invalidate()
6842 {
6843 type.start = 1;
6844 type.end = 0;
6845 version_maj.start = 1;
6846 version_maj.end = 0;
6847 version_min.start = 1;
6848 version_min.end = 0;
6849 version = 0x009;
6850 parser::invalidate();
6851 }
6852
6854 stdex::interval<size_t> version_maj;
6855 stdex::interval<size_t> version_min;
6857
6858 protected:
6859 virtual bool do_match(
6860 _In_reads_or_z_(end) const char* text,
6861 _In_ size_t start = 0,
6862 _In_ size_t end = SIZE_MAX,
6863 _In_ int flags = match_default)
6864 {
6865 _Assume_(text || start >= end);
6866 this->interval.end = start;
6867 type.start = this->interval.end;
6868 for (;;) {
6869 if (this->interval.end < end && text[this->interval.end]) {
6870 if (text[this->interval.end] == '/') {
6871 type.end = this->interval.end;
6872 this->interval.end++;
6873 break;
6874 }
6875 else if (stdex::isspace(text[this->interval.end]))
6876 goto error;
6877 else
6878 this->interval.end++;
6879 }
6880 else {
6881 type.end = this->interval.end;
6882 goto error;
6883 }
6884 }
6885 version_maj.start = this->interval.end;
6886 for (;;) {
6887 if (this->interval.end < end && text[this->interval.end]) {
6888 if (text[this->interval.end] == '.') {
6889 version_maj.end = this->interval.end;
6890 this->interval.end++;
6891 version_min.start = this->interval.end;
6892 for (;;) {
6893 if (this->interval.end < end && text[this->interval.end]) {
6894 if (stdex::isspace(text[this->interval.end])) {
6895 version_min.end = this->interval.end;
6896 version =
6897 (uint16_t)strtoui(text + version_maj.start, version_maj.size(), nullptr, 10) * 0x100 +
6898 (uint16_t)strtoui(text + version_min.start, version_min.size(), nullptr, 10);
6899 break;
6900 }
6901 else
6902 this->interval.end++;
6903 }
6904 else
6905 goto error;
6906 }
6907 break;
6908 }
6909 else if (stdex::isspace(text[this->interval.end])) {
6910 version_maj.end = this->interval.end;
6911 version_min.start = 1;
6912 version_min.end = 0;
6913 version = (uint16_t)strtoui(text + version_maj.start, version_maj.size(), nullptr, 10) * 0x100;
6914 break;
6915 }
6916 else
6917 this->interval.end++;
6918 }
6919 else
6920 goto error;
6921 }
6922 this->interval.start = start;
6923 return true;
6924
6925 error:
6926 invalidate();
6927 return false;
6928 }
6929 };
6930
6934 class http_request : public parser
6935 {
6936 public:
6937 http_request(_In_ const std::locale& locale = std::locale()) :
6938 parser(locale),
6939 url(locale),
6940 protocol(locale)
6941 {}
6942
6943 virtual void invalidate()
6944 {
6945 verb.start = 1;
6946 verb.end = 0;
6947 url.invalidate();
6948 protocol.invalidate();
6949 parser::invalidate();
6950 }
6951
6953 http_url url;
6954 http_protocol protocol;
6955
6956 protected:
6957 virtual bool do_match(
6958 _In_reads_or_z_(end) const char* text,
6959 _In_ size_t start = 0,
6960 _In_ size_t end = SIZE_MAX,
6961 _In_ int flags = match_default)
6962 {
6963 _Assume_(text || start >= end);
6964 this->interval.end = start;
6965
6966 for (;;) {
6967 if (m_line_break.match(text, this->interval.end, end, flags))
6968 goto error;
6969 else if (this->interval.end < end && text[this->interval.end]) {
6970 if (stdex::isspace(text[this->interval.end]))
6971 this->interval.end++;
6972 else
6973 break;
6974 }
6975 else
6976 goto error;
6977 }
6978 verb.start = this->interval.end;
6979 for (;;) {
6980 if (m_line_break.match(text, this->interval.end, end, flags))
6981 goto error;
6982 else if (this->interval.end < end && text[this->interval.end]) {
6983 if (stdex::isspace(text[this->interval.end])) {
6984 verb.end = this->interval.end;
6985 this->interval.end++;
6986 break;
6987 }
6988 else
6989 this->interval.end++;
6990 }
6991 else
6992 goto error;
6993 }
6994
6995 for (;;) {
6996 if (m_line_break.match(text, this->interval.end, end, flags))
6997 goto error;
6998 else if (this->interval.end < end && text[this->interval.end]) {
6999 if (stdex::isspace(text[this->interval.end]))
7000 this->interval.end++;
7001 else
7002 break;
7003 }
7004 else
7005 goto error;
7006 }
7007 if (url.match(text, this->interval.end, end, flags))
7008 this->interval.end = url.interval.end;
7009 else
7010 goto error;
7011
7012 protocol.invalidate();
7013 for (;;) {
7014 if (m_line_break.match(text, this->interval.end, end, flags)) {
7015 this->interval.end = m_line_break.interval.end;
7016 goto end;
7017 }
7018 else if (this->interval.end < end && text[this->interval.end]) {
7019 if (stdex::isspace(text[this->interval.end]))
7020 this->interval.end++;
7021 else
7022 break;
7023 }
7024 else
7025 goto end;
7026 }
7027 for (;;) {
7028 if (m_line_break.match(text, this->interval.end, end, flags)) {
7029 this->interval.end = m_line_break.interval.end;
7030 goto end;
7031 }
7032 else if (protocol.match(text, this->interval.end, end, flags)) {
7033 this->interval.end = protocol.interval.end;
7034 break;
7035 }
7036 else
7037 goto end;
7038 }
7039
7040 for (;;) {
7041 if (m_line_break.match(text, this->interval.end, end, flags)) {
7042 this->interval.end = m_line_break.interval.end;
7043 break;
7044 }
7045 else if (this->interval.end < end && text[this->interval.end])
7046 this->interval.end++;
7047 else
7048 goto end;
7049 }
7050
7051 end:
7052 this->interval.start = start;
7053 return true;
7054
7055 error:
7056 invalidate();
7057 return false;
7058 }
7059
7060 http_line_break m_line_break;
7061 };
7062
7066 class http_header : public parser
7067 {
7068 public:
7069 virtual void invalidate()
7070 {
7071 name.start = 1;
7072 name.end = 0;
7073 value.start = 1;
7074 value.end = 0;
7075 parser::invalidate();
7076 }
7077
7080
7081 protected:
7082 virtual bool do_match(
7083 _In_reads_or_z_(end) const char* text,
7084 _In_ size_t start = 0,
7085 _In_ size_t end = SIZE_MAX,
7086 _In_ int flags = match_default)
7087 {
7088 _Assume_(text || start >= end);
7089 this->interval.end = start;
7090
7091 if (m_line_break.match(text, this->interval.end, end, flags) ||
7092 (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])))
7093 goto error;
7094 name.start = this->interval.end;
7095 for (;;) {
7096 if (m_line_break.match(text, this->interval.end, end, flags))
7097 goto error;
7098 else if (this->interval.end < end && text[this->interval.end]) {
7099 if (stdex::isspace(text[this->interval.end])) {
7100 name.end = this->interval.end;
7101 this->interval.end++;
7102 for (;;) {
7103 if (m_line_break.match(text, this->interval.end, end, flags))
7104 goto error;
7105 else if (this->interval.end < end && text[this->interval.end]) {
7106 if (stdex::isspace(text[this->interval.end]))
7107 this->interval.end++;
7108 else
7109 break;
7110 }
7111 else
7112 goto error;
7113 }
7114 if (this->interval.end < end && text[this->interval.end] == ':') {
7115 this->interval.end++;
7116 break;
7117 }
7118 else
7119 goto error;
7120 break;
7121 }
7122 else if (text[this->interval.end] == ':') {
7123 name.end = this->interval.end;
7124 this->interval.end++;
7125 break;
7126 }
7127 else
7128 this->interval.end++;
7129 }
7130 else
7131 goto error;
7132 }
7133 value.start = SIZE_MAX;
7134 value.end = 0;
7135 for (;;) {
7136 if (m_line_break.match(text, this->interval.end, end, flags)) {
7137 this->interval.end = m_line_break.interval.end;
7138 if (!m_line_break.match(text, this->interval.end, end, flags) &&
7139 this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end]))
7140 this->interval.end++;
7141 else
7142 break;
7143 }
7144 else if (this->interval.end < end && text[this->interval.end]) {
7145 if (stdex::isspace(text[this->interval.end]))
7146 this->interval.end++;
7147 else {
7148 if (value.start == SIZE_MAX) value.start = this->interval.end;
7149 value.end = ++this->interval.end;
7150 }
7151 }
7152 else
7153 break;
7154 }
7155 this->interval.start = start;
7156 return true;
7157
7158 error:
7159 invalidate();
7160 return false;
7161 }
7162
7163 http_line_break m_line_break;
7164 };
7165
7169 template <class KEY, class T>
7170 class http_value_collection : public T
7171 {
7172 public:
7173 void insert(
7174 _In_reads_or_z_(end) const char* text,
7175 _In_ size_t start = 0,
7176 _In_ size_t end = SIZE_MAX,
7177 _In_ int flags = match_default)
7178 {
7179 while (start < end) {
7180 while (start < end && text[start] && stdex::isspace(text[start])) start++;
7181 if (start < end && text[start] == ',') {
7182 start++;
7183 while (start < end&& text[start] && stdex::isspace(text[start])) start++;
7184 }
7185 KEY el;
7186 if (el.match(text, start, end, flags)) {
7187 start = el.interval.end;
7188 T::insert(std::move(el));
7189 }
7190 else
7191 break;
7192 }
7193 }
7194 };
7195
7196 template <class T>
7198 constexpr bool operator()(const T& a, const T& b) const noexcept
7199 {
7200 return a.factor.value > b.factor.value;
7201 }
7202 };
7203
7207 template <class T, class AX = std::allocator<T>>
7209
7213 template <class T>
7215 {
7216 public:
7218 _In_ const std::shared_ptr<basic_parser<T>>& quote,
7219 _In_ const std::shared_ptr<basic_parser<T>>& chr,
7220 _In_ const std::shared_ptr<basic_parser<T>>& escape,
7221 _In_ const std::shared_ptr<basic_parser<T>>& sol,
7222 _In_ const std::shared_ptr<basic_parser<T>>& bs,
7223 _In_ const std::shared_ptr<basic_parser<T>>& ff,
7224 _In_ const std::shared_ptr<basic_parser<T>>& lf,
7225 _In_ const std::shared_ptr<basic_parser<T>>& cr,
7226 _In_ const std::shared_ptr<basic_parser<T>>& htab,
7227 _In_ const std::shared_ptr<basic_parser<T>>& uni,
7228 _In_ const std::shared_ptr<basic_integer16<T>>& hex,
7229 _In_ const std::locale& locale = std::locale()) :
7231 m_quote(quote),
7232 m_chr(chr),
7233 m_escape(escape),
7234 m_sol(sol),
7235 m_bs(bs),
7236 m_ff(ff),
7237 m_lf(lf),
7238 m_cr(cr),
7239 m_htab(htab),
7240 m_uni(uni),
7241 m_hex(hex)
7242 {}
7243
7244 virtual void invalidate()
7245 {
7246 value.clear();
7248 }
7249
7250 std::basic_string<T> value;
7251
7252 protected:
7253 virtual bool do_match(
7254 _In_reads_or_z_opt_(end) const T* text,
7255 _In_ size_t start = 0,
7256 _In_ size_t end = SIZE_MAX,
7257 _In_ int flags = match_default)
7258 {
7259 _Assume_(text || start >= end);
7260 this->interval.end = start;
7261 if (m_quote->match(text, this->interval.end, end, flags)) {
7262 this->interval.end = m_quote->interval.end;
7263 value.clear();
7264 for (;;) {
7265 if (m_quote->match(text, this->interval.end, end, flags)) {
7266 this->interval.start = start;
7267 this->interval.end = m_quote->interval.end;
7268 return true;
7269 }
7270 if (m_escape->match(text, this->interval.end, end, flags)) {
7271 if (m_quote->match(text, m_escape->interval.end, end, flags)) {
7272 value += '"'; this->interval.end = m_quote->interval.end;
7273 continue;
7274 }
7275 if (m_sol->match(text, m_escape->interval.end, end, flags)) {
7276 value += '/'; this->interval.end = m_sol->interval.end;
7277 continue;
7278 }
7279 if (m_bs->match(text, m_escape->interval.end, end, flags)) {
7280 value += '\b'; this->interval.end = m_bs->interval.end;
7281 continue;
7282 }
7283 if (m_ff->match(text, m_escape->interval.end, end, flags)) {
7284 value += '\f'; this->interval.end = m_ff->interval.end;
7285 continue;
7286 }
7287 if (m_lf->match(text, m_escape->interval.end, end, flags)) {
7288 value += '\n'; this->interval.end = m_lf->interval.end;
7289 continue;
7290 }
7291 if (m_cr->match(text, m_escape->interval.end, end, flags)) {
7292 value += '\r'; this->interval.end = m_cr->interval.end;
7293 continue;
7294 }
7295 if (m_htab->match(text, m_escape->interval.end, end, flags)) {
7296 value += '\t'; this->interval.end = m_htab->interval.end;
7297 continue;
7298 }
7299 if (
7300 m_uni->match(text, m_escape->interval.end, end, flags) &&
7301 m_hex->match(text, m_uni->interval.end, std::min(m_uni->interval.end + 4, end), flags | match_case_insensitive) &&
7302 m_hex->interval.size() == 4 /* JSON requests 4-digit Unicode sequneces: \u.... */)
7303 {
7304 _Assume_(m_hex->value <= 0xffff);
7305 if (sizeof(T) == 1) {
7306 if (m_hex->value > 0x7ff) {
7307 value += (T)(0xe0 | ((m_hex->value >> 12) & 0x0f));
7308 value += (T)(0x80 | ((m_hex->value >> 6) & 0x3f));
7309 value += (T)(0x80 | (m_hex->value & 0x3f));
7310 }
7311 else if (m_hex->value > 0x7f) {
7312 value += (T)(0xc0 | ((m_hex->value >> 6) & 0x1f));
7313 value += (T)(0x80 | (m_hex->value & 0x3f));
7314 }
7315 else
7316 value += (T)(m_hex->value & 0x7f);
7317 }
7318 else
7319 value += (T)m_hex->value;
7320 this->interval.end = m_hex->interval.end;
7321 continue;
7322 }
7323 if (m_escape->match(text, m_escape->interval.end, end, flags)) {
7324 value += '\\'; this->interval.end = m_escape->interval.end;
7325 continue;
7326 }
7327 }
7328 if (m_chr->match(text, this->interval.end, end, flags)) {
7329 value.append(text + m_chr->interval.start, m_chr->interval.size());
7330 this->interval.end = m_chr->interval.end;
7331 continue;
7332 }
7333 break;
7334 }
7335 }
7336 value.clear();
7337 this->interval.invalidate();
7338 return false;
7339 }
7340
7341 std::shared_ptr<basic_parser<T>> m_quote;
7342 std::shared_ptr<basic_parser<T>> m_chr;
7343 std::shared_ptr<basic_parser<T>> m_escape;
7344 std::shared_ptr<basic_parser<T>> m_sol;
7345 std::shared_ptr<basic_parser<T>> m_bs;
7346 std::shared_ptr<basic_parser<T>> m_ff;
7347 std::shared_ptr<basic_parser<T>> m_lf;
7348 std::shared_ptr<basic_parser<T>> m_cr;
7349 std::shared_ptr<basic_parser<T>> m_htab;
7350 std::shared_ptr<basic_parser<T>> m_uni;
7351 std::shared_ptr<basic_integer16<T>> m_hex;
7352 };
7353
7356#ifdef _UNICODE
7357 using tjson_string = wjson_string;
7358#else
7359 using tjson_string = json_string;
7360#endif
7361
7365 template <class T>
7367 {
7368 public:
7369 virtual void invalidate()
7370 {
7371 this->content.invalidate();
7372 basic_parser::invalidate();
7373 }
7374
7376
7377 protected:
7378 virtual bool do_match(
7379 _In_reads_or_z_opt_(end) const T* text,
7380 _In_ size_t start = 0,
7381 _In_ size_t end = SIZE_MAX,
7382 _In_ int flags = match_multiline)
7383 {
7384 _Unreferenced_(flags);
7385 _Assume_(text || start + 1 >= end);
7386 if (start + 1 < end &&
7387 text[start] == '/' &&
7388 text[start + 1] == '*')
7389 {
7390 // /*
7391 this->content.start = this->interval.end = start + 2;
7392 for (;;) {
7393 if (this->interval.end >= end || !text[this->interval.end])
7394 break;
7395 if (this->interval.end + 1 < end &&
7396 text[this->interval.end] == '*' &&
7397 text[this->interval.end + 1] == '/')
7398 {
7399 // /*...*/
7400 this->content.end = this->interval.end;
7401 this->interval.start = start;
7402 this->interval.end = this->interval.end + 2;
7403 return true;
7404 }
7405 this->interval.end++;
7406 }
7407 }
7408 this->content.invalidate();
7409 this->interval.invalidate();
7410 return false;
7411 }
7412 };
7413
7414 using css_comment = basic_css_comment<char>;
7415 using wcss_comment = basic_css_comment<wchar_t>;
7416#ifdef _UNICODE
7417 using tcss_comment = wcss_comment;
7418#else
7419 using tcss_comment = css_comment;
7420#endif
7421
7425 template <class T>
7426 class basic_css_cdo : public basic_parser<T>
7427 {
7428 protected:
7429 virtual bool do_match(
7430 _In_reads_or_z_opt_(end) const T* text,
7431 _In_ size_t start = 0,
7432 _In_ size_t end = SIZE_MAX,
7433 _In_ int flags = match_multiline)
7434 {
7435 _Unreferenced_(flags);
7436 _Assume_(text || start + 3 >= end);
7437 if (start + 3 < end &&
7438 text[start] == '<' &&
7439 text[start + 1] == '!' &&
7440 text[start + 2] == '-' &&
7441 text[start + 3] == '-')
7442 {
7443 this->interval.start = start;
7444 this->interval.end = start + 4;
7445 return true;
7446 }
7447 this->interval.invalidate();
7448 return false;
7449 }
7450 };
7451
7454#ifdef _UNICODE
7455 using tcss_cdo = wcss_cdo;
7456#else
7457 using tcss_cdo = css_cdo;
7458#endif
7459
7463 template <class T>
7464 class basic_css_cdc : public basic_parser<T>
7465 {
7466 protected:
7467 virtual bool do_match(
7468 _In_reads_or_z_opt_(end) const T* text,
7469 _In_ size_t start = 0,
7470 _In_ size_t end = SIZE_MAX,
7471 _In_ int flags = match_multiline)
7472 {
7473 _Unreferenced_(flags);
7474 _Assume_(text || start + 2 >= end);
7475 if (start + 2 < end &&
7476 text[start] == '-' &&
7477 text[start + 1] == '-' &&
7478 text[start + 2] == '>')
7479 {
7480 this->interval.start = start;
7481 this->interval.end = start + 3;
7482 return true;
7483 }
7484 this->interval.invalidate();
7485 return false;
7486 }
7487 };
7488
7491#ifdef _UNICODE
7492 using tcss_cdc = wcss_cdc;
7493#else
7494 using tcss_cdc = css_cdc;
7495#endif
7496
7500 template <class T>
7502 {
7503 public:
7504 virtual void invalidate()
7505 {
7506 this->content.invalidate();
7507 basic_parser::invalidate();
7508 }
7509
7511
7512 protected:
7513 virtual bool do_match(
7514 _In_reads_or_z_opt_(end) const T* text,
7515 _In_ size_t start = 0,
7516 _In_ size_t end = SIZE_MAX,
7517 _In_ int flags = match_multiline)
7518 {
7519 _Unreferenced_(flags);
7520 this->interval.end = start;
7521 _Assume_(text || this->interval.end >= end);
7522 if (this->interval.end < end &&
7523 (text[this->interval.end] == '\"' || text[this->interval.end] == '\''))
7524 {
7525 // "Quoted...
7526 T quote = text[this->interval.end];
7527 this->content.start = ++this->interval.end;
7528 for (;;) {
7529 if (this->interval.end >= end || !text[this->interval.end])
7530 break;
7531 if (text[this->interval.end] == quote) {
7532 // End quote"
7533 this->content.end = this->interval.end;
7534 this->interval.start = start;
7535 this->interval.end++;
7536 return true;
7537 }
7538 if (this->interval.end + 1 < end &&
7539 text[this->interval.end] == '\\' &&
7540 (text[this->interval.end + 1] == '\"' || text[this->interval.end + 1] == '\''))
7541 {
7542 // Escaped quote
7543 this->interval.end = this->interval.end + 2;
7544 }
7545 else
7546 this->interval.end++;
7547 }
7548 }
7549
7550 this->content.invalidate();
7551 this->interval.invalidate();
7552 return false;
7553 }
7554 };
7555
7556 using css_string = basic_css_string<char>;
7557 using wcss_string = basic_css_string<wchar_t>;
7558#ifdef _UNICODE
7559 using tcss_string = wcss_string;
7560#else
7561 using tcss_string = css_string;
7562#endif
7563
7567 template <class T>
7568 class basic_css_uri : public basic_parser<T>
7569 {
7570 public:
7571 virtual void invalidate()
7572 {
7573 this->content.invalidate();
7574 basic_parser::invalidate();
7575 }
7576
7578
7579 protected:
7580 virtual bool do_match(
7581 _In_reads_or_z_opt_(end) const T* text,
7582 _In_ size_t start = 0,
7583 _In_ size_t end = SIZE_MAX,
7584 _In_ int flags = match_multiline)
7585 {
7586 _Unreferenced_(flags);
7587 this->interval.end = start;
7588 _Assume_(text || this->interval.end + 3 >= end);
7589 if (this->interval.end + 3 < end &&
7590 (text[this->interval.end] == 'u' || text[this->interval.end] == 'U') &&
7591 (text[this->interval.end + 1] == 'r' || text[this->interval.end + 1] == 'R') &&
7592 (text[this->interval.end + 2] == 'l' || text[this->interval.end + 2] == 'L') &&
7593 text[this->interval.end + 3] == '(')
7594 {
7595 // url(
7596 this->interval.end = this->interval.end + 4;
7597
7598 // Skip whitespace.
7599 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
7600 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
7601
7602 if (this->interval.end < end &&
7603 (text[this->interval.end] == '\"' || text[this->interval.end] == '\''))
7604 {
7605 // url("Quoted...
7606 T quote = text[this->interval.end];
7607 this->content.start = ++this->interval.end;
7608 for (;;) {
7609 if (this->interval.end >= end || !text[this->interval.end])
7610 goto error;
7611 if (text[this->interval.end] == quote) {
7612 // End quote"
7613 this->content.end = this->interval.end;
7614 this->interval.end++;
7615 break;
7616 }
7617 if (this->interval.end + 1 < end &&
7618 text[this->interval.end] == '\\' &&
7619 (text[this->interval.end + 1] == '\"' || text[this->interval.end + 1] == '\''))
7620 {
7621 // Escaped quote
7622 this->interval.end = this->interval.end + 2;
7623 }
7624 else
7625 this->interval.end++;
7626 }
7627
7628 // Skip whitespace.
7629 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
7630
7631 if (this->interval.end < end &&
7632 text[this->interval.end] == ')')
7633 {
7634 // url("...")
7635 this->interval.start = start;
7636 this->interval.end++;
7637 return true;
7638 }
7639 }
7640 else {
7641 // url(...
7642 this->content.start = content.end = this->interval.end;
7643 for (;;) {
7644 if (this->interval.end >= end || !text[this->interval.end])
7645 goto error;
7646 if (text[this->interval.end] == ')') {
7647 // url(...)
7648 this->interval.start = start;
7649 this->interval.end++;
7650 return true;
7651 }
7652 if (ctype.is(ctype.space, text[this->interval.end]))
7653 this->interval.end++;
7654 else
7655 this->content.end = ++this->interval.end;
7656 }
7657 }
7658 }
7659
7660 error:
7661 invalidate();
7662 return false;
7663 }
7664 };
7665
7666 using css_uri = basic_css_uri<char>;
7667 using wcss_uri = basic_css_uri<wchar_t>;
7668#ifdef _UNICODE
7669 using tcss_uri = wcss_uri;
7670#else
7671 using tcss_uri = css_uri;
7672#endif
7673
7677 template <class T>
7679 {
7680 public:
7681 virtual void invalidate()
7682 {
7683 this->content.invalidate();
7684 basic_parser::invalidate();
7685 }
7686
7688
7689 protected:
7690 virtual bool do_match(
7691 _In_reads_or_z_opt_(end) const T* text,
7692 _In_ size_t start = 0,
7693 _In_ size_t end = SIZE_MAX,
7694 _In_ int flags = match_multiline)
7695 {
7696 _Unreferenced_(flags);
7697 this->interval.end = start;
7698 _Assume_(text || this->interval.end + 6 >= end);
7699 if (this->interval.end + 6 < end &&
7700 text[this->interval.end] == '@' &&
7701 (text[this->interval.end + 1] == 'i' || text[this->interval.end + 1] == 'I') &&
7702 (text[this->interval.end + 2] == 'm' || text[this->interval.end + 2] == 'M') &&
7703 (text[this->interval.end + 3] == 'p' || text[this->interval.end + 3] == 'P') &&
7704 (text[this->interval.end + 4] == 'o' || text[this->interval.end + 4] == 'O') &&
7705 (text[this->interval.end + 5] == 'r' || text[this->interval.end + 5] == 'R') &&
7706 (text[this->interval.end + 6] == 't' || text[this->interval.end + 6] == 'T'))
7707 {
7708 // @import...
7709 this->interval.end = this->interval.end + 7;
7710
7711 // Skip whitespace.
7712 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
7713 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
7714
7715 if (this->interval.end < end &&
7716 (text[this->interval.end] == '\"' || text[this->interval.end] == '\''))
7717 {
7718 // @import "Quoted
7719 T quote = text[this->interval.end];
7720 this->content.start = ++this->interval.end;
7721 for (;;) {
7722 if (this->interval.end >= end || !text[this->interval.end])
7723 goto error;
7724 if (text[this->interval.end] == quote) {
7725 // End quote"
7726 this->content.end = this->interval.end;
7727 this->interval.start = start;
7728 this->interval.end++;
7729 return true;
7730 }
7731 if (this->interval.end + 1 < end &&
7732 text[this->interval.end] == '\\' &&
7733 (text[this->interval.end + 1] == '\"' || text[this->interval.end + 1] == '\''))
7734 {
7735 // Escaped quote
7736 this->interval.end = this->interval.end + 2;
7737 }
7738 else
7739 this->interval.end++;
7740 }
7741 }
7742 }
7743
7744 error:
7745 invalidate();
7746 return false;
7747 }
7748 };
7749
7750 using css_import = basic_css_import<char>;
7751 using wcss_import = basic_css_import<wchar_t>;
7752#ifdef _UNICODE
7753 using tcss_import = wcss_import;
7754#else
7755 using tcss_import = css_import;
7756#endif
7757
7761 template <class T>
7763 {
7764 public:
7765 virtual void invalidate()
7766 {
7767 this->base_type.invalidate();
7768 this->sub_type.invalidate();
7769 this->charset.invalidate();
7770 basic_parser::invalidate();
7771 }
7772
7776
7777 protected:
7778 virtual bool do_match(
7779 _In_reads_or_z_opt_(end) const T* text,
7780 _In_ size_t start = 0,
7781 _In_ size_t end = SIZE_MAX,
7782 _In_ int flags = match_multiline)
7783 {
7784 _Unreferenced_(flags);
7785 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
7786
7787 this->interval.end = start;
7788 this->base_type.start = this->interval.end;
7789 for (;;) {
7790 _Assume_(text || this->interval.end >= end);
7791 if (this->interval.end >= end || !text[this->interval.end])
7792 break;
7793 if (text[this->interval.end] == '/' ||
7794 text[this->interval.end] == ';' ||
7795 ctype.is(ctype.space, text[this->interval.end]))
7796 break;
7797 this->interval.end++;
7798 }
7799 if (this->interval.end <= this->base_type.start)
7800 goto error;
7801 this->base_type.end = this->interval.end;
7802
7803 if (end <= this->interval.end || text[this->interval.end] != '/')
7804 goto error;
7805
7806 this->interval.end++;
7807 this->sub_type.start = this->interval.end;
7808 for (;;) {
7809 if (this->interval.end >= end || !text[this->interval.end])
7810 break;
7811 if (text[this->interval.end] == '/' ||
7812 text[this->interval.end] == ';' ||
7813 ctype.is(ctype.space, text[this->interval.end]))
7814 break;
7815 this->interval.end++;
7816 }
7817 if (this->interval.end <= this->sub_type.start)
7818 goto error;
7819
7820 this->sub_type.end = this->interval.end;
7821 this->charset.invalidate();
7822 if (this->interval.end < end && text[this->interval.end] == ';') {
7823 this->interval.end++;
7824
7825 // Skip whitespace.
7826 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
7827
7828 if (this->interval.end + 7 < end &&
7829 (text[this->interval.end] == 'c' || text[this->interval.end] == 'C') &&
7830 (text[this->interval.end + 1] == 'h' || text[this->interval.end + 1] == 'H') &&
7831 (text[this->interval.end + 2] == 'a' || text[this->interval.end + 2] == 'A') &&
7832 (text[this->interval.end + 3] == 'r' || text[this->interval.end + 3] == 'R') &&
7833 (text[this->interval.end + 4] == 's' || text[this->interval.end + 4] == 'S') &&
7834 (text[this->interval.end + 5] == 'e' || text[this->interval.end + 5] == 'E') &&
7835 (text[this->interval.end + 6] == 't' || text[this->interval.end + 6] == 'T') &&
7836 text[this->interval.end + 7] == '=')
7837 {
7838 this->interval.end = this->interval.end + 8;
7839 if (this->interval.end < end &&
7840 (text[this->interval.end] == '\"' || text[this->interval.end] == '\''))
7841 {
7842 // "Quoted...
7843 T quote = text[this->interval.end];
7844 this->charset.start = ++this->interval.end;
7845 for (;;) {
7846 if (this->interval.end >= end || !text[this->interval.end]) {
7847 // No end quote!
7848 this->charset.invalidate();
7849 break;
7850 }
7851 if (text[this->interval.end] == quote) {
7852 // End quote"
7853 this->charset.end = this->interval.end;
7854 this->interval.end++;
7855 break;
7856 }
7857 this->interval.end++;
7858 }
7859 }
7860 else {
7861 // Nonquoted
7862 this->charset.start = this->interval.end;
7863 for (;;) {
7864 if (this->interval.end >= end || !text[this->interval.end] ||
7865 ctype.is(ctype.space, text[this->interval.end])) {
7866 this->charset.end = this->interval.end;
7867 break;
7868 }
7869 this->interval.end++;
7870 }
7871 }
7872 }
7873 }
7874 this->interval.start = start;
7875 return true;
7876
7877 error:
7878 invalidate();
7879 return false;
7880 }
7881 };
7882
7883 using mime_type = basic_mime_type<char>;
7884 using wmime_type = basic_mime_type<wchar_t>;
7885#ifdef _UNICODE
7886 using tmime_type = wmime_type;
7887#else
7888 using tmime_type = mime_type;
7889#endif
7890
7894 template <class T>
7896 {
7897 protected:
7898 virtual bool do_match(
7899 _In_reads_or_z_opt_(end) const T* text,
7900 _In_ size_t start = 0,
7901 _In_ size_t end = SIZE_MAX,
7902 _In_ int flags = match_default)
7903 {
7904 _Unreferenced_(flags);
7905 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
7906 this->interval.end = start;
7907 for (;;) {
7908 _Assume_(text || this->interval.end >= end);
7909 if (this->interval.end >= end || !text[this->interval.end]) {
7911 this->interval.start = start;
7912 return true;
7913 }
7914 this->interval.invalidate();
7915 return false;
7916 }
7917 if (text[this->interval.end] == '>' ||
7918 text[this->interval.end] == '=' ||
7919 text[this->interval.end] == '/' && this->interval.end + 1 < end && text[this->interval.end + 1] == '>' ||
7920 ctype.is(ctype.space, text[this->interval.end]))
7921 {
7922 this->interval.start = start;
7923 return true;
7924 }
7925 this->interval.end++;
7926 }
7927 }
7928 };
7929
7932#ifdef _UNICODE
7933 using thtml_ident = whtml_ident;
7934#else
7935 using thtml_ident = html_ident;
7936#endif
7937
7941 template <class T>
7943 {
7944 public:
7945 virtual void invalidate()
7946 {
7947 this->content.invalidate();
7948 basic_parser::invalidate();
7949 }
7950
7952
7953 protected:
7954 virtual bool do_match(
7955 _In_reads_or_z_opt_(end) const T* text,
7956 _In_ size_t start = 0,
7957 _In_ size_t end = SIZE_MAX,
7958 _In_ int flags = match_default)
7959 {
7960 _Unreferenced_(flags);
7961 this->interval.end = start;
7962 _Assume_(text || this->interval.end >= end);
7963 if (this->interval.end < end &&
7964 (text[this->interval.end] == '\"' || text[this->interval.end] == '\''))
7965 {
7966 // "Quoted...
7967 T quote = text[this->interval.end];
7968 this->content.start = ++this->interval.end;
7969 for (;;) {
7970 if (this->interval.end >= end || !text[this->interval.end]) {
7971 // No end quote!
7972 this->content.invalidate();
7973 this->interval.invalidate();
7974 return false;
7975 }
7976 if (text[this->interval.end] == quote) {
7977 // End quote"
7978 this->content.end = this->interval.end;
7979 this->interval.start = start;
7980 this->interval.end++;
7981 return true;
7982 }
7983 this->interval.end++;
7984 }
7985 }
7986
7987 // Nonquoted
7988 this->content.start = this->interval.end;
7989 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
7990 for (;;) {
7991 _Assume_(text || this->interval.end >= end);
7992 if (this->interval.end >= end || !text[this->interval.end]) {
7993 this->content.end = this->interval.end;
7994 this->interval.start = start;
7995 return true;
7996 }
7997 if (text[this->interval.end] == '>' ||
7998 text[this->interval.end] == '/' && this->interval.end + 1 < end && text[this->interval.end + 1] == '>' ||
7999 ctype.is(ctype.space, text[this->interval.end]))
8000 {
8001 this->content.end = this->interval.end;
8002 this->interval.start = start;
8003 return true;
8004 }
8005 this->interval.end++;
8006 }
8007 }
8008 };
8009
8010 using html_value = basic_html_value<char>;
8011 using whtml_value = basic_html_value<wchar_t>;
8012#ifdef _UNICODE
8013 using thtml_value = whtml_value;
8014#else
8015 using thtml_value = html_value;
8016#endif
8017
8021 enum class html_sequence_t {
8022 text = 0,
8023 element,
8024 element_start,
8025 element_end,
8026 declaration,
8027 comment,
8028 instruction,
8029 PCDATA,
8030 CDATA,
8031
8032 unknown = -1,
8033 };
8034
8042
8046 template <class T>
8048 {
8049 public:
8050 basic_html_tag(_In_ const std::locale& locale = std::locale()) :
8052 type(html_sequence_t::unknown)
8053 {}
8054
8055 virtual void invalidate()
8056 {
8057 this->type = html_sequence_t::unknown;
8058 this->name.invalidate();
8059 this->attributes.clear();
8060 basic_parser::invalidate();
8061 }
8062
8063 html_sequence_t type;
8065 std::vector<html_attribute> attributes;
8066
8067 protected:
8068 virtual bool do_match(
8069 _In_reads_or_z_opt_(end) const T* text,
8070 _In_ size_t start = 0,
8071 _In_ size_t end = SIZE_MAX,
8072 _In_ int flags = match_multiline)
8073 {
8074 _Assume_(text || start >= end);
8075 if (start >= end || text[start] != '<')
8076 goto error;
8077 this->interval.end = start + 1;
8078 if (this->interval.end >= end || !text[this->interval.end])
8079 goto error;
8080 if (text[this->interval.end] == '/' &&
8081 this->m_ident.match(text, this->interval.end + 1, end, flags))
8082 {
8083 // </...
8084 this->type = html_sequence_t::element_end;
8085 this->name = this->m_ident.interval;
8086 this->interval.end = this->m_ident.interval.end;
8087 }
8088 else if (text[this->interval.end] == '!') {
8089 // <!...
8090 this->interval.end++;
8091 if (this->interval.end + 1 < end &&
8092 text[this->interval.end] == '-' &&
8093 text[this->interval.end + 1] == '-')
8094 {
8095 // <!--...
8096 this->name.start = this->interval.end = this->interval.end + 2;
8097 for (;;) {
8098 if (this->interval.end >= end || !text[this->interval.end])
8099 goto error;
8100 if (this->interval.end + 2 < end &&
8101 text[this->interval.end] == '-' &&
8102 text[this->interval.end + 1] == '-' &&
8103 text[this->interval.end + 2] == '>')
8104 {
8105 // <!--...-->
8106 this->type = html_sequence_t::comment;
8107 this->name.end = this->interval.end;
8108 this->attributes.clear();
8109 this->interval.start = start;
8110 this->interval.end = this->interval.end + 3;
8111 return true;
8112 }
8113 this->interval.end++;
8114 }
8115 }
8116 this->type = html_sequence_t::declaration;
8117 this->name.start = this->name.end = this->interval.end;
8118 }
8119 else if (text[this->interval.end] == '?') {
8120 // <?...
8121 this->name.start = ++this->interval.end;
8122 for (;;) {
8123 if (this->interval.end >= end || !text[this->interval.end])
8124 goto error;
8125 if (text[this->interval.end] == '>') {
8126 // <?...>
8127 this->type = html_sequence_t::instruction;
8128 this->name.end = this->interval.end;
8129 this->attributes.clear();
8130 this->interval.start = start;
8131 this->interval.end++;
8132 return true;
8133 }
8134 if (this->interval.end + 1 < end &&
8135 text[this->interval.end] == '?' &&
8136 text[this->interval.end + 1] == '>')
8137 {
8138 // <?...?>
8139 this->type = html_sequence_t::instruction;
8140 this->name.end = this->interval.end;
8141 this->attributes.clear();
8142 this->interval.start = start;
8143 this->interval.end = this->interval.end + 2;
8144 return true;
8145 }
8146 this->interval.end++;
8147 }
8148 }
8149 else if (this->m_ident.match(text, this->interval.end, end, flags)) {
8150 // <tag...
8151 this->type = html_sequence_t::element_start;
8152 this->name = this->m_ident.interval;
8153 this->interval.end = this->m_ident.interval.end;
8154 }
8155 else
8156 goto error;
8157
8158 // Skip whitespace.
8159 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
8160 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8161
8162 this->attributes.clear();
8163 for (;;) {
8164 if (this->type == html_sequence_t::element_start &&
8165 this->interval.end + 1 < end &&
8166 text[this->interval.end] == '/' &&
8167 text[this->interval.end + 1] == '>')
8168 {
8169 // <tag .../>
8170 this->type = html_sequence_t::element;
8171 this->interval.end = this->interval.end + 2;
8172 break;
8173 }
8174 if (this->interval.end < end &&
8175 text[this->interval.end] == '>')
8176 {
8177 // <tag ...>
8178 this->interval.end++;
8179 break;
8180 }
8181 if (this->type == html_sequence_t::declaration &&
8182 this->interval.end + 1 < end &&
8183 text[this->interval.end] == '!' &&
8184 text[this->interval.end + 1] == '>')
8185 {
8186 // "<!...!>".
8187 this->interval.end = this->interval.end + 2;
8188 break;
8189 }
8190 if (this->type == html_sequence_t::declaration &&
8191 this->interval.end + 1 < end &&
8192 text[this->interval.end] == '-' &&
8193 text[this->interval.end + 1] == '-')
8194 {
8195 // "<! ... --...".
8196 this->interval.end = this->interval.end + 2;
8197 for (;;) {
8198 if (this->interval.end >= end || !text[this->interval.end])
8199 goto error;
8200 if (this->interval.end + 1 < end &&
8201 text[this->interval.end] == '-' &&
8202 text[this->interval.end + 1] == '-')
8203 {
8204 // "<! ... --...--".
8205 this->interval.end = this->interval.end + 2;
8206 break;
8207 }
8208 this->interval.end++;
8209 }
8210
8211 // Skip whitespace.
8212 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8213 continue;
8214 }
8215
8216 if (this->interval.end >= end || !text[this->interval.end])
8217 goto error;
8218
8219 // Attributes follow...
8220 html_attribute* a = nullptr;
8221 if (this->m_ident.match(text, this->interval.end, end, flags)) {
8222 this->attributes.push_back(std::move(html_attribute{ this->m_ident.interval }));
8223 a = &this->attributes.back();
8224 _Assume_(a);
8225 this->interval.end = this->m_ident.interval.end;
8226 }
8227 else {
8228 // What was that?! Skip.
8229 this->interval.end++;
8230 continue;
8231 }
8232
8233 // Skip whitespace.
8234 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8235
8236 if (this->interval.end < end && text[this->interval.end] == '=') {
8237 this->interval.end++;
8238
8239 // Skip whitespace.
8240 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8241
8242 if (this->m_value.match(text, this->interval.end, end, flags)) {
8243 // This attribute has value.
8244 a->value = this->m_value.content;
8245 this->interval.end = this->m_value.interval.end;
8246
8247 // Skip whitespace.
8248 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8249 }
8250 }
8251 else {
8252 // This attribute has no value.
8253 a->value.invalidate();
8254 }
8255 }
8256
8257 this->interval.start = start;
8258 return true;
8259
8260 error:
8261 invalidate();
8262 return false;
8263 }
8264
8265 basic_html_ident<T> m_ident;
8266 basic_html_value<T> m_value;
8267 };
8268
8269 using html_tag = basic_html_tag<char>;
8270 using whtml_tag = basic_html_tag<wchar_t>;
8271#ifdef _UNICODE
8272 using thtml_tag = whtml_tag;
8273#else
8274 using thtml_tag = html_tag;
8275#endif
8276
8280 template <class T>
8282 {
8283 public:
8284 virtual void invalidate()
8285 {
8286 this->condition.invalidate();
8287 basic_parser::invalidate();
8288 }
8289
8290 stdex::interval<size_t> condition;
8291
8292 protected:
8293 virtual bool do_match(
8294 _In_reads_or_z_opt_(end) const T* text,
8295 _In_ size_t start = 0,
8296 _In_ size_t end = SIZE_MAX,
8297 _In_ int flags = match_multiline)
8298 {
8299 _Unreferenced_(flags);
8300 _Assume_(text || start + 2 >= end);
8301 if (start + 2 < end &&
8302 text[start] == '<' &&
8303 text[start + 1] == '!' &&
8304 text[start + 2] == '[')
8305 {
8306 this->interval.end = start + 3;
8307
8308 // Skip whitespace.
8309 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
8310 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8311
8312 this->condition.start = this->condition.end = this->interval.end;
8313
8314 for (;;) {
8315 if (this->interval.end >= end || !text[this->interval.end])
8316 break;
8317 if (text[this->interval.end] == '[') {
8318 this->interval.start = start;
8319 this->interval.end++;
8320 return true;
8321 }
8322 if (ctype.is(ctype.space, text[this->interval.end]))
8323 this->interval.end++;
8324 else
8325 this->condition.end = ++this->interval.end;
8326 }
8327 }
8328
8329 this->condition.invalidate();
8330 this->interval.invalidate();
8331 return false;
8332 }
8333 };
8334
8335 using html_declaration_condition_start = basic_html_declaration_condition_start<char>;
8336 using whtml_declaration_condition_start = basic_html_declaration_condition_start<wchar_t>;
8337#ifdef _UNICODE
8338 using thtml_declaration_condition_start = whtml_declaration_condition_start;
8339#else
8340 using thtml_declaration_condition_start = html_declaration_condition_start;
8341#endif
8342
8346 template <class T>
8348 {
8349 protected:
8350 virtual bool do_match(
8351 _In_reads_or_z_opt_(end) const T* text,
8352 _In_ size_t start = 0,
8353 _In_ size_t end = SIZE_MAX,
8354 _In_ int flags = match_multiline)
8355 {
8356 _Unreferenced_(flags);
8357 _Assume_(text || start + 2 >= end);
8358 if (start + 2 < end &&
8359 text[start] == ']' &&
8360 text[start + 1] == ']' &&
8361 text[start + 2] == '>')
8362 {
8363 this->interval.start = start;
8364 this->interval.end = start + 3;
8365 return true;
8366 }
8367 this->interval.invalidate();
8368 return false;
8369 }
8370 };
8371
8374#ifdef _UNICODE
8376#else
8378#endif
8379 }
8380}
8381
8382#undef ENUM_FLAG_OPERATOR
8383#undef ENUM_FLAGS
8384
8385#ifdef _MSC_VER
8386#pragma warning(pop)
8387#endif
locale_t helper class to free_locale when going out of scope.
Definition locale.hpp:69
Test for angle in d°mm'ss.dddd form.
Definition parser.hpp:4375
Test for any code unit.
Definition parser.hpp:231
Test for beginning of line.
Definition parser.hpp:630
Test for any.
Definition parser.hpp:1073
Test for chemical formula.
Definition parser.hpp:5504
Test for Creditor Reference.
Definition parser.hpp:4939
T reference[22]
Normalized national reference number.
Definition parser.hpp:4961
T check_digits[3]
Two check digits.
Definition parser.hpp:4960
bool is_valid
Is reference valid per ISO 7064.
Definition parser.hpp:4962
Legacy CSS comment end -->
Definition parser.hpp:7465
Legacy CSS comment start <!--
Definition parser.hpp:7427
CSS comment.
Definition parser.hpp:7367
stdex::interval< size_t > content
content position in source
Definition parser.hpp:7375
CSS import directive.
Definition parser.hpp:7679
stdex::interval< size_t > content
content position in source
Definition parser.hpp:7687
CSS string.
Definition parser.hpp:7502
stdex::interval< size_t > content
content position in source
Definition parser.hpp:7510
URI in CSS.
Definition parser.hpp:7569
stdex::interval< size_t > content
content position in source
Definition parser.hpp:7577
Test for any code unit from a given string of code units.
Definition parser.hpp:735
Test for specific code unit.
Definition parser.hpp:303
Test for date.
Definition parser.hpp:4008
Test for valid DNS domain character.
Definition parser.hpp:2790
bool allow_on_edge
Is character allowed at the beginning or an end of a DNS domain?
Definition parser.hpp:2800
Test for DNS domain/hostname.
Definition parser.hpp:2890
bool m_allow_absolute
May DNS names end with a dot (absolute name)?
Definition parser.hpp:2954
Test for e-mail address.
Definition parser.hpp:3782
Test for emoticon.
Definition parser.hpp:3885
std::shared_ptr< basic_parser< T > > apex
apex/eyebrows/halo (e.g. O, 0)
Definition parser.hpp:3913
std::shared_ptr< basic_parser< T > > eyes
eyes (e.g. :, ;, >, |, B)
Definition parser.hpp:3914
std::shared_ptr< basic_set< T > > mouth
mouth (e.g. ), ), (, (, |, P, D, p, d)
Definition parser.hpp:3916
std::shared_ptr< basic_parser< T > > nose
nose (e.g. -, o)
Definition parser.hpp:3915
std::shared_ptr< basic_parser< T > > emoticon
emoticon as a whole (e.g. 😀, 🤔, 😶)
Definition parser.hpp:3912
Test for end of line.
Definition parser.hpp:669
Test for fraction.
Definition parser.hpp:1701
End of condition ...]]>
Definition parser.hpp:8348
Start of condition <![condition[...
Definition parser.hpp:8282
virtual bool do_match(_In_reads_or_z_opt_(end) const T *text, size_t start=0, size_t end=SIZE_MAX, int flags=match_multiline)
condition position in source
Definition parser.hpp:8293
Contiguous sequence of characters representing name of element, attribute etc.
Definition parser.hpp:7896
Tag.
Definition parser.hpp:8048
std::vector< html_attribute > attributes
tag attributes
Definition parser.hpp:8065
html_sequence_t type
tag type
Definition parser.hpp:8063
stdex::interval< size_t > name
tag name position in source
Definition parser.hpp:8064
Optionally-quoted string representing value of an attribute.
Definition parser.hpp:7943
stdex::interval< size_t > content
content position in source
Definition parser.hpp:7951
Test for International Bank Account Number.
Definition parser.hpp:4650
T bban[31]
Normalized Basic Bank Account Number.
Definition parser.hpp:4675
T country[3]
ISO 3166-1 alpha-2 country code.
Definition parser.hpp:4673
T check_digits[3]
Two check digits.
Definition parser.hpp:4674
bool is_valid
Is IBAN valid per ISO 7064.
Definition parser.hpp:4676
Test for decimal integer.
Definition parser.hpp:1311
Test for decimal integer possibly containing thousand separators.
Definition parser.hpp:1396
bool has_separators
Did integer have any separators?
Definition parser.hpp:1417
size_t digit_count
Total number of digits in integer.
Definition parser.hpp:1416
Test for hexadecimal integer.
Definition parser.hpp:1476
Base class for integer testing.
Definition parser.hpp:1289
size_t value
Calculated value of the numeral.
Definition parser.hpp:1303
Test for IPv4 address.
Definition parser.hpp:2359
stdex::interval< size_t > components[4]
Individual component intervals.
Definition parser.hpp:2404
struct in_addr value
IPv4 address value.
Definition parser.hpp:2405
Test for IPv6 address.
Definition parser.hpp:2570
std::shared_ptr< basic_parser< T > > scope_id
Scope ID (e.g. NIC index with link-local addresses)
Definition parser.hpp:2642
stdex::interval< size_t > components[8]
Individual component intervals.
Definition parser.hpp:2640
struct in6_addr value
IPv6 address value.
Definition parser.hpp:2641
Test for valid IPv6 address scope ID character.
Definition parser.hpp:2496
Test for repeating.
Definition parser.hpp:925
bool m_greedy
try to match as long sequence as possible
Definition parser.hpp:964
std::shared_ptr< basic_parser< T > > m_el
repeating element
Definition parser.hpp:961
size_t m_min_iterations
minimum number of iterations
Definition parser.hpp:962
size_t m_max_iterations
maximum number of iterations
Definition parser.hpp:963
Test for JSON string.
Definition parser.hpp:7215
MIME content type.
Definition parser.hpp:7763
stdex::interval< size_t > base_type
basic type position in source
Definition parser.hpp:7773
stdex::interval< size_t > sub_type
sub-type position in source
Definition parser.hpp:7774
stdex::interval< size_t > charset
charset position in source
Definition parser.hpp:7775
Test for mixed numeral.
Definition parser.hpp:1936
std::shared_ptr< basic_parser< T > > fraction
fraction
Definition parser.hpp:1969
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:1967
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:1966
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:1965
std::shared_ptr< basic_parser< T > > integer
Integer part.
Definition parser.hpp:1968
Test for monetary numeral.
Definition parser.hpp:2230
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:2263
std::shared_ptr< basic_parser< T > > decimal_separator
Decimal separator.
Definition parser.hpp:2268
std::shared_ptr< basic_parser< T > > currency
Currency part.
Definition parser.hpp:2266
std::shared_ptr< basic_parser< T > > decimal
Decimal part.
Definition parser.hpp:2269
std::shared_ptr< basic_parser< T > > integer
Integer part.
Definition parser.hpp:2267
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:2264
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:2265
"No-op" match
Definition parser.hpp:199
Base template for all parsers.
Definition parser.hpp:75
stdex::interval< size_t > interval
Region of the last match.
Definition parser.hpp:115
Test for permutation.
Definition parser.hpp:1213
Test for phone number.
Definition parser.hpp:4498
std::basic_string< T > value
Normalized phone number.
Definition parser.hpp:4523
Test for any punctuation code unit.
Definition parser.hpp:476
Test for Roman numeral.
Definition parser.hpp:1585
Test for scientific numeral.
Definition parser.hpp:2061
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:2107
std::shared_ptr< basic_parser< T > > exponent_symbol
Exponent symbol (e.g. 'e')
Definition parser.hpp:2111
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:2105
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:2106
double value
Calculated value of the numeral.
Definition parser.hpp:2115
std::shared_ptr< basic_parser< T > > negative_exp_sign
Negative exponent sign (e.g. '-')
Definition parser.hpp:2113
std::shared_ptr< basic_integer< T > > decimal
Decimal part.
Definition parser.hpp:2110
std::shared_ptr< basic_parser< T > > positive_exp_sign
Positive exponent sign (e.g. '+')
Definition parser.hpp:2112
std::shared_ptr< basic_integer< T > > exponent
Exponent part.
Definition parser.hpp:2114
std::shared_ptr< basic_parser< T > > decimal_separator
Decimal separator.
Definition parser.hpp:2109
std::shared_ptr< basic_integer< T > > integer
Integer part.
Definition parser.hpp:2108
Test for match score.
Definition parser.hpp:1764
Test for sequence.
Definition parser.hpp:1021
Definition parser.hpp:704
Test for SI Reference delimiter.
Definition parser.hpp:5133
Test for SI Reference part.
Definition parser.hpp:5087
Test for SI Reference.
Definition parser.hpp:5172
basic_si_reference_part< T > part3
Reference data part 3 (P3)
Definition parser.hpp:5201
basic_si_reference_part< T > part1
Reference data part 1 (P1)
Definition parser.hpp:5199
bool is_valid
Is reference valid.
Definition parser.hpp:5202
T model[3]
Reference model.
Definition parser.hpp:5198
basic_si_reference_part< T > part2
Reference data part 2 (P2)
Definition parser.hpp:5200
Test for signed numeral.
Definition parser.hpp:1850
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:1876
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:1875
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:1874
std::shared_ptr< basic_parser< T > > number
Number.
Definition parser.hpp:1877
Test for any space code unit.
Definition parser.hpp:396
Test for any space or punctuation code unit.
Definition parser.hpp:551
Test for any string.
Definition parser.hpp:1141
Test for given string.
Definition parser.hpp:830
Test for time.
Definition parser.hpp:4273
Test for valid URL password character.
Definition parser.hpp:3074
Test for valid URL path character.
Definition parser.hpp:3176
Test for URL path.
Definition parser.hpp:3286
Test for valid URL username character.
Definition parser.hpp:2973
Test for URL.
Definition parser.hpp:3426
Test for HTTP agent.
Definition parser.hpp:6759
Test for HTTP any type.
Definition parser.hpp:5902
Test for HTTP asterisk.
Definition parser.hpp:6530
Test for HTTP header.
Definition parser.hpp:7067
Test for HTTP language (RFC1766)
Definition parser.hpp:6398
Test for HTTP line break (RFC2616: CRLF | LF)
Definition parser.hpp:5584
Test for HTTP media range (RFC2616: media-range)
Definition parser.hpp:5934
Test for HTTP media type (RFC2616: media-type)
Definition parser.hpp:5986
Test for HTTP parameter (RFC2616: parameter)
Definition parser.hpp:5850
http_token name
Parameter name.
Definition parser.hpp:5859
http_value value
Parameter value.
Definition parser.hpp:5860
Test for HTTP protocol.
Definition parser.hpp:6834
uint16_t version
HTTP protocol version: 0x100 = 1.0, 0x101 = 1.1...
Definition parser.hpp:6856
Test for HTTP quoted string (RFC2616: quoted-string)
Definition parser.hpp:5743
stdex::interval< size_t > content
String content (without quotes)
Definition parser.hpp:5752
Test for HTTP request.
Definition parser.hpp:6935
Test for HTTP space (RFC2616: LWS)
Definition parser.hpp:5620
Test for HTTP text character (RFC2616: TEXT)
Definition parser.hpp:5656
Test for HTTP token (RFC2616: token - tolerates non-ASCII)
Definition parser.hpp:5689
Test for HTTP URL parameter.
Definition parser.hpp:6223
Test for HTTP URL path segment.
Definition parser.hpp:6135
Test for HTTP URL path segment.
Definition parser.hpp:6168
std::vector< http_url_path_segment > segments
Path segments.
Definition parser.hpp:6176
Test for HTTP URL port.
Definition parser.hpp:6079
Test for HTTP URL server.
Definition parser.hpp:6042
Test for HTTP URL.
Definition parser.hpp:6300
Collection of HTTP values.
Definition parser.hpp:7171
Test for HTTP value (RFC2616: value)
Definition parser.hpp:5806
http_quoted_string string
Value when matched as quoted string.
Definition parser.hpp:5815
http_token token
Value when matched as token.
Definition parser.hpp:5816
Test for HTTP weight factor.
Definition parser.hpp:6461
float value
Calculated value of the weight factor.
Definition parser.hpp:6474
Test for HTTP weighted value.
Definition parser.hpp:6553
Base template for collection-holding parsers.
Definition parser.hpp:981
Test for any SGML code point.
Definition parser.hpp:264
Test for any SGML code point from a given string of SGML code points.
Definition parser.hpp:787
Test for specific SGML code point.
Definition parser.hpp:352
Test for valid DNS domain SGML character.
Definition parser.hpp:2845
Test for valid IPv6 address scope ID SGML character.
Definition parser.hpp:2535
Test for any SGML punctuation code point.
Definition parser.hpp:517
Test for any SGML space code point.
Definition parser.hpp:439
Test for any SGML space or punctuation code point.
Definition parser.hpp:594
Test for SGML given string.
Definition parser.hpp:877
Test for valid URL password SGML character.
Definition parser.hpp:3127
Test for valid URL path SGML character.
Definition parser.hpp:3233
Test for valid URL username SGML character.
Definition parser.hpp:3025
Numerical interval.
Definition interval.hpp:18
T size() const
Returns interval size.
Definition interval.hpp:47
T end
interval end
Definition interval.hpp:20
interval() noexcept
Constructs an invalid interval.
Definition interval.hpp:25
void invalidate()
Invalidates interval.
Definition interval.hpp:59
T start
interval start
Definition interval.hpp:19
Tag attribute.
Definition parser.hpp:8038
stdex::interval< size_t > name
attribute name position in source
Definition parser.hpp:8039
stdex::interval< size_t > value
attribute value position in source
Definition parser.hpp:8040
Definition parser.hpp:7197