stdex
Additional custom or not Standard C++ covered algorithms
Loading...
Searching...
No Matches
parser.hpp
1/*
2 SPDX-License-Identifier: MIT
3 Copyright © 2023-2024 Amebis
4*/
5
6#pragma once
7
8#include "compat.hpp"
9#include "endian.hpp"
10#include "interval.hpp"
11#include "memory.hpp"
12#include "sgml.hpp"
13#include "string.hpp"
14#include <stdarg.h>
15#include <stdint.h>
16#include <math.h>
17#if defined(_WIN32)
18#include <winsock2.h>
19#if _MSC_VER >= 1300
20#include <ws2ipdef.h>
21#endif
22#include <ws2tcpip.h>
23#else
24#include <netinet/in.h>
25#endif
26#include <limits>
27#include <list>
28#include <locale>
29#include <memory>
30#include <set>
31#include <string_view>
32#include <string>
33
34#if defined(_MSC_VER)
35#pragma warning(push)
36#pragma warning(disable: 4100)
37#elif defined(__GNUC__)
38#pragma GCC diagnostic push
39#pragma GCC diagnostic ignored "-Wunknown-pragmas"
40#endif
41
42#define ENUM_FLAG_OPERATOR(T,X) \
43inline T operator X (const T lhs, const T rhs) { return static_cast<T>(static_cast<std::underlying_type_t<T>>(lhs) X static_cast<std::underlying_type_t<T>>(rhs)); } \
44inline T operator X (const T lhs, const std::underlying_type_t<T> rhs) { return static_cast<T>(static_cast<std::underlying_type_t<T>>(lhs) X rhs); } \
45inline T operator X (const std::underlying_type_t<T> lhs, const T rhs) { return static_cast<T>(lhs X static_cast<std::underlying_type_t<T>>(rhs)); } \
46inline T& operator X= (T& lhs, const T rhs) { return lhs = lhs X rhs; } \
47inline T& operator X= (T& lhs, const std::underlying_type_t<T> rhs) { return lhs = lhs X rhs; }
48#define ENUM_FLAGS(T, type) \
49enum class T : type; \
50inline T operator ~ (T t) { return (T) (~static_cast<std::underlying_type_t <T>>(t)); } \
51ENUM_FLAG_OPERATOR(T,|) \
52ENUM_FLAG_OPERATOR(T,^) \
53ENUM_FLAG_OPERATOR(T,&) \
54enum class T : type
55
56#if defined(_WIN32)
57#elif defined(__APPLE__)
58#define s6_words __u6_addr.__u6_addr16
59#else
60#define s6_words s6_addr16
61#endif
62
63namespace stdex
64{
65 namespace parser
66 {
70 constexpr int match_default = 0;
71 constexpr int match_case_insensitive = 0x1;
72 constexpr int match_multiline = 0x2;
73
77 template <class T>
79 {
80 public:
81 basic_parser(_In_ const std::locale& locale = std::locale()) : m_locale(locale) {}
82 virtual ~basic_parser() {}
83
84 bool search(
85 _In_reads_or_z_opt_(end) const T* text,
86 _In_ size_t start = 0,
87 _In_ size_t end = SIZE_MAX,
88 _In_ int flags = match_default)
89 {
90 for (size_t i = start; i < end && text[i]; i++)
91 if (match(text, i, end, flags))
92 return true;
93 return false;
94 }
95
96 bool match(
97 _In_reads_or_z_opt_(end) const T* text,
98 _In_ size_t start = 0,
99 _In_ size_t end = SIZE_MAX,
100 _In_ int flags = match_default)
101 {
102 return do_match(text, start, end, flags);
103 }
104
105 bool match(
106 _In_ const std::basic_string_view<T, std::char_traits<T>> text,
107 _In_ size_t start = 0,
108 _In_ size_t end = SIZE_MAX,
109 _In_ int flags = match_default)
110 {
111 return match(text.data(), start, std::min<size_t>(end, text.size()), flags);
112 }
113
114 virtual void invalidate()
115 {
116 this->interval.invalidate();
117 }
118
120
121 protected:
122 virtual bool do_match(
123 _In_reads_or_z_opt_(end) const T* text,
124 _In_ size_t start = 0,
125 _In_ size_t end = SIZE_MAX,
126 _In_ int flags = match_default) = 0;
127
129 const wchar_t* next_sgml_cp(_In_ const char* text, _In_ size_t start, _In_ size_t end, _Out_ size_t& chr_end, _Out_ wchar_t(&buf)[3])
130 {
131 if (text[start] == '&') {
132 // Potential entity start
133 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
134 for (chr_end = start + 1;; chr_end++) {
135 if (chr_end >= end || text[chr_end] == 0) {
136 // Unterminated entity
137 break;
138 }
139 if (text[chr_end] == ';') {
140 // Entity end
141 size_t n = chr_end - start - 1;
142 if (n >= 2 && text[start + 1] == '#') {
143 // Numerical entity
144 utf32_t unicode;
145 if (text[start + 2] == 'x' || text[start + 2] == 'X')
146 unicode = static_cast<utf32_t>(strtou32(text + start + 3, n - 2, nullptr, 16));
147 else
148 unicode = static_cast<utf32_t>(strtou32(text + start + 2, n - 1, nullptr, 10));
149#ifdef _WIN32
150 if (unicode < 0x10000) {
151 buf[0] = (wchar_t)unicode;
152 buf[1] = 0;
153 }
154 else {
155 ucs4_to_surrogate_pair(buf, unicode);
156 buf[2] = 0;
157 }
158#else
159 buf[0] = (wchar_t)unicode;
160 buf[1] = 0;
161#endif
162 chr_end++;
163 return buf;
164 }
165 const wchar_t* entity_w = sgml2uni(text + start + 1, n);
166 if (entity_w) {
167 chr_end++;
168 return entity_w;
169 }
170 // Unknown entity.
171 break;
172 }
173 else if (text[chr_end] == '&' || ctype.is(ctype.space, text[chr_end])) {
174 // This char cannot possibly be a part of entity.
175 break;
176 }
177 }
178 }
179 buf[0] = text[start];
180 buf[1] = 0;
181 chr_end = start + 1;
182 return buf;
183 }
185
186 std::locale m_locale;
187 };
188
189 using parser = basic_parser<char>;
190 using wparser = basic_parser<wchar_t>;
191#ifdef _UNICODE
192 using tparser = wparser;
193#else
194 using tparser = parser;
195#endif
196 using sgml_parser = basic_parser<char>;
197
201 template <class T>
202 class basic_noop : public basic_parser<T>
203 {
204 protected:
205 virtual bool do_match(
206 _In_reads_or_z_opt_(end) const T* text,
207 _In_ size_t start = 0,
208 _In_ size_t end = SIZE_MAX,
209 _In_ int flags = match_default)
210 {
211 _Assume_(text || start >= end);
212 if (start < end && text[start]) {
213 this->interval.start = this->interval.end = start;
214 return true;
215 }
216 this->interval.invalidate();
217 return false;
218 }
219 };
220
221 using noop = basic_noop<char>;
223#ifdef _UNICODE
224 using tnoop = wnoop;
225#else
226 using tnoop = noop;
227#endif
229
233 template <class T>
234 class basic_any_cu : public basic_parser<T>
235 {
236 public:
237 basic_any_cu(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
238
239 protected:
240 virtual bool do_match(
241 _In_reads_or_z_opt_(end) const T* text,
242 _In_ size_t start = 0,
243 _In_ size_t end = SIZE_MAX,
244 _In_ int flags = match_default)
245 {
246 _Assume_(text || start >= end);
247 if (start < end && text[start]) {
248 this->interval.end = (this->interval.start = start) + 1;
249 return true;
250 }
251 this->interval.invalidate();
252 return false;
253 }
254 };
255
258#ifdef _UNICODE
259 using tany_cu = wany_cu;
260#else
261 using tany_cu = any_cu;
262#endif
263
267 class sgml_any_cp : public basic_any_cu<char>
268 {
269 public:
270 sgml_any_cp(_In_ const std::locale& locale = std::locale()) : basic_any_cu<char>(locale) {}
271
272 protected:
273 virtual bool do_match(
274 _In_reads_or_z_(end) const char* text,
275 _In_ size_t start = 0,
276 _In_ size_t end = SIZE_MAX,
277 _In_ int flags = match_default)
278 {
279 _Assume_(text || start >= end);
280 if (start < end && text[start]) {
281 if (text[start] == '&') {
282 // SGML entity
283 const auto& ctype = std::use_facet<std::ctype<char>>(m_locale);
284 for (this->interval.end = start + 1; this->interval.end < end && text[this->interval.end]; this->interval.end++)
285 if (text[this->interval.end] == ';') {
286 this->interval.end++;
287 this->interval.start = start;
288 return true;
289 }
290 else if (text[this->interval.end] == '&' || ctype.is(ctype.space, text[this->interval.end]))
291 break;
292 // Unterminated entity
293 }
294 this->interval.end = (this->interval.start = start) + 1;
295 return true;
296 }
297 this->interval.invalidate();
298 return false;
299 }
300 };
301
305 template <class T>
306 class basic_cu : public basic_parser<T>
307 {
308 public:
309 basic_cu(T chr, bool invert = false, _In_ const std::locale& locale = std::locale()) :
311 m_chr(chr),
312 m_invert(invert)
313 {}
314
315 protected:
316 virtual bool do_match(
317 _In_reads_or_z_opt_(end) const T* text,
318 _In_ size_t start = 0,
319 _In_ size_t end = SIZE_MAX,
320 _In_ int flags = match_default)
321 {
322 _Assume_(text || start >= end);
323 if (start < end && text[start]) {
324 bool r;
325 if (flags & match_case_insensitive) {
326 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
327 r = ctype.tolower(text[start]) == ctype.tolower(m_chr);
328 }
329 else
330 r = text[start] == m_chr;
331 if ((r && !m_invert) || (!r && m_invert)) {
332 this->interval.end = (this->interval.start = start) + 1;
333 return true;
334 }
335 }
336 this->interval.invalidate();
337 return false;
338 }
339
340 T m_chr;
341 bool m_invert;
342 };
343
344 using cu = basic_cu<char>;
345 using wcu = basic_cu<wchar_t>;
346#ifdef _UNICODE
347 using tcu = wcu;
348#else
349 using tcu = cu;
350#endif
351
355 class sgml_cp : public sgml_parser
356 {
357 public:
358 sgml_cp(const char* chr, size_t count = SIZE_MAX, bool invert = false, _In_ const std::locale& locale = std::locale()) :
360 m_invert(invert)
361 {
362 _Assume_(chr || !count);
363 wchar_t buf[3];
364 size_t chr_end;
365 m_chr.assign(count ? next_sgml_cp(chr, 0, count, chr_end, buf) : L"");
366 }
367
368 protected:
369 virtual bool do_match(
370 _In_reads_or_z_(end) const char* text,
371 _In_ size_t start = 0,
372 _In_ size_t end = SIZE_MAX,
373 _In_ int flags = match_default)
374 {
375 _Assume_(text || start >= end);
376 if (start < end && text[start]) {
377 wchar_t buf[3];
378 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
379 bool r = ((flags & match_case_insensitive) ?
380 stdex::strnicmp(chr, SIZE_MAX, m_chr.data(), m_chr.size(), m_locale) :
381 stdex::strncmp(chr, SIZE_MAX, m_chr.data(), m_chr.size())) == 0;
382 if ((r && !m_invert) || (!r && m_invert)) {
383 this->interval.start = start;
384 return true;
385 }
386 }
387 this->interval.invalidate();
388 return false;
389 }
390
391 std::wstring m_chr;
392 bool m_invert;
393 };
394
398 template <class T>
399 class basic_space_cu : public basic_parser<T>
400 {
401 public:
402 basic_space_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) :
404 m_invert(invert)
405 {}
406
407 protected:
408 virtual bool do_match(
409 _In_reads_or_z_opt_(end) const T* text,
410 _In_ size_t start = 0,
411 _In_ size_t end = SIZE_MAX,
412 _In_ int flags = match_default)
413 {
414 _Assume_(text || start >= end);
415 if (start < end && text[start]) {
416 bool r =
417 ((flags & match_multiline) || !stdex::islbreak(text[start])) &&
418 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::space, text[start]);
419 if ((r && !m_invert) || (!r && m_invert)) {
420 this->interval.end = (this->interval.start = start) + 1;
421 return true;
422 }
423 }
424 this->interval.invalidate();
425 return false;
426 }
427
428 bool m_invert;
429 };
430
433#ifdef _UNICODE
434 using tspace_cu = wspace_cu;
435#else
436 using tspace_cu = space_cu;
437#endif
438
442 class sgml_space_cp : public basic_space_cu<char>
443 {
444 public:
445 sgml_space_cp(_In_ bool invert = false, _In_ const std::locale& locale = std::locale()) :
447 {}
448
449 protected:
450 virtual bool do_match(
451 _In_reads_or_z_(end) const char* text,
452 _In_ size_t start = 0,
453 _In_ size_t end = SIZE_MAX,
454 _In_ int flags = match_default)
455 {
456 _Assume_(text || start >= end);
457 if (start < end && text[start]) {
458 wchar_t buf[3];
459 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
460 const wchar_t* chr_end = chr + stdex::strlen(chr);
461 bool r =
462 ((flags & match_multiline) || !stdex::islbreak(chr, SIZE_MAX)) &&
463 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::space, chr, chr_end) == chr_end;
464 if ((r && !m_invert) || (!r && m_invert)) {
465 this->interval.start = start;
466 return true;
467 }
468 }
469
470 this->interval.invalidate();
471 return false;
472 }
473 };
474
478 template <class T>
479 class basic_punct_cu : public basic_parser<T>
480 {
481 public:
482 basic_punct_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) :
484 m_invert(invert)
485 {}
486
487 protected:
488 virtual bool do_match(
489 _In_reads_or_z_opt_(end) const T* text,
490 _In_ size_t start = 0,
491 _In_ size_t end = SIZE_MAX,
492 _In_ int flags = match_default)
493 {
494 _Assume_(text || start >= end);
495 if (start < end && text[start]) {
496 bool r = std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::punct, text[start]);
497 if ((r && !m_invert) || (!r && m_invert)) {
498 this->interval.end = (this->interval.start = start) + 1;
499 return true;
500 }
501 }
502 this->interval.invalidate();
503 return false;
504 }
505
506 bool m_invert;
507 };
508
511#ifdef _UNICODE
512 using tpunct_cu = wpunct_cu;
513#else
514 using tpunct_cu = punct_cu;
515#endif
516
520 class sgml_punct_cp : public basic_punct_cu<char>
521 {
522 public:
523 sgml_punct_cp(bool invert = false, _In_ const std::locale& locale = std::locale()) :
525 {}
526
527 protected:
528 virtual bool do_match(
529 _In_reads_or_z_(end) const char* text,
530 _In_ size_t start = 0,
531 _In_ size_t end = SIZE_MAX,
532 _In_ int flags = match_default)
533 {
534 _Assume_(text || start >= end);
535 if (start < end && text[start]) {
536 wchar_t buf[3];
537 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
538 const wchar_t* chr_end = chr + stdex::strlen(chr);
539 bool r = std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::punct, chr, chr_end) == chr_end;
540 if ((r && !m_invert) || (!r && m_invert)) {
541 this->interval.start = start;
542 return true;
543 }
544 }
545 this->interval.invalidate();
546 return false;
547 }
548 };
549
553 template <class T>
555 {
556 public:
557 basic_space_or_punct_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) :
559 m_invert(invert)
560 {}
561
562 protected:
563 virtual bool do_match(
564 _In_reads_or_z_opt_(end) const T* text,
565 _In_ size_t start = 0,
566 _In_ size_t end = SIZE_MAX,
567 _In_ int flags = match_default)
568 {
569 _Assume_(text || start >= end);
570 if (start < end && text[start]) {
571 bool r =
572 ((flags & match_multiline) || !stdex::islbreak(text[start])) &&
573 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::space | std::ctype_base::punct, text[start]);
574 if ((r && !m_invert) || (!r && m_invert)) {
575 this->interval.end = (this->interval.start = start) + 1;
576 return true;
577 }
578 }
579 this->interval.invalidate();
580 return false;
581 }
582
583 bool m_invert;
584 };
585
588#ifdef _UNICODE
590#else
592#endif
593
598 {
599 public:
600 sgml_space_or_punct_cp(bool invert = false, _In_ const std::locale& locale = std::locale()) :
602 {}
603
604 protected:
605 virtual bool do_match(
606 _In_reads_or_z_(end) const char* text,
607 _In_ size_t start = 0,
608 _In_ size_t end = SIZE_MAX,
609 _In_ int flags = match_default)
610 {
611 _Assume_(text || start >= end);
612 if (start < end && text[start]) {
613 wchar_t buf[3];
614 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
615 const wchar_t* chr_end = chr + stdex::strlen(chr);
616 bool r =
617 ((flags & match_multiline) || !stdex::islbreak(chr, SIZE_MAX)) &&
618 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::space | std::ctype_base::punct, chr, chr_end) == chr_end;
619 if ((r && !m_invert) || (!r && m_invert)) {
620 this->interval.start = start;
621 return true;
622 }
623 }
624 this->interval.invalidate();
625 return false;
626 }
627 };
628
632 template <class T>
633 class basic_bol : public basic_parser<T>
634 {
635 public:
636 basic_bol(bool invert = false) : m_invert(invert) {}
637
638 protected:
639 virtual bool do_match(
640 _In_reads_or_z_opt_(end) const T* text,
641 _In_ size_t start = 0,
642 _In_ size_t end = SIZE_MAX,
643 _In_ int flags = match_default)
644 {
645 _Assume_(text || !end);
646 _Assume_(text || start >= end);
647 bool r = start == 0 || (start <= end && stdex::islbreak(text[start - 1]));
648 if ((r && !m_invert) || (!r && m_invert)) {
649 this->interval.end = this->interval.start = start;
650 return true;
651 }
652 this->interval.invalidate();
653 return false;
654 }
655
656 bool m_invert;
657 };
658
659 using bol = basic_bol<char>;
660 using wbol = basic_bol<wchar_t>;
661#ifdef _UNICODE
662 using tbol = wbol;
663#else
664 using tbol = bol;
665#endif
667
671 template <class T>
672 class basic_eol : public basic_parser<T>
673 {
674 public:
675 basic_eol(bool invert = false) : m_invert(invert) {}
676
677 protected:
678 virtual bool do_match(
679 _In_reads_or_z_opt_(end) const T* text,
680 _In_ size_t start = 0,
681 _In_ size_t end = SIZE_MAX,
682 _In_ int flags = match_default)
683 {
684 _Assume_(text || start >= end);
685 bool r = start >= end || !text[start] || stdex::islbreak(text[start]);
686 if ((r && !m_invert) || (!r && m_invert)) {
687 this->interval.end = this->interval.start = start;
688 return true;
689 }
690 this->interval.invalidate();
691 return false;
692 }
693
694 bool m_invert;
695 };
696
697 using eol = basic_eol<char>;
698 using weol = basic_eol<wchar_t>;
699#ifdef _UNICODE
700 using teol = weol;
701#else
702 using teol = eol;
703#endif
705
706 template <class T>
707 class basic_set : public basic_parser<T>
708 {
709 public:
710 basic_set(bool invert = false, _In_ const std::locale& locale = std::locale()) :
712 hit_offset(SIZE_MAX),
713 m_invert(invert)
714 {}
715
716 virtual void invalidate()
717 {
718 hit_offset = SIZE_MAX;
720 }
721
722 size_t hit_offset;
723
724 protected:
725 virtual bool do_match(
726 _In_reads_or_z_opt_(end) const T* text,
727 _In_ size_t start = 0,
728 _In_ size_t end = SIZE_MAX,
729 _In_ int flags = match_default) = 0;
730
731 bool m_invert;
732 };
733
737 template <class T>
738 class basic_cu_set : public basic_set<T>
739 {
740 public:
742 _In_reads_or_z_(count) const T* set,
743 _In_ size_t count = SIZE_MAX,
744 _In_ bool invert = false,
745 _In_ const std::locale& locale = std::locale()) :
747 {
748 if (set)
749 m_set.assign(set, set + stdex::strnlen(set, count));
750 }
751
752 protected:
753 virtual bool do_match(
754 _In_reads_or_z_opt_(end) const T* text,
755 _In_ size_t start = 0,
756 _In_ size_t end = SIZE_MAX,
757 _In_ int flags = match_default)
758 {
759 _Assume_(text || start >= end);
760 if (start < end && text[start]) {
761 const T* set = m_set.data();
762 size_t r = (flags & match_case_insensitive) ?
763 stdex::strnichr(set, m_set.size(), text[start], this->m_locale) :
764 stdex::strnchr(set, m_set.size(), text[start]);
765 if ((r != stdex::npos && !this->m_invert) || (r == stdex::npos && this->m_invert)) {
766 this->hit_offset = r;
767 this->interval.end = (this->interval.start = start) + 1;
768 return true;
769 }
770 }
771 this->hit_offset = SIZE_MAX;
772 this->interval.invalidate();
773 return false;
774 }
775
776 std::basic_string<T> m_set;
777 };
778
781#ifdef _UNICODE
782 using tcu_set = wcu_set;
783#else
784 using tcu_set = cu_set;
785#endif
786
790 class sgml_cp_set : public basic_set<char>
791 {
792 public:
793 sgml_cp_set(const char* set, size_t count = SIZE_MAX, bool invert = false, _In_ const std::locale& locale = std::locale()) :
795 {
796 if (set)
797 m_set = sgml2str(set, count);
798 }
799
800 protected:
801 virtual bool do_match(
802 _In_reads_or_z_(end) const char* text,
803 _In_ size_t start = 0,
804 _In_ size_t end = SIZE_MAX,
805 _In_ int flags = match_default)
806 {
807 _Assume_(text || start >= end);
808 if (start < end && text[start]) {
809 wchar_t buf[3];
810 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
811 const wchar_t* set = m_set.data();
812 size_t r = (flags & match_case_insensitive) ?
813 stdex::strnistr(set, m_set.size(), chr, m_locale) :
814 stdex::strnstr(set, m_set.size(), chr);
815 if ((r != stdex::npos && !m_invert) || (r == stdex::npos && m_invert)) {
816 hit_offset = r;
817 this->interval.start = start;
818 return true;
819 }
820 }
821 hit_offset = SIZE_MAX;
822 this->interval.invalidate();
823 return false;
824 }
825
826 std::wstring m_set;
827 };
828
832 template <class T>
833 class basic_string : public basic_parser<T>
834 {
835 public:
837 _In_reads_or_z_(count) const T* str,
838 _In_ size_t count = SIZE_MAX,
839 _In_ const std::locale& locale = std::locale()) :
841 m_str(str, str + stdex::strnlen(str, count))
842 {}
843
844 protected:
845 virtual bool do_match(
846 _In_reads_or_z_opt_(end) const T* text,
847 _In_ size_t start = 0,
848 _In_ size_t end = SIZE_MAX,
849 _In_ int flags = match_default)
850 {
851 _Assume_(text || start >= end);
852 size_t
853 m = m_str.size(),
854 n = std::min<size_t>(end - start, m);
855 bool r = ((flags & match_case_insensitive) ?
856 stdex::strnicmp(text + start, n, m_str.data(), m, this->m_locale) :
857 stdex::strncmp(text + start, n, m_str.data(), m)) == 0;
858 if (r) {
859 this->interval.end = (this->interval.start = start) + n;
860 return true;
861 }
862 this->interval.invalidate();
863 return false;
864 }
865
866 std::basic_string<T> m_str;
867 };
868
871#ifdef _UNICODE
872 using tstring = wstring;
873#else
874 using tstring = string;
875#endif
876
881 {
882 public:
883 sgml_string(const char* str, size_t count = SIZE_MAX, _In_ const std::locale& locale = std::locale()) :
885 m_str(sgml2str(str, count))
886 {}
887
888 protected:
889 virtual bool do_match(
890 _In_reads_or_z_(end) const char* text,
891 _In_ size_t start = 0,
892 _In_ size_t end = SIZE_MAX,
893 _In_ int flags = match_default)
894 {
895 _Assume_(text || start >= end);
896 const wchar_t* str = m_str.data();
897 const bool case_insensitive = flags & match_case_insensitive ? true : false;
898 const auto& ctype = std::use_facet<std::ctype<wchar_t>>(m_locale);
899 for (this->interval.end = start;;) {
900 if (!*str) {
901 this->interval.start = start;
902 return true;
903 }
904 if (this->interval.end >= end || !text[this->interval.end]) {
905 this->interval.invalidate();
906 return false;
907 }
908 wchar_t buf[3];
909 const wchar_t* chr = next_sgml_cp(text, this->interval.end, end, this->interval.end, buf);
910 for (; *chr; ++str, ++chr) {
911 if (!*str ||
912 (case_insensitive ? ctype.tolower(*str) != ctype.tolower(*chr) : *str != *chr))
913 {
914 this->interval.invalidate();
915 return false;
916 }
917 }
918 }
919 }
920
921 std::wstring m_str;
922 };
923
927 template <class T>
929 {
930 public:
931 basic_iterations(const std::shared_ptr<basic_parser<T>>& el, size_t min_iterations = 0, size_t max_iterations = SIZE_MAX, bool greedy = true) :
932 m_el(el),
936 {}
937
938 protected:
939 virtual bool do_match(
940 _In_reads_or_z_opt_(end) const T* text,
941 _In_ size_t start = 0,
942 _In_ size_t end = SIZE_MAX,
943 _In_ int flags = match_default)
944 {
945 _Assume_(text || start >= end);
946 this->interval.start = this->interval.end = start;
947 for (size_t i = 0; ; i++) {
948 if ((!m_greedy && i >= m_min_iterations) || i >= m_max_iterations)
949 return true;
950 if (!m_el->match(text, this->interval.end, end, flags)) {
951 if (i >= m_min_iterations)
952 return true;
953 break;
954 }
955 if (m_el->interval.end == this->interval.end) {
956 // Element did match, but the matching interval was empty. Quit instead of spinning.
957 return true;
958 }
959 this->interval.end = m_el->interval.end;
960 }
961 this->interval.invalidate();
962 return false;
963 }
964
965 std::shared_ptr<basic_parser<T>> m_el;
968 bool m_greedy;
969 };
970
973#ifdef _UNICODE
974 using titerations = witerations;
975#else
976 using titerations = iterations;
977#endif
979
983 template <class T>
985 {
986 protected:
987 parser_collection(_In_ const std::locale& locale) : basic_parser<T>(locale) {}
988
989 public:
991 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el,
992 _In_ size_t count,
993 _In_ const std::locale& locale = std::locale()) :
995 {
996 _Assume_(el || !count);
997 m_collection.reserve(count);
998 for (size_t i = 0; i < count; i++)
999 m_collection.push_back(el[i]);
1000 }
1001
1003 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1004 _In_ const std::locale& locale = std::locale()) :
1006 m_collection(std::move(collection))
1007 {}
1008
1009 virtual void invalidate()
1010 {
1011 for (auto& el : m_collection)
1012 el->invalidate();
1014 }
1015
1016 protected:
1017 std::vector<std::shared_ptr<basic_parser<T>>> m_collection;
1018 };
1019
1023 template <class T>
1025 {
1026 public:
1028 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el = nullptr,
1029 _In_ size_t count = 0,
1030 _In_ const std::locale& locale = std::locale()) :
1032 {}
1033
1035 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1036 _In_ const std::locale& locale = std::locale()) :
1038 {}
1039
1040 protected:
1041 virtual bool do_match(
1042 _In_reads_or_z_opt_(end) const T* text,
1043 _In_ size_t start = 0,
1044 _In_ size_t end = SIZE_MAX,
1045 _In_ int flags = match_default)
1046 {
1047 _Assume_(text || start >= end);
1048 this->interval.end = start;
1049 for (auto i = this->m_collection.begin(); i != this->m_collection.end(); ++i) {
1050 if (!(*i)->match(text, this->interval.end, end, flags)) {
1051 for (++i; i != this->m_collection.end(); ++i)
1052 (*i)->invalidate();
1053 this->interval.invalidate();
1054 return false;
1055 }
1056 this->interval.end = (*i)->interval.end;
1057 }
1058 this->interval.start = start;
1059 return true;
1060 }
1061 };
1062
1065#ifdef _UNICODE
1066 using tsequence = wsequence;
1067#else
1068 using tsequence = sequence;
1069#endif
1071
1075 template <class T>
1077 {
1078 protected:
1079 basic_branch(_In_ const std::locale& locale) :
1081 hit_offset(SIZE_MAX)
1082 {}
1083
1084 public:
1086 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el = nullptr,
1087 _In_ size_t count = 0,
1088 _In_ const std::locale& locale = std::locale()) :
1090 hit_offset(SIZE_MAX)
1091 {}
1092
1094 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1095 _In_ const std::locale& locale = std::locale()) :
1097 hit_offset(SIZE_MAX)
1098 {}
1099
1100 virtual void invalidate()
1101 {
1102 hit_offset = SIZE_MAX;
1104 }
1105
1106 size_t hit_offset;
1107
1108 protected:
1109 virtual bool do_match(
1110 _In_reads_or_z_opt_(end) const T* text,
1111 _In_ size_t start = 0,
1112 _In_ size_t end = SIZE_MAX,
1113 _In_ int flags = match_default)
1114 {
1115 _Assume_(text || start >= end);
1116 hit_offset = 0;
1117 for (auto i = this->m_collection.begin(); i != this->m_collection.end(); ++i, ++hit_offset) {
1118 if ((*i)->match(text, start, end, flags)) {
1119 this->interval = (*i)->interval;
1120 for (++i; i != this->m_collection.end(); ++i)
1121 (*i)->invalidate();
1122 return true;
1123 }
1124 }
1125 hit_offset = SIZE_MAX;
1126 this->interval.invalidate();
1127 return false;
1128 }
1129 };
1130
1131 using branch = basic_branch<char>;
1133#ifdef _UNICODE
1134 using tbranch = wbranch;
1135#else
1136 using tbranch = branch;
1137#endif
1139
1143 template <class T, class T_parser = basic_string<T>>
1145 {
1146 public:
1148 _In_reads_(count) const T* str_z = nullptr,
1149 _In_ size_t count = 0,
1150 _In_ const std::locale& locale = std::locale()) :
1152 {
1153 build(str_z, count);
1154 }
1155
1156 basic_string_branch(_In_z_ const T* str, ...) :
1157 basic_branch<T>(std::locale())
1158 {
1159 va_list params;
1160 va_start(params, str);
1161 build(str, params);
1162 va_end(params);
1163 }
1164
1165 basic_string_branch(_In_ const std::locale& locale, _In_z_ const T* str, ...) :
1167 {
1168 va_list params;
1169 va_start(params, str);
1170 build(str, params);
1171 va_end(params);
1172 }
1173
1174 protected:
1175 void build(_In_reads_(count) const T* str_z, _In_ size_t count)
1176 {
1177 _Assume_(str_z || !count);
1178 if (count) {
1179 size_t offset, n;
1180 for (
1181 offset = n = 0;
1182 offset < count && str_z[offset];
1183 offset += stdex::strnlen(str_z + offset, count - offset) + 1, ++n);
1184 this->m_collection.reserve(n);
1185 for (
1186 offset = 0;
1187 offset < count && str_z[offset];
1188 offset += stdex::strnlen(str_z + offset, count - offset) + 1)
1189 this->m_collection.push_back(std::move(std::make_shared<T_parser>(str_z + offset, count - offset, this->m_locale)));
1190 }
1191 }
1192
1193 void build(_In_z_ const T* str, _In_ va_list params)
1194 {
1195 const T* p;
1196 for (
1197 this->m_collection.push_back(std::move(std::make_shared<T_parser>(str, SIZE_MAX, this->m_locale)));
1198 (p = va_arg(params, const T*)) != nullptr;
1199 this->m_collection.push_back(std::move(std::make_shared<T_parser>(p, SIZE_MAX, this->m_locale))));
1200 }
1201 };
1202
1205#ifdef _UNICODE
1207#else
1209#endif
1211
1215 template <class T>
1217 {
1218 public:
1220 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el = nullptr,
1221 _In_ size_t count = 0,
1222 _In_ const std::locale& locale = std::locale()) :
1224 {}
1225
1227 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1228 _In_ const std::locale& locale = std::locale()) :
1230 {}
1231
1232 protected:
1233 virtual bool do_match(
1234 _In_reads_or_z_opt_(end) const T* text,
1235 _In_ size_t start = 0,
1236 _In_ size_t end = SIZE_MAX,
1237 _In_ int flags = match_default)
1238 {
1239 _Assume_(text || start >= end);
1240 for (auto& el : this->m_collection)
1241 el->invalidate();
1242 if (match_recursively(text, start, end, flags)) {
1243 this->interval.start = start;
1244 return true;
1245 }
1246 this->interval.invalidate();
1247 return false;
1248 }
1249
1250 bool match_recursively(
1251 _In_reads_or_z_opt_(end) const T* text,
1252 _In_ size_t start = 0,
1253 _In_ size_t end = SIZE_MAX,
1254 _In_ int flags = match_default)
1255 {
1256 bool all_matched = true;
1257 for (auto& el : this->m_collection) {
1258 if (!el->interval) {
1259 // Element was not matched in permutatuion yet.
1260 all_matched = false;
1261 if (el->match(text, start, end, flags)) {
1262 // Element matched for the first time.
1263 if (match_recursively(text, el->interval.end, end, flags)) {
1264 // Rest of the elements matched too.
1265 return true;
1266 }
1267 el->invalidate();
1268 }
1269 }
1270 }
1271 if (all_matched) {
1272 this->interval.end = start;
1273 return true;
1274 }
1275 return false;
1276 }
1277 };
1278
1281#ifdef _UNICODE
1282 using tpermutation = wpermutation;
1283#else
1284 using tpermutation = permutation;
1285#endif
1287
1291 template <class T>
1292 class basic_integer : public basic_parser<T>
1293 {
1294 public:
1295 basic_integer(_In_ const std::locale& locale = std::locale()) :
1297 value(0)
1298 {}
1299
1300 virtual void invalidate()
1301 {
1302 value = 0;
1304 }
1305
1306 public:
1307 size_t value;
1308 };
1309
1313 template <class T>
1315 {
1316 public:
1318 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
1319 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
1320 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
1321 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
1322 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
1323 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
1324 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
1325 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
1326 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
1327 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
1328 _In_ const std::locale& locale = std::locale()) :
1330 m_digit_0(digit_0),
1331 m_digit_1(digit_1),
1332 m_digit_2(digit_2),
1333 m_digit_3(digit_3),
1334 m_digit_4(digit_4),
1335 m_digit_5(digit_5),
1336 m_digit_6(digit_6),
1337 m_digit_7(digit_7),
1338 m_digit_8(digit_8),
1339 m_digit_9(digit_9)
1340 {}
1341
1342 protected:
1343 virtual bool do_match(
1344 _In_reads_or_z_opt_(end) const T* text,
1345 _In_ size_t start = 0,
1346 _In_ size_t end = SIZE_MAX,
1347 _In_ int flags = match_default)
1348 {
1349 _Assume_(text || start >= end);
1350 for (this->interval.end = start, this->value = 0; this->interval.end < end && text[this->interval.end];) {
1351 size_t dig;
1352 if (m_digit_0->match(text, this->interval.end, end, flags)) { dig = 0; this->interval.end = m_digit_0->interval.end; }
1353 else if (m_digit_1->match(text, this->interval.end, end, flags)) { dig = 1; this->interval.end = m_digit_1->interval.end; }
1354 else if (m_digit_2->match(text, this->interval.end, end, flags)) { dig = 2; this->interval.end = m_digit_2->interval.end; }
1355 else if (m_digit_3->match(text, this->interval.end, end, flags)) { dig = 3; this->interval.end = m_digit_3->interval.end; }
1356 else if (m_digit_4->match(text, this->interval.end, end, flags)) { dig = 4; this->interval.end = m_digit_4->interval.end; }
1357 else if (m_digit_5->match(text, this->interval.end, end, flags)) { dig = 5; this->interval.end = m_digit_5->interval.end; }
1358 else if (m_digit_6->match(text, this->interval.end, end, flags)) { dig = 6; this->interval.end = m_digit_6->interval.end; }
1359 else if (m_digit_7->match(text, this->interval.end, end, flags)) { dig = 7; this->interval.end = m_digit_7->interval.end; }
1360 else if (m_digit_8->match(text, this->interval.end, end, flags)) { dig = 8; this->interval.end = m_digit_8->interval.end; }
1361 else if (m_digit_9->match(text, this->interval.end, end, flags)) { dig = 9; this->interval.end = m_digit_9->interval.end; }
1362 else break;
1363 this->value = this->value * 10 + dig;
1364 }
1366 this->interval.start = start;
1367 return true;
1368 }
1369 this->interval.invalidate();
1370 return false;
1371 }
1372
1373 std::shared_ptr<basic_parser<T>>
1374 m_digit_0,
1375 m_digit_1,
1376 m_digit_2,
1377 m_digit_3,
1378 m_digit_4,
1379 m_digit_5,
1380 m_digit_6,
1381 m_digit_7,
1382 m_digit_8,
1383 m_digit_9;
1384 };
1385
1388#ifdef _UNICODE
1389 using tinteger10 = winteger10;
1390#else
1391 using tinteger10 = integer10;
1392#endif
1394
1398 template <class T>
1400 {
1401 public:
1403 _In_ const std::shared_ptr<basic_integer10<T>>& digits,
1404 _In_ const std::shared_ptr<basic_set<T>>& separator,
1405 _In_ const std::locale& locale = std::locale()) :
1407 digit_count(0),
1408 has_separators(false),
1409 m_digits(digits),
1410 m_separator(separator)
1411 {}
1412
1413 virtual void invalidate()
1414 {
1415 digit_count = 0;
1416 has_separators = false;
1418 }
1419
1422
1423 protected:
1424 virtual bool do_match(
1425 _In_reads_or_z_opt_(end) const T* text,
1426 _In_ size_t start = 0,
1427 _In_ size_t end = SIZE_MAX,
1428 _In_ int flags = match_default)
1429 {
1430 _Assume_(text || start >= end);
1431 if (m_digits->match(text, start, end, flags)) {
1432 // Leading part match.
1433 this->value = m_digits->value;
1434 digit_count = m_digits->interval.size();
1435 has_separators = false;
1436 this->interval.start = start;
1437 this->interval.end = m_digits->interval.end;
1438 if (m_digits->interval.size() <= 3) {
1439 // Maybe separated with thousand separators?
1440 size_t hit_offset = SIZE_MAX;
1441 while (m_separator->match(text, this->interval.end, end, flags) &&
1442 (hit_offset == SIZE_MAX || hit_offset == m_separator->hit_offset) && // All separators must be the same, no mixing.
1443 m_digits->match(text, m_separator->interval.end, end, flags) &&
1444 m_digits->interval.size() == 3)
1445 {
1446 // Thousand separator and three-digit integer followed.
1447 this->value = this->value * 1000 + m_digits->value;
1448 digit_count += 3;
1449 has_separators = true;
1450 this->interval.end = m_digits->interval.end;
1451 hit_offset = m_separator->hit_offset;
1452 }
1453 }
1454
1455 return true;
1456 }
1457 this->value = 0;
1458 this->interval.invalidate();
1459 return false;
1460 }
1461
1462 std::shared_ptr<basic_integer10<T>> m_digits;
1463 std::shared_ptr<basic_set<T>> m_separator;
1464 };
1465
1466 using integer10ts = basic_integer10ts<char>;
1467 using winteger10ts = basic_integer10ts<wchar_t>;
1468#ifdef _UNICODE
1469 using tinteger10ts = winteger10ts;
1470#else
1471 using tinteger10ts = integer10ts;
1472#endif
1473 using sgml_integer10ts = basic_integer10ts<char>;
1474
1478 template <class T>
1480 {
1481 public:
1483 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
1484 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
1485 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
1486 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
1487 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
1488 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
1489 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
1490 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
1491 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
1492 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
1493 _In_ const std::shared_ptr<basic_parser<T>>& digit_10,
1494 _In_ const std::shared_ptr<basic_parser<T>>& digit_11,
1495 _In_ const std::shared_ptr<basic_parser<T>>& digit_12,
1496 _In_ const std::shared_ptr<basic_parser<T>>& digit_13,
1497 _In_ const std::shared_ptr<basic_parser<T>>& digit_14,
1498 _In_ const std::shared_ptr<basic_parser<T>>& digit_15,
1499 _In_ const std::locale& locale = std::locale()) :
1501 m_digit_0(digit_0),
1502 m_digit_1(digit_1),
1503 m_digit_2(digit_2),
1504 m_digit_3(digit_3),
1505 m_digit_4(digit_4),
1506 m_digit_5(digit_5),
1507 m_digit_6(digit_6),
1508 m_digit_7(digit_7),
1509 m_digit_8(digit_8),
1510 m_digit_9(digit_9),
1511 m_digit_10(digit_10),
1512 m_digit_11(digit_11),
1513 m_digit_12(digit_12),
1514 m_digit_13(digit_13),
1515 m_digit_14(digit_14),
1516 m_digit_15(digit_15)
1517 {}
1518
1519 protected:
1520 virtual bool do_match(
1521 _In_reads_or_z_opt_(end) const T* text,
1522 _In_ size_t start = 0,
1523 _In_ size_t end = SIZE_MAX,
1524 _In_ int flags = match_default)
1525 {
1526 _Assume_(text || start >= end);
1527 for (this->interval.end = start, this->value = 0; this->interval.end < end && text[this->interval.end];) {
1528 size_t dig;
1529 if (m_digit_0->match(text, this->interval.end, end, flags)) { dig = 0; this->interval.end = m_digit_0->interval.end; }
1530 else if (m_digit_1->match(text, this->interval.end, end, flags)) { dig = 1; this->interval.end = m_digit_1->interval.end; }
1531 else if (m_digit_2->match(text, this->interval.end, end, flags)) { dig = 2; this->interval.end = m_digit_2->interval.end; }
1532 else if (m_digit_3->match(text, this->interval.end, end, flags)) { dig = 3; this->interval.end = m_digit_3->interval.end; }
1533 else if (m_digit_4->match(text, this->interval.end, end, flags)) { dig = 4; this->interval.end = m_digit_4->interval.end; }
1534 else if (m_digit_5->match(text, this->interval.end, end, flags)) { dig = 5; this->interval.end = m_digit_5->interval.end; }
1535 else if (m_digit_6->match(text, this->interval.end, end, flags)) { dig = 6; this->interval.end = m_digit_6->interval.end; }
1536 else if (m_digit_7->match(text, this->interval.end, end, flags)) { dig = 7; this->interval.end = m_digit_7->interval.end; }
1537 else if (m_digit_8->match(text, this->interval.end, end, flags)) { dig = 8; this->interval.end = m_digit_8->interval.end; }
1538 else if (m_digit_9->match(text, this->interval.end, end, flags)) { dig = 9; this->interval.end = m_digit_9->interval.end; }
1539 else if (m_digit_10->match(text, this->interval.end, end, flags)) { dig = 10; this->interval.end = m_digit_10->interval.end; }
1540 else if (m_digit_11->match(text, this->interval.end, end, flags)) { dig = 11; this->interval.end = m_digit_11->interval.end; }
1541 else if (m_digit_12->match(text, this->interval.end, end, flags)) { dig = 12; this->interval.end = m_digit_12->interval.end; }
1542 else if (m_digit_13->match(text, this->interval.end, end, flags)) { dig = 13; this->interval.end = m_digit_13->interval.end; }
1543 else if (m_digit_14->match(text, this->interval.end, end, flags)) { dig = 14; this->interval.end = m_digit_14->interval.end; }
1544 else if (m_digit_15->match(text, this->interval.end, end, flags)) { dig = 15; this->interval.end = m_digit_15->interval.end; }
1545 else break;
1546 this->value = this->value * 16 + dig;
1547 }
1549 this->interval.start = start;
1550 return true;
1551 }
1552 this->interval.invalidate();
1553 return false;
1554 }
1555
1556 std::shared_ptr<basic_parser<T>>
1557 m_digit_0,
1558 m_digit_1,
1559 m_digit_2,
1560 m_digit_3,
1561 m_digit_4,
1562 m_digit_5,
1563 m_digit_6,
1564 m_digit_7,
1565 m_digit_8,
1566 m_digit_9,
1567 m_digit_10,
1568 m_digit_11,
1569 m_digit_12,
1570 m_digit_13,
1571 m_digit_14,
1572 m_digit_15;
1573 };
1574
1577#ifdef _UNICODE
1578 using tinteger16 = winteger16;
1579#else
1580 using tinteger16 = integer16;
1581#endif
1583
1587 template <class T>
1589 {
1590 public:
1592 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
1593 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
1594 _In_ const std::shared_ptr<basic_parser<T>>& digit_10,
1595 _In_ const std::shared_ptr<basic_parser<T>>& digit_50,
1596 _In_ const std::shared_ptr<basic_parser<T>>& digit_100,
1597 _In_ const std::shared_ptr<basic_parser<T>>& digit_500,
1598 _In_ const std::shared_ptr<basic_parser<T>>& digit_1000,
1599 _In_ const std::shared_ptr<basic_parser<T>>& digit_5000,
1600 _In_ const std::shared_ptr<basic_parser<T>>& digit_10000,
1601 _In_ const std::locale& locale = std::locale()) :
1603 m_digit_1(digit_1),
1604 m_digit_5(digit_5),
1605 m_digit_10(digit_10),
1606 m_digit_50(digit_50),
1607 m_digit_100(digit_100),
1608 m_digit_500(digit_500),
1609 m_digit_1000(digit_1000),
1610 m_digit_5000(digit_5000),
1611 m_digit_10000(digit_10000)
1612 {}
1613
1614 protected:
1615 virtual bool do_match(
1616 _In_reads_or_z_opt_(end) const T* text,
1617 _In_ size_t start = 0,
1618 _In_ size_t end = SIZE_MAX,
1619 _In_ int flags = match_default)
1620 {
1621 _Assume_(text || start >= end);
1622 size_t
1624 end2;
1625
1626 for (this->interval.end = start, this->value = 0; this->interval.end < end && text[this->interval.end]; dig[3] = dig[2], dig[2] = dig[1], dig[1] = dig[0], this->interval.end = end2) {
1627 if (m_digit_1 && m_digit_1->match(text, this->interval.end, end, flags)) { dig[0] = 1; end2 = m_digit_1->interval.end; }
1628 else if (m_digit_5 && m_digit_5->match(text, this->interval.end, end, flags)) { dig[0] = 5; end2 = m_digit_5->interval.end; }
1629 else if (m_digit_10 && m_digit_10->match(text, this->interval.end, end, flags)) { dig[0] = 10; end2 = m_digit_10->interval.end; }
1630 else if (m_digit_50 && m_digit_50->match(text, this->interval.end, end, flags)) { dig[0] = 50; end2 = m_digit_50->interval.end; }
1631 else if (m_digit_100 && m_digit_100->match(text, this->interval.end, end, flags)) { dig[0] = 100; end2 = m_digit_100->interval.end; }
1632 else if (m_digit_500 && m_digit_500->match(text, this->interval.end, end, flags)) { dig[0] = 500; end2 = m_digit_500->interval.end; }
1633 else if (m_digit_1000 && m_digit_1000->match(text, this->interval.end, end, flags)) { dig[0] = 1000; end2 = m_digit_1000->interval.end; }
1634 else if (m_digit_5000 && m_digit_5000->match(text, this->interval.end, end, flags)) { dig[0] = 5000; end2 = m_digit_5000->interval.end; }
1635 else if (m_digit_10000 && m_digit_10000->match(text, this->interval.end, end, flags)) { dig[0] = 10000; end2 = m_digit_10000->interval.end; }
1636 else break;
1637
1638 // Store first digit.
1639 if (dig[4] == SIZE_MAX) dig[4] = dig[0];
1640
1641 if (dig[3] == dig[2] && dig[2] == dig[1] && dig[1] == dig[0] && dig[0] != dig[4]) {
1642 // Same digit repeated four times. No-go, unless first digit. E.g. XIIII vs. XIV. MMMMMCD allowed, IIII also...
1643 break;
1644 }
1645 if (dig[0] <= dig[1]) {
1646 // Digit is less or equal previous one: add.
1647 this->value += dig[0];
1648 }
1649 else if (
1650 (dig[1] == 1 && (dig[0] == 5 || dig[0] == 10)) ||
1651 (dig[1] == 10 && (dig[0] == 50 || dig[0] == 100)) ||
1652 (dig[1] == 100 && (dig[0] == 500 || dig[0] == 1000)) ||
1653 (dig[1] == 1000 && (dig[0] == 5000 || dig[0] == 10000)))
1654 {
1655 // Digit is up to two orders bigger than previous one: subtract. But...
1656 if (dig[2] < dig[0]) {
1657 // Digit is also bigger than pre-previous one. E.g. VIX (V < X => invalid)
1658 break;
1659 }
1660 this->value -= dig[1]; // Cancel addition in the previous step.
1661 dig[0] -= dig[1]; // Combine last two digits.
1662 dig[1] = dig[2]; // The true previous digit is now pre-previous one. :)
1663 dig[2] = dig[3]; // The true pre-previous digit is now pre-pre-previous one. :)
1664 this->value += dig[0]; // Add combined value.
1665 }
1666 else {
1667 // New digit is too big than the previous one. E.g. VX (V < X => invalid)
1668 break;
1669 }
1670 }
1671 if (this->value) {
1672 this->interval.start = start;
1673 return true;
1674 }
1675 this->interval.invalidate();
1676 return false;
1677 }
1678
1679 std::shared_ptr<basic_parser<T>>
1680 m_digit_1,
1681 m_digit_5,
1682 m_digit_10,
1683 m_digit_50,
1684 m_digit_100,
1685 m_digit_500,
1686 m_digit_1000,
1687 m_digit_5000,
1688 m_digit_10000;
1689 };
1690
1693#ifdef _UNICODE
1695#else
1697#endif
1699
1703 template <class T>
1705 {
1706 public:
1708 _In_ const std::shared_ptr<basic_parser<T>>& _numerator,
1709 _In_ const std::shared_ptr<basic_parser<T>>& _fraction_line,
1710 _In_ const std::shared_ptr<basic_parser<T>>& _denominator,
1711 _In_ const std::locale& locale = std::locale()) :
1713 numerator(_numerator),
1714 fraction_line(_fraction_line),
1715 denominator(_denominator)
1716 {}
1717
1718 virtual void invalidate()
1719 {
1720 numerator->invalidate();
1721 fraction_line->invalidate();
1722 denominator->invalidate();
1724 }
1725
1726 std::shared_ptr<basic_parser<T>> numerator;
1727 std::shared_ptr<basic_parser<T>> fraction_line;
1728 std::shared_ptr<basic_parser<T>> denominator;
1729
1730 protected:
1731 virtual bool do_match(
1732 _In_reads_or_z_opt_(end) const T* text,
1733 _In_ size_t start = 0,
1734 _In_ size_t end = SIZE_MAX,
1735 _In_ int flags = match_default)
1736 {
1737 _Assume_(text || start >= end);
1738 if (numerator->match(text, start, end, flags) &&
1739 fraction_line->match(text, numerator->interval.end, end, flags) &&
1740 denominator->match(text, fraction_line->interval.end, end, flags))
1741 {
1742 this->interval.start = start;
1743 this->interval.end = denominator->interval.end;
1744 return true;
1745 }
1746 numerator->invalidate();
1747 fraction_line->invalidate();
1748 denominator->invalidate();
1749 this->interval.invalidate();
1750 return false;
1751 }
1752 };
1753
1756#ifdef _UNICODE
1757 using tfraction = wfraction;
1758#else
1759 using tfraction = fraction;
1760#endif
1762
1766 template <class T>
1767 class basic_score : public basic_parser<T>
1768 {
1769 public:
1771 _In_ const std::shared_ptr<basic_parser<T>>& _home,
1772 _In_ const std::shared_ptr<basic_parser<T>>& _separator,
1773 _In_ const std::shared_ptr<basic_parser<T>>& _guest,
1774 _In_ const std::shared_ptr<basic_parser<T>>& space,
1775 _In_ const std::locale& locale = std::locale()) :
1777 home(_home),
1778 separator(_separator),
1779 guest(_guest),
1780 m_space(space)
1781 {}
1782
1783 virtual void invalidate()
1784 {
1785 home->invalidate();
1786 separator->invalidate();
1787 guest->invalidate();
1789 }
1790
1791 std::shared_ptr<basic_parser<T>> home;
1792 std::shared_ptr<basic_parser<T>> separator;
1793 std::shared_ptr<basic_parser<T>> guest;
1794
1795 protected:
1796 virtual bool do_match(
1797 _In_reads_or_z_opt_(end) const T* text,
1798 _In_ size_t start = 0,
1799 _In_ size_t end = SIZE_MAX,
1800 _In_ int flags = match_default)
1801 {
1802 _Assume_(text || start >= end);
1803 this->interval.end = start;
1804
1805 const int space_match_flags = flags & ~match_multiline; // Spaces in score must never be broken in new line.
1806
1807 if (home->match(text, this->interval.end, end, flags))
1808 this->interval.end = home->interval.end;
1809 else
1810 goto end;
1811
1812 for (; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
1813
1814 if (separator->match(text, this->interval.end, end, flags))
1815 this->interval.end = separator->interval.end;
1816 else
1817 goto end;
1818
1819 for (; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
1820
1821 if (guest->match(text, this->interval.end, end, flags))
1822 this->interval.end = guest->interval.end;
1823 else
1824 goto end;
1825
1826 this->interval.start = start;
1827 return true;
1828
1829 end:
1830 home->invalidate();
1831 separator->invalidate();
1832 guest->invalidate();
1833 this->interval.invalidate();
1834 return false;
1835 }
1836
1837 std::shared_ptr<basic_parser<T>> m_space;
1838 };
1839
1840 using score = basic_score<char>;
1842#ifdef _UNICODE
1843 using tscore = wscore;
1844#else
1845 using tscore = score;
1846#endif
1848
1852 template <class T>
1854 {
1855 public:
1857 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
1858 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
1859 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
1860 _In_ const std::shared_ptr<basic_parser<T>>& _number,
1861 _In_ const std::locale& locale = std::locale()) :
1867 {}
1868
1869 virtual void invalidate()
1870 {
1871 if (positive_sign) positive_sign->invalidate();
1872 if (negative_sign) negative_sign->invalidate();
1873 if (special_sign) special_sign->invalidate();
1874 number->invalidate();
1876 }
1877
1878 std::shared_ptr<basic_parser<T>> positive_sign;
1879 std::shared_ptr<basic_parser<T>> negative_sign;
1880 std::shared_ptr<basic_parser<T>> special_sign;
1881 std::shared_ptr<basic_parser<T>> number;
1882
1883 protected:
1884 virtual bool do_match(
1885 _In_reads_or_z_opt_(end) const T* text,
1886 _In_ size_t start = 0,
1887 _In_ size_t end = SIZE_MAX,
1888 _In_ int flags = match_default)
1889 {
1890 _Assume_(text || start >= end);
1891 this->interval.end = start;
1892 if (positive_sign && positive_sign->match(text, this->interval.end, end, flags)) {
1893 this->interval.end = positive_sign->interval.end;
1894 if (negative_sign) negative_sign->invalidate();
1895 if (special_sign) special_sign->invalidate();
1896 }
1897 else if (negative_sign && negative_sign->match(text, this->interval.end, end, flags)) {
1898 this->interval.end = negative_sign->interval.end;
1899 if (positive_sign) positive_sign->invalidate();
1900 if (special_sign) special_sign->invalidate();
1901 }
1902 else if (special_sign && special_sign->match(text, this->interval.end, end, flags)) {
1903 this->interval.end = special_sign->interval.end;
1904 if (positive_sign) positive_sign->invalidate();
1905 if (negative_sign) negative_sign->invalidate();
1906 }
1907 else {
1908 if (positive_sign) positive_sign->invalidate();
1909 if (negative_sign) negative_sign->invalidate();
1910 if (special_sign) special_sign->invalidate();
1911 }
1912 if (number->match(text, this->interval.end, end, flags)) {
1913 this->interval.start = start;
1914 this->interval.end = number->interval.end;
1915 return true;
1916 }
1917 if (positive_sign) positive_sign->invalidate();
1918 if (negative_sign) negative_sign->invalidate();
1919 if (special_sign) special_sign->invalidate();
1920 number->invalidate();
1921 this->interval.invalidate();
1922 return false;
1923 }
1924 };
1925
1926 using signed_numeral = basic_signed_numeral<char>;
1927 using wsigned_numeral = basic_signed_numeral<wchar_t>;
1928#ifdef _UNICODE
1929 using tsigned_numeral = wsigned_numeral;
1930#else
1931 using tsigned_numeral = signed_numeral;
1932#endif
1933 using sgml_signed_numeral = basic_signed_numeral<char>;
1934
1938 template <class T>
1940 {
1941 public:
1943 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
1944 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
1945 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
1946 _In_ const std::shared_ptr<basic_parser<T>>& _integer,
1947 _In_ const std::shared_ptr<basic_parser<T>>& space,
1948 _In_ const std::shared_ptr<basic_parser<T>>& _fraction,
1949 _In_ const std::locale& locale = std::locale()) :
1956 m_space(space)
1957 {}
1958
1959 virtual void invalidate()
1960 {
1961 if (positive_sign) positive_sign->invalidate();
1962 if (negative_sign) negative_sign->invalidate();
1963 if (special_sign) special_sign->invalidate();
1964 integer->invalidate();
1965 fraction->invalidate();
1967 }
1968
1969 std::shared_ptr<basic_parser<T>> positive_sign;
1970 std::shared_ptr<basic_parser<T>> negative_sign;
1971 std::shared_ptr<basic_parser<T>> special_sign;
1972 std::shared_ptr<basic_parser<T>> integer;
1973 std::shared_ptr<basic_parser<T>> fraction;
1974
1975 protected:
1976 virtual bool do_match(
1977 _In_reads_or_z_opt_(end) const T* text,
1978 _In_ size_t start = 0,
1979 _In_ size_t end = SIZE_MAX,
1980 _In_ int flags = match_default)
1981 {
1982 _Assume_(text || start >= end);
1983 this->interval.end = start;
1984
1985 if (positive_sign && positive_sign->match(text, this->interval.end, end, flags)) {
1986 this->interval.end = positive_sign->interval.end;
1987 if (negative_sign) negative_sign->invalidate();
1988 if (special_sign) special_sign->invalidate();
1989 }
1990 else if (negative_sign && negative_sign->match(text, this->interval.end, end, flags)) {
1991 this->interval.end = negative_sign->interval.end;
1992 if (positive_sign) positive_sign->invalidate();
1993 if (special_sign) special_sign->invalidate();
1994 }
1995 else if (special_sign && special_sign->match(text, this->interval.end, end, flags)) {
1996 this->interval.end = special_sign->interval.end;
1997 if (positive_sign) positive_sign->invalidate();
1998 if (negative_sign) negative_sign->invalidate();
1999 }
2000 else {
2001 if (positive_sign) positive_sign->invalidate();
2002 if (negative_sign) negative_sign->invalidate();
2003 if (special_sign) special_sign->invalidate();
2004 }
2005
2006 // Check for <integer> <fraction>
2007 const int space_match_flags = flags & ~match_multiline; // Spaces in fractions must never be broken in new line.
2008 if (integer->match(text, this->interval.end, end, flags) &&
2009 m_space->match(text, integer->interval.end, end, space_match_flags))
2010 {
2011 for (this->interval.end = m_space->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
2012 if (fraction->match(text, this->interval.end, end, flags)) {
2013 this->interval.start = start;
2014 this->interval.end = fraction->interval.end;
2015 return true;
2016 }
2017 fraction->invalidate();
2018 this->interval.start = start;
2019 this->interval.end = integer->interval.end;
2020 return true;
2021 }
2022
2023 // Check for <fraction>
2024 if (fraction->match(text, this->interval.end, end, flags)) {
2025 integer->invalidate();
2026 this->interval.start = start;
2027 this->interval.end = fraction->interval.end;
2028 return true;
2029 }
2030
2031 // Check for <integer>
2032 if (integer->match(text, this->interval.end, end, flags)) {
2033 fraction->invalidate();
2034 this->interval.start = start;
2035 this->interval.end = integer->interval.end;
2036 return true;
2037 }
2038
2039 if (positive_sign) positive_sign->invalidate();
2040 if (negative_sign) negative_sign->invalidate();
2041 if (special_sign) special_sign->invalidate();
2042 integer->invalidate();
2043 fraction->invalidate();
2044 this->interval.invalidate();
2045 return false;
2046 }
2047
2048 std::shared_ptr<basic_parser<T>> m_space;
2049 };
2050
2051 using mixed_numeral = basic_mixed_numeral<char>;
2052 using wmixed_numeral = basic_mixed_numeral<wchar_t>;
2053#ifdef _UNICODE
2054 using tmixed_numeral = wmixed_numeral;
2055#else
2056 using tmixed_numeral = mixed_numeral;
2057#endif
2058 using sgml_mixed_numeral = basic_mixed_numeral<char>;
2059
2063 template <class T>
2065 {
2066 public:
2068 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
2069 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
2070 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
2071 _In_ const std::shared_ptr<basic_integer<T>>& _integer,
2072 _In_ const std::shared_ptr<basic_parser<T>>& _decimal_separator,
2073 _In_ const std::shared_ptr<basic_integer<T>>& _decimal,
2074 _In_ const std::shared_ptr<basic_parser<T>>& _exponent_symbol,
2075 _In_ const std::shared_ptr<basic_parser<T>>& _positive_exp_sign,
2076 _In_ const std::shared_ptr<basic_parser<T>>& _negative_exp_sign,
2077 _In_ const std::shared_ptr<basic_integer<T>>& _exponent,
2078 _In_ const std::locale& locale = std::locale()) :
2090 value(std::numeric_limits<double>::quiet_NaN())
2091 {}
2092
2093 virtual void invalidate()
2094 {
2095 if (positive_sign) positive_sign->invalidate();
2096 if (negative_sign) negative_sign->invalidate();
2097 if (special_sign) special_sign->invalidate();
2098 integer->invalidate();
2099 decimal_separator->invalidate();
2100 decimal->invalidate();
2101 if (exponent_symbol) exponent_symbol->invalidate();
2102 if (positive_exp_sign) positive_exp_sign->invalidate();
2103 if (negative_exp_sign) negative_exp_sign->invalidate();
2104 if (exponent) exponent->invalidate();
2105 value = std::numeric_limits<double>::quiet_NaN();
2107 }
2108
2109 std::shared_ptr<basic_parser<T>> positive_sign;
2110 std::shared_ptr<basic_parser<T>> negative_sign;
2111 std::shared_ptr<basic_parser<T>> special_sign;
2112 std::shared_ptr<basic_integer<T>> integer;
2113 std::shared_ptr<basic_parser<T>> decimal_separator;
2114 std::shared_ptr<basic_integer<T>> decimal;
2115 std::shared_ptr<basic_parser<T>> exponent_symbol;
2116 std::shared_ptr<basic_parser<T>> positive_exp_sign;
2117 std::shared_ptr<basic_parser<T>> negative_exp_sign;
2118 std::shared_ptr<basic_integer<T>> exponent;
2119 double value;
2120
2121 protected:
2122 virtual bool do_match(
2123 _In_reads_or_z_opt_(end) const T* text,
2124 _In_ size_t start = 0,
2125 _In_ size_t end = SIZE_MAX,
2126 _In_ int flags = match_default)
2127 {
2128 _Assume_(text || start >= end);
2129 this->interval.end = start;
2130
2131 if (positive_sign && positive_sign->match(text, this->interval.end, end, flags)) {
2132 this->interval.end = positive_sign->interval.end;
2133 if (negative_sign) negative_sign->invalidate();
2134 if (special_sign) special_sign->invalidate();
2135 }
2136 else if (negative_sign && negative_sign->match(text, this->interval.end, end, flags)) {
2137 this->interval.end = negative_sign->interval.end;
2138 if (positive_sign) positive_sign->invalidate();
2139 if (special_sign) special_sign->invalidate();
2140 }
2141 else if (special_sign && special_sign->match(text, this->interval.end, end, flags)) {
2142 this->interval.end = special_sign->interval.end;
2143 if (positive_sign) positive_sign->invalidate();
2144 if (negative_sign) negative_sign->invalidate();
2145 }
2146 else {
2147 if (positive_sign) positive_sign->invalidate();
2148 if (negative_sign) negative_sign->invalidate();
2149 if (special_sign) special_sign->invalidate();
2150 }
2151
2152 if (integer->match(text, this->interval.end, end, flags))
2153 this->interval.end = integer->interval.end;
2154
2155 if (decimal_separator->match(text, this->interval.end, end, flags) &&
2156 decimal->match(text, decimal_separator->interval.end, end, flags))
2157 this->interval.end = decimal->interval.end;
2158 else {
2159 decimal_separator->invalidate();
2160 decimal->invalidate();
2161 }
2162
2163 if (integer->interval.empty() &&
2164 decimal->interval.empty())
2165 {
2166 // No integer part, no decimal part.
2167 if (positive_sign) positive_sign->invalidate();
2168 if (negative_sign) negative_sign->invalidate();
2169 if (special_sign) special_sign->invalidate();
2170 integer->invalidate();
2171 decimal_separator->invalidate();
2172 decimal->invalidate();
2173 if (exponent_symbol) exponent_symbol->invalidate();
2174 if (positive_exp_sign) positive_exp_sign->invalidate();
2175 if (negative_exp_sign) negative_exp_sign->invalidate();
2176 if (exponent) exponent->invalidate();
2177 this->interval.invalidate();
2178 return false;
2179 }
2180
2181 if (exponent_symbol && exponent_symbol->match(text, this->interval.end, end, flags) &&
2182 ((positive_exp_sign && positive_exp_sign->match(text, exponent_symbol->interval.end, end, flags) &&
2183 exponent && exponent->match(text, positive_exp_sign->interval.end, end, flags)) ||
2184 (exponent && exponent->match(text, exponent_symbol->interval.end, end, flags))))
2185 {
2186 this->interval.end = exponent->interval.end;
2187 if (negative_exp_sign) negative_exp_sign->invalidate();
2188 }
2189 else if (exponent_symbol && exponent_symbol->match(text, this->interval.end, end, flags) &&
2190 negative_exp_sign && negative_exp_sign->match(text, exponent_symbol->interval.end, end, flags) &&
2191 exponent && exponent->match(text, negative_exp_sign->interval.end, end, flags))
2192 {
2193 this->interval.end = exponent->interval.end;
2194 if (positive_exp_sign) positive_exp_sign->invalidate();
2195 }
2196 else {
2197 if (exponent_symbol) exponent_symbol->invalidate();
2198 if (positive_exp_sign) positive_exp_sign->invalidate();
2199 if (negative_exp_sign) negative_exp_sign->invalidate();
2200 if (exponent) exponent->invalidate();
2201 }
2202
2203 value = (double)integer->value;
2204 if (decimal->interval)
2205 value += (double)decimal->value * pow(10.0, -(double)decimal->interval.size());
2206 if (negative_sign && negative_sign->interval)
2207 value = -value;
2208 if (exponent && exponent->interval) {
2209 double e = (double)exponent->value;
2210 if (negative_exp_sign && negative_exp_sign->interval)
2211 e = -e;
2212 value *= pow(10.0, e);
2213 }
2214
2215 this->interval.start = start;
2216 return true;
2217 }
2218 };
2219
2220 using scientific_numeral = basic_scientific_numeral<char>;
2221 using wscientific_numeral = basic_scientific_numeral<wchar_t>;
2222#ifdef _UNICODE
2223 using tscientific_numeral = wscientific_numeral;
2224#else
2225 using tscientific_numeral = scientific_numeral;
2226#endif
2227 using sgml_scientific_numeral = basic_scientific_numeral<char>;
2228
2232 template <class T>
2234 {
2235 public:
2237 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
2238 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
2239 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
2240 _In_ const std::shared_ptr<basic_parser<T>>& _currency,
2241 _In_ const std::shared_ptr<basic_parser<T>>& _integer,
2242 _In_ const std::shared_ptr<basic_parser<T>>& _decimal_separator,
2243 _In_ const std::shared_ptr<basic_parser<T>>& _decimal,
2244 _In_ const std::locale& locale = std::locale()) :
2253 {}
2254
2255 virtual void invalidate()
2256 {
2257 if (positive_sign) positive_sign->invalidate();
2258 if (negative_sign) negative_sign->invalidate();
2259 if (special_sign) special_sign->invalidate();
2260 currency->invalidate();
2261 integer->invalidate();
2262 decimal_separator->invalidate();
2263 decimal->invalidate();
2265 }
2266
2267 std::shared_ptr<basic_parser<T>> positive_sign;
2268 std::shared_ptr<basic_parser<T>> negative_sign;
2269 std::shared_ptr<basic_parser<T>> special_sign;
2270 std::shared_ptr<basic_parser<T>> currency;
2271 std::shared_ptr<basic_parser<T>> integer;
2272 std::shared_ptr<basic_parser<T>> decimal_separator;
2273 std::shared_ptr<basic_parser<T>> decimal;
2274
2275 protected:
2276 virtual bool do_match(
2277 _In_reads_or_z_opt_(end) const T* text,
2278 _In_ size_t start = 0,
2279 _In_ size_t end = SIZE_MAX,
2280 _In_ int flags = match_default)
2281 {
2282 _Assume_(text || start >= end);
2283 this->interval.end = start;
2284
2285 if (positive_sign->match(text, this->interval.end, end, flags)) {
2286 this->interval.end = positive_sign->interval.end;
2287 if (negative_sign) negative_sign->invalidate();
2288 if (special_sign) special_sign->invalidate();
2289 }
2290 else if (negative_sign->match(text, this->interval.end, end, flags)) {
2291 this->interval.end = negative_sign->interval.end;
2292 if (positive_sign) positive_sign->invalidate();
2293 if (special_sign) special_sign->invalidate();
2294 }
2295 else if (special_sign->match(text, this->interval.end, end, flags)) {
2296 this->interval.end = special_sign->interval.end;
2297 if (positive_sign) positive_sign->invalidate();
2298 if (negative_sign) negative_sign->invalidate();
2299 }
2300 else {
2301 if (positive_sign) positive_sign->invalidate();
2302 if (negative_sign) negative_sign->invalidate();
2303 if (special_sign) special_sign->invalidate();
2304 }
2305
2306 if (currency->match(text, this->interval.end, end, flags))
2307 this->interval.end = currency->interval.end;
2308 else {
2309 if (positive_sign) positive_sign->invalidate();
2310 if (negative_sign) negative_sign->invalidate();
2311 if (special_sign) special_sign->invalidate();
2312 integer->invalidate();
2313 decimal_separator->invalidate();
2314 decimal->invalidate();
2315 this->interval.invalidate();
2316 return false;
2317 }
2318
2319 if (integer->match(text, this->interval.end, end, flags))
2320 this->interval.end = integer->interval.end;
2321 if (decimal_separator->match(text, this->interval.end, end, flags) &&
2322 decimal->match(text, decimal_separator->interval.end, end, flags))
2323 this->interval.end = decimal->interval.end;
2324 else {
2325 decimal_separator->invalidate();
2326 decimal->invalidate();
2327 }
2328
2329 if (integer->interval.empty() &&
2330 decimal->interval.empty())
2331 {
2332 // No integer part, no decimal part.
2333 if (positive_sign) positive_sign->invalidate();
2334 if (negative_sign) negative_sign->invalidate();
2335 if (special_sign) special_sign->invalidate();
2336 currency->invalidate();
2337 integer->invalidate();
2338 decimal_separator->invalidate();
2339 decimal->invalidate();
2340 this->interval.invalidate();
2341 return false;
2342 }
2343
2344 this->interval.start = start;
2345 return true;
2346 }
2347 };
2348
2349 using monetary_numeral = basic_monetary_numeral<char>;
2350 using wmonetary_numeral = basic_monetary_numeral<wchar_t>;
2351#ifdef _UNICODE
2352 using tmonetary_numeral = wmonetary_numeral;
2353#else
2354 using tmonetary_numeral = monetary_numeral;
2355#endif
2356 using sgml_monetary_numeral = basic_monetary_numeral<char>;
2357
2361 template <class T>
2363 {
2364 public:
2366 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
2367 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
2368 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
2369 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
2370 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
2371 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
2372 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
2373 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
2374 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
2375 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
2376 _In_ const std::shared_ptr<basic_parser<T>>& separator,
2377 _In_ const std::locale& locale = std::locale()) :
2379 m_digit_0(digit_0),
2380 m_digit_1(digit_1),
2381 m_digit_2(digit_2),
2382 m_digit_3(digit_3),
2383 m_digit_4(digit_4),
2384 m_digit_5(digit_5),
2385 m_digit_6(digit_6),
2386 m_digit_7(digit_7),
2387 m_digit_8(digit_8),
2388 m_digit_9(digit_9),
2389 m_separator(separator)
2390 {
2391 value.s_addr = 0;
2392 }
2393
2394 virtual void invalidate()
2395 {
2396 components[0].start = 1;
2397 components[0].end = 0;
2398 components[1].start = 1;
2399 components[1].end = 0;
2400 components[2].start = 1;
2401 components[2].end = 0;
2402 components[3].start = 1;
2403 components[3].end = 0;
2404 value.s_addr = 0;
2406 }
2407
2410
2411 protected:
2412 virtual bool do_match(
2413 _In_reads_or_z_opt_(end) const T* text,
2414 _In_ size_t start = 0,
2415 _In_ size_t end = SIZE_MAX,
2416 _In_ int flags = match_default)
2417 {
2418 _Assume_(text || start >= end);
2419 this->interval.end = start;
2420 value.s_addr = 0;
2421
2422 size_t i;
2423 for (i = 0; i < 4; i++) {
2424 if (i) {
2425 if (m_separator->match(text, this->interval.end, end, flags))
2426 this->interval.end = m_separator->interval.end;
2427 else
2428 goto error;
2429 }
2430
2431 components[i].start = this->interval.end;
2432 bool is_empty = true;
2433 size_t x;
2434 for (x = 0; this->interval.end < end && text[this->interval.end];) {
2435 size_t dig, digit_end;
2436 if (m_digit_0->match(text, this->interval.end, end, flags)) { dig = 0; digit_end = m_digit_0->interval.end; }
2437 else if (m_digit_1->match(text, this->interval.end, end, flags)) { dig = 1; digit_end = m_digit_1->interval.end; }
2438 else if (m_digit_2->match(text, this->interval.end, end, flags)) { dig = 2; digit_end = m_digit_2->interval.end; }
2439 else if (m_digit_3->match(text, this->interval.end, end, flags)) { dig = 3; digit_end = m_digit_3->interval.end; }
2440 else if (m_digit_4->match(text, this->interval.end, end, flags)) { dig = 4; digit_end = m_digit_4->interval.end; }
2441 else if (m_digit_5->match(text, this->interval.end, end, flags)) { dig = 5; digit_end = m_digit_5->interval.end; }
2442 else if (m_digit_6->match(text, this->interval.end, end, flags)) { dig = 6; digit_end = m_digit_6->interval.end; }
2443 else if (m_digit_7->match(text, this->interval.end, end, flags)) { dig = 7; digit_end = m_digit_7->interval.end; }
2444 else if (m_digit_8->match(text, this->interval.end, end, flags)) { dig = 8; digit_end = m_digit_8->interval.end; }
2445 else if (m_digit_9->match(text, this->interval.end, end, flags)) { dig = 9; digit_end = m_digit_9->interval.end; }
2446 else break;
2447 size_t x_n = x * 10 + dig;
2448 if (x_n <= 255) {
2449 x = x_n;
2450 this->interval.end = digit_end;
2451 is_empty = false;
2452 }
2453 else
2454 break;
2455 }
2456 if (is_empty)
2457 goto error;
2458 components[i].end = this->interval.end;
2459 value.s_addr = (value.s_addr << 8) | (uint8_t)x;
2460 }
2461 if (i < 4)
2462 goto error;
2463
2464 HE2BE(reinterpret_cast<uint32_t&>(value.s_addr));
2465 this->interval.start = start;
2466 return true;
2467
2468 error:
2469 invalidate();
2470 return false;
2471 }
2472
2473 std::shared_ptr<basic_parser<T>>
2474 m_digit_0,
2475 m_digit_1,
2476 m_digit_2,
2477 m_digit_3,
2478 m_digit_4,
2479 m_digit_5,
2480 m_digit_6,
2481 m_digit_7,
2482 m_digit_8,
2483 m_digit_9;
2484 std::shared_ptr<basic_parser<T>> m_separator;
2485 };
2486
2487 using ipv4_address = basic_ipv4_address<char>;
2488 using wipv4_address = basic_ipv4_address<wchar_t>;
2489#ifdef _UNICODE
2490 using tipv4_address = wipv4_address;
2491#else
2492 using tipv4_address = ipv4_address;
2493#endif
2494 using sgml_ipv4_address = basic_ipv4_address<char>;
2495
2499 template <class T>
2501 {
2502 public:
2503 basic_ipv6_scope_id_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
2504
2505 protected:
2506 virtual bool do_match(
2507 _In_reads_or_z_opt_(end) const T* text,
2508 _In_ size_t start = 0,
2509 _In_ size_t end = SIZE_MAX,
2510 _In_ int flags = match_default)
2511 {
2512 _Assume_(text || start >= end);
2513 if (start < end && text[start]) {
2514 if (text[start] == '-' ||
2515 text[start] == '_' ||
2516 text[start] == ':' ||
2517 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
2518 {
2519 this->interval.end = (this->interval.start = start) + 1;
2520 return true;
2521 }
2522 }
2523 this->interval.invalidate();
2524 return false;
2525 }
2526 };
2527
2530#ifdef _UNICODE
2532#else
2534#endif
2535
2540 {
2541 public:
2542 sgml_ipv6_scope_id_char(_In_ const std::locale& locale = std::locale()) : sgml_parser(locale) {}
2543
2544 protected:
2545 virtual bool do_match(
2546 _In_reads_or_z_(end) const char* text,
2547 _In_ size_t start = 0,
2548 _In_ size_t end = SIZE_MAX,
2549 _In_ int flags = match_default)
2550 {
2551 _Assume_(text || start >= end);
2552 if (start < end && text[start]) {
2553 wchar_t buf[3];
2554 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
2555 const wchar_t* chr_end = chr + stdex::strlen(chr);
2556 if (((chr[0] == L'-' ||
2557 chr[0] == L'_' ||
2558 chr[0] == L':') && chr[1] == 0) ||
2559 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
2560 {
2561 this->interval.start = start;
2562 return true;
2563 }
2564 }
2565 this->interval.invalidate();
2566 return false;
2567 }
2568 };
2569
2573 template <class T>
2575 {
2576 public:
2578 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
2579 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
2580 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
2581 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
2582 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
2583 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
2584 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
2585 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
2586 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
2587 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
2588 _In_ const std::shared_ptr<basic_parser<T>>& digit_10,
2589 _In_ const std::shared_ptr<basic_parser<T>>& digit_11,
2590 _In_ const std::shared_ptr<basic_parser<T>>& digit_12,
2591 _In_ const std::shared_ptr<basic_parser<T>>& digit_13,
2592 _In_ const std::shared_ptr<basic_parser<T>>& digit_14,
2593 _In_ const std::shared_ptr<basic_parser<T>>& digit_15,
2594 _In_ const std::shared_ptr<basic_parser<T>>& separator,
2595 _In_ const std::shared_ptr<basic_parser<T>>& scope_id_separator = nullptr,
2596 _In_ const std::shared_ptr<basic_parser<T>>& _scope_id = nullptr,
2597 _In_ const std::locale& locale = std::locale()) :
2599 m_digit_0(digit_0),
2600 m_digit_1(digit_1),
2601 m_digit_2(digit_2),
2602 m_digit_3(digit_3),
2603 m_digit_4(digit_4),
2604 m_digit_5(digit_5),
2605 m_digit_6(digit_6),
2606 m_digit_7(digit_7),
2607 m_digit_8(digit_8),
2608 m_digit_9(digit_9),
2609 m_digit_10(digit_10),
2610 m_digit_11(digit_11),
2611 m_digit_12(digit_12),
2612 m_digit_13(digit_13),
2613 m_digit_14(digit_14),
2614 m_digit_15(digit_15),
2615 m_separator(separator),
2616 m_scope_id_separator(scope_id_separator),
2618 {
2619 memset(&value, 0, sizeof(value));
2620 }
2621
2622 virtual void invalidate()
2623 {
2624 components[0].start = 1;
2625 components[0].end = 0;
2626 components[1].start = 1;
2627 components[1].end = 0;
2628 components[2].start = 1;
2629 components[2].end = 0;
2630 components[3].start = 1;
2631 components[3].end = 0;
2632 components[4].start = 1;
2633 components[4].end = 0;
2634 components[5].start = 1;
2635 components[5].end = 0;
2636 components[6].start = 1;
2637 components[6].end = 0;
2638 components[7].start = 1;
2639 components[7].end = 0;
2640 memset(&value, 0, sizeof(value));
2641 if (scope_id) scope_id->invalidate();
2643 }
2644
2647 std::shared_ptr<basic_parser<T>> scope_id;
2648
2649 protected:
2650 virtual bool do_match(
2651 _In_reads_or_z_opt_(end) const T* text,
2652 _In_ size_t start = 0,
2653 _In_ size_t end = SIZE_MAX,
2654 _In_ int flags = match_default)
2655 {
2656 _Assume_(text || start >= end);
2657 this->interval.end = start;
2658 memset(&value, 0, sizeof(value));
2659
2660 size_t i, compaction_i = SIZE_MAX, compaction_start = start;
2661 for (i = 0; i < 8; i++) {
2662 bool is_empty = true;
2663
2664 if (m_separator->match(text, this->interval.end, end, flags)) {
2665 // : found
2666 this->interval.end = m_separator->interval.end;
2667 if (m_separator->match(text, this->interval.end, end, flags)) {
2668 // :: found
2669 if (compaction_i == SIZE_MAX) {
2670 // Zero compaction start
2671 compaction_i = i;
2672 compaction_start = m_separator->interval.start;
2673 this->interval.end = m_separator->interval.end;
2674 }
2675 else {
2676 // More than one zero compaction
2677 break;
2678 }
2679 }
2680 else if (!i) {
2681 // Leading : found
2682 goto error;
2683 }
2684 }
2685 else if (i) {
2686 // : missing
2687 break;
2688 }
2689
2690 components[i].start = this->interval.end;
2691 size_t x;
2692 for (x = 0; this->interval.end < end && text[this->interval.end];) {
2693 size_t dig, digit_end;
2694 if (m_digit_0->match(text, this->interval.end, end, flags)) { dig = 0; digit_end = m_digit_0->interval.end; }
2695 else if (m_digit_1->match(text, this->interval.end, end, flags)) { dig = 1; digit_end = m_digit_1->interval.end; }
2696 else if (m_digit_2->match(text, this->interval.end, end, flags)) { dig = 2; digit_end = m_digit_2->interval.end; }
2697 else if (m_digit_3->match(text, this->interval.end, end, flags)) { dig = 3; digit_end = m_digit_3->interval.end; }
2698 else if (m_digit_4->match(text, this->interval.end, end, flags)) { dig = 4; digit_end = m_digit_4->interval.end; }
2699 else if (m_digit_5->match(text, this->interval.end, end, flags)) { dig = 5; digit_end = m_digit_5->interval.end; }
2700 else if (m_digit_6->match(text, this->interval.end, end, flags)) { dig = 6; digit_end = m_digit_6->interval.end; }
2701 else if (m_digit_7->match(text, this->interval.end, end, flags)) { dig = 7; digit_end = m_digit_7->interval.end; }
2702 else if (m_digit_8->match(text, this->interval.end, end, flags)) { dig = 8; digit_end = m_digit_8->interval.end; }
2703 else if (m_digit_9->match(text, this->interval.end, end, flags)) { dig = 9; digit_end = m_digit_9->interval.end; }
2704 else if (m_digit_10->match(text, this->interval.end, end, flags)) { dig = 10; digit_end = m_digit_10->interval.end; }
2705 else if (m_digit_11->match(text, this->interval.end, end, flags)) { dig = 11; digit_end = m_digit_11->interval.end; }
2706 else if (m_digit_12->match(text, this->interval.end, end, flags)) { dig = 12; digit_end = m_digit_12->interval.end; }
2707 else if (m_digit_13->match(text, this->interval.end, end, flags)) { dig = 13; digit_end = m_digit_13->interval.end; }
2708 else if (m_digit_14->match(text, this->interval.end, end, flags)) { dig = 14; digit_end = m_digit_14->interval.end; }
2709 else if (m_digit_15->match(text, this->interval.end, end, flags)) { dig = 15; digit_end = m_digit_15->interval.end; }
2710 else break;
2711 size_t x_n = x * 16 + dig;
2712 if (x_n <= 0xffff) {
2713 x = x_n;
2714 this->interval.end = digit_end;
2715 is_empty = false;
2716 }
2717 else
2718 break;
2719 }
2720 if (is_empty) {
2721 if (compaction_i != SIZE_MAX) {
2722 // Zero compaction active: no sweat.
2723 break;
2724 }
2725 goto error;
2726 }
2727 components[i].end = this->interval.end;
2728 HE2BE(reinterpret_cast<uint16_t&>(this->value.s6_words[i]));
2729 }
2730
2731 if (compaction_i != SIZE_MAX) {
2732 // Align components right due to zero compaction.
2733 size_t j, k;
2734 for (j = 8, k = i; k > compaction_i;) {
2735 this->value.s6_words[--j] = this->value.s6_words[--k];
2737 }
2738 for (; j > compaction_i;) {
2739 this->value.s6_words[--j] = 0;
2740 components[j].start =
2742 }
2743 }
2744 else if (i < 8)
2745 goto error;
2746
2747 if (m_scope_id_separator && m_scope_id_separator->match(text, this->interval.end, end, flags) &&
2748 scope_id && scope_id->match(text, m_scope_id_separator->interval.end, end, flags))
2749 this->interval.end = scope_id->interval.end;
2750 else if (scope_id)
2751 scope_id->invalidate();
2752
2753 this->interval.start = start;
2754 return true;
2755
2756 error:
2757 invalidate();
2758 return false;
2759 }
2760
2761 std::shared_ptr<basic_parser<T>>
2762 m_digit_0,
2763 m_digit_1,
2764 m_digit_2,
2765 m_digit_3,
2766 m_digit_4,
2767 m_digit_5,
2768 m_digit_6,
2769 m_digit_7,
2770 m_digit_8,
2771 m_digit_9,
2772 m_digit_10,
2773 m_digit_11,
2774 m_digit_12,
2775 m_digit_13,
2776 m_digit_14,
2777 m_digit_15;
2778 std::shared_ptr<basic_parser<T>> m_separator, m_scope_id_separator;
2779 };
2780
2781 using ipv6_address = basic_ipv6_address<char>;
2782 using wipv6_address = basic_ipv6_address<wchar_t>;
2783#ifdef _UNICODE
2784 using tipv6_address = wipv6_address;
2785#else
2786 using tipv6_address = ipv6_address;
2787#endif
2788 using sgml_ipv6_address = basic_ipv6_address<char>;
2789
2793 template <class T>
2795 {
2796 public:
2798 _In_ bool allow_idn,
2799 _In_ const std::locale& locale = std::locale()) :
2801 m_allow_idn(allow_idn),
2802 allow_on_edge(true)
2803 {}
2804
2806
2807 protected:
2808 virtual bool do_match(
2809 _In_reads_or_z_opt_(end) const T* text,
2810 _In_ size_t start = 0,
2811 _In_ size_t end = SIZE_MAX,
2812 _In_ int flags = match_default)
2813 {
2814 _Assume_(text || start >= end);
2815 if (start < end && text[start]) {
2816 if (('A' <= text[start] && text[start] <= 'Z') ||
2817 ('a' <= text[start] && text[start] <= 'z') ||
2818 ('0' <= text[start] && text[start] <= '9'))
2819 allow_on_edge = true;
2820 else if (text[start] == '-')
2821 allow_on_edge = false;
2822 else if (m_allow_idn && std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
2823 allow_on_edge = true;
2824 else {
2825 this->interval.invalidate();
2826 return false;
2827 }
2828 this->interval.end = (this->interval.start = start) + 1;
2829 return true;
2830 }
2831 this->interval.invalidate();
2832 return false;
2833 }
2834
2835 bool m_allow_idn;
2836 };
2837
2838 using dns_domain_char = basic_dns_domain_char<char>;
2839 using wdns_domain_char = basic_dns_domain_char<wchar_t>;
2840#ifdef _UNICODE
2841 using tdns_domain_char = wdns_domain_char;
2842#else
2843 using tdns_domain_char = dns_domain_char;
2844#endif
2845
2850 {
2851 public:
2853 _In_ bool allow_idn,
2854 _In_ const std::locale& locale = std::locale()) :
2856 {}
2857
2858 protected:
2859 virtual bool do_match(
2860 _In_reads_or_z_(end) const char* text,
2861 _In_ size_t start = 0,
2862 _In_ size_t end = SIZE_MAX,
2863 _In_ int flags = match_default)
2864 {
2865 _Assume_(text || start >= end);
2866 if (start < end && text[start]) {
2867 wchar_t buf[3];
2868 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
2869 const wchar_t* chr_end = chr + stdex::strlen(chr);
2870 if ((('A' <= chr[0] && chr[0] <= 'Z') ||
2871 ('a' <= chr[0] && chr[0] <= 'z') ||
2872 ('0' <= chr[0] && chr[0] <= '9')) && chr[1] == 0)
2873 allow_on_edge = true;
2874 else if (chr[0] == '-' && chr[1] == 0)
2875 allow_on_edge = false;
2876 else if (m_allow_idn && std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
2877 allow_on_edge = true;
2878 else {
2879 this->interval.invalidate();
2880 return false;
2881 }
2882 this->interval.start = start;
2883 return true;
2884 }
2885 this->interval.invalidate();
2886 return false;
2887 }
2888 };
2889
2893 template <class T>
2895 {
2896 public:
2898 _In_ bool allow_absolute,
2899 _In_ const std::shared_ptr<basic_dns_domain_char<T>>& domain_char,
2900 _In_ const std::shared_ptr<basic_parser<T>>& separator,
2901 _In_ const std::locale& locale = std::locale()) :
2904 m_domain_char(domain_char),
2905 m_separator(separator)
2906 {}
2907
2908 protected:
2909 virtual bool do_match(
2910 _In_reads_or_z_opt_(end) const T* text,
2911 _In_ size_t start = 0,
2912 _In_ size_t end = SIZE_MAX,
2913 _In_ int flags = match_default)
2914 {
2915 _Assume_(text || start >= end);
2916 size_t i = start, count;
2917 for (count = 0; i < end && text[i] && count < 127; count++) {
2918 if (m_domain_char->match(text, i, end, flags) &&
2919 m_domain_char->allow_on_edge)
2920 {
2921 // Domain start
2922 this->interval.end = i = m_domain_char->interval.end;
2923 while (i < end && text[i]) {
2924 if (m_domain_char->allow_on_edge &&
2925 m_separator->match(text, i, end, flags))
2926 {
2927 // Domain end
2928 if (m_allow_absolute)
2929 this->interval.end = i = m_separator->interval.end;
2930 else {
2931 this->interval.end = i;
2932 i = m_separator->interval.end;
2933 }
2934 break;
2935 }
2936 if (m_domain_char->match(text, i, end, flags)) {
2937 if (m_domain_char->allow_on_edge)
2938 this->interval.end = i = m_domain_char->interval.end;
2939 else
2940 i = m_domain_char->interval.end;
2941 }
2942 else {
2943 this->interval.start = start;
2944 return true;
2945 }
2946 }
2947 }
2948 else
2949 break;
2950 }
2951 if (count) {
2952 this->interval.start = start;
2953 return true;
2954 }
2955 this->interval.invalidate();
2956 return false;
2957 }
2958
2960 std::shared_ptr<basic_dns_domain_char<T>> m_domain_char;
2961 std::shared_ptr<basic_parser<T>> m_separator;
2962 };
2963
2966#ifdef _UNICODE
2967 using tdns_name = wdns_name;
2968#else
2969 using tdns_name = dns_name;
2970#endif
2972
2976 template <class T>
2978 {
2979 public:
2980 basic_url_username_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
2981
2982 protected:
2983 virtual bool do_match(
2984 _In_reads_or_z_opt_(end) const T* text,
2985 _In_ size_t start = 0,
2986 _In_ size_t end = SIZE_MAX,
2987 _In_ int flags = match_default)
2988 {
2989 _Assume_(text || start >= end);
2990 if (start < end && text[start]) {
2991 if (text[start] == '-' ||
2992 text[start] == '.' ||
2993 text[start] == '_' ||
2994 text[start] == '~' ||
2995 text[start] == '%' ||
2996 text[start] == '!' ||
2997 text[start] == '$' ||
2998 text[start] == '&' ||
2999 text[start] == '\'' ||
3000 //text[start] == '(' ||
3001 //text[start] == ')' ||
3002 text[start] == '*' ||
3003 text[start] == '+' ||
3004 text[start] == ',' ||
3005 text[start] == ';' ||
3006 text[start] == '=' ||
3007 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
3008 {
3009 this->interval.end = (this->interval.start = start) + 1;
3010 return true;
3011 }
3012 }
3013 this->interval.invalidate();
3014 return false;
3015 }
3016 };
3017
3020#ifdef _UNICODE
3022#else
3024#endif
3025
3030 {
3031 public:
3032 sgml_url_username_char(_In_ const std::locale& locale = std::locale()) : basic_url_username_char<char>(locale) {}
3033
3034 protected:
3035 virtual bool do_match(
3036 _In_reads_or_z_(end) const char* text,
3037 _In_ size_t start = 0,
3038 _In_ size_t end = SIZE_MAX,
3039 _In_ int flags = match_default)
3040 {
3041 _Assume_(text || start >= end);
3042 if (start < end && text[start]) {
3043 wchar_t buf[3];
3044 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
3045 const wchar_t* chr_end = chr + stdex::strlen(chr);
3046 if (((chr[0] == L'-' ||
3047 chr[0] == L'.' ||
3048 chr[0] == L'_' ||
3049 chr[0] == L'~' ||
3050 chr[0] == L'%' ||
3051 chr[0] == L'!' ||
3052 chr[0] == L'$' ||
3053 chr[0] == L'&' ||
3054 chr[0] == L'\'' ||
3055 //chr[0] == L'(' ||
3056 //chr[0] == L')' ||
3057 chr[0] == L'*' ||
3058 chr[0] == L'+' ||
3059 chr[0] == L',' ||
3060 chr[0] == L';' ||
3061 chr[0] == L'=') && chr[1] == 0) ||
3062 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
3063 {
3064 this->interval.start = start;
3065 return true;
3066 }
3067 }
3068
3069 this->interval.invalidate();
3070 return false;
3071 }
3072 };
3073
3077 template <class T>
3079 {
3080 public:
3081 basic_url_password_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
3082
3083 protected:
3084 virtual bool do_match(
3085 _In_reads_or_z_opt_(end) const T* text,
3086 _In_ size_t start = 0,
3087 _In_ size_t end = SIZE_MAX,
3088 _In_ int flags = match_default)
3089 {
3090 _Assume_(text || start >= end);
3091 if (start < end && text[start]) {
3092 if (text[start] == '-' ||
3093 text[start] == '.' ||
3094 text[start] == '_' ||
3095 text[start] == '~' ||
3096 text[start] == '%' ||
3097 text[start] == '!' ||
3098 text[start] == '$' ||
3099 text[start] == '&' ||
3100 text[start] == '\'' ||
3101 text[start] == '(' ||
3102 text[start] == ')' ||
3103 text[start] == '*' ||
3104 text[start] == '+' ||
3105 text[start] == ',' ||
3106 text[start] == ';' ||
3107 text[start] == '=' ||
3108 text[start] == ':' ||
3109 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
3110 {
3111 this->interval.end = (this->interval.start = start) + 1;
3112 return true;
3113 }
3114 }
3115 this->interval.invalidate();
3116 return false;
3117 }
3118 };
3119
3122#ifdef _UNICODE
3124#else
3126#endif
3127
3132 {
3133 public:
3134 sgml_url_password_char(_In_ const std::locale& locale = std::locale()) : basic_url_password_char<char>(locale) {}
3135
3136 protected:
3137 virtual bool do_match(
3138 _In_reads_or_z_(end) const char* text,
3139 _In_ size_t start = 0,
3140 _In_ size_t end = SIZE_MAX,
3141 _In_ int flags = match_default)
3142 {
3143 _Assume_(text || start >= end);
3144 if (start < end && text[start]) {
3145 wchar_t buf[3];
3146 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
3147 const wchar_t* chr_end = chr + stdex::strlen(chr);
3148 if (((chr[0] == L'-' ||
3149 chr[0] == L'.' ||
3150 chr[0] == L'_' ||
3151 chr[0] == L'~' ||
3152 chr[0] == L'%' ||
3153 chr[0] == L'!' ||
3154 chr[0] == L'$' ||
3155 chr[0] == L'&' ||
3156 chr[0] == L'\'' ||
3157 chr[0] == L'(' ||
3158 chr[0] == L')' ||
3159 chr[0] == L'*' ||
3160 chr[0] == L'+' ||
3161 chr[0] == L',' ||
3162 chr[0] == L';' ||
3163 chr[0] == L'=' ||
3164 chr[0] == L':') && chr[1] == 0) ||
3165 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
3166 {
3167 this->interval.start = start;
3168 return true;
3169 }
3170 }
3171 this->interval.invalidate();
3172 return false;
3173 }
3174 };
3175
3179 template <class T>
3181 {
3182 public:
3183 basic_url_path_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
3184
3185 protected:
3186 virtual bool do_match(
3187 _In_reads_or_z_opt_(end) const T* text,
3188 _In_ size_t start = 0,
3189 _In_ size_t end = SIZE_MAX,
3190 _In_ int flags = match_default)
3191 {
3192 _Assume_(text || start >= end);
3193 if (start < end && text[start]) {
3194 if (text[start] == '/' ||
3195 text[start] == '-' ||
3196 text[start] == '.' ||
3197 text[start] == '_' ||
3198 text[start] == '~' ||
3199 text[start] == '%' ||
3200 text[start] == '!' ||
3201 text[start] == '$' ||
3202 text[start] == '&' ||
3203 text[start] == '\'' ||
3204 text[start] == '(' ||
3205 text[start] == ')' ||
3206 text[start] == '*' ||
3207 text[start] == '+' ||
3208 text[start] == ',' ||
3209 text[start] == ';' ||
3210 text[start] == '=' ||
3211 text[start] == ':' ||
3212 text[start] == '@' ||
3213 text[start] == '?' ||
3214 text[start] == '#' ||
3215 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
3216 {
3217 this->interval.end = (this->interval.start = start) + 1;
3218 return true;
3219 }
3220 }
3221 this->interval.invalidate();
3222 return false;
3223 }
3224 };
3225
3228#ifdef _UNICODE
3230#else
3232#endif
3233
3238 {
3239 public:
3240 sgml_url_path_char(_In_ const std::locale& locale = std::locale()) : basic_url_path_char<char>(locale) {}
3241
3242 protected:
3243 virtual bool do_match(
3244 _In_reads_or_z_(end) const char* text,
3245 _In_ size_t start = 0,
3246 _In_ size_t end = SIZE_MAX,
3247 _In_ int flags = match_default)
3248 {
3249 _Assume_(text || start >= end);
3250 if (start < end && text[start]) {
3251 wchar_t buf[3];
3252 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
3253 const wchar_t* chr_end = chr + stdex::strlen(chr);
3254 if (((chr[0] == L'/' ||
3255 chr[0] == L'-' ||
3256 chr[0] == L'.' ||
3257 chr[0] == L'_' ||
3258 chr[0] == L'~' ||
3259 chr[0] == L'%' ||
3260 chr[0] == L'!' ||
3261 chr[0] == L'$' ||
3262 chr[0] == L'&' ||
3263 chr[0] == L'\'' ||
3264 chr[0] == L'(' ||
3265 chr[0] == L')' ||
3266 chr[0] == L'*' ||
3267 chr[0] == L'+' ||
3268 chr[0] == L',' ||
3269 chr[0] == L';' ||
3270 chr[0] == L'=' ||
3271 chr[0] == L':' ||
3272 chr[0] == L'@' ||
3273 chr[0] == L'?' ||
3274 chr[0] == L'#') && chr[1] == 0) ||
3275 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
3276 {
3277 this->interval.start = start;
3278 return true;
3279 }
3280 }
3281 this->interval.invalidate();
3282 return false;
3283 }
3284 };
3285
3289 template <class T>
3291 {
3292 public:
3294 _In_ const std::shared_ptr<basic_parser<T>>& path_char,
3295 _In_ const std::shared_ptr<basic_parser<T>>& query_start,
3296 _In_ const std::shared_ptr<basic_parser<T>>& bookmark_start,
3297 _In_ const std::locale& locale = std::locale()) :
3299 m_path_char(path_char),
3300 m_query_start(query_start),
3301 m_bookmark_start(bookmark_start)
3302 {}
3303
3304 virtual void invalidate()
3305 {
3306 path.start = 1;
3307 path.end = 0;
3308 query.start = 1;
3309 query.end = 0;
3310 bookmark.start = 1;
3311 bookmark.end = 0;
3313 }
3314
3317 stdex::interval<size_t> bookmark;
3318
3319 protected:
3320 virtual bool do_match(
3321 _In_reads_or_z_opt_(end) const T* text,
3322 _In_ size_t start = 0,
3323 _In_ size_t end = SIZE_MAX,
3324 _In_ int flags = match_default)
3325 {
3326 _Assume_(text || start >= end);
3327
3328 this->interval.end = start;
3329 path.start = start;
3330 query.start = 1;
3331 query.end = 0;
3332 bookmark.start = 1;
3333 bookmark.end = 0;
3334
3335 for (;;) {
3336 if (this->interval.end >= end || !text[this->interval.end])
3337 break;
3338 if (m_query_start->match(text, this->interval.end, end, flags)) {
3339 path.end = this->interval.end;
3340 query.start = this->interval.end = m_query_start->interval.end;
3341 for (;;) {
3342 if (this->interval.end >= end || !text[this->interval.end]) {
3343 query.end = this->interval.end;
3344 break;
3345 }
3346 if (m_bookmark_start->match(text, this->interval.end, end, flags)) {
3347 query.end = this->interval.end;
3348 bookmark.start = this->interval.end = m_bookmark_start->interval.end;
3349 for (;;) {
3350 if (this->interval.end >= end || !text[this->interval.end]) {
3351 bookmark.end = this->interval.end;
3352 break;
3353 }
3354 if (m_path_char->match(text, this->interval.end, end, flags))
3355 this->interval.end = m_path_char->interval.end;
3356 else {
3357 bookmark.end = this->interval.end;
3358 break;
3359 }
3360 }
3361 this->interval.start = start;
3362 return true;
3363 }
3364 if (m_path_char->match(text, this->interval.end, end, flags))
3365 this->interval.end = m_path_char->interval.end;
3366 else {
3367 query.end = this->interval.end;
3368 break;
3369 }
3370 }
3371 this->interval.start = start;
3372 return true;
3373 }
3374 if (m_bookmark_start->match(text, this->interval.end, end, flags)) {
3375 path.end = this->interval.end;
3376 bookmark.start = this->interval.end = m_bookmark_start->interval.end;
3377 for (;;) {
3378 if (this->interval.end >= end || !text[this->interval.end]) {
3379 bookmark.end = this->interval.end;
3380 break;
3381 }
3382 if (m_path_char->match(text, this->interval.end, end, flags))
3383 this->interval.end = m_path_char->interval.end;
3384 else {
3385 bookmark.end = this->interval.end;
3386 break;
3387 }
3388 }
3389 this->interval.start = start;
3390 return true;
3391 }
3392 if (m_path_char->match(text, this->interval.end, end, flags))
3393 this->interval.end = m_path_char->interval.end;
3394 else
3395 break;
3396 }
3397
3399 path.end = this->interval.end;
3400 this->interval.start = start;
3401 return true;
3402 }
3403
3404 path.start = 1;
3405 path.end = 0;
3406 bookmark.start = 1;
3407 bookmark.end = 0;
3408 this->interval.invalidate();
3409 return false;
3410 }
3411
3412 std::shared_ptr<basic_parser<T>> m_path_char;
3413 std::shared_ptr<basic_parser<T>> m_query_start;
3414 std::shared_ptr<basic_parser<T>> m_bookmark_start;
3415 };
3416
3419#ifdef _UNICODE
3420 using turl_path = wurl_path;
3421#else
3422 using turl_path = url_path;
3423#endif
3425
3429 template <class T>
3430 class basic_url : public basic_parser<T>
3431 {
3432 public:
3433 basic_url(
3434 _In_ const std::shared_ptr<basic_parser<T>>& _http_scheme,
3435 _In_ const std::shared_ptr<basic_parser<T>>& _ftp_scheme,
3436 _In_ const std::shared_ptr<basic_parser<T>>& _mailto_scheme,
3437 _In_ const std::shared_ptr<basic_parser<T>>& _file_scheme,
3438 _In_ const std::shared_ptr<basic_parser<T>>& colon,
3439 _In_ const std::shared_ptr<basic_parser<T>>& slash,
3440 _In_ const std::shared_ptr<basic_parser<T>>& _username,
3441 _In_ const std::shared_ptr<basic_parser<T>>& _password,
3442 _In_ const std::shared_ptr<basic_parser<T>>& at,
3443 _In_ const std::shared_ptr<basic_parser<T>>& ip_lbracket,
3444 _In_ const std::shared_ptr<basic_parser<T>>& ip_rbracket,
3445 _In_ const std::shared_ptr<basic_parser<T>>& _ipv4_host,
3446 _In_ const std::shared_ptr<basic_parser<T>>& _ipv6_host,
3447 _In_ const std::shared_ptr<basic_parser<T>>& _dns_host,
3448 _In_ const std::shared_ptr<basic_parser<T>>& _port,
3449 _In_ const std::shared_ptr<basic_parser<T>>& _path,
3450 _In_ const std::locale& locale = std::locale()) :
3452 http_scheme(_http_scheme),
3453 ftp_scheme(_ftp_scheme),
3454 mailto_scheme(_mailto_scheme),
3455 file_scheme(_file_scheme),
3456 m_colon(colon),
3457 m_slash(slash),
3458 username(_username),
3459 password(_password),
3460 m_at(at),
3461 m_ip_lbracket(ip_lbracket),
3462 m_ip_rbracket(ip_rbracket),
3463 ipv4_host(_ipv4_host),
3464 ipv6_host(_ipv6_host),
3465 dns_host(_dns_host),
3466 port(_port),
3467 path(_path)
3468 {}
3469
3470 virtual void invalidate()
3471 {
3472 http_scheme->invalidate();
3473 ftp_scheme->invalidate();
3474 mailto_scheme->invalidate();
3475 file_scheme->invalidate();
3476 username->invalidate();
3477 password->invalidate();
3478 ipv4_host->invalidate();
3479 ipv6_host->invalidate();
3480 dns_host->invalidate();
3481 port->invalidate();
3482 path->invalidate();
3484 }
3485
3486 std::shared_ptr<basic_parser<T>> http_scheme;
3487 std::shared_ptr<basic_parser<T>> ftp_scheme;
3488 std::shared_ptr<basic_parser<T>> mailto_scheme;
3489 std::shared_ptr<basic_parser<T>> file_scheme;
3490 std::shared_ptr<basic_parser<T>> username;
3491 std::shared_ptr<basic_parser<T>> password;
3492 std::shared_ptr<basic_parser<T>> ipv4_host;
3493 std::shared_ptr<basic_parser<T>> ipv6_host;
3494 std::shared_ptr<basic_parser<T>> dns_host;
3495 std::shared_ptr<basic_parser<T>> port;
3496 std::shared_ptr<basic_parser<T>> path;
3497
3498 protected:
3499 virtual bool do_match(
3500 _In_reads_or_z_opt_(end) const T* text,
3501 _In_ size_t start = 0,
3502 _In_ size_t end = SIZE_MAX,
3503 _In_ int flags = match_default)
3504 {
3505 _Assume_(text || start >= end);
3506
3507 this->interval.end = start;
3508
3509 if (http_scheme->match(text, this->interval.end, end, flags) &&
3510 m_colon->match(text, http_scheme->interval.end, end, flags) &&
3511 m_slash->match(text, m_colon->interval.end, end, flags) &&
3512 m_slash->match(text, m_slash->interval.end, end, flags))
3513 {
3514 // http://
3515 this->interval.end = m_slash->interval.end;
3516 ftp_scheme->invalidate();
3517 mailto_scheme->invalidate();
3518 file_scheme->invalidate();
3519 }
3520 else if (ftp_scheme->match(text, this->interval.end, end, flags) &&
3521 m_colon->match(text, ftp_scheme->interval.end, end, flags) &&
3522 m_slash->match(text, m_colon->interval.end, end, flags) &&
3523 m_slash->match(text, m_slash->interval.end, end, flags))
3524 {
3525 // ftp://
3526 this->interval.end = m_slash->interval.end;
3527 http_scheme->invalidate();
3528 mailto_scheme->invalidate();
3529 file_scheme->invalidate();
3530 }
3531 else if (mailto_scheme->match(text, this->interval.end, end, flags) &&
3532 m_colon->match(text, mailto_scheme->interval.end, end, flags))
3533 {
3534 // mailto:
3535 this->interval.end = m_colon->interval.end;
3536 http_scheme->invalidate();
3537 ftp_scheme->invalidate();
3538 file_scheme->invalidate();
3539 }
3540 else if (file_scheme->match(text, this->interval.end, end, flags) &&
3541 m_colon->match(text, file_scheme->interval.end, end, flags) &&
3542 m_slash->match(text, m_colon->interval.end, end, flags) &&
3543 m_slash->match(text, m_slash->interval.end, end, flags))
3544 {
3545 // file://
3546 this->interval.end = m_slash->interval.end;
3547 http_scheme->invalidate();
3548 ftp_scheme->invalidate();
3549 mailto_scheme->invalidate();
3550 }
3551 else {
3552 // Default to http:
3553 http_scheme->invalidate();
3554 ftp_scheme->invalidate();
3555 mailto_scheme->invalidate();
3556 file_scheme->invalidate();
3557 }
3558
3559 if (ftp_scheme->interval) {
3560 if (username->match(text, this->interval.end, end, flags)) {
3561 if (m_colon->match(text, username->interval.end, end, flags) &&
3562 password->match(text, m_colon->interval.end, end, flags) &&
3563 m_at->match(text, password->interval.end, end, flags))
3564 {
3565 // Username and password
3566 this->interval.end = m_at->interval.end;
3567 }
3568 else if (m_at->match(text, this->interval.end, end, flags)) {
3569 // Username only
3570 this->interval.end = m_at->interval.end;
3571 password->invalidate();
3572 }
3573 else {
3574 username->invalidate();
3575 password->invalidate();
3576 }
3577 }
3578 else {
3579 username->invalidate();
3580 password->invalidate();
3581 }
3582
3583 if (ipv4_host->match(text, this->interval.end, end, flags)) {
3584 // Host is IPv4
3585 this->interval.end = ipv4_host->interval.end;
3586 ipv6_host->invalidate();
3587 dns_host->invalidate();
3588 }
3589 else if (
3590 m_ip_lbracket->match(text, this->interval.end, end, flags) &&
3591 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3592 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3593 {
3594 // Host is IPv6
3595 this->interval.end = m_ip_rbracket->interval.end;
3596 ipv4_host->invalidate();
3597 dns_host->invalidate();
3598 }
3599 else if (dns_host->match(text, this->interval.end, end, flags)) {
3600 // Host is hostname
3601 this->interval.end = dns_host->interval.end;
3602 ipv4_host->invalidate();
3603 ipv6_host->invalidate();
3604 }
3605 else {
3606 invalidate();
3607 return false;
3608 }
3609
3610 if (m_colon->match(text, this->interval.end, end, flags) &&
3611 port->match(text, m_colon->interval.end, end, flags))
3612 {
3613 // Port
3614 this->interval.end = port->interval.end;
3615 }
3616 else
3617 port->invalidate();
3618
3619 if (path->match(text, this->interval.end, end, flags)) {
3620 // Path
3621 this->interval.end = path->interval.end;
3622 }
3623
3624 this->interval.start = start;
3625 return true;
3626 }
3627
3628 if (mailto_scheme->interval) {
3629 if (username->match(text, this->interval.end, end, flags) &&
3630 m_at->match(text, username->interval.end, end, flags))
3631 {
3632 // Username
3633 this->interval.end = m_at->interval.end;
3634 }
3635 else {
3636 invalidate();
3637 return false;
3638 }
3639
3640 if (m_ip_lbracket->match(text, this->interval.end, end, flags) &&
3641 ipv4_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3642 m_ip_rbracket->match(text, ipv4_host->interval.end, end, flags))
3643 {
3644 // Host is IPv4
3645 this->interval.end = m_ip_rbracket->interval.end;
3646 ipv6_host->invalidate();
3647 dns_host->invalidate();
3648 }
3649 else if (
3650 m_ip_lbracket->match(text, this->interval.end, end, flags) &&
3651 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3652 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3653 {
3654 // Host is IPv6
3655 this->interval.end = m_ip_rbracket->interval.end;
3656 ipv4_host->invalidate();
3657 dns_host->invalidate();
3658 }
3659 else if (dns_host->match(text, this->interval.end, end, flags)) {
3660 // Host is hostname
3661 this->interval.end = dns_host->interval.end;
3662 ipv4_host->invalidate();
3663 ipv6_host->invalidate();
3664 }
3665 else {
3666 invalidate();
3667 return false;
3668 }
3669
3670 password->invalidate();
3671 port->invalidate();
3672 path->invalidate();
3673 this->interval.start = start;
3674 return true;
3675 }
3676
3677 if (file_scheme->interval) {
3678 if (path->match(text, this->interval.end, end, flags)) {
3679 // Path
3680 this->interval.end = path->interval.end;
3681 }
3682
3683 username->invalidate();
3684 password->invalidate();
3685 ipv4_host->invalidate();
3686 ipv6_host->invalidate();
3687 dns_host->invalidate();
3688 port->invalidate();
3689 this->interval.start = start;
3690 return true;
3691 }
3692
3693 // "http://" found or defaulted to
3694
3695 // If "http://" explicit, test for username&password.
3696 if (http_scheme->interval &&
3697 username->match(text, this->interval.end, end, flags))
3698 {
3699 if (m_colon->match(text, username->interval.end, end, flags) &&
3700 password->match(text, m_colon->interval.end, end, flags) &&
3701 m_at->match(text, password->interval.end, end, flags))
3702 {
3703 // Username and password
3704 this->interval.end = m_at->interval.end;
3705 }
3706 else if (m_at->match(text, username->interval.end, end, flags)) {
3707 // Username only
3708 this->interval.end = m_at->interval.end;
3709 password->invalidate();
3710 }
3711 else {
3712 username->invalidate();
3713 password->invalidate();
3714 }
3715 }
3716 else {
3717 username->invalidate();
3718 password->invalidate();
3719 }
3720
3721 if (ipv4_host->match(text, this->interval.end, end, flags)) {
3722 // Host is IPv4
3723 this->interval.end = ipv4_host->interval.end;
3724 ipv6_host->invalidate();
3725 dns_host->invalidate();
3726 }
3727 else if (
3728 m_ip_lbracket->match(text, this->interval.end, end, flags) &&
3729 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3730 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3731 {
3732 // Host is IPv6
3733 this->interval.end = m_ip_rbracket->interval.end;
3734 ipv4_host->invalidate();
3735 dns_host->invalidate();
3736 }
3737 else if (dns_host->match(text, this->interval.end, end, flags)) {
3738 // Host is hostname
3739 this->interval.end = dns_host->interval.end;
3740 ipv4_host->invalidate();
3741 ipv6_host->invalidate();
3742 }
3743 else {
3744 invalidate();
3745 return false;
3746 }
3747
3748 if (m_colon->match(text, this->interval.end, end, flags) &&
3749 port->match(text, m_colon->interval.end, end, flags))
3750 {
3751 // Port
3752 this->interval.end = port->interval.end;
3753 }
3754 else
3755 port->invalidate();
3756
3757 if (path->match(text, this->interval.end, end, flags)) {
3758 // Path
3759 this->interval.end = path->interval.end;
3760 }
3761
3762 this->interval.start = start;
3763 return true;
3764 }
3765
3766 std::shared_ptr<basic_parser<T>> m_colon;
3767 std::shared_ptr<basic_parser<T>> m_slash;
3768 std::shared_ptr<basic_parser<T>> m_at;
3769 std::shared_ptr<basic_parser<T>> m_ip_lbracket;
3770 std::shared_ptr<basic_parser<T>> m_ip_rbracket;
3771 };
3772
3773 using url = basic_url<char>;
3774 using wurl = basic_url<wchar_t>;
3775#ifdef _UNICODE
3776 using turl = wurl;
3777#else
3778 using turl = url;
3779#endif
3780 using sgml_url = basic_url<char>;
3781
3785 template <class T>
3787 {
3788 public:
3790 _In_ const std::shared_ptr<basic_parser<T>>& _username,
3791 _In_ const std::shared_ptr<basic_parser<T>>& at,
3792 _In_ const std::shared_ptr<basic_parser<T>>& ip_lbracket,
3793 _In_ const std::shared_ptr<basic_parser<T>>& ip_rbracket,
3794 _In_ const std::shared_ptr<basic_parser<T>>& _ipv4_host,
3795 _In_ const std::shared_ptr<basic_parser<T>>& _ipv6_host,
3796 _In_ const std::shared_ptr<basic_parser<T>>& _dns_host,
3797 _In_ const std::locale& locale = std::locale()) :
3799 username(_username),
3800 m_at(at),
3801 m_ip_lbracket(ip_lbracket),
3802 m_ip_rbracket(ip_rbracket),
3803 ipv4_host(_ipv4_host),
3804 ipv6_host(_ipv6_host),
3805 dns_host(_dns_host)
3806 {}
3807
3808 virtual void invalidate()
3809 {
3810 username->invalidate();
3811 ipv4_host->invalidate();
3812 ipv6_host->invalidate();
3813 dns_host->invalidate();
3815 }
3816
3817 std::shared_ptr<basic_parser<T>> username;
3818 std::shared_ptr<basic_parser<T>> ipv4_host;
3819 std::shared_ptr<basic_parser<T>> ipv6_host;
3820 std::shared_ptr<basic_parser<T>> dns_host;
3821
3822 protected:
3823 virtual bool do_match(
3824 _In_reads_or_z_opt_(end) const T* text,
3825 _In_ size_t start = 0,
3826 _In_ size_t end = SIZE_MAX,
3827 _In_ int flags = match_default)
3828 {
3829 _Assume_(text || start >= end);
3830
3831 if (username->match(text, start, end, flags) &&
3832 m_at->match(text, username->interval.end, end, flags))
3833 {
3834 // Username@
3835 if (m_ip_lbracket->match(text, m_at->interval.end, end, flags) &&
3836 ipv4_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3837 m_ip_rbracket->match(text, ipv4_host->interval.end, end, flags))
3838 {
3839 // Host is IPv4
3840 this->interval.end = m_ip_rbracket->interval.end;
3841 ipv6_host->invalidate();
3842 dns_host->invalidate();
3843 }
3844 else if (
3845 m_ip_lbracket->match(text, m_at->interval.end, end, flags) &&
3846 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3847 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3848 {
3849 // Host is IPv6
3850 this->interval.end = m_ip_rbracket->interval.end;
3851 ipv4_host->invalidate();
3852 dns_host->invalidate();
3853 }
3854 else if (dns_host->match(text, m_at->interval.end, end, flags)) {
3855 // Host is hostname
3856 this->interval.end = dns_host->interval.end;
3857 ipv4_host->invalidate();
3858 ipv6_host->invalidate();
3859 }
3860 else
3861 goto error;
3862 this->interval.start = start;
3863 return true;
3864 }
3865
3866 error:
3867 invalidate();
3868 return false;
3869 }
3870
3871 std::shared_ptr<basic_parser<T>> m_at;
3872 std::shared_ptr<basic_parser<T>> m_ip_lbracket;
3873 std::shared_ptr<basic_parser<T>> m_ip_rbracket;
3874 };
3875
3878#ifdef _UNICODE
3880#else
3882#endif
3884
3888 template <class T>
3890 {
3891 public:
3893 _In_ const std::shared_ptr<basic_parser<T>>& _emoticon,
3894 _In_ const std::shared_ptr<basic_parser<T>>& _apex,
3895 _In_ const std::shared_ptr<basic_parser<T>>& _eyes,
3896 _In_ const std::shared_ptr<basic_parser<T>>& _nose,
3897 _In_ const std::shared_ptr<basic_set<T>>& _mouth,
3898 _In_ const std::locale& locale = std::locale()) :
3901 apex(_apex),
3902 eyes(_eyes),
3903 nose(_nose),
3904 mouth(_mouth)
3905 {}
3906
3907 virtual void invalidate()
3908 {
3909 if (emoticon) emoticon->invalidate();
3910 if (apex) apex->invalidate();
3911 eyes->invalidate();
3912 if (nose) nose->invalidate();
3913 mouth->invalidate();
3915 }
3916
3917 std::shared_ptr<basic_parser<T>> emoticon;
3918 std::shared_ptr<basic_parser<T>> apex;
3919 std::shared_ptr<basic_parser<T>> eyes;
3920 std::shared_ptr<basic_parser<T>> nose;
3921 std::shared_ptr<basic_set<T>> mouth;
3922
3923 protected:
3924 virtual bool do_match(
3925 _In_reads_or_z_opt_(end) const T* text,
3926 _In_ size_t start = 0,
3927 _In_ size_t end = SIZE_MAX,
3928 _In_ int flags = match_default)
3929 {
3930 _Assume_(text || start >= end);
3931
3932 if (emoticon && emoticon->match(text, start, end, flags)) {
3933 if (apex) apex->invalidate();
3934 eyes->invalidate();
3935 if (nose) nose->invalidate();
3936 mouth->invalidate();
3937 this->interval.start = start;
3938 this->interval.end = emoticon->interval.end;
3939 return true;
3940 }
3941
3942 this->interval.end = start;
3943
3944 if (apex && apex->match(text, this->interval.end, end, flags))
3945 this->interval.end = apex->interval.end;
3946
3947 if (eyes->match(text, this->interval.end, end, flags)) {
3948 if (nose && nose->match(text, eyes->interval.end, end, flags) &&
3949 mouth->match(text, nose->interval.end, end, flags))
3950 {
3951 size_t
3953 hit_offset = mouth->hit_offset;
3954 // Mouth may repeat :-)))))))
3955 for (this->interval.end = mouth->interval.end; mouth->match(text, this->interval.end, end, flags) && mouth->hit_offset == hit_offset; this->interval.end = mouth->interval.end);
3956 mouth->interval.start = start_mouth;
3957 mouth->interval.end = this->interval.end;
3958 this->interval.start = start;
3959 return true;
3960 }
3961 if (mouth->match(text, eyes->interval.end, end, flags)) {
3962 size_t
3964 hit_offset = mouth->hit_offset;
3965 // Mouth may repeat :-)))))))
3966 for (this->interval.end = mouth->interval.end; mouth->match(text, this->interval.end, end, flags) && mouth->hit_offset == hit_offset; this->interval.end = mouth->interval.end);
3967 if (nose) nose->invalidate();
3968 mouth->interval.start = start_mouth;
3969 mouth->interval.end = this->interval.end;
3970 this->interval.start = start;
3971 return true;
3972 }
3973 }
3974
3975 if (emoticon) emoticon->invalidate();
3976 if (apex) apex->invalidate();
3977 eyes->invalidate();
3978 if (nose) nose->invalidate();
3979 mouth->invalidate();
3980 this->interval.invalidate();
3981 return false;
3982 }
3983 };
3984
3985 using emoticon = basic_emoticon<char>;
3986 using wemoticon = basic_emoticon<wchar_t>;
3987#ifdef _UNICODE
3988 using temoticon = wemoticon;
3989#else
3990 using temoticon = emoticon;
3991#endif
3992 using sgml_emoticon = basic_emoticon<char>;
3993
3997 enum date_format_t {
3998 date_format_none = 0,
3999 date_format_dmy = 0x1,
4000 date_format_mdy = 0x2,
4001 date_format_ymd = 0x4,
4002 date_format_ym = 0x8,
4003 date_format_my = 0x10,
4004 date_format_dm = 0x20,
4005 date_format_md = 0x40,
4006 };
4007
4011 template <class T>
4012 class basic_date : public basic_parser<T>
4013 {
4014 public:
4015 basic_date(
4016 _In_ int format_mask,
4017 _In_ const std::shared_ptr<basic_integer<T>>& _day,
4018 _In_ const std::shared_ptr<basic_integer<T>>& _month,
4019 _In_ const std::shared_ptr<basic_integer<T>>& _year,
4020 _In_ const std::shared_ptr<basic_set<T>>& separator,
4021 _In_ const std::shared_ptr<basic_parser<T>>& space,
4022 _In_ const std::locale& locale = std::locale()) :
4024 format(date_format_none),
4025 m_format_mask(format_mask),
4026 day(_day),
4027 month(_month),
4028 year(_year),
4029 m_separator(separator),
4030 m_space(space)
4031 {}
4032
4033 virtual void invalidate()
4034 {
4035 if (day) day->invalidate();
4036 if (month) month->invalidate();
4037 if (year) year->invalidate();
4038 format = date_format_none;
4040 }
4041
4042 date_format_t format;
4043 std::shared_ptr<basic_integer<T>> day;
4044 std::shared_ptr<basic_integer<T>> month;
4045 std::shared_ptr<basic_integer<T>> year;
4046
4047 protected:
4048 virtual bool do_match(
4049 _In_reads_or_z_opt_(end) const T* text,
4050 _In_ size_t start = 0,
4051 _In_ size_t end = SIZE_MAX,
4052 _In_ int flags = match_default)
4053 {
4054 _Assume_(text || start >= end);
4055
4056 const int space_match_flags = flags & ~match_multiline; // Spaces in dates must never be broken in new line.
4057 if ((m_format_mask & date_format_dmy) == date_format_dmy) {
4058 if (day->match(text, start, end, flags)) {
4059 for (this->interval.end = day->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4060 if (m_separator->match(text, this->interval.end, end, flags)) {
4061 size_t hit_offset = m_separator->hit_offset;
4062 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4063 if (month->match(text, this->interval.end, end, flags)) {
4064 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4065 if (m_separator->match(text, this->interval.end, end, flags) &&
4066 m_separator->hit_offset == hit_offset) // Both separators must match.
4067 {
4068 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4069 if (year->match(text, this->interval.end, end, flags) &&
4070 is_valid(day->value, month->value))
4071 {
4072 this->interval.start = start;
4073 this->interval.end = year->interval.end;
4074 format = date_format_dmy;
4075 return true;
4076 }
4077 }
4078 }
4079 }
4080 }
4081 }
4082
4083 if ((m_format_mask & date_format_mdy) == date_format_mdy) {
4084 if (month->match(text, start, end, flags)) {
4085 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4086 if (m_separator->match(text, this->interval.end, end, flags)) {
4087 size_t hit_offset = m_separator->hit_offset;
4088 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4089 if (day->match(text, this->interval.end, end, flags)) {
4090 for (this->interval.end = day->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4091 if (m_separator->match(text, this->interval.end, end, flags) &&
4092 m_separator->hit_offset == hit_offset) // Both separators must match.
4093 {
4094 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4095 if (year->match(text, this->interval.end, end, flags) &&
4096 is_valid(day->value, month->value))
4097 {
4098 this->interval.start = start;
4099 this->interval.end = year->interval.end;
4100 format = date_format_mdy;
4101 return true;
4102 }
4103 }
4104 }
4105 }
4106 }
4107 }
4108
4109 if ((m_format_mask & date_format_ymd) == date_format_ymd) {
4110 if (year->match(text, start, end, flags)) {
4111 for (this->interval.end = year->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4112 if (m_separator->match(text, this->interval.end, end, flags)) {
4113 size_t hit_offset = m_separator->hit_offset;
4114 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4115 if (month->match(text, this->interval.end, end, flags)) {
4116 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4117 if (m_separator->match(text, this->interval.end, end, flags) &&
4118 m_separator->hit_offset == hit_offset) // Both separators must match.
4119 {
4120 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4121 if (day->match(text, this->interval.end, end, flags) &&
4122 is_valid(day->value, month->value))
4123 {
4124 this->interval.start = start;
4125 this->interval.end = day->interval.end;
4126 format = date_format_ymd;
4127 return true;
4128 }
4129 }
4130 }
4131 }
4132 }
4133 }
4134
4135 if ((m_format_mask & date_format_ym) == date_format_ym) {
4136 if (year->match(text, start, end, flags)) {
4137 for (this->interval.end = year->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4138 if (m_separator->match(text, this->interval.end, end, flags)) {
4139 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4140 if (month->match(text, this->interval.end, end, flags) &&
4141 is_valid(SIZE_MAX, month->value))
4142 {
4143 if (day) day->invalidate();
4144 this->interval.start = start;
4145 this->interval.end = month->interval.end;
4146 format = date_format_ym;
4147 return true;
4148 }
4149 }
4150 }
4151 }
4152
4153 if ((m_format_mask & date_format_my) == date_format_my) {
4154 if (month->match(text, start, end, flags)) {
4155 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4156 if (m_separator->match(text, this->interval.end, end, flags)) {
4157 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4158 if (year->match(text, this->interval.end, end, flags) &&
4159 is_valid(SIZE_MAX, month->value))
4160 {
4161 if (day) day->invalidate();
4162 this->interval.start = start;
4163 this->interval.end = year->interval.end;
4164 format = date_format_my;
4165 return true;
4166 }
4167 }
4168 }
4169 }
4170
4171 if ((m_format_mask & date_format_dm) == date_format_dm) {
4172 if (day->match(text, start, end, flags)) {
4173 for (this->interval.end = day->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4174 if (m_separator->match(text, this->interval.end, end, flags)) {
4175 size_t hit_offset = m_separator->hit_offset;
4176 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4177 if (month->match(text, this->interval.end, end, flags) &&
4178 is_valid(day->value, month->value))
4179 {
4180 if (year) year->invalidate();
4181 this->interval.start = start;
4182 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4183 if (m_separator->match(text, this->interval.end, end, flags) &&
4184 m_separator->hit_offset == hit_offset) // Both separators must match.
4185 this->interval.end = m_separator->interval.end;
4186 else
4187 this->interval.end = month->interval.end;
4188 format = date_format_dm;
4189 return true;
4190 }
4191 }
4192 }
4193 }
4194
4195 if ((m_format_mask & date_format_md) == date_format_md) {
4196 if (month->match(text, start, end, flags)) {
4197 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4198 if (m_separator->match(text, this->interval.end, end, flags)) {
4199 size_t hit_offset = m_separator->hit_offset;
4200 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4201 if (day->match(text, this->interval.end, end, flags) &&
4202 is_valid(day->value, month->value))
4203 {
4204 if (year) year->invalidate();
4205 this->interval.start = start;
4206 for (this->interval.end = day->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4207 if (m_separator->match(text, this->interval.end, end, flags) &&
4208 m_separator->hit_offset == hit_offset) // Both separators must match.
4209 this->interval.end = m_separator->interval.end;
4210 else
4211 this->interval.end = day->interval.end;
4212 format = date_format_md;
4213 return true;
4214 }
4215 }
4216 }
4217 }
4218
4219 if (day) day->invalidate();
4220 if (month) month->invalidate();
4221 if (year) year->invalidate();
4222 format = date_format_none;
4223 this->interval.invalidate();
4224 return false;
4225 }
4226
4227 static bool is_valid(size_t day, size_t month)
4228 {
4229 if (month == SIZE_MAX) {
4230 // Default to January. This allows validating day only, as January has all 31 days.
4231 month = 1;
4232 }
4233 if (day == SIZE_MAX) {
4234 // Default to 1st day in month. This allows validating month only, as each month has 1st day.
4235 day = 1;
4236 }
4237
4238 switch (month) {
4239 case 1:
4240 case 3:
4241 case 5:
4242 case 7:
4243 case 8:
4244 case 10:
4245 case 12:
4246 return 1 <= day && day <= 31;
4247 case 2:
4248 return 1 <= day && day <= 29;
4249 case 4:
4250 case 6:
4251 case 9:
4252 case 11:
4253 return 1 <= day && day <= 30;
4254 default:
4255 return false;
4256 }
4257 }
4258
4259 int m_format_mask;
4260 std::shared_ptr<basic_set<T>> m_separator;
4261 std::shared_ptr<basic_parser<T>> m_space;
4262 };
4263
4264 using date = basic_date<char>;
4265 using wdate = basic_date<wchar_t>;
4266#ifdef _UNICODE
4267 using tdate = wdate;
4268#else
4269 using tdate = date;
4270#endif
4272
4276 template <class T>
4277 class basic_time : public basic_parser<T>
4278 {
4279 public:
4280 basic_time(
4281 _In_ const std::shared_ptr<basic_integer10<T>>& _hour,
4282 _In_ const std::shared_ptr<basic_integer10<T>>& _minute,
4283 _In_ const std::shared_ptr<basic_integer10<T>>& _second,
4284 _In_ const std::shared_ptr<basic_integer10<T>>& _millisecond,
4285 _In_ const std::shared_ptr<basic_set<T>>& separator,
4286 _In_ const std::shared_ptr<basic_parser<T>>& millisecond_separator,
4287 _In_ const std::locale& locale = std::locale()) :
4289 hour(_hour),
4290 minute(_minute),
4291 second(_second),
4292 millisecond(_millisecond),
4293 m_separator(separator),
4294 m_millisecond_separator(millisecond_separator)
4295 {}
4296
4297 virtual void invalidate()
4298 {
4299 hour->invalidate();
4300 minute->invalidate();
4301 if (second) second->invalidate();
4302 if (millisecond) millisecond->invalidate();
4304 }
4305
4306 std::shared_ptr<basic_integer10<T>> hour;
4307 std::shared_ptr<basic_integer10<T>> minute;
4308 std::shared_ptr<basic_integer10<T>> second;
4309 std::shared_ptr<basic_integer10<T>> millisecond;
4310
4311 protected:
4312 virtual bool do_match(
4313 _In_reads_or_z_opt_(end) const T* text,
4314 _In_ size_t start = 0,
4315 _In_ size_t end = SIZE_MAX,
4316 _In_ int flags = match_default)
4317 {
4318 _Assume_(text || start >= end);
4319
4320 if (hour->match(text, start, end, flags) &&
4321 m_separator->match(text, hour->interval.end, end, flags) &&
4322 minute->match(text, m_separator->interval.end, end, flags) &&
4323 minute->value < 60)
4324 {
4325 // hh::mm
4326 size_t hit_offset = m_separator->hit_offset;
4327 if (m_separator->match(text, minute->interval.end, end, flags) &&
4328 m_separator->hit_offset == hit_offset && // Both separators must match.
4329 second && second->match(text, m_separator->interval.end, end, flags) &&
4330 second->value < 60)
4331 {
4332 // hh::mm:ss
4333 if (m_millisecond_separator && m_millisecond_separator->match(text, second->interval.end, end, flags) &&
4334 millisecond && millisecond->match(text, m_millisecond_separator->interval.end, end, flags) &&
4335 millisecond->value < 1000)
4336 {
4337 // hh::mm:ss.mmmm
4338 this->interval.end = millisecond->interval.end;
4339 }
4340 else {
4341 if (millisecond) millisecond->invalidate();
4342 this->interval.end = second->interval.end;
4343 }
4344 }
4345 else {
4346 if (second) second->invalidate();
4347 if (millisecond) millisecond->invalidate();
4348 this->interval.end = minute->interval.end;
4349 }
4350 this->interval.start = start;
4351 return true;
4352 }
4353
4354 hour->invalidate();
4355 minute->invalidate();
4356 if (second) second->invalidate();
4357 if (millisecond) millisecond->invalidate();
4358 this->interval.invalidate();
4359 return false;
4360 }
4361
4362 std::shared_ptr<basic_set<T>> m_separator;
4363 std::shared_ptr<basic_parser<T>> m_millisecond_separator;
4364 };
4365
4366 using time = basic_time<char>;
4367 using wtime = basic_time<wchar_t>;
4368#ifdef _UNICODE
4369 using ttime = wtime;
4370#else
4371 using ttime = time;
4372#endif
4374
4378 template <class T>
4379 class basic_angle : public basic_parser<T>
4380 {
4381 public:
4383 _In_ const std::shared_ptr<basic_integer10<T>>& _degree,
4384 _In_ const std::shared_ptr<basic_parser<T>>& _degree_separator,
4385 _In_ const std::shared_ptr<basic_integer10<T>>& _minute,
4386 _In_ const std::shared_ptr<basic_parser<T>>& _minute_separator,
4387 _In_ const std::shared_ptr<basic_integer10<T>>& _second,
4388 _In_ const std::shared_ptr<basic_parser<T>>& _second_separator,
4389 _In_ const std::shared_ptr<basic_parser<T>>& _decimal,
4390 _In_ const std::locale& locale = std::locale()) :
4392 degree(_degree),
4393 degree_separator(_degree_separator),
4394 minute(_minute),
4395 minute_separator(_minute_separator),
4396 second(_second),
4397 second_separator(_second_separator),
4398 decimal(_decimal)
4399 {}
4400
4401 virtual void invalidate()
4402 {
4403 degree->invalidate();
4404 degree_separator->invalidate();
4405 minute->invalidate();
4406 minute_separator->invalidate();
4407 if (second) second->invalidate();
4408 if (second_separator) second_separator->invalidate();
4409 if (decimal) decimal->invalidate();
4411 }
4412
4413 std::shared_ptr<basic_integer10<T>> degree;
4414 std::shared_ptr<basic_parser<T>> degree_separator;
4415 std::shared_ptr<basic_integer10<T>> minute;
4416 std::shared_ptr<basic_parser<T>> minute_separator;
4417 std::shared_ptr<basic_integer10<T>> second;
4418 std::shared_ptr<basic_parser<T>> second_separator;
4419 std::shared_ptr<basic_parser<T>> decimal;
4420
4421 protected:
4422 virtual bool do_match(
4423 _In_reads_or_z_opt_(end) const T* text,
4424 _In_ size_t start = 0,
4425 _In_ size_t end = SIZE_MAX,
4426 _In_ int flags = match_default)
4427 {
4428 _Assume_(text || start >= end);
4429
4430 this->interval.end = start;
4431
4432 if (degree->match(text, this->interval.end, end, flags) &&
4433 degree_separator->match(text, degree->interval.end, end, flags))
4434 {
4435 // Degrees
4436 this->interval.end = degree_separator->interval.end;
4437 }
4438 else {
4439 degree->invalidate();
4440 degree_separator->invalidate();
4441 }
4442
4443 if (minute->match(text, this->interval.end, end, flags) &&
4444 minute->value < 60 &&
4445 minute_separator->match(text, minute->interval.end, end, flags))
4446 {
4447 // Minutes
4448 this->interval.end = minute_separator->interval.end;
4449 }
4450 else {
4451 minute->invalidate();
4452 minute_separator->invalidate();
4453 }
4454
4455 if (second && second->match(text, this->interval.end, end, flags) &&
4456 second->value < 60)
4457 {
4458 // Seconds
4459 this->interval.end = second->interval.end;
4460 if (second_separator && second_separator->match(text, this->interval.end, end, flags))
4461 this->interval.end = second_separator->interval.end;
4462 else
4463 if (second_separator) second_separator->invalidate();
4464 }
4465 else {
4466 if (second) second->invalidate();
4467 if (second_separator) second_separator->invalidate();
4468 }
4469
4470 if (degree->interval.start < degree->interval.end ||
4471 minute->interval.start < minute->interval.end ||
4472 (second && second->interval.start < second->interval.end))
4473 {
4474 if (decimal && decimal->match(text, this->interval.end, end, flags)) {
4475 // Decimals
4476 this->interval.end = decimal->interval.end;
4477 }
4478 else if (decimal)
4479 decimal->invalidate();
4480 this->interval.start = start;
4481 return true;
4482 }
4483 if (decimal) decimal->invalidate();
4484 this->interval.invalidate();
4485 return false;
4486 }
4487 };
4488
4489 using angle = basic_angle<char>;
4491#ifdef _UNICODE
4492 using RRegElKot = wangle;
4493#else
4494 using RRegElKot = angle;
4495#endif
4497
4501 template <class T>
4503 {
4504 public:
4506 _In_ const std::shared_ptr<basic_parser<T>>& digit,
4507 _In_ const std::shared_ptr<basic_parser<T>>& plus_sign,
4508 _In_ const std::shared_ptr<basic_set<T>>& lparenthesis,
4509 _In_ const std::shared_ptr<basic_set<T>>& rparenthesis,
4510 _In_ const std::shared_ptr<basic_parser<T>>& separator,
4511 _In_ const std::shared_ptr<basic_parser<T>>& space,
4512 _In_ const std::locale& locale = std::locale()) :
4514 m_digit(digit),
4515 m_plus_sign(plus_sign),
4516 m_lparenthesis(lparenthesis),
4517 m_rparenthesis(rparenthesis),
4518 m_separator(separator),
4519 m_space(space)
4520 {}
4521
4522 virtual void invalidate()
4523 {
4524 value.clear();
4526 }
4527
4528 std::basic_string<T> value;
4529
4530 protected:
4531 virtual bool do_match(
4532 _In_reads_or_z_opt_(end) const T* text,
4533 _In_ size_t start = 0,
4534 _In_ size_t end = SIZE_MAX,
4535 _In_ int flags = match_default)
4536 {
4537 _Assume_(text || start >= end);
4538
4539 size_t safe_digit_end = start, safe_value_size = 0;
4540 bool has_digits = false, after_digit = false, in_parentheses = false, after_parentheses = false;
4541 const int space_match_flags = flags & ~match_multiline; // Spaces in phone numbers must never be broken in new line.
4542
4543 this->interval.end = start;
4544 value.clear();
4545 m_lparenthesis->invalidate();
4546 m_rparenthesis->invalidate();
4547
4548 if (m_plus_sign && m_plus_sign->match(text, this->interval.end, end, flags)) {
4549 value.append(text + m_plus_sign->interval.start, text + m_plus_sign->interval.end);
4550 safe_value_size = value.size();
4551 this->interval.end = m_plus_sign->interval.end;
4552 }
4553
4554 for (;;) {
4555 _Assume_(text || this->interval.end >= end);
4556 if (this->interval.end >= end || !text[this->interval.end])
4557 break;
4558 if (m_digit->match(text, this->interval.end, end, flags)) {
4559 // Digit
4560 value.append(text + m_digit->interval.start, text + m_digit->interval.end);
4561 this->interval.end = m_digit->interval.end;
4562 if (!in_parentheses) {
4563 safe_digit_end = this->interval.end;
4564 safe_value_size = value.size();
4565 has_digits = true;
4566 }
4567 after_digit = true;
4568 after_parentheses = false;
4569 }
4570 else if (
4571 m_lparenthesis && !m_lparenthesis->interval && // No left parenthesis yet
4572 m_rparenthesis && !m_rparenthesis->interval && // Right parenthesis after left
4573 m_lparenthesis->match(text, this->interval.end, end, flags))
4574 {
4575 // Left parenthesis
4576 value.append(text + m_lparenthesis->interval.start, m_lparenthesis->interval.size());
4577 this->interval.end = m_lparenthesis->interval.end;
4578 in_parentheses = true;
4579 after_digit = false;
4580 after_parentheses = false;
4581 }
4582 else if (
4583 in_parentheses && // After left parenthesis
4584 m_rparenthesis && !m_rparenthesis->interval && // No right parenthesis yet
4585 m_rparenthesis->match(text, this->interval.end, end, flags) &&
4586 m_lparenthesis->hit_offset == m_rparenthesis->hit_offset) // Left and right parentheses must match
4587 {
4588 // Right parenthesis
4589 value.append(text + m_rparenthesis->interval.start, text + m_rparenthesis->interval.end);
4590 this->interval.end = m_rparenthesis->interval.end;
4591 safe_digit_end = this->interval.end;
4592 safe_value_size = value.size();
4593 in_parentheses = false;
4594 after_digit = false;
4595 after_parentheses = true;
4596 }
4597 else if (
4598 after_digit &&
4599 !in_parentheses && // No separators inside parentheses
4600 !after_parentheses && // No separators following right parenthesis
4601 m_separator && m_separator->match(text, this->interval.end, end, flags))
4602 {
4603 // Separator
4604 this->interval.end = m_separator->interval.end;
4605 after_digit = false;
4606 after_parentheses = false;
4607 }
4608 else if (
4610 m_space && m_space->match(text, this->interval.end, end, space_match_flags))
4611 {
4612 // Space
4613 this->interval.end = m_space->interval.end;
4614 after_digit = false;
4615 after_parentheses = false;
4616 }
4617 else
4618 break;
4619 }
4620 if (has_digits) {
4621 value.erase(safe_value_size);
4622 this->interval.start = start;
4623 this->interval.end = safe_digit_end;
4624 return true;
4625 }
4626 value.clear();
4627 this->interval.invalidate();
4628 return false;
4629 }
4630
4631 std::shared_ptr<basic_parser<T>> m_digit;
4632 std::shared_ptr<basic_parser<T>> m_plus_sign;
4633 std::shared_ptr<basic_set<T>> m_lparenthesis;
4634 std::shared_ptr<basic_set<T>> m_rparenthesis;
4635 std::shared_ptr<basic_parser<T>> m_separator;
4636 std::shared_ptr<basic_parser<T>> m_space;
4637 };
4638
4639 using phone_number = basic_phone_number<char>;
4640 using wphone_number = basic_phone_number<wchar_t>;
4641#ifdef _UNICODE
4642 using tphone_number = wphone_number;
4643#else
4644 using tphone_number = phone_number;
4645#endif
4646 using sgml_phone_number = basic_phone_number<char>;
4647
4653 template <class T>
4654 class basic_iban : public basic_parser<T>
4655 {
4656 public:
4657 basic_iban(
4658 _In_ const std::shared_ptr<basic_parser<T>>& space,
4659 _In_ const std::locale& locale = std::locale()) :
4661 m_space(space)
4662 {
4663 this->country[0] = 0;
4664 this->check_digits[0] = 0;
4665 this->bban[0] = 0;
4666 this->is_valid = false;
4667 }
4668
4669 virtual void invalidate()
4670 {
4671 this->country[0] = 0;
4672 this->check_digits[0] = 0;
4673 this->bban[0] = 0;
4674 this->is_valid = false;
4676 }
4677
4678 T country[3];
4680 T bban[31];
4682
4683 protected:
4684 virtual bool do_match(
4685 _In_reads_or_z_opt_(end) const T* text,
4686 _In_ size_t start = 0,
4687 _In_ size_t end = SIZE_MAX,
4688 _In_ int flags = match_default)
4689 {
4690 _Assume_(text || start >= end);
4691 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
4692 const bool case_insensitive = flags & match_case_insensitive ? true : false;
4693 struct country_t {
4694 T country[2];
4695 T check_digits[2];
4696 size_t length;
4697 };
4698 static const country_t s_countries[] = {
4699 { { 'A', 'D' }, {}, 24 }, // Andorra
4700 { { 'A', 'E' }, {}, 23 }, // United Arab Emirates
4701 { { 'A', 'L' }, {}, 28 }, // Albania
4702 { { 'A', 'O' }, {}, 25 }, // Angola
4703 { { 'A', 'T' }, {}, 20 }, // Austria
4704 { { 'A', 'Z' }, {}, 28 }, // Azerbaijan
4705 { { 'B', 'A' }, { '3', '9' }, 20}, // Bosnia and Herzegovina
4706 { { 'B', 'E' }, {}, 16 }, // Belgium
4707 { { 'B', 'F' }, {}, 28 }, // Burkina Faso
4708 { { 'B', 'G' }, {}, 22 }, // Bulgaria
4709 { { 'B', 'H' }, {}, 22 }, // Bahrain
4710 { { 'B', 'I' }, {}, 27 }, // Burundi
4711 { { 'B', 'J' }, {}, 28 }, // Benin
4712 { { 'B', 'R' }, {}, 29 }, // Brazil
4713 { { 'B', 'Y' }, {}, 28 }, // Belarus
4714 { { 'C', 'F' }, {}, 27 }, // Central African Republic
4715 { { 'C', 'G' }, {}, 27 }, // Congo, Republic of the
4716 { { 'C', 'H' }, {}, 21 }, // Switzerland
4717 { { 'C', 'I' }, {}, 28 }, // Côte d'Ivoire
4718 { { 'C', 'M' }, {}, 27 }, // Cameroon
4719 { { 'C', 'R' }, {}, 22 }, // Costa Rica
4720 { { 'C', 'V' }, {}, 25 }, // Cabo Verde
4721 { { 'C', 'Y' }, {}, 28 }, // Cyprus
4722 { { 'C', 'Z' }, {}, 24 }, // Czech Republic
4723 { { 'D', 'E' }, {}, 22 }, // Germany
4724 { { 'D', 'J' }, {}, 27 }, // Djibouti
4725 { { 'D', 'K' }, {}, 18 }, // Denmark
4726 { { 'D', 'O' }, {}, 28 }, // Dominican Republic
4727 { { 'D', 'Z' }, {}, 26 }, // Algeria
4728 { { 'E', 'E' }, {}, 20 }, // Estonia
4729 { { 'E', 'G' }, {}, 29 }, // Egypt
4730 { { 'E', 'S' }, {}, 24 }, // Spain
4731 { { 'F', 'I' }, {}, 18 }, // Finland
4732 { { 'F', 'O' }, {}, 18 }, // Faroe Islands
4733 { { 'F', 'R' }, {}, 27 }, // France
4734 { { 'G', 'A' }, {}, 27 }, // Gabon
4735 { { 'G', 'B' }, {}, 22 }, // United Kingdom
4736 { { 'G', 'E' }, {}, 22 }, // Georgia
4737 { { 'G', 'I' }, {}, 23 }, // Gibraltar
4738 { { 'G', 'L' }, {}, 18 }, // Greenland
4739 { { 'G', 'Q' }, {}, 27 }, // Equatorial Guinea
4740 { { 'G', 'R' }, {}, 27 }, // Greece
4741 { { 'G', 'T' }, {}, 28 }, // Guatemala
4742 { { 'G', 'W' }, {}, 25 }, // Guinea-Bissau
4743 { { 'H', 'N' }, {}, 28 }, // Honduras
4744 { { 'H', 'R' }, {}, 21 }, // Croatia
4745 { { 'H', 'U' }, {}, 28 }, // Hungary
4746 { { 'I', 'E' }, {}, 22 }, // Ireland
4747 { { 'I', 'L' }, {}, 23 }, // Israel
4748 { { 'I', 'Q' }, {}, 23 }, // Iraq
4749 { { 'I', 'R' }, {}, 26 }, // Iran
4750 { { 'I', 'S' }, {}, 26 }, // Iceland
4751 { { 'I', 'T' }, {}, 27 }, // Italy
4752 { { 'J', 'O' }, {}, 30 }, // Jordan
4753 { { 'K', 'M' }, {}, 27 }, // Comoros
4754 { { 'K', 'W' }, {}, 30 }, // Kuwait
4755 { { 'K', 'Z' }, {}, 20 }, // Kazakhstan
4756 { { 'L', 'B' }, {}, 28 }, // Lebanon
4757 { { 'L', 'C' }, {}, 32 }, // Saint Lucia
4758 { { 'L', 'I' }, {}, 21 }, // Liechtenstein
4759 { { 'L', 'T' }, {}, 20 }, // Lithuania
4760 { { 'L', 'U' }, {}, 20 }, // Luxembourg
4761 { { 'L', 'V' }, {}, 21 }, // Latvia
4762 { { 'L', 'Y' }, {}, 25 }, // Libya
4763 { { 'M', 'A' }, {}, 28 }, // Morocco
4764 { { 'M', 'C' }, {}, 27 }, // Monaco
4765 { { 'M', 'D' }, {}, 24 }, // Moldova
4766 { { 'M', 'E' }, { '2', '5' }, 22 }, // Montenegro
4767 { { 'M', 'G' }, {}, 27 }, // Madagascar
4768 { { 'M', 'K' }, { '0', '7' }, 19 }, // North Macedonia
4769 { { 'M', 'L' }, {}, 28 }, // Mali
4770 { { 'M', 'R' }, { '1', '3' }, 27}, // Mauritania
4771 { { 'M', 'T' }, {}, 31 }, // Malta
4772 { { 'M', 'U' }, {}, 30 }, // Mauritius
4773 { { 'M', 'Z' }, {}, 25 }, // Mozambique
4774 { { 'N', 'E' }, {}, 28 }, // Niger
4775 { { 'N', 'I' }, {}, 32 }, // Nicaragua
4776 { { 'N', 'L' }, {}, 18 }, // Netherlands
4777 { { 'N', 'O' }, {}, 15 }, // Norway
4778 { { 'P', 'K' }, {}, 24 }, // Pakistan
4779 { { 'P', 'L' }, {}, 28 }, // Poland
4780 { { 'P', 'S' }, {}, 29 }, // Palestinian territories
4781 { { 'P', 'T' }, { '5', '0' }, 25 }, // Portugal
4782 { { 'Q', 'A' }, {}, 29 }, // Qatar
4783 { { 'R', 'O' }, {}, 24 }, // Romania
4784 { { 'R', 'S' }, { '3', '5' }, 22 }, // Serbia
4785 { { 'R', 'U' }, {}, 33 }, // Russia
4786 { { 'S', 'A' }, {}, 24 }, // Saudi Arabia
4787 { { 'S', 'C' }, {}, 31 }, // Seychelles
4788 { { 'S', 'D' }, {}, 18 }, // Sudan
4789 { { 'S', 'E' }, {}, 24 }, // Sweden
4790 { { 'S', 'I' }, { '5', '6' }, 19 }, // Slovenia
4791 { { 'S', 'K' }, {}, 24 }, // Slovakia
4792 { { 'S', 'M' }, {}, 27 }, // San Marino
4793 { { 'S', 'N' }, {}, 28 }, // Senegal
4794 { { 'S', 'T' }, {}, 25 }, // São Tomé and Príncipe
4795 { { 'S', 'V' }, {}, 28 }, // El Salvador
4796 { { 'T', 'D' }, {}, 27 }, // Chad
4797 { { 'T', 'G' }, {}, 28 }, // Togo
4798 { { 'T', 'L' }, { '3', '8' }, 23}, // East Timor
4799 { { 'T', 'N' }, { '5', '9' }, 24 }, // Tunisia
4800 { { 'T', 'R' }, {}, 26 }, // Turkey
4801 { { 'U', 'A' }, {}, 29 }, // Ukraine
4802 { { 'V', 'A' }, {}, 22 }, // Vatican City
4803 { { 'V', 'G' }, {}, 24 }, // Virgin Islands, British
4804 { { 'X', 'K' }, {}, 20 }, // Kosovo
4805 };
4806 const country_t* country_desc = nullptr;
4807 size_t n, available, next, bban_length;
4809
4810 this->interval.end = start;
4811 for (size_t i = 0; i < 2; ++i, ++this->interval.end) {
4812 if (this->interval.end >= end || !text[this->interval.end])
4813 goto error; // incomplete country code
4814 T chr = case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end];
4815 if (chr < 'A' || 'Z' < chr)
4816 goto error; // invalid country code
4817 this->country[i] = chr;
4818 }
4819 for (size_t l = 0, r = _countof(s_countries);;) {
4820 if (l >= r)
4821 goto error; // unknown country
4822 size_t m = (l + r) / 2;
4823 const country_t& c = s_countries[m];
4824 if (c.country[0] < this->country[0] || (c.country[0] == this->country[0] && c.country[1] < this->country[1]))
4825 l = m + 1;
4826 else if (this->country[0] < c.country[0] || (this->country[0] == c.country[0] && this->country[1] < c.country[1]))
4827 r = m;
4828 else {
4829 country_desc = &c;
4830 break;
4831 }
4832 }
4833 this->country[2] = 0;
4834
4835 for (size_t i = 0; i < 2; ++i, ++this->interval.end) {
4836 if (this->interval.end >= end || text[this->interval.end] < '0' || '9' < text[this->interval.end])
4837 goto error; // incomplete or invalid check digits
4838 this->check_digits[i] = text[this->interval.end];
4839 }
4840 this->check_digits[2] = 0;
4841
4842 if ((country_desc->check_digits[0] && this->check_digits[0] != country_desc->check_digits[0]) ||
4843 (country_desc->check_digits[1] && this->check_digits[1] != country_desc->check_digits[1]))
4844 goto error; // unexpected check digits
4845
4846 bban_length = country_desc->length - 4;
4847 for (n = 0; n < bban_length;) {
4848 if (this->interval.end >= end || !text[this->interval.end])
4849 goto error; // bban too short
4850 if (m_space && m_space->match(text, this->interval.end, end, flags)) {
4851 this->interval.end = m_space->interval.end;
4852 continue;
4853 }
4854 T chr = case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end];
4855 if (('0' <= chr && chr <= '9') || ('A' <= chr && chr <= 'Z')) {
4856 this->bban[n++] = chr;
4857 this->interval.end++;
4858 }
4859 else
4860 goto error; // invalid bban
4861 }
4862 this->bban[n] = 0;
4863
4864 // Normalize IBAN.
4865 T normalized[69];
4866 available = 0;
4867 for (size_t i = 0; ; ++i) {
4868 if (!this->bban[i]) {
4869 for (i = 0; i < 2; ++i) {
4870 if ('A' <= this->country[i] && this->country[i] <= 'J') {
4871 normalized[available++] = '1';
4872 normalized[available++] = '0' + this->country[i] - 'A';
4873 }
4874 else if ('K' <= this->country[i] && this->country[i] <= 'T') {
4875 normalized[available++] = '2';
4876 normalized[available++] = '0' + this->country[i] - 'K';
4877 }
4878 else if ('U' <= this->country[i] && this->country[i] <= 'Z') {
4879 normalized[available++] = '3';
4880 normalized[available++] = '0' + this->country[i] - 'U';
4881 }
4882 }
4883 normalized[available++] = this->check_digits[0];
4884 normalized[available++] = this->check_digits[1];
4885 normalized[available] = 0;
4886 break;
4887 }
4888 if ('0' <= this->bban[i] && this->bban[i] <= '9')
4889 normalized[available++] = this->bban[i];
4890 else if ('A' <= this->bban[i] && this->bban[i] <= 'J') {
4891 normalized[available++] = '1';
4892 normalized[available++] = '0' + this->bban[i] - 'A';
4893 }
4894 else if ('K' <= this->bban[i] && this->bban[i] <= 'T') {
4895 normalized[available++] = '2';
4896 normalized[available++] = '0' + this->bban[i] - 'K';
4897 }
4898 else if ('U' <= this->bban[i] && this->bban[i] <= 'Z') {
4899 normalized[available++] = '3';
4900 normalized[available++] = '0' + this->bban[i] - 'U';
4901 }
4902 }
4903
4904 // Calculate modulo 97.
4905 nominator = stdex::strtou32(normalized, 9, &next, 10);
4906 for (;;) {
4907 nominator %= 97;
4908 if (!normalized[next]) {
4909 this->is_valid = nominator == 1;
4910 break;
4911 }
4912 size_t digit_count = nominator == 0 ? 0 : nominator < 10 ? 1 : 2;
4913 for (; digit_count < 9 && normalized[next]; ++next, ++digit_count)
4914 nominator = nominator * 10 + static_cast<uint32_t>(normalized[next] - '0');
4915 }
4916
4917 this->interval.start = start;
4918 return true;
4919
4920 error:
4921 invalidate();
4922 return false;
4923 }
4924
4925 std::shared_ptr<basic_parser<T>> m_space;
4926 };
4927
4928 using iban = basic_iban<char>;
4929 using wiban = basic_iban<wchar_t>;
4930#ifdef _UNICODE
4931 using tiban = wiban;
4932#else
4933 using tiban = iban;
4934#endif
4935 using sgml_iban = basic_iban<char>;
4936
4942 template <class T>
4944 {
4945 public:
4947 _In_ const std::shared_ptr<basic_parser<T>>& space,
4948 _In_ const std::locale& locale = std::locale()) :
4950 m_space(space)
4951 {
4952 this->check_digits[0] = 0;
4953 this->reference[0] = 0;
4954 this->is_valid = false;
4955 }
4956
4957 virtual void invalidate()
4958 {
4959 this->check_digits[0] = 0;
4960 this->reference[0] = 0;
4961 this->is_valid = false;
4963 }
4964
4968
4969 protected:
4970 virtual bool do_match(
4971 _In_reads_or_z_opt_(end) const T* text,
4972 _In_ size_t start = 0,
4973 _In_ size_t end = SIZE_MAX,
4974 _In_ int flags = match_default)
4975 {
4976 _Assume_(text || start >= end);
4977 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
4978 const bool case_insensitive = flags & match_case_insensitive ? true : false;
4979 size_t n, available, next;
4981
4982 this->interval.end = start;
4983 if (this->interval.end + 1 >= end ||
4984 (case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end]) != 'R' ||
4985 (case_insensitive ? ctype.toupper(text[this->interval.end + 1]) : text[this->interval.end + 1]) != 'F')
4986 goto error; // incomplete or wrong reference ID
4987 this->interval.end += 2;
4988
4989 for (size_t i = 0; i < 2; ++i, ++this->interval.end) {
4990 if (this->interval.end >= end || text[this->interval.end] < '0' || '9' < text[this->interval.end])
4991 goto error; // incomplete or invalid check digits
4992 this->check_digits[i] = text[this->interval.end];
4993 }
4994 this->check_digits[2] = 0;
4995
4996 for (n = 0;;) {
4997 if (m_space && m_space->match(text, this->interval.end, end, flags))
4998 this->interval.end = m_space->interval.end;
4999 for (size_t j = 0; j < 4; ++j) {
5000 if (this->interval.end >= end || !text[this->interval.end])
5001 goto out;
5002 T chr = case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end];
5003 if (('0' <= chr && chr <= '9') || ('A' <= chr && chr <= 'Z')) {
5004 if (n >= _countof(reference) - 1)
5005 goto error; // reference overflow
5006 this->reference[n++] = chr;
5007 this->interval.end++;
5008 }
5009 else
5010 goto out;
5011 }
5012 }
5013 out:
5014 if (!n)
5015 goto error; // reference too short
5016 this->reference[_countof(this->reference) - 1] = 0;
5017 for (size_t i = n, j = _countof(this->reference) - 1; i;)
5018 this->reference[--j] = this->reference[--i];
5019 for (size_t j = _countof(this->reference) - 1 - n; j;)
5020 this->reference[--j] = '0';
5021
5022 // Normalize creditor reference.
5023 T normalized[47];
5024 available = 0;
5025 for (size_t i = 0; ; ++i) {
5026 if (!this->reference[i]) {
5027 normalized[available++] = '2'; // R
5028 normalized[available++] = '7';
5029 normalized[available++] = '1'; // F
5030 normalized[available++] = '5';
5031 normalized[available++] = this->check_digits[0];
5032 normalized[available++] = this->check_digits[1];
5033 normalized[available] = 0;
5034 break;
5035 }
5036 if ('0' <= this->reference[i] && this->reference[i] <= '9')
5037 normalized[available++] = this->reference[i];
5038 else if ('A' <= this->reference[i] && this->reference[i] <= 'J') {
5039 normalized[available++] = '1';
5040 normalized[available++] = '0' + this->reference[i] - 'A';
5041 }
5042 else if ('K' <= this->reference[i] && this->reference[i] <= 'T') {
5043 normalized[available++] = '2';
5044 normalized[available++] = '0' + this->reference[i] - 'K';
5045 }
5046 else if ('U' <= this->reference[i] && this->reference[i] <= 'Z') {
5047 normalized[available++] = '3';
5048 normalized[available++] = '0' + this->reference[i] - 'U';
5049 }
5050 }
5051
5052 // Calculate modulo 97.
5053 nominator = stdex::strtou32(normalized, 9, &next, 10);
5054 for (;;) {
5055 nominator %= 97;
5056 if (!normalized[next]) {
5057 this->is_valid = nominator == 1;
5058 break;
5059 }
5060 size_t digit_count = nominator == 0 ? 0 : nominator < 10 ? 1 : 2;
5061 for (; digit_count < 9 && normalized[next]; ++next, ++digit_count)
5062 nominator = nominator * 10 + static_cast<uint32_t>(normalized[next] - '0');
5063 }
5064
5065 this->interval.start = start;
5066 return true;
5067
5068 error:
5069 invalidate();
5070 return false;
5071 }
5072
5073 std::shared_ptr<basic_parser<T>> m_space;
5074 };
5075
5076 using creditor_reference = basic_creditor_reference<char>;
5077 using wcreditor_reference = basic_creditor_reference<wchar_t>;
5078#ifdef _UNICODE
5079 using tcreditor_reference = wcreditor_reference;
5080#else
5081 using tcreditor_reference = creditor_reference;
5082#endif
5083 using sgml_creditor_reference = basic_creditor_reference<char>;
5084
5090 template <class T>
5092 {
5093 public:
5094 basic_si_reference_part(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
5095
5096 protected:
5097 virtual bool do_match(
5098 _In_reads_or_z_opt_(end) const T* text,
5099 _In_ size_t start = 0,
5100 _In_ size_t end = SIZE_MAX,
5101 _In_ int flags = match_default)
5102 {
5103 _Assume_(text || start >= end);
5104 this->interval.end = start;
5105 for (;;) {
5106 if (this->interval.end >= end || !text[this->interval.end])
5107 break;
5108 if ('0' <= text[this->interval.end] && text[this->interval.end] <= '9')
5109 this->interval.end++;
5110 else
5111 break;
5112 }
5114 this->interval.start = start;
5115 return true;
5116 }
5117 this->interval.invalidate();
5118 return false;
5119 }
5120 };
5121
5124#ifdef _UNICODE
5126#else
5128#endif
5130
5136 template <class T>
5138 {
5139 public:
5140 basic_si_reference_delimiter(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
5141
5142 protected:
5143 virtual bool do_match(
5144 _In_reads_or_z_opt_(end) const T* text,
5145 _In_ size_t start = 0,
5146 _In_ size_t end = SIZE_MAX,
5147 _In_ int flags = match_default)
5148 {
5149 _Assume_(text || start >= end);
5150 if (start < end && text[start] == '-') {
5151 this->interval.end = (this->interval.start = start) + 1;
5152 return true;
5153 }
5154 this->interval.invalidate();
5155 return false;
5156 }
5157 };
5158
5161#ifdef _UNICODE
5163#else
5165#endif
5167
5175 template <class T>
5177 {
5178 public:
5180 _In_ const std::shared_ptr<basic_parser<T>>& space,
5181 _In_ const std::locale& locale = std::locale()) :
5183 part1(locale),
5184 part2(locale),
5185 part3(locale),
5186 is_valid(false),
5187 m_space(space),
5188 m_delimiter(locale)
5189 {
5190 this->model[0] = 0;
5191 }
5192
5193 virtual void invalidate()
5194 {
5195 this->model[0] = 0;
5196 this->part1.invalidate();
5197 this->part2.invalidate();
5198 this->part3.invalidate();
5199 this->is_valid = false;
5201 }
5202
5203 T model[3];
5208
5209 protected:
5210 virtual bool do_match(
5211 _In_reads_or_z_opt_(end) const T* text,
5212 _In_ size_t start = 0,
5213 _In_ size_t end = SIZE_MAX,
5214 _In_ int flags = match_default)
5215 {
5216 _Assume_(text || start >= end);
5217 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
5218 const bool case_insensitive = flags & match_case_insensitive ? true : false;
5219
5220 this->interval.end = start;
5221 if (this->interval.end + 1 >= end ||
5222 (case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end]) != 'S' ||
5223 (case_insensitive ? ctype.toupper(text[this->interval.end + 1]) : text[this->interval.end + 1]) != 'I')
5224 goto error; // incomplete or wrong reference ID
5225 this->interval.end += 2;
5226
5227 for (size_t i = 0; i < 2; ++i, ++this->interval.end) {
5228 if (this->interval.end >= end || text[this->interval.end] < '0' || '9' < text[this->interval.end])
5229 goto error; // incomplete or invalid model
5230 this->model[i] = text[this->interval.end];
5231 }
5232 this->model[2] = 0;
5233
5234 this->part1.invalidate();
5235 this->part2.invalidate();
5236 this->part3.invalidate();
5237 if (this->model[0] == '9' && this->model[1] == '9') {
5238 is_valid = true;
5239 this->interval.start = start;
5240 return true;
5241 }
5242
5243 if (m_space && m_space->match(text, this->interval.end, end, flags))
5244 this->interval.end = m_space->interval.end;
5245
5246 this->part1.match(text, this->interval.end, end, flags) &&
5247 this->m_delimiter.match(text, this->part1.interval.end, end, flags) &&
5248 this->part2.match(text, this->m_delimiter.interval.end, end, flags) &&
5249 this->m_delimiter.match(text, this->part2.interval.end, end, flags) &&
5250 this->part3.match(text, this->m_delimiter.interval.end, end, flags);
5251
5252 this->interval.start = start;
5253 if (this->part3.interval)
5254 this->interval.end = this->part3.interval.end;
5255 else if (this->part2.interval)
5256 this->interval.end = this->part2.interval.end;
5257 else if (this->part1.interval)
5258 this->interval.end = this->part1.interval.end;
5259 else
5260 this->interval.end = start + 4;
5261
5262 if (this->model[0] == '0' && this->model[1] == '0')
5263 is_valid =
5264 this->part3.interval ?
5265 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5266 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 :
5267 this->part2.interval ?
5268 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5269 this->part1.interval.size() + this->part2.interval.size() <= 20 :
5270 this->part1.interval ?
5271 this->part1.interval.size() <= 12 :
5272 false;
5273 else if (this->model[0] == '0' && this->model[1] == '1')
5274 is_valid =
5275 this->part3.interval ?
5276 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5277 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5278 check11(
5279 text + this->part1.interval.start, this->part1.interval.size(),
5280 text + this->part2.interval.start, this->part2.interval.size(),
5281 text + this->part3.interval.start, this->part3.interval.size()) :
5282 this->part2.interval ?
5283 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5284 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5285 check11(
5286 text + this->part1.interval.start, this->part1.interval.size(),
5287 text + this->part2.interval.start, this->part2.interval.size()) :
5288 this->part1.interval ?
5289 this->part1.interval.size() <= 12 &&
5290 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5291 false;
5292 else if (this->model[0] == '0' && this->model[1] == '2')
5293 is_valid =
5294 this->part3.interval ?
5295 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5296 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5297 check11(text + this->part2.interval.start, this->part2.interval.size()) &&
5298 check11(text + this->part3.interval.start, this->part3.interval.size()) :
5299 false;
5300 else if (this->model[0] == '0' && this->model[1] == '3')
5301 is_valid =
5302 this->part3.interval ?
5303 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5304 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5305 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5306 check11(text + this->part2.interval.start, this->part2.interval.size()) &&
5307 check11(text + this->part3.interval.start, this->part3.interval.size()) :
5308 false;
5309 else if (this->model[0] == '0' && this->model[1] == '4')
5310 is_valid =
5311 this->part3.interval ?
5312 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5313 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5314 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5315 check11(text + this->part3.interval.start, this->part3.interval.size()) :
5316 false;
5317 else if ((this->model[0] == '0' || this->model[0] == '5') && this->model[1] == '5')
5318 is_valid =
5319 this->part3.interval ?
5320 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5321 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5322 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5323 this->part2.interval ?
5324 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5325 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5326 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5327 this->part1.interval ?
5328 this->part1.interval.size() <= 12 &&
5329 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5330 false;
5331 else if (this->model[0] == '0' && this->model[1] == '6')
5332 is_valid =
5333 this->part3.interval ?
5334 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5335 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5336 check11(
5337 text + this->part2.interval.start, this->part2.interval.size(),
5338 text + this->part3.interval.start, this->part3.interval.size()) :
5339 this->part2.interval ?
5340 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5341 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5342 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5343 false;
5344 else if (this->model[0] == '0' && this->model[1] == '7')
5345 is_valid =
5346 this->part3.interval ?
5347 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5348 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5349 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5350 this->part2.interval ?
5351 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5352 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5353 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5354 false;
5355 else if (this->model[0] == '0' && this->model[1] == '8')
5356 is_valid =
5357 this->part3.interval ?
5358 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5359 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5360 check11(
5361 text + this->part1.interval.start, this->part1.interval.size(),
5362 text + this->part2.interval.start, this->part2.interval.size()) &&
5363 check11(text + this->part3.interval.start, this->part3.interval.size()) :
5364 false;
5365 else if (this->model[0] == '0' && this->model[1] == '9')
5366 is_valid =
5367 this->part3.interval ?
5368 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5369 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5370 check11(
5371 text + this->part1.interval.start, this->part1.interval.size(),
5372 text + this->part2.interval.start, this->part2.interval.size()) :
5373 this->part2.interval ?
5374 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5375 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5376 check11(
5377 text + this->part1.interval.start, this->part1.interval.size(),
5378 text + this->part2.interval.start, this->part2.interval.size()) :
5379 this->part1.interval ?
5380 this->part1.interval.size() <= 12 &&
5381 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5382 false;
5383 else if (this->model[0] == '1' && this->model[1] == '0')
5384 is_valid =
5385 this->part3.interval ?
5386 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5387 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5388 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5389 check11(
5390 text + this->part2.interval.start, this->part2.interval.size(),
5391 text + this->part3.interval.start, this->part3.interval.size()) :
5392 this->part2.interval ?
5393 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5394 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5395 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5396 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5397 false;
5398 else if (
5399 (this->model[0] == '1' && (this->model[1] == '1' || this->model[1] == '8' || this->model[1] == '9')) ||
5400 ((this->model[0] == '2' || this->model[0] == '3') && this->model[1] == '8') ||
5401 (this->model[0] == '4' && (this->model[1] == '0' || this->model[1] == '1' || this->model[1] == '8' || this->model[1] == '9')) ||
5402 (this->model[0] == '5' && (this->model[1] == '1' || this->model[1] == '8')))
5403 is_valid =
5404 this->part3.interval ?
5405 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5406 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5407 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5408 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5409 this->part2.interval ?
5410 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5411 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5412 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5413 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5414 false;
5415 else if (this->model[0] == '1' && this->model[1] == '2')
5416 is_valid =
5417 this->part3.interval ? false :
5418 this->part2.interval ? false :
5419 this->part1.interval ?
5420 this->part1.interval.size() <= 13 &&
5421 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5422 false;
5423 else if ((this->model[0] == '2' || this->model[0] == '3') && this->model[1] == '1')
5424 is_valid =
5425 this->part3.interval ? false :
5426 this->part2.interval ?
5427 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5428 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5429 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5430 false;
5431 else
5432 is_valid = true; // Assume models we don't handle as valid
5433 return true;
5434
5435 error:
5436 invalidate();
5437 return false;
5438 }
5439
5440 static bool check11(
5441 _In_count_(num_part1) const T* part1, _In_ size_t num_part1)
5442 {
5443 _Assume_(part1 && num_part1 >= 1);
5444 uint32_t nominator = 0, ponder = 2;
5445 for (size_t i = num_part1 - 1; i--; ++ponder)
5446 nominator += static_cast<uint32_t>(part1[i] - '0') * ponder;
5447 uint8_t control = 11 - static_cast<uint8_t>(nominator % 11);
5448 if (control >= 10)
5449 control = 0;
5450 return control == part1[num_part1 - 1] - '0';
5451 }
5452
5453 static bool check11(
5454 _In_count_(num_part1) const T* part1, _In_ size_t num_part1,
5455 _In_count_(num_part2) const T* part2, _In_ size_t num_part2)
5456 {
5457 _Assume_(part1 || !num_part1);
5458 _Assume_(part2 && num_part2 >= 1);
5459 uint32_t nominator = 0, ponder = 2;
5460 for (size_t i = num_part2 - 1; i--; ++ponder)
5461 nominator += static_cast<uint32_t>(part2[i] - '0') * ponder;
5462 for (size_t i = num_part1; i--; ++ponder)
5463 nominator += static_cast<uint32_t>(part1[i] - '0') * ponder;
5464 uint8_t control = 11 - static_cast<uint8_t>(nominator % 11);
5465 if (control == 10)
5466 control = 0;
5467 return control == part2[num_part2 - 1] - '0';
5468 }
5469
5470 static bool check11(
5471 _In_count_(num_part1) const T* part1, _In_ size_t num_part1,
5472 _In_count_(num_part2) const T* part2, _In_ size_t num_part2,
5473 _In_count_(num_part3) const T* part3, _In_ size_t num_part3)
5474 {
5475 _Assume_(part1 || !num_part1);
5476 _Assume_(part2 || !num_part2);
5477 _Assume_(part3 && num_part3 >= 1);
5478 uint32_t nominator = 0, ponder = 2;
5479 for (size_t i = num_part3 - 1; i--; ++ponder)
5480 nominator += static_cast<uint32_t>(part3[i] - '0') * ponder;
5481 for (size_t i = num_part2; i--; ++ponder)
5482 nominator += static_cast<uint32_t>(part2[i] - '0') * ponder;
5483 for (size_t i = num_part1; i--; ++ponder)
5484 nominator += static_cast<uint32_t>(part1[i] - '0') * ponder;
5485 uint8_t control = 11 - static_cast<uint8_t>(nominator % 11);
5486 if (control == 10)
5487 control = 0;
5488 return control == part2[num_part3 - 1] - '0';
5489 }
5490
5491 std::shared_ptr<basic_parser<T>> m_space;
5492 basic_si_reference_delimiter<T> m_delimiter;
5493 };
5494
5495 using si_reference = basic_si_reference<char>;
5496 using wsi_reference = basic_si_reference<wchar_t>;
5497#ifdef _UNICODE
5498 using tsi_reference = wsi_reference;
5499#else
5500 using tsi_reference = si_reference;
5501#endif
5502 using sgml_si_reference = basic_si_reference<char>;
5503
5507 template <class T>
5509 {
5510 public:
5512 _In_ const std::shared_ptr<basic_parser<T>>& element,
5513 _In_ const std::shared_ptr<basic_parser<T>>& digit,
5514 _In_ const std::shared_ptr<basic_parser<T>>& sign,
5515 _In_ const std::locale& locale = std::locale()) :
5517 m_element(element),
5518 m_digit(digit),
5519 m_sign(sign),
5520 has_digits(false),
5521 has_charge(false)
5522 {}
5523
5524 virtual void invalidate()
5525 {
5526 has_digits = false;
5527 has_charge = false;
5529 }
5530
5531 bool has_digits;
5532 bool has_charge;
5533
5534 protected:
5535 virtual bool do_match(
5536 _In_reads_or_z_opt_(end) const T* text,
5537 _In_ size_t start = 0,
5538 _In_ size_t end = SIZE_MAX,
5539 _In_ int flags = match_default)
5540 {
5541 _Assume_(text || start >= end);
5542
5543 has_digits = false;
5544 has_charge = false;
5545 this->interval.end = start;
5546
5547 const int element_match_flags = flags & ~match_case_insensitive; // Chemical elements are always case-sensitive.
5548 for (;;) {
5549 if (m_element->match(text, this->interval.end, end, element_match_flags)) {
5550 this->interval.end = m_element->interval.end;
5551 while (m_digit->match(text, this->interval.end, end, flags)) {
5552 this->interval.end = m_digit->interval.end;
5553 has_digits = true;
5554 }
5555 }
5556 else if (start < this->interval.end) {
5557 if (m_sign->match(text, this->interval.end, end, flags)) {
5558 this->interval.end = m_sign->interval.end;
5559 has_charge = true;
5560 }
5561 this->interval.start = start;
5562 return true;
5563 }
5564 else {
5565 this->interval.invalidate();
5566 return false;
5567 }
5568 }
5569 }
5570
5571 std::shared_ptr<basic_parser<T>> m_element;
5572 std::shared_ptr<basic_parser<T>> m_digit;
5573 std::shared_ptr<basic_parser<T>> m_sign;
5574 };
5575
5578#ifdef _UNICODE
5580#else
5582#endif
5584
5589 {
5590 protected:
5591 virtual bool do_match(
5592 _In_reads_or_z_(end) const char* text,
5593 _In_ size_t start = 0,
5594 _In_ size_t end = SIZE_MAX,
5595 _In_ int flags = match_default)
5596 {
5597 _Assume_(text || start >= end);
5598 this->interval.end = start;
5599
5600 _Assume_(text || this->interval.end >= end);
5601 if (this->interval.end < end && text[this->interval.end]) {
5602 if (text[this->interval.end] == '\r') {
5603 this->interval.end++;
5604 if (this->interval.end < end && text[this->interval.end] == '\n') {
5605 this->interval.start = start;
5606 this->interval.end++;
5607 return true;
5608 }
5609 }
5610 else if (text[this->interval.end] == '\n') {
5611 this->interval.start = start;
5612 this->interval.end++;
5613 return true;
5614 }
5615 }
5616 this->interval.invalidate();
5617 return false;
5618 }
5619 };
5620
5624 class http_space : public parser
5625 {
5626 protected:
5627 virtual bool do_match(
5628 _In_reads_or_z_(end) const char* text,
5629 _In_ size_t start = 0,
5630 _In_ size_t end = SIZE_MAX,
5631 _In_ int flags = match_default)
5632 {
5633 _Assume_(text || start >= end);
5634 this->interval.end = start;
5635 if (m_line_break.match(text, this->interval.end, end, flags)) {
5636 this->interval.end = m_line_break.interval.end;
5637 if (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) {
5638 this->interval.start = start;
5639 this->interval.end++;
5640 while (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) this->interval.end++;
5641 return true;
5642 }
5643 }
5644 else if (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) {
5645 this->interval.start = start;
5646 this->interval.end++;
5647 while (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) this->interval.end++;
5648 return true;
5649 }
5650 this->interval.invalidate();
5651 return false;
5652 }
5653
5654 http_line_break m_line_break;
5655 };
5656
5660 class http_text_char : public parser
5661 {
5662 protected:
5663 virtual bool do_match(
5664 _In_reads_or_z_(end) const char* text,
5665 _In_ size_t start = 0,
5666 _In_ size_t end = SIZE_MAX,
5667 _In_ int flags = match_default)
5668 {
5669 _Assume_(text || start >= end);
5670 this->interval.end = start;
5671
5672 _Assume_(text || this->interval.end >= end);
5673 if (m_space.match(text, this->interval.end, end, flags)) {
5674 this->interval.start = start;
5675 this->interval.end = m_space.interval.end;
5676 return true;
5677 }
5678 else if (this->interval.end < end && text[this->interval.end] && text[this->interval.end] >= 0x20) {
5679 this->interval.start = start;
5680 this->interval.end++;
5681 return true;
5682 }
5683 this->interval.invalidate();
5684 return false;
5685 }
5686
5687 http_space m_space;
5688 };
5689
5693 class http_token : public parser
5694 {
5695 protected:
5696 virtual bool do_match(
5697 _In_reads_or_z_(end) const char* text,
5698 _In_ size_t start = 0,
5699 _In_ size_t end = SIZE_MAX,
5700 _In_ int flags = match_default)
5701 {
5702 _Assume_(text || start >= end);
5703 this->interval.end = start;
5704 for (;;) {
5705 if (this->interval.end < end && text[this->interval.end]) {
5706 if ((unsigned int)text[this->interval.end] < 0x20 ||
5707 (unsigned int)text[this->interval.end] == 0x7f ||
5708 text[this->interval.end] == '(' ||
5709 text[this->interval.end] == ')' ||
5710 text[this->interval.end] == '<' ||
5711 text[this->interval.end] == '>' ||
5712 text[this->interval.end] == '@' ||
5713 text[this->interval.end] == ',' ||
5714 text[this->interval.end] == ';' ||
5715 text[this->interval.end] == ':' ||
5716 text[this->interval.end] == '\\' ||
5717 text[this->interval.end] == '\"' ||
5718 text[this->interval.end] == '/' ||
5719 text[this->interval.end] == '[' ||
5720 text[this->interval.end] == ']' ||
5721 text[this->interval.end] == '?' ||
5722 text[this->interval.end] == '=' ||
5723 text[this->interval.end] == '{' ||
5724 text[this->interval.end] == '}' ||
5725 stdex::isspace(text[this->interval.end]))
5726 break;
5727 else
5728 this->interval.end++;
5729 }
5730 else
5731 break;
5732 }
5734 this->interval.start = start;
5735 return true;
5736 }
5737 else {
5738 this->interval.invalidate();
5739 return false;
5740 }
5741 }
5742 };
5743
5748 {
5749 public:
5750 virtual void invalidate()
5751 {
5752 content.start = 1;
5753 content.end = 0;
5754 parser::invalidate();
5755 }
5756
5758
5759 protected:
5760 virtual bool do_match(
5761 _In_reads_or_z_(end) const char* text,
5762 _In_ size_t start = 0,
5763 _In_ size_t end = SIZE_MAX,
5764 _In_ int flags = match_default)
5765 {
5766 _Assume_(text || start >= end);
5767 this->interval.end = start;
5768 if (this->interval.end < end && text[this->interval.end] != '"')
5769 goto error;
5770 this->interval.end++;
5771 content.start = this->interval.end;
5772 for (;;) {
5773 _Assume_(text || this->interval.end >= end);
5774 if (this->interval.end < end && text[this->interval.end]) {
5775 if (text[this->interval.end] == '"') {
5776 content.end = this->interval.end;
5777 this->interval.end++;
5778 break;
5779 }
5780 else if (text[this->interval.end] == '\\') {
5781 this->interval.end++;
5782 if (this->interval.end < end && text[this->interval.end]) {
5783 this->interval.end++;
5784 }
5785 else
5786 goto error;
5787 }
5788 else if (m_chr.match(text, this->interval.end, end, flags))
5789 this->interval.end++;
5790 else
5791 goto error;
5792 }
5793 else
5794 goto error;
5795 }
5796 this->interval.start = start;
5797 return true;
5798
5799 error:
5800 invalidate();
5801 return false;
5802 }
5803
5804 http_text_char m_chr;
5805 };
5806
5810 class http_value : public parser
5811 {
5812 public:
5813 virtual void invalidate()
5814 {
5815 string.invalidate();
5816 token.invalidate();
5817 parser::invalidate();
5818 }
5819
5822
5823 protected:
5824 virtual bool do_match(
5825 _In_reads_or_z_(end) const char* text,
5826 _In_ size_t start = 0,
5827 _In_ size_t end = SIZE_MAX,
5828 _In_ int flags = match_default)
5829 {
5830 _Assume_(text || start >= end);
5831 this->interval.end = start;
5832 if (string.match(text, this->interval.end, end, flags)) {
5833 token.invalidate();
5834 this->interval.end = string.interval.end;
5835 this->interval.start = start;
5836 return true;
5837 }
5838 else if (token.match(text, this->interval.end, end, flags)) {
5839 string.invalidate();
5840 this->interval.end = token.interval.end;
5841 this->interval.start = start;
5842 return true;
5843 }
5844 else {
5845 this->interval.invalidate();
5846 return false;
5847 }
5848 }
5849 };
5850
5854 class http_parameter : public parser
5855 {
5856 public:
5857 virtual void invalidate()
5858 {
5859 name.invalidate();
5860 value.invalidate();
5861 parser::invalidate();
5862 }
5863
5866
5867 protected:
5868 virtual bool do_match(
5869 _In_reads_or_z_(end) const char* text,
5870 _In_ size_t start = 0,
5871 _In_ size_t end = SIZE_MAX,
5872 _In_ int flags = match_default)
5873 {
5874 _Assume_(text || start >= end);
5875 this->interval.end = start;
5876 if (name.match(text, this->interval.end, end, flags))
5877 this->interval.end = name.interval.end;
5878 else
5879 goto error;
5880 while (m_space.match(text, this->interval.end, end, flags))
5881 this->interval.end = m_space.interval.end;
5882 _Assume_(text || this->interval.end >= end);
5883 if (this->interval.end < end && text[this->interval.end] == '=')
5884 this->interval.end++;
5885 else
5886 while (m_space.match(text, this->interval.end, end, flags))
5887 this->interval.end = m_space.interval.end;
5888 if (value.match(text, this->interval.end, end, flags))
5889 this->interval.end = value.interval.end;
5890 else
5891 goto error;
5892 this->interval.start = start;
5893 return true;
5894
5895 error:
5896 invalidate();
5897 return false;
5898 }
5899
5900 http_space m_space;
5901 };
5902
5906 class http_any_type : public parser
5907 {
5908 protected:
5909 virtual bool do_match(
5910 _In_reads_or_z_(end) const char* text,
5911 _In_ size_t start = 0,
5912 _In_ size_t end = SIZE_MAX,
5913 _In_ int flags = match_default)
5914 {
5915 _Assume_(text || start >= end);
5916 if (start + 2 < end &&
5917 text[start] == '*' &&
5918 text[start + 1] == '/' &&
5919 text[start + 2] == '*')
5920 {
5921 this->interval.end = (this->interval.start = start) + 3;
5922 return true;
5923 }
5924 else if (start < end && text[start] == '*') {
5925 this->interval.end = (this->interval.start = start) + 1;
5926 return true;
5927 }
5928 else {
5929 this->interval.invalidate();
5930 return false;
5931 }
5932 }
5933 };
5934
5939 {
5940 public:
5941 virtual void invalidate()
5942 {
5943 type.invalidate();
5944 subtype.invalidate();
5945 parser::invalidate();
5946 }
5947
5948 http_token type;
5949 http_token subtype;
5950
5951 protected:
5952 virtual bool do_match(
5953 _In_reads_or_z_(end) const char* text,
5954 _In_ size_t start = 0,
5955 _In_ size_t end = SIZE_MAX,
5956 _In_ int flags = match_default)
5957 {
5958 _Assume_(text || start >= end);
5959 this->interval.end = start;
5960 if (type.match(text, this->interval.end, end, flags))
5961 this->interval.end = type.interval.end;
5962 else
5963 goto error;
5964 while (m_space.match(text, this->interval.end, end, flags))
5965 this->interval.end = m_space.interval.end;
5966 if (this->interval.end < end && text[this->interval.end] == '/')
5967 this->interval.end++;
5968 else
5969 goto error;
5970 while (m_space.match(text, this->interval.end, end, flags))
5971 this->interval.end = m_space.interval.end;
5972 if (subtype.match(text, this->interval.end, end, flags))
5973 this->interval.end = subtype.interval.end;
5974 else
5975 goto error;
5976 this->interval.start = start;
5977 return true;
5978
5979 error:
5980 invalidate();
5981 return false;
5982 }
5983
5984 http_space m_space;
5985 };
5986
5991 {
5992 public:
5993 virtual void invalidate()
5994 {
5995 params.clear();
5996 http_media_range::invalidate();
5997 }
5998
5999 std::list<http_parameter> params;
6000
6001 protected:
6002 virtual bool do_match(
6003 _In_reads_or_z_(end) const char* text,
6004 _In_ size_t start = 0,
6005 _In_ size_t end = SIZE_MAX,
6006 _In_ int flags = match_default)
6007 {
6008 _Assume_(text || start >= end);
6009 if (!http_media_range::do_match(text, start, end, flags))
6010 goto error;
6011 params.clear();
6012 for (;;) {
6013 if (this->interval.end < end && text[this->interval.end]) {
6014 if (m_space.match(text, this->interval.end, end, flags))
6015 this->interval.end = m_space.interval.end;
6016 else if (text[this->interval.end] == ';') {
6017 this->interval.end++;
6018 while (m_space.match(text, this->interval.end, end, flags))
6019 this->interval.end = m_space.interval.end;
6020 http_parameter param;
6021 if (param.match(text, this->interval.end, end, flags)) {
6022 this->interval.end = param.interval.end;
6023 params.push_back(std::move(param));
6024 }
6025 else
6026 break;
6027 }
6028 else
6029 break;
6030 }
6031 else
6032 break;
6033 }
6034 this->interval.end = params.empty() ? subtype.interval.end : params.back().interval.end;
6035 return true;
6036
6037 error:
6038 invalidate();
6039 return false;
6040 }
6041 };
6042
6047 {
6048 protected:
6049 virtual bool do_match(
6050 _In_reads_or_z_(end) const char* text,
6051 _In_ size_t start = 0,
6052 _In_ size_t end = SIZE_MAX,
6053 _In_ int flags = match_default)
6054 {
6055 _Assume_(text || start >= end);
6056 this->interval.end = start;
6057 for (;;) {
6058 if (this->interval.end < end && text[this->interval.end]) {
6059 if ((unsigned int)text[this->interval.end] < 0x20 ||
6060 (unsigned int)text[this->interval.end] == 0x7f ||
6061 text[this->interval.end] == ':' ||
6062 text[this->interval.end] == '/' ||
6063 stdex::isspace(text[this->interval.end]))
6064 break;
6065 else
6066 this->interval.end++;
6067 }
6068 else
6069 break;
6070 }
6072 this->interval.start = start;
6073 return true;
6074 }
6075 this->interval.invalidate();
6076 return false;
6077 }
6078 };
6079
6083 class http_url_port : public parser
6084 {
6085 public:
6086 http_url_port(_In_ const std::locale& locale = std::locale()) :
6087 parser(locale),
6088 value(0)
6089 {}
6090
6091 virtual void invalidate()
6092 {
6093 value = 0;
6094 parser::invalidate();
6095 }
6096
6097 uint16_t value;
6098
6099 protected:
6100 virtual bool do_match(
6101 _In_reads_or_z_(end) const char* text,
6102 _In_ size_t start = 0,
6103 _In_ size_t end = SIZE_MAX,
6104 _In_ int flags = match_default)
6105 {
6106 _Assume_(text || start >= end);
6107 value = 0;
6108 this->interval.end = start;
6109 for (;;) {
6110 if (this->interval.end < end && text[this->interval.end]) {
6111 if ('0' <= text[this->interval.end] && text[this->interval.end] <= '9') {
6112 size_t _value = static_cast<size_t>(value) * 10 + static_cast<size_t>(text[this->interval.end] - '0');
6113 if (_value > UINT16_MAX) {
6114 value = 0;
6115 this->interval.invalidate();
6116 return false;
6117 }
6118 value = (uint16_t)_value;
6119 this->interval.end++;
6120 }
6121 else
6122 break;
6123 }
6124 else
6125 break;
6126 }
6128 this->interval.start = start;
6129 return true;
6130 }
6131 this->interval.invalidate();
6132 return false;
6133 }
6134 };
6135
6140 {
6141 protected:
6142 virtual bool do_match(
6143 _In_reads_or_z_(end) const char* text,
6144 _In_ size_t start = 0,
6145 _In_ size_t end = SIZE_MAX,
6146 _In_ int flags = match_default)
6147 {
6148 _Assume_(text || start >= end);
6149 this->interval.end = start;
6150 for (;;) {
6151 if (this->interval.end < end && text[this->interval.end]) {
6152 if ((unsigned int)text[this->interval.end] < 0x20 ||
6153 (unsigned int)text[this->interval.end] == 0x7f ||
6154 text[this->interval.end] == '?' ||
6155 text[this->interval.end] == '/' ||
6156 stdex::isspace(text[this->interval.end]))
6157 break;
6158 else
6159 this->interval.end++;
6160 }
6161 else
6162 break;
6163 }
6164 this->interval.start = start;
6165 return true;
6166 }
6167 };
6168
6172 class http_url_path : public parser
6173 {
6174 public:
6175 virtual void invalidate()
6176 {
6177 segments.clear();
6178 parser::invalidate();
6179 }
6180
6181 std::vector<http_url_path_segment> segments;
6182
6183 protected:
6184 virtual bool do_match(
6185 _In_reads_or_z_(end) const char* text,
6186 _In_ size_t start = 0,
6187 _In_ size_t end = SIZE_MAX,
6188 _In_ int flags = match_default)
6189 {
6190 _Assume_(text || start >= end);
6192 this->interval.end = start;
6193 segments.clear();
6194 _Assume_(text || this->interval.end >= end);
6195 if (this->interval.end < end && text[this->interval.end] != '/')
6196 goto error;
6197 this->interval.end++;
6198 s.match(text, this->interval.end, end, flags);
6199 segments.push_back(s);
6200 this->interval.end = s.interval.end;
6201 for (;;) {
6202 if (this->interval.end < end && text[this->interval.end]) {
6203 if (text[this->interval.end] == '/') {
6204 this->interval.end++;
6205 s.match(text, this->interval.end, end, flags);
6206 segments.push_back(s);
6207 this->interval.end = s.interval.end;
6208 }
6209 else
6210 break;
6211 }
6212 else
6213 break;
6214 }
6215 this->interval.start = start;
6216 return true;
6217
6218 error:
6219 invalidate();
6220 return false;
6221 }
6222 };
6223
6228 {
6229 public:
6230 virtual void invalidate()
6231 {
6232 name.start = 1;
6233 name.end = 0;
6234 value.start = 1;
6235 value.end = 0;
6236 parser::invalidate();
6237 }
6238
6241
6242 protected:
6243 virtual bool do_match(
6244 _In_reads_or_z_(end) const char* text,
6245 _In_ size_t start = 0,
6246 _In_ size_t end = SIZE_MAX,
6247 _In_ int flags = match_default)
6248 {
6249 _Assume_(text || start >= end);
6250 this->interval.end = start;
6251 name.start = this->interval.end;
6252 for (;;) {
6253 if (this->interval.end < end && text[this->interval.end]) {
6254 if ((unsigned int)text[this->interval.end] < 0x20 ||
6255 (unsigned int)text[this->interval.end] == 0x7f ||
6256 text[this->interval.end] == '&' ||
6257 text[this->interval.end] == '=' ||
6258 stdex::isspace(text[this->interval.end]))
6259 break;
6260 else
6261 this->interval.end++;
6262 }
6263 else
6264 break;
6265 }
6267 name.end = this->interval.end;
6268 else
6269 goto error;
6270 if (text[this->interval.end] == '=') {
6271 this->interval.end++;
6272 value.start = this->interval.end;
6273 for (;;) {
6274 if (this->interval.end < end && text[this->interval.end]) {
6275 if ((unsigned int)text[this->interval.end] < 0x20 ||
6276 (unsigned int)text[this->interval.end] == 0x7f ||
6277 text[this->interval.end] == '&' ||
6278 stdex::isspace(text[this->interval.end]))
6279 break;
6280 else
6281 this->interval.end++;
6282 }
6283 else
6284 break;
6285 }
6286 value.end = this->interval.end;
6287 }
6288 else {
6289 value.start = 1;
6290 value.end = 0;
6291 }
6292 this->interval.start = start;
6293 return true;
6294
6295 error:
6296 invalidate();
6297 return false;
6298 }
6299 };
6300
6304 class http_url : public parser
6305 {
6306 public:
6307 http_url(_In_ const std::locale& locale = std::locale()) :
6308 parser(locale),
6309 port(locale)
6310 {}
6311
6312 virtual void invalidate()
6313 {
6314 server.invalidate();
6315 port.invalidate();
6316 path.invalidate();
6317 params.clear();
6318 parser::invalidate();
6319 }
6320
6321 http_url_server server;
6322 http_url_port port;
6323 http_url_path path;
6324 std::list<http_url_parameter> params;
6325
6326 protected:
6327 virtual bool do_match(
6328 _In_reads_or_z_(end) const char* text,
6329 _In_ size_t start = 0,
6330 _In_ size_t end = SIZE_MAX,
6331 _In_ int flags = match_default)
6332 {
6333 _Assume_(text || start >= end);
6334 this->interval.end = start;
6335
6336 if (this->interval.end + 7 <= end && stdex::strnicmp(text + this->interval.end, 7, "http://", SIZE_MAX, m_locale) == 0) {
6337 this->interval.end += 7;
6338 if (server.match(text, this->interval.end, end, flags))
6339 this->interval.end = server.interval.end;
6340 else
6341 goto error;
6342 if (this->interval.end < end && text[this->interval.end] == ':') {
6343 this->interval.end++;
6344 if (port.match(text, this->interval.end, end, flags))
6345 this->interval.end = port.interval.end;
6346 }
6347 else {
6348 port.invalidate();
6349 port.value = 80;
6350 }
6351 }
6352 else {
6353 server.invalidate();
6354 port.invalidate();
6355 port.value = 80;
6356 }
6357
6358 if (path.match(text, this->interval.end, end, flags))
6359 this->interval.end = path.interval.end;
6360 else
6361 goto error;
6362
6363 params.clear();
6364
6365 if (this->interval.end < end && text[this->interval.end] == '?') {
6366 this->interval.end++;
6367 for (;;) {
6368 if (this->interval.end < end && text[this->interval.end]) {
6369 if ((unsigned int)text[this->interval.end] < 0x20 ||
6370 (unsigned int)text[this->interval.end] == 0x7f ||
6371 stdex::isspace(text[this->interval.end]))
6372 break;
6373 else if (text[this->interval.end] == '&')
6374 this->interval.end++;
6375 else {
6376 http_url_parameter param;
6377 if (param.match(text, this->interval.end, end, flags)) {
6378 this->interval.end = param.interval.end;
6379 params.push_back(std::move(param));
6380 }
6381 else
6382 break;
6383 }
6384 }
6385 else
6386 break;
6387 }
6388 }
6389
6390 this->interval.start = start;
6391 return true;
6392
6393 error:
6394 invalidate();
6395 return false;
6396 }
6397 };
6398
6402 class http_language : public parser
6403 {
6404 public:
6405 virtual void invalidate()
6406 {
6407 components.clear();
6408 parser::invalidate();
6409 }
6410
6411 std::vector<stdex::interval<size_t>> components;
6412
6413 protected:
6414 virtual bool do_match(
6415 _In_reads_or_z_(end) const char* text,
6416 _In_ size_t start = 0,
6417 _In_ size_t end = SIZE_MAX,
6418 _In_ int flags = match_default)
6419 {
6420 _Assume_(text || start >= end);
6421 this->interval.end = start;
6422 components.clear();
6423 for (;;) {
6424 if (this->interval.end < end && text[this->interval.end]) {
6426 k.end = this->interval.end;
6427 for (;;) {
6428 if (k.end < end && text[k.end]) {
6429 if (stdex::isalpha(text[k.end]))
6430 k.end++;
6431 else
6432 break;
6433 }
6434 else
6435 break;
6436 }
6437 if (this->interval.end < k.end) {
6438 k.start = this->interval.end;
6439 this->interval.end = k.end;
6440 components.push_back(k);
6441 }
6442 else
6443 break;
6444 if (this->interval.end < end && text[this->interval.end] == '-')
6445 this->interval.end++;
6446 else
6447 break;
6448 }
6449 else
6450 break;
6451 }
6452 if (!components.empty()) {
6453 this->interval.start = start;
6454 this->interval.end = components.back().end;
6455 return true;
6456 }
6457 this->interval.invalidate();
6458 return false;
6459 }
6460 };
6461
6465 class http_weight : public parser
6466 {
6467 public:
6468 http_weight(_In_ const std::locale& locale = std::locale()) :
6469 parser(locale),
6470 value(1.0f)
6471 {}
6472
6473 virtual void invalidate()
6474 {
6475 value = 1.0f;
6476 parser::invalidate();
6477 }
6478
6479 float value;
6480
6481 protected:
6482 virtual bool do_match(
6483 _In_reads_or_z_(end) const char* text,
6484 _In_ size_t start = 0,
6485 _In_ size_t end = SIZE_MAX,
6486 _In_ int flags = match_default)
6487 {
6488 _Assume_(text || start >= end);
6489 size_t celi_del = 0, decimalni_del = 0, decimalni_del_n = 1;
6490 this->interval.end = start;
6491 for (;;) {
6492 if (this->interval.end < end && text[this->interval.end]) {
6493 if ('0' <= text[this->interval.end] && text[this->interval.end] <= '9') {
6494 celi_del = celi_del * 10 + static_cast<size_t>(text[this->interval.end] - '0');
6495 this->interval.end++;
6496 }
6497 else if (text[this->interval.end] == '.') {
6498 this->interval.end++;
6499 for (;;) {
6500 if (this->interval.end < end && text[this->interval.end]) {
6501 if ('0' <= text[this->interval.end] && text[this->interval.end] <= '9') {
6502 decimalni_del = decimalni_del * 10 + static_cast<size_t>(text[this->interval.end] - '0');
6503 decimalni_del_n *= 10;
6504 this->interval.end++;
6505 }
6506 else
6507 break;
6508 }
6509 else
6510 break;
6511 }
6512 break;
6513 }
6514 else
6515 break;
6516 }
6517 else
6518 break;
6519 }
6522 this->interval.start = start;
6523 return true;
6524 }
6525 value = 1.0f;
6526 this->interval.invalidate();
6527 return false;
6528 }
6529 };
6530
6534 class http_asterisk : public parser
6535 {
6536 protected:
6537 virtual bool do_match(
6538 _In_reads_or_z_(end) const char* text,
6539 _In_ size_t start = 0,
6540 _In_ size_t end = SIZE_MAX,
6541 _In_ int flags = match_default)
6542 {
6543 _Assume_(text || end <= start);
6544 if (start < end && text[start] == '*') {
6545 this->interval.end = (this->interval.start = start) + 1;
6546 return true;
6547 }
6548 this->interval.invalidate();
6549 return false;
6550 }
6551 };
6552
6556 template <class T, class T_asterisk = http_asterisk>
6558 {
6559 public:
6560 http_weighted_value(_In_ const std::locale& locale = std::locale()) :
6561 parser(locale),
6562 factor(locale)
6563 {}
6564
6565 virtual void invalidate()
6566 {
6567 asterisk.invalidate();
6568 value.invalidate();
6569 factor.invalidate();
6570 parser::invalidate();
6571 }
6572
6573 T_asterisk asterisk;
6574 T value;
6575 http_weight factor;
6576
6577 protected:
6578 virtual bool do_match(
6579 _In_reads_or_z_(end) const char* text,
6580 _In_ size_t start = 0,
6581 _In_ size_t end = SIZE_MAX,
6582 _In_ int flags = match_default)
6583 {
6584 _Assume_(text || start >= end);
6585 size_t konec_vrednosti;
6586 this->interval.end = start;
6587 if (asterisk.match(text, this->interval.end, end, flags)) {
6588 this->interval.end = konec_vrednosti = asterisk.interval.end;
6589 value.invalidate();
6590 }
6591 else if (value.match(text, this->interval.end, end, flags)) {
6592 this->interval.end = konec_vrednosti = value.interval.end;
6593 asterisk.invalidate();
6594 }
6595 else {
6596 asterisk.invalidate();
6597 value.invalidate();
6598 this->interval.invalidate();
6599 return false;
6600 }
6601
6602 while (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) this->interval.end++;
6603 if (this->interval.end < end && text[this->interval.end] == ';') {
6604 this->interval.end++;
6605 while (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) this->interval.end++;
6606 if (this->interval.end < end && (text[this->interval.end] == 'q' || text[this->interval.end] == 'Q')) {
6607 this->interval.end++;
6608 while (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) this->interval.end++;
6609 if (this->interval.end < end && text[this->interval.end] == '=') {
6610 this->interval.end++;
6611 while (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) this->interval.end++;
6612 if (factor.match(text, this->interval.end, end, flags))
6613 this->interval.end = factor.interval.end;
6614 }
6615 }
6616 }
6617 if (!factor.interval) {
6618 factor.invalidate();
6619 this->interval.end = konec_vrednosti;
6620 }
6621 this->interval.start = start;
6622 return true;
6623 }
6624 };
6625
6630 {
6631 public:
6632 virtual void invalidate()
6633 {
6634 name.invalidate();
6635 value.invalidate();
6636 parser::invalidate();
6637 }
6638
6639 http_token name;
6640 http_value value;
6641
6642 protected:
6643 virtual bool do_match(
6644 _In_reads_or_z_(end) const char* text,
6645 _In_ size_t start = 0,
6646 _In_ size_t end = SIZE_MAX,
6647 _In_ int flags = match_default)
6648 {
6649 _Assume_(text || start >= end);
6650 this->interval.end = start;
6651 if (this->interval.end < end && text[this->interval.end] == '$')
6652 this->interval.end++;
6653 else
6654 goto error;
6655 if (name.match(text, this->interval.end, end, flags))
6656 this->interval.end = name.interval.end;
6657 else
6658 goto error;
6659 while (m_space.match(text, this->interval.end, end, flags))
6660 this->interval.end = m_space.interval.end;
6661 if (this->interval.end < end && text[this->interval.end] == '=')
6662 this->interval.end++;
6663 else
6664 goto error;
6665 while (m_space.match(text, this->interval.end, end, flags))
6666 this->interval.end = m_space.interval.end;
6667 if (value.match(text, this->interval.end, end, flags))
6668 this->interval.end = value.interval.end;
6669 else
6670 goto error;
6671 this->interval.start = start;
6672 return true;
6673
6674 error:
6675 invalidate();
6676 return false;
6677 }
6678
6679 http_space m_space;
6680 };
6681
6685 class http_cookie : public parser
6686 {
6687 public:
6688 virtual void invalidate()
6689 {
6690 name.invalidate();
6691 value.invalidate();
6692 params.clear();
6693 parser::invalidate();
6694 }
6695
6698 std::list<http_cookie_parameter> params;
6699
6700 protected:
6701 virtual bool do_match(
6702 _In_reads_or_z_(end) const char* text,
6703 _In_ size_t start = 0,
6704 _In_ size_t end = SIZE_MAX,
6705 _In_ int flags = match_default)
6706 {
6707 _Assume_(text || start >= end);
6708 this->interval.end = start;
6709 if (name.match(text, this->interval.end, end, flags))
6710 this->interval.end = name.interval.end;
6711 else
6712 goto error;
6713 while (m_space.match(text, this->interval.end, end, flags))
6714 this->interval.end = m_space.interval.end;
6715 if (this->interval.end < end && text[this->interval.end] == '=')
6716 this->interval.end++;
6717 else
6718 goto error;
6719 while (m_space.match(text, this->interval.end, end, flags))
6720 this->interval.end = m_space.interval.end;
6721 if (value.match(text, this->interval.end, end, flags))
6722 this->interval.end = value.interval.end;
6723 else
6724 goto error;
6725 params.clear();
6726 for (;;) {
6727 if (this->interval.end < end && text[this->interval.end]) {
6728 if (m_space.match(text, this->interval.end, end, flags))
6729 this->interval.end = m_space.interval.end;
6730 else if (text[this->interval.end] == ';') {
6731 this->interval.end++;
6732 while (m_space.match(text, this->interval.end, end, flags))
6733 this->interval.end = m_space.interval.end;
6735 if (param.match(text, this->interval.end, end, flags)) {
6736 this->interval.end = param.interval.end;
6737 params.push_back(std::move(param));
6738 }
6739 else
6740 break;
6741 }
6742 else
6743 break;
6744 }
6745 else
6746 break;
6747 }
6748 this->interval.start = start;
6749 this->interval.end = params.empty() ? value.interval.end : params.back().interval.end;
6750 return true;
6751
6752 error:
6753 invalidate();
6754 return false;
6755 }
6756
6757 http_space m_space;
6758 };
6759
6763 class http_agent : public parser
6764 {
6765 public:
6766 virtual void invalidate()
6767 {
6768 type.start = 1;
6769 type.end = 0;
6770 version.start = 1;
6771 version.end = 0;
6772 parser::invalidate();
6773 }
6774
6777
6778 protected:
6779 virtual bool do_match(
6780 _In_reads_or_z_(end) const char* text,
6781 _In_ size_t start = 0,
6782 _In_ size_t end = SIZE_MAX,
6783 _In_ int flags = match_default)
6784 {
6785 _Assume_(text || start >= end);
6786 this->interval.end = start;
6787 type.start = this->interval.end;
6788 for (;;) {
6789 if (this->interval.end < end && text[this->interval.end]) {
6790 if (text[this->interval.end] == '/') {
6791 type.end = this->interval.end;
6792 this->interval.end++;
6793 version.start = this->interval.end;
6794 for (;;) {
6795 if (this->interval.end < end && text[this->interval.end]) {
6796 if (stdex::isspace(text[this->interval.end])) {
6797 version.end = this->interval.end;
6798 break;
6799 }
6800 else
6801 this->interval.end++;
6802 }
6803 else {
6804 version.end = this->interval.end;
6805 break;
6806 }
6807 }
6808 break;
6809 }
6810 else if (stdex::isspace(text[this->interval.end])) {
6811 type.end = this->interval.end;
6812 break;
6813 }
6814 else
6815 this->interval.end++;
6816 }
6817 else {
6818 type.end = this->interval.end;
6819 break;
6820 }
6821 }
6823 this->interval.start = start;
6824 return true;
6825 }
6826 type.start = 1;
6827 type.end = 0;
6828 version.start = 1;
6829 version.end = 0;
6830 this->interval.invalidate();
6831 return false;
6832 }
6833 };
6834
6838 class http_protocol : public parser
6839 {
6840 public:
6841 http_protocol(_In_ const std::locale& locale = std::locale()) :
6842 parser(locale),
6843 version(0x009)
6844 {}
6845
6846 virtual void invalidate()
6847 {
6848 type.start = 1;
6849 type.end = 0;
6850 version_maj.start = 1;
6851 version_maj.end = 0;
6852 version_min.start = 1;
6853 version_min.end = 0;
6854 version = 0x009;
6855 parser::invalidate();
6856 }
6857
6859 stdex::interval<size_t> version_maj;
6860 stdex::interval<size_t> version_min;
6862
6863 protected:
6864 virtual bool do_match(
6865 _In_reads_or_z_(end) const char* text,
6866 _In_ size_t start = 0,
6867 _In_ size_t end = SIZE_MAX,
6868 _In_ int flags = match_default)
6869 {
6870 _Assume_(text || start >= end);
6871 this->interval.end = start;
6872 type.start = this->interval.end;
6873 for (;;) {
6874 if (this->interval.end < end && text[this->interval.end]) {
6875 if (text[this->interval.end] == '/') {
6876 type.end = this->interval.end;
6877 this->interval.end++;
6878 break;
6879 }
6880 else if (stdex::isspace(text[this->interval.end]))
6881 goto error;
6882 else
6883 this->interval.end++;
6884 }
6885 else {
6886 type.end = this->interval.end;
6887 goto error;
6888 }
6889 }
6890 version_maj.start = this->interval.end;
6891 for (;;) {
6892 if (this->interval.end < end && text[this->interval.end]) {
6893 if (text[this->interval.end] == '.') {
6894 version_maj.end = this->interval.end;
6895 this->interval.end++;
6896 version_min.start = this->interval.end;
6897 for (;;) {
6898 if (this->interval.end < end && text[this->interval.end]) {
6899 if (stdex::isspace(text[this->interval.end])) {
6900 version_min.end = this->interval.end;
6901 version =
6902 (uint16_t)strtoui(text + version_maj.start, version_maj.size(), nullptr, 10) * 0x100 +
6903 (uint16_t)strtoui(text + version_min.start, version_min.size(), nullptr, 10);
6904 break;
6905 }
6906 else
6907 this->interval.end++;
6908 }
6909 else
6910 goto error;
6911 }
6912 break;
6913 }
6914 else if (stdex::isspace(text[this->interval.end])) {
6915 version_maj.end = this->interval.end;
6916 version_min.start = 1;
6917 version_min.end = 0;
6918 version = (uint16_t)strtoui(text + version_maj.start, version_maj.size(), nullptr, 10) * 0x100;
6919 break;
6920 }
6921 else
6922 this->interval.end++;
6923 }
6924 else
6925 goto error;
6926 }
6927 this->interval.start = start;
6928 return true;
6929
6930 error:
6931 invalidate();
6932 return false;
6933 }
6934 };
6935
6939 class http_request : public parser
6940 {
6941 public:
6942 http_request(_In_ const std::locale& locale = std::locale()) :
6943 parser(locale),
6944 url(locale),
6945 protocol(locale)
6946 {}
6947
6948 virtual void invalidate()
6949 {
6950 verb.start = 1;
6951 verb.end = 0;
6952 url.invalidate();
6953 protocol.invalidate();
6954 parser::invalidate();
6955 }
6956
6958 http_url url;
6959 http_protocol protocol;
6960
6961 protected:
6962 virtual bool do_match(
6963 _In_reads_or_z_(end) const char* text,
6964 _In_ size_t start = 0,
6965 _In_ size_t end = SIZE_MAX,
6966 _In_ int flags = match_default)
6967 {
6968 _Assume_(text || start >= end);
6969 this->interval.end = start;
6970
6971 for (;;) {
6972 if (m_line_break.match(text, this->interval.end, end, flags))
6973 goto error;
6974 else if (this->interval.end < end && text[this->interval.end]) {
6975 if (stdex::isspace(text[this->interval.end]))
6976 this->interval.end++;
6977 else
6978 break;
6979 }
6980 else
6981 goto error;
6982 }
6983 verb.start = this->interval.end;
6984 for (;;) {
6985 if (m_line_break.match(text, this->interval.end, end, flags))
6986 goto error;
6987 else if (this->interval.end < end && text[this->interval.end]) {
6988 if (stdex::isspace(text[this->interval.end])) {
6989 verb.end = this->interval.end;
6990 this->interval.end++;
6991 break;
6992 }
6993 else
6994 this->interval.end++;
6995 }
6996 else
6997 goto error;
6998 }
6999
7000 for (;;) {
7001 if (m_line_break.match(text, this->interval.end, end, flags))
7002 goto error;
7003 else if (this->interval.end < end && text[this->interval.end]) {
7004 if (stdex::isspace(text[this->interval.end]))
7005 this->interval.end++;
7006 else
7007 break;
7008 }
7009 else
7010 goto error;
7011 }
7012 if (url.match(text, this->interval.end, end, flags))
7013 this->interval.end = url.interval.end;
7014 else
7015 goto error;
7016
7017 protocol.invalidate();
7018 for (;;) {
7019 if (m_line_break.match(text, this->interval.end, end, flags)) {
7020 this->interval.end = m_line_break.interval.end;
7021 goto end;
7022 }
7023 else if (this->interval.end < end && text[this->interval.end]) {
7024 if (stdex::isspace(text[this->interval.end]))
7025 this->interval.end++;
7026 else
7027 break;
7028 }
7029 else
7030 goto end;
7031 }
7032 for (;;) {
7033 if (m_line_break.match(text, this->interval.end, end, flags)) {
7034 this->interval.end = m_line_break.interval.end;
7035 goto end;
7036 }
7037 else if (protocol.match(text, this->interval.end, end, flags)) {
7038 this->interval.end = protocol.interval.end;
7039 break;
7040 }
7041 else
7042 goto end;
7043 }
7044
7045 for (;;) {
7046 if (m_line_break.match(text, this->interval.end, end, flags)) {
7047 this->interval.end = m_line_break.interval.end;
7048 break;
7049 }
7050 else if (this->interval.end < end && text[this->interval.end])
7051 this->interval.end++;
7052 else
7053 goto end;
7054 }
7055
7056 end:
7057 this->interval.start = start;
7058 return true;
7059
7060 error:
7061 invalidate();
7062 return false;
7063 }
7064
7065 http_line_break m_line_break;
7066 };
7067
7071 class http_header : public parser
7072 {
7073 public:
7074 virtual void invalidate()
7075 {
7076 name.start = 1;
7077 name.end = 0;
7078 value.start = 1;
7079 value.end = 0;
7080 parser::invalidate();
7081 }
7082
7085
7086 protected:
7087 virtual bool do_match(
7088 _In_reads_or_z_(end) const char* text,
7089 _In_ size_t start = 0,
7090 _In_ size_t end = SIZE_MAX,
7091 _In_ int flags = match_default)
7092 {
7093 _Assume_(text || start >= end);
7094 this->interval.end = start;
7095
7096 if (m_line_break.match(text, this->interval.end, end, flags) ||
7097 (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])))
7098 goto error;
7099 name.start = this->interval.end;
7100 for (;;) {
7101 if (m_line_break.match(text, this->interval.end, end, flags))
7102 goto error;
7103 else if (this->interval.end < end && text[this->interval.end]) {
7104 if (stdex::isspace(text[this->interval.end])) {
7105 name.end = this->interval.end;
7106 this->interval.end++;
7107 for (;;) {
7108 if (m_line_break.match(text, this->interval.end, end, flags))
7109 goto error;
7110 else if (this->interval.end < end && text[this->interval.end]) {
7111 if (stdex::isspace(text[this->interval.end]))
7112 this->interval.end++;
7113 else
7114 break;
7115 }
7116 else
7117 goto error;
7118 }
7119 if (this->interval.end < end && text[this->interval.end] == ':') {
7120 this->interval.end++;
7121 break;
7122 }
7123 else
7124 goto error;
7125 break;
7126 }
7127 else if (text[this->interval.end] == ':') {
7128 name.end = this->interval.end;
7129 this->interval.end++;
7130 break;
7131 }
7132 else
7133 this->interval.end++;
7134 }
7135 else
7136 goto error;
7137 }
7138 value.start = SIZE_MAX;
7139 value.end = 0;
7140 for (;;) {
7141 if (m_line_break.match(text, this->interval.end, end, flags)) {
7142 this->interval.end = m_line_break.interval.end;
7143 if (!m_line_break.match(text, this->interval.end, end, flags) &&
7144 this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end]))
7145 this->interval.end++;
7146 else
7147 break;
7148 }
7149 else if (this->interval.end < end && text[this->interval.end]) {
7150 if (stdex::isspace(text[this->interval.end]))
7151 this->interval.end++;
7152 else {
7153 if (value.start == SIZE_MAX) value.start = this->interval.end;
7154 value.end = ++this->interval.end;
7155 }
7156 }
7157 else
7158 break;
7159 }
7160 this->interval.start = start;
7161 return true;
7162
7163 error:
7164 invalidate();
7165 return false;
7166 }
7167
7168 http_line_break m_line_break;
7169 };
7170
7174 template <class KEY, class T>
7175 class http_value_collection : public T
7176 {
7177 public:
7178 void insert(
7179 _In_reads_or_z_(end) const char* text,
7180 _In_ size_t start = 0,
7181 _In_ size_t end = SIZE_MAX,
7182 _In_ int flags = match_default)
7183 {
7184 while (start < end) {
7185 while (start < end && text[start] && stdex::isspace(text[start])) start++;
7186 if (start < end && text[start] == ',') {
7187 start++;
7188 while (start < end&& text[start] && stdex::isspace(text[start])) start++;
7189 }
7190 KEY el;
7191 if (el.match(text, start, end, flags)) {
7192 start = el.interval.end;
7193 T::insert(std::move(el));
7194 }
7195 else
7196 break;
7197 }
7198 }
7199 };
7200
7201 template <class T>
7203 constexpr bool operator()(const T& a, const T& b) const noexcept
7204 {
7205 return a.factor.value > b.factor.value;
7206 }
7207 };
7208
7212 template <class T, class AX = std::allocator<T>>
7214
7218 template <class T>
7220 {
7221 public:
7223 _In_ const std::shared_ptr<basic_parser<T>>& quote,
7224 _In_ const std::shared_ptr<basic_parser<T>>& chr,
7225 _In_ const std::shared_ptr<basic_parser<T>>& escape,
7226 _In_ const std::shared_ptr<basic_parser<T>>& sol,
7227 _In_ const std::shared_ptr<basic_parser<T>>& bs,
7228 _In_ const std::shared_ptr<basic_parser<T>>& ff,
7229 _In_ const std::shared_ptr<basic_parser<T>>& lf,
7230 _In_ const std::shared_ptr<basic_parser<T>>& cr,
7231 _In_ const std::shared_ptr<basic_parser<T>>& htab,
7232 _In_ const std::shared_ptr<basic_parser<T>>& uni,
7233 _In_ const std::shared_ptr<basic_integer16<T>>& hex,
7234 _In_ const std::locale& locale = std::locale()) :
7236 m_quote(quote),
7237 m_chr(chr),
7238 m_escape(escape),
7239 m_sol(sol),
7240 m_bs(bs),
7241 m_ff(ff),
7242 m_lf(lf),
7243 m_cr(cr),
7244 m_htab(htab),
7245 m_uni(uni),
7246 m_hex(hex)
7247 {}
7248
7249 virtual void invalidate()
7250 {
7251 value.clear();
7253 }
7254
7255 std::basic_string<T> value;
7256
7257 protected:
7258 virtual bool do_match(
7259 _In_reads_or_z_opt_(end) const T* text,
7260 _In_ size_t start = 0,
7261 _In_ size_t end = SIZE_MAX,
7262 _In_ int flags = match_default)
7263 {
7264 _Assume_(text || start >= end);
7265 this->interval.end = start;
7266 if (m_quote->match(text, this->interval.end, end, flags)) {
7267 this->interval.end = m_quote->interval.end;
7268 value.clear();
7269 for (;;) {
7270 if (m_quote->match(text, this->interval.end, end, flags)) {
7271 this->interval.start = start;
7272 this->interval.end = m_quote->interval.end;
7273 return true;
7274 }
7275 if (m_escape->match(text, this->interval.end, end, flags)) {
7276 if (m_quote->match(text, m_escape->interval.end, end, flags)) {
7277 value += '"'; this->interval.end = m_quote->interval.end;
7278 continue;
7279 }
7280 if (m_sol->match(text, m_escape->interval.end, end, flags)) {
7281 value += '/'; this->interval.end = m_sol->interval.end;
7282 continue;
7283 }
7284 if (m_bs->match(text, m_escape->interval.end, end, flags)) {
7285 value += '\b'; this->interval.end = m_bs->interval.end;
7286 continue;
7287 }
7288 if (m_ff->match(text, m_escape->interval.end, end, flags)) {
7289 value += '\f'; this->interval.end = m_ff->interval.end;
7290 continue;
7291 }
7292 if (m_lf->match(text, m_escape->interval.end, end, flags)) {
7293 value += '\n'; this->interval.end = m_lf->interval.end;
7294 continue;
7295 }
7296 if (m_cr->match(text, m_escape->interval.end, end, flags)) {
7297 value += '\r'; this->interval.end = m_cr->interval.end;
7298 continue;
7299 }
7300 if (m_htab->match(text, m_escape->interval.end, end, flags)) {
7301 value += '\t'; this->interval.end = m_htab->interval.end;
7302 continue;
7303 }
7304 if (
7305 m_uni->match(text, m_escape->interval.end, end, flags) &&
7306 m_hex->match(text, m_uni->interval.end, std::min(m_uni->interval.end + 4, end), flags | match_case_insensitive) &&
7307 m_hex->interval.size() == 4 /* JSON requests 4-digit Unicode sequneces: \u.... */)
7308 {
7309 _Assume_(m_hex->value <= 0xffff);
7310 if (sizeof(T) == 1) {
7311 if (m_hex->value > 0x7ff) {
7312 value += (T)(0xe0 | ((m_hex->value >> 12) & 0x0f));
7313 value += (T)(0x80 | ((m_hex->value >> 6) & 0x3f));
7314 value += (T)(0x80 | (m_hex->value & 0x3f));
7315 }
7316 else if (m_hex->value > 0x7f) {
7317 value += (T)(0xc0 | ((m_hex->value >> 6) & 0x1f));
7318 value += (T)(0x80 | (m_hex->value & 0x3f));
7319 }
7320 else
7321 value += (T)(m_hex->value & 0x7f);
7322 }
7323 else
7324 value += (T)m_hex->value;
7325 this->interval.end = m_hex->interval.end;
7326 continue;
7327 }
7328 if (m_escape->match(text, m_escape->interval.end, end, flags)) {
7329 value += '\\'; this->interval.end = m_escape->interval.end;
7330 continue;
7331 }
7332 }
7333 if (m_chr->match(text, this->interval.end, end, flags)) {
7334 value.append(text + m_chr->interval.start, m_chr->interval.size());
7335 this->interval.end = m_chr->interval.end;
7336 continue;
7337 }
7338 break;
7339 }
7340 }
7341 value.clear();
7342 this->interval.invalidate();
7343 return false;
7344 }
7345
7346 std::shared_ptr<basic_parser<T>> m_quote;
7347 std::shared_ptr<basic_parser<T>> m_chr;
7348 std::shared_ptr<basic_parser<T>> m_escape;
7349 std::shared_ptr<basic_parser<T>> m_sol;
7350 std::shared_ptr<basic_parser<T>> m_bs;
7351 std::shared_ptr<basic_parser<T>> m_ff;
7352 std::shared_ptr<basic_parser<T>> m_lf;
7353 std::shared_ptr<basic_parser<T>> m_cr;
7354 std::shared_ptr<basic_parser<T>> m_htab;
7355 std::shared_ptr<basic_parser<T>> m_uni;
7356 std::shared_ptr<basic_integer16<T>> m_hex;
7357 };
7358
7361#ifdef _UNICODE
7362 using tjson_string = wjson_string;
7363#else
7364 using tjson_string = json_string;
7365#endif
7366
7370 template <class T>
7372 {
7373 public:
7374 virtual void invalidate()
7375 {
7376 this->content.invalidate();
7378 }
7379
7381
7382 protected:
7383 virtual bool do_match(
7384 _In_reads_or_z_opt_(end) const T* text,
7385 _In_ size_t start = 0,
7386 _In_ size_t end = SIZE_MAX,
7387 _In_ int flags = match_multiline)
7388 {
7389 _Unreferenced_(flags);
7390 _Assume_(text || start + 1 >= end);
7391 if (start + 1 < end &&
7392 text[start] == '/' &&
7393 text[start + 1] == '*')
7394 {
7395 // /*
7396 this->content.start = this->interval.end = start + 2;
7397 for (;;) {
7398 if (this->interval.end >= end || !text[this->interval.end])
7399 break;
7400 if (this->interval.end + 1 < end &&
7401 text[this->interval.end] == '*' &&
7402 text[this->interval.end + 1] == '/')
7403 {
7404 // /*...*/
7405 this->content.end = this->interval.end;
7406 this->interval.start = start;
7407 this->interval.end = this->interval.end + 2;
7408 return true;
7409 }
7410 this->interval.end++;
7411 }
7412 }
7413 this->content.invalidate();
7414 this->interval.invalidate();
7415 return false;
7416 }
7417 };
7418
7419 using css_comment = basic_css_comment<char>;
7420 using wcss_comment = basic_css_comment<wchar_t>;
7421#ifdef _UNICODE
7422 using tcss_comment = wcss_comment;
7423#else
7424 using tcss_comment = css_comment;
7425#endif
7426
7430 template <class T>
7431 class basic_css_cdo : public basic_parser<T>
7432 {
7433 protected:
7434 virtual bool do_match(
7435 _In_reads_or_z_opt_(end) const T* text,
7436 _In_ size_t start = 0,
7437 _In_ size_t end = SIZE_MAX,
7438 _In_ int flags = match_multiline)
7439 {
7440 _Unreferenced_(flags);
7441 _Assume_(text || start + 3 >= end);
7442 if (start + 3 < end &&
7443 text[start] == '<' &&
7444 text[start + 1] == '!' &&
7445 text[start + 2] == '-' &&
7446 text[start + 3] == '-')
7447 {
7448 this->interval.start = start;
7449 this->interval.end = start + 4;
7450 return true;
7451 }
7452 this->interval.invalidate();
7453 return false;
7454 }
7455 };
7456
7459#ifdef _UNICODE
7460 using tcss_cdo = wcss_cdo;
7461#else
7462 using tcss_cdo = css_cdo;
7463#endif
7464
7468 template <class T>
7469 class basic_css_cdc : public basic_parser<T>
7470 {
7471 protected:
7472 virtual bool do_match(
7473 _In_reads_or_z_opt_(end) const T* text,
7474 _In_ size_t start = 0,
7475 _In_ size_t end = SIZE_MAX,
7476 _In_ int flags = match_multiline)
7477 {
7478 _Unreferenced_(flags);
7479 _Assume_(text || start + 2 >= end);
7480 if (start + 2 < end &&
7481 text[start] == '-' &&
7482 text[start + 1] == '-' &&
7483 text[start + 2] == '>')
7484 {
7485 this->interval.start = start;
7486 this->interval.end = start + 3;
7487 return true;
7488 }
7489 this->interval.invalidate();
7490 return false;
7491 }
7492 };
7493
7496#ifdef _UNICODE
7497 using tcss_cdc = wcss_cdc;
7498#else
7499 using tcss_cdc = css_cdc;
7500#endif
7501
7505 template <class T>
7507 {
7508 public:
7509 virtual void invalidate()
7510 {
7511 this->content.invalidate();
7513 }
7514
7516
7517 protected:
7518 virtual bool do_match(
7519 _In_reads_or_z_opt_(end) const T* text,
7520 _In_ size_t start = 0,
7521 _In_ size_t end = SIZE_MAX,
7522 _In_ int flags = match_multiline)
7523 {
7524 _Unreferenced_(flags);
7525 this->interval.end = start;
7526 _Assume_(text || this->interval.end >= end);
7527 if (this->interval.end < end &&
7528 (text[this->interval.end] == '\"' || text[this->interval.end] == '\''))
7529 {
7530 // "Quoted...
7531 T quote = text[this->interval.end];
7532 this->content.start = ++this->interval.end;
7533 for (;;) {
7534 if (this->interval.end >= end || !text[this->interval.end])
7535 break;
7536 if (text[this->interval.end] == quote) {
7537 // End quote"
7538 this->content.end = this->interval.end;
7539 this->interval.start = start;
7540 this->interval.end++;
7541 return true;
7542 }
7543 if (this->interval.end + 1 < end &&
7544 text[this->interval.end] == '\\' &&
7545 (text[this->interval.end + 1] == '\"' || text[this->interval.end + 1] == '\''))
7546 {
7547 // Escaped quote
7548 this->interval.end = this->interval.end + 2;
7549 }
7550 else
7551 this->interval.end++;
7552 }
7553 }
7554
7555 this->content.invalidate();
7556 this->interval.invalidate();
7557 return false;
7558 }
7559 };
7560
7561 using css_string = basic_css_string<char>;
7562 using wcss_string = basic_css_string<wchar_t>;
7563#ifdef _UNICODE
7564 using tcss_string = wcss_string;
7565#else
7566 using tcss_string = css_string;
7567#endif
7568
7572 template <class T>
7573 class basic_css_uri : public basic_parser<T>
7574 {
7575 public:
7576 virtual void invalidate()
7577 {
7578 this->content.invalidate();
7580 }
7581
7583
7584 protected:
7585 virtual bool do_match(
7586 _In_reads_or_z_opt_(end) const T* text,
7587 _In_ size_t start = 0,
7588 _In_ size_t end = SIZE_MAX,
7589 _In_ int flags = match_multiline)
7590 {
7591 _Unreferenced_(flags);
7592 this->interval.end = start;
7593 _Assume_(text || this->interval.end + 3 >= end);
7594 if (this->interval.end + 3 < end &&
7595 (text[this->interval.end] == 'u' || text[this->interval.end] == 'U') &&
7596 (text[this->interval.end + 1] == 'r' || text[this->interval.end + 1] == 'R') &&
7597 (text[this->interval.end + 2] == 'l' || text[this->interval.end + 2] == 'L') &&
7598 text[this->interval.end + 3] == '(')
7599 {
7600 // url(
7601 this->interval.end = this->interval.end + 4;
7602
7603 // Skip whitespace.
7604 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
7605 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
7606
7607 if (this->interval.end < end &&
7608 (text[this->interval.end] == '\"' || text[this->interval.end] == '\''))
7609 {
7610 // url("Quoted...
7611 T quote = text[this->interval.end];
7612 this->content.start = ++this->interval.end;
7613 for (;;) {
7614 if (this->interval.end >= end || !text[this->interval.end])
7615 goto error;
7616 if (text[this->interval.end] == quote) {
7617 // End quote"
7618 this->content.end = this->interval.end;
7619 this->interval.end++;
7620 break;
7621 }
7622 if (this->interval.end + 1 < end &&
7623 text[this->interval.end] == '\\' &&
7624 (text[this->interval.end + 1] == '\"' || text[this->interval.end + 1] == '\''))
7625 {
7626 // Escaped quote
7627 this->interval.end = this->interval.end + 2;
7628 }
7629 else
7630 this->interval.end++;
7631 }
7632
7633 // Skip whitespace.
7634 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
7635
7636 if (this->interval.end < end &&
7637 text[this->interval.end] == ')')
7638 {
7639 // url("...")
7640 this->interval.start = start;
7641 this->interval.end++;
7642 return true;
7643 }
7644 }
7645 else {
7646 // url(...
7647 this->content.start = content.end = this->interval.end;
7648 for (;;) {
7649 if (this->interval.end >= end || !text[this->interval.end])
7650 goto error;
7651 if (text[this->interval.end] == ')') {
7652 // url(...)
7653 this->interval.start = start;
7654 this->interval.end++;
7655 return true;
7656 }
7657 if (ctype.is(ctype.space, text[this->interval.end]))
7658 this->interval.end++;
7659 else
7660 this->content.end = ++this->interval.end;
7661 }
7662 }
7663 }
7664
7665 error:
7666 invalidate();
7667 return false;
7668 }
7669 };
7670
7671 using css_uri = basic_css_uri<char>;
7672 using wcss_uri = basic_css_uri<wchar_t>;
7673#ifdef _UNICODE
7674 using tcss_uri = wcss_uri;
7675#else
7676 using tcss_uri = css_uri;
7677#endif
7678
7682 template <class T>
7684 {
7685 public:
7686 virtual void invalidate()
7687 {
7688 this->content.invalidate();
7690 }
7691
7693
7694 protected:
7695 virtual bool do_match(
7696 _In_reads_or_z_opt_(end) const T* text,
7697 _In_ size_t start = 0,
7698 _In_ size_t end = SIZE_MAX,
7699 _In_ int flags = match_multiline)
7700 {
7701 _Unreferenced_(flags);
7702 this->interval.end = start;
7703 _Assume_(text || this->interval.end + 6 >= end);
7704 if (this->interval.end + 6 < end &&
7705 text[this->interval.end] == '@' &&
7706 (text[this->interval.end + 1] == 'i' || text[this->interval.end + 1] == 'I') &&
7707 (text[this->interval.end + 2] == 'm' || text[this->interval.end + 2] == 'M') &&
7708 (text[this->interval.end + 3] == 'p' || text[this->interval.end + 3] == 'P') &&
7709 (text[this->interval.end + 4] == 'o' || text[this->interval.end + 4] == 'O') &&
7710 (text[this->interval.end + 5] == 'r' || text[this->interval.end + 5] == 'R') &&
7711 (text[this->interval.end + 6] == 't' || text[this->interval.end + 6] == 'T'))
7712 {
7713 // @import...
7714 this->interval.end = this->interval.end + 7;
7715
7716 // Skip whitespace.
7717 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
7718 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
7719
7720 if (this->interval.end < end &&
7721 (text[this->interval.end] == '\"' || text[this->interval.end] == '\''))
7722 {
7723 // @import "Quoted
7724 T quote = text[this->interval.end];
7725 this->content.start = ++this->interval.end;
7726 for (;;) {
7727 if (this->interval.end >= end || !text[this->interval.end])
7728 goto error;
7729 if (text[this->interval.end] == quote) {
7730 // End quote"
7731 this->content.end = this->interval.end;
7732 this->interval.start = start;
7733 this->interval.end++;
7734 return true;
7735 }
7736 if (this->interval.end + 1 < end &&
7737 text[this->interval.end] == '\\' &&
7738 (text[this->interval.end + 1] == '\"' || text[this->interval.end + 1] == '\''))
7739 {
7740 // Escaped quote
7741 this->interval.end = this->interval.end + 2;
7742 }
7743 else
7744 this->interval.end++;
7745 }
7746 }
7747 }
7748
7749 error:
7750 invalidate();
7751 return false;
7752 }
7753 };
7754
7755 using css_import = basic_css_import<char>;
7756 using wcss_import = basic_css_import<wchar_t>;
7757#ifdef _UNICODE
7758 using tcss_import = wcss_import;
7759#else
7760 using tcss_import = css_import;
7761#endif
7762
7766 template <class T>
7768 {
7769 public:
7770 virtual void invalidate()
7771 {
7772 this->base_type.invalidate();
7773 this->sub_type.invalidate();
7774 this->charset.invalidate();
7776 }
7777
7781
7782 protected:
7783 virtual bool do_match(
7784 _In_reads_or_z_opt_(end) const T* text,
7785 _In_ size_t start = 0,
7786 _In_ size_t end = SIZE_MAX,
7787 _In_ int flags = match_multiline)
7788 {
7789 _Unreferenced_(flags);
7790 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
7791
7792 this->interval.end = start;
7793 this->base_type.start = this->interval.end;
7794 for (;;) {
7795 _Assume_(text || this->interval.end >= end);
7796 if (this->interval.end >= end || !text[this->interval.end])
7797 break;
7798 if (text[this->interval.end] == '/' ||
7799 text[this->interval.end] == ';' ||
7800 ctype.is(ctype.space, text[this->interval.end]))
7801 break;
7802 this->interval.end++;
7803 }
7804 if (this->interval.end <= this->base_type.start)
7805 goto error;
7806 this->base_type.end = this->interval.end;
7807
7808 if (end <= this->interval.end || text[this->interval.end] != '/')
7809 goto error;
7810
7811 this->interval.end++;
7812 this->sub_type.start = this->interval.end;
7813 for (;;) {
7814 if (this->interval.end >= end || !text[this->interval.end])
7815 break;
7816 if (text[this->interval.end] == '/' ||
7817 text[this->interval.end] == ';' ||
7818 ctype.is(ctype.space, text[this->interval.end]))
7819 break;
7820 this->interval.end++;
7821 }
7822 if (this->interval.end <= this->sub_type.start)
7823 goto error;
7824
7825 this->sub_type.end = this->interval.end;
7826 this->charset.invalidate();
7827 if (this->interval.end < end && text[this->interval.end] == ';') {
7828 this->interval.end++;
7829
7830 // Skip whitespace.
7831 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
7832
7833 if (this->interval.end + 7 < end &&
7834 (text[this->interval.end] == 'c' || text[this->interval.end] == 'C') &&
7835 (text[this->interval.end + 1] == 'h' || text[this->interval.end + 1] == 'H') &&
7836 (text[this->interval.end + 2] == 'a' || text[this->interval.end + 2] == 'A') &&
7837 (text[this->interval.end + 3] == 'r' || text[this->interval.end + 3] == 'R') &&
7838 (text[this->interval.end + 4] == 's' || text[this->interval.end + 4] == 'S') &&
7839 (text[this->interval.end + 5] == 'e' || text[this->interval.end + 5] == 'E') &&
7840 (text[this->interval.end + 6] == 't' || text[this->interval.end + 6] == 'T') &&
7841 text[this->interval.end + 7] == '=')
7842 {
7843 this->interval.end = this->interval.end + 8;
7844 if (this->interval.end < end &&
7845 (text[this->interval.end] == '\"' || text[this->interval.end] == '\''))
7846 {
7847 // "Quoted...
7848 T quote = text[this->interval.end];
7849 this->charset.start = ++this->interval.end;
7850 for (;;) {
7851 if (this->interval.end >= end || !text[this->interval.end]) {
7852 // No end quote!
7853 this->charset.invalidate();
7854 break;
7855 }
7856 if (text[this->interval.end] == quote) {
7857 // End quote"
7858 this->charset.end = this->interval.end;
7859 this->interval.end++;
7860 break;
7861 }
7862 this->interval.end++;
7863 }
7864 }
7865 else {
7866 // Nonquoted
7867 this->charset.start = this->interval.end;
7868 for (;;) {
7869 if (this->interval.end >= end || !text[this->interval.end] ||
7870 ctype.is(ctype.space, text[this->interval.end])) {
7871 this->charset.end = this->interval.end;
7872 break;
7873 }
7874 this->interval.end++;
7875 }
7876 }
7877 }
7878 }
7879 this->interval.start = start;
7880 return true;
7881
7882 error:
7883 invalidate();
7884 return false;
7885 }
7886 };
7887
7888 using mime_type = basic_mime_type<char>;
7889 using wmime_type = basic_mime_type<wchar_t>;
7890#ifdef _UNICODE
7891 using tmime_type = wmime_type;
7892#else
7893 using tmime_type = mime_type;
7894#endif
7895
7899 template <class T>
7901 {
7902 protected:
7903 virtual bool do_match(
7904 _In_reads_or_z_opt_(end) const T* text,
7905 _In_ size_t start = 0,
7906 _In_ size_t end = SIZE_MAX,
7907 _In_ int flags = match_default)
7908 {
7909 _Unreferenced_(flags);
7910 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
7911 this->interval.end = start;
7912 for (;;) {
7913 _Assume_(text || this->interval.end >= end);
7914 if (this->interval.end >= end || !text[this->interval.end]) {
7916 this->interval.start = start;
7917 return true;
7918 }
7919 this->interval.invalidate();
7920 return false;
7921 }
7922 if (text[this->interval.end] == '>' ||
7923 text[this->interval.end] == '=' ||
7924 (text[this->interval.end] == '/' && this->interval.end + 1 < end && text[this->interval.end + 1] == '>') ||
7925 ctype.is(ctype.space, text[this->interval.end]))
7926 {
7927 this->interval.start = start;
7928 return true;
7929 }
7930 this->interval.end++;
7931 }
7932 }
7933 };
7934
7937#ifdef _UNICODE
7938 using thtml_ident = whtml_ident;
7939#else
7940 using thtml_ident = html_ident;
7941#endif
7942
7946 template <class T>
7948 {
7949 public:
7950 virtual void invalidate()
7951 {
7952 this->content.invalidate();
7954 }
7955
7957
7958 protected:
7959 virtual bool do_match(
7960 _In_reads_or_z_opt_(end) const T* text,
7961 _In_ size_t start = 0,
7962 _In_ size_t end = SIZE_MAX,
7963 _In_ int flags = match_default)
7964 {
7965 _Unreferenced_(flags);
7966 this->interval.end = start;
7967 _Assume_(text || this->interval.end >= end);
7968 if (this->interval.end < end &&
7969 (text[this->interval.end] == '\"' || text[this->interval.end] == '\''))
7970 {
7971 // "Quoted...
7972 T quote = text[this->interval.end];
7973 this->content.start = ++this->interval.end;
7974 for (;;) {
7975 if (this->interval.end >= end || !text[this->interval.end]) {
7976 // No end quote!
7977 this->content.invalidate();
7978 this->interval.invalidate();
7979 return false;
7980 }
7981 if (text[this->interval.end] == quote) {
7982 // End quote"
7983 this->content.end = this->interval.end;
7984 this->interval.start = start;
7985 this->interval.end++;
7986 return true;
7987 }
7988 this->interval.end++;
7989 }
7990 }
7991
7992 // Nonquoted
7993 this->content.start = this->interval.end;
7994 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
7995 for (;;) {
7996 _Assume_(text || this->interval.end >= end);
7997 if (this->interval.end >= end || !text[this->interval.end]) {
7998 this->content.end = this->interval.end;
7999 this->interval.start = start;
8000 return true;
8001 }
8002 if (text[this->interval.end] == '>' ||
8003 (text[this->interval.end] == '/' && this->interval.end + 1 < end && text[this->interval.end + 1] == '>') ||
8004 ctype.is(ctype.space, text[this->interval.end]))
8005 {
8006 this->content.end = this->interval.end;
8007 this->interval.start = start;
8008 return true;
8009 }
8010 this->interval.end++;
8011 }
8012 }
8013 };
8014
8015 using html_value = basic_html_value<char>;
8016 using whtml_value = basic_html_value<wchar_t>;
8017#ifdef _UNICODE
8018 using thtml_value = whtml_value;
8019#else
8020 using thtml_value = html_value;
8021#endif
8022
8026 enum class html_sequence_t {
8027 text = 0,
8028 element,
8029 element_start,
8030 element_end,
8031 declaration,
8032 comment,
8033 instruction,
8034 PCDATA,
8035 CDATA,
8036
8037 unknown = -1,
8038 };
8039
8047
8051 template <class T>
8053 {
8054 public:
8055 basic_html_tag(_In_ const std::locale& locale = std::locale()) :
8057 type(html_sequence_t::unknown)
8058 {}
8059
8060 virtual void invalidate()
8061 {
8062 this->type = html_sequence_t::unknown;
8063 this->name.invalidate();
8064 this->attributes.clear();
8066 }
8067
8068 html_sequence_t type;
8070 std::vector<html_attribute> attributes;
8071
8072 protected:
8073 virtual bool do_match(
8074 _In_reads_or_z_opt_(end) const T* text,
8075 _In_ size_t start = 0,
8076 _In_ size_t end = SIZE_MAX,
8077 _In_ int flags = match_multiline)
8078 {
8079 _Assume_(text || start >= end);
8080 if (start >= end || text[start] != '<')
8081 goto error;
8082 this->interval.end = start + 1;
8083 if (this->interval.end >= end || !text[this->interval.end])
8084 goto error;
8085 if (text[this->interval.end] == '/' &&
8086 this->m_ident.match(text, this->interval.end + 1, end, flags))
8087 {
8088 // </...
8089 this->type = html_sequence_t::element_end;
8090 this->name = this->m_ident.interval;
8091 this->interval.end = this->m_ident.interval.end;
8092 }
8093 else if (text[this->interval.end] == '!') {
8094 // <!...
8095 this->interval.end++;
8096 if (this->interval.end + 1 < end &&
8097 text[this->interval.end] == '-' &&
8098 text[this->interval.end + 1] == '-')
8099 {
8100 // <!--...
8101 this->name.start = this->interval.end = this->interval.end + 2;
8102 for (;;) {
8103 if (this->interval.end >= end || !text[this->interval.end])
8104 goto error;
8105 if (this->interval.end + 2 < end &&
8106 text[this->interval.end] == '-' &&
8107 text[this->interval.end + 1] == '-' &&
8108 text[this->interval.end + 2] == '>')
8109 {
8110 // <!--...-->
8111 this->type = html_sequence_t::comment;
8112 this->name.end = this->interval.end;
8113 this->attributes.clear();
8114 this->interval.start = start;
8115 this->interval.end = this->interval.end + 3;
8116 return true;
8117 }
8118 this->interval.end++;
8119 }
8120 }
8121 this->type = html_sequence_t::declaration;
8122 this->name.start = this->name.end = this->interval.end;
8123 }
8124 else if (text[this->interval.end] == '?') {
8125 // <?...
8126 this->name.start = ++this->interval.end;
8127 for (;;) {
8128 if (this->interval.end >= end || !text[this->interval.end])
8129 goto error;
8130 if (text[this->interval.end] == '>') {
8131 // <?...>
8132 this->type = html_sequence_t::instruction;
8133 this->name.end = this->interval.end;
8134 this->attributes.clear();
8135 this->interval.start = start;
8136 this->interval.end++;
8137 return true;
8138 }
8139 if (this->interval.end + 1 < end &&
8140 text[this->interval.end] == '?' &&
8141 text[this->interval.end + 1] == '>')
8142 {
8143 // <?...?>
8144 this->type = html_sequence_t::instruction;
8145 this->name.end = this->interval.end;
8146 this->attributes.clear();
8147 this->interval.start = start;
8148 this->interval.end = this->interval.end + 2;
8149 return true;
8150 }
8151 this->interval.end++;
8152 }
8153 }
8154 else if (this->m_ident.match(text, this->interval.end, end, flags)) {
8155 // <tag...
8156 this->type = html_sequence_t::element_start;
8157 this->name = this->m_ident.interval;
8158 this->interval.end = this->m_ident.interval.end;
8159 }
8160 else
8161 goto error;
8162
8163 {
8164 // Skip whitespace.
8165 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
8166 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8167
8168 this->attributes.clear();
8169 for (;;) {
8170 if (this->type == html_sequence_t::element_start &&
8171 this->interval.end + 1 < end &&
8172 text[this->interval.end] == '/' &&
8173 text[this->interval.end + 1] == '>')
8174 {
8175 // <tag .../>
8176 this->type = html_sequence_t::element;
8177 this->interval.end = this->interval.end + 2;
8178 break;
8179 }
8180 if (this->interval.end < end &&
8181 text[this->interval.end] == '>')
8182 {
8183 // <tag ...>
8184 this->interval.end++;
8185 break;
8186 }
8187 if (this->type == html_sequence_t::declaration &&
8188 this->interval.end + 1 < end &&
8189 text[this->interval.end] == '!' &&
8190 text[this->interval.end + 1] == '>')
8191 {
8192 // "<!...!>".
8193 this->interval.end = this->interval.end + 2;
8194 break;
8195 }
8196 if (this->type == html_sequence_t::declaration &&
8197 this->interval.end + 1 < end &&
8198 text[this->interval.end] == '-' &&
8199 text[this->interval.end + 1] == '-')
8200 {
8201 // "<! ... --...".
8202 this->interval.end = this->interval.end + 2;
8203 for (;;) {
8204 if (this->interval.end >= end || !text[this->interval.end])
8205 goto error;
8206 if (this->interval.end + 1 < end &&
8207 text[this->interval.end] == '-' &&
8208 text[this->interval.end + 1] == '-')
8209 {
8210 // "<! ... --...--".
8211 this->interval.end = this->interval.end + 2;
8212 break;
8213 }
8214 this->interval.end++;
8215 }
8216
8217 // Skip whitespace.
8218 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8219 continue;
8220 }
8221
8222 if (this->interval.end >= end || !text[this->interval.end])
8223 goto error;
8224
8225 // Attributes follow...
8226 html_attribute* a = nullptr;
8227 if (this->m_ident.match(text, this->interval.end, end, flags)) {
8228 this->attributes.push_back(std::move(html_attribute{ this->m_ident.interval }));
8229 a = &this->attributes.back();
8230 _Assume_(a);
8231 this->interval.end = this->m_ident.interval.end;
8232 }
8233 else {
8234 // What was that?! Skip.
8235 this->interval.end++;
8236 continue;
8237 }
8238
8239 // Skip whitespace.
8240 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8241
8242 if (this->interval.end < end && text[this->interval.end] == '=') {
8243 this->interval.end++;
8244
8245 // Skip whitespace.
8246 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8247
8248 if (this->m_value.match(text, this->interval.end, end, flags)) {
8249 // This attribute has value.
8250 a->value = this->m_value.content;
8251 this->interval.end = this->m_value.interval.end;
8252
8253 // Skip whitespace.
8254 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8255 }
8256 }
8257 else {
8258 // This attribute has no value.
8259 a->value.invalidate();
8260 }
8261 }
8262 }
8263
8264 this->interval.start = start;
8265 return true;
8266
8267 error:
8268 invalidate();
8269 return false;
8270 }
8271
8272 basic_html_ident<T> m_ident;
8273 basic_html_value<T> m_value;
8274 };
8275
8276 using html_tag = basic_html_tag<char>;
8277 using whtml_tag = basic_html_tag<wchar_t>;
8278#ifdef _UNICODE
8279 using thtml_tag = whtml_tag;
8280#else
8281 using thtml_tag = html_tag;
8282#endif
8283
8287 template <class T>
8289 {
8290 public:
8291 virtual void invalidate()
8292 {
8293 this->condition.invalidate();
8295 }
8296
8297 stdex::interval<size_t> condition;
8298
8299 protected:
8300 virtual bool do_match(
8301 _In_reads_or_z_opt_(end) const T* text,
8302 _In_ size_t start = 0,
8303 _In_ size_t end = SIZE_MAX,
8304 _In_ int flags = match_multiline)
8305 {
8306 _Unreferenced_(flags);
8307 _Assume_(text || start + 2 >= end);
8308 if (start + 2 < end &&
8309 text[start] == '<' &&
8310 text[start + 1] == '!' &&
8311 text[start + 2] == '[')
8312 {
8313 this->interval.end = start + 3;
8314
8315 // Skip whitespace.
8316 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
8317 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8318
8319 this->condition.start = this->condition.end = this->interval.end;
8320
8321 for (;;) {
8322 if (this->interval.end >= end || !text[this->interval.end])
8323 break;
8324 if (text[this->interval.end] == '[') {
8325 this->interval.start = start;
8326 this->interval.end++;
8327 return true;
8328 }
8329 if (ctype.is(ctype.space, text[this->interval.end]))
8330 this->interval.end++;
8331 else
8332 this->condition.end = ++this->interval.end;
8333 }
8334 }
8335
8336 this->condition.invalidate();
8337 this->interval.invalidate();
8338 return false;
8339 }
8340 };
8341
8342 using html_declaration_condition_start = basic_html_declaration_condition_start<char>;
8343 using whtml_declaration_condition_start = basic_html_declaration_condition_start<wchar_t>;
8344#ifdef _UNICODE
8345 using thtml_declaration_condition_start = whtml_declaration_condition_start;
8346#else
8347 using thtml_declaration_condition_start = html_declaration_condition_start;
8348#endif
8349
8353 template <class T>
8355 {
8356 protected:
8357 virtual bool do_match(
8358 _In_reads_or_z_opt_(end) const T* text,
8359 _In_ size_t start = 0,
8360 _In_ size_t end = SIZE_MAX,
8361 _In_ int flags = match_multiline)
8362 {
8363 _Unreferenced_(flags);
8364 _Assume_(text || start + 2 >= end);
8365 if (start + 2 < end &&
8366 text[start] == ']' &&
8367 text[start + 1] == ']' &&
8368 text[start + 2] == '>')
8369 {
8370 this->interval.start = start;
8371 this->interval.end = start + 3;
8372 return true;
8373 }
8374 this->interval.invalidate();
8375 return false;
8376 }
8377 };
8378
8381#ifdef _UNICODE
8383#else
8385#endif
8386 }
8387}
8388
8389#undef ENUM_FLAG_OPERATOR
8390#undef ENUM_FLAGS
8391
8392#if defined(_MSC_VER)
8393#pragma warning(pop)
8394#elif defined(__GNUC__)
8395#pragma GCC diagnostic pop
8396#endif
locale_t helper class to free_locale when going out of scope.
Definition locale.hpp:69
Test for angle in d°mm'ss.dddd form.
Definition parser.hpp:4380
Test for any code unit.
Definition parser.hpp:235
Test for beginning of line.
Definition parser.hpp:634
Test for any.
Definition parser.hpp:1077
Test for chemical formula.
Definition parser.hpp:5509
Test for Creditor Reference.
Definition parser.hpp:4944
T reference[22]
Normalized national reference number.
Definition parser.hpp:4966
T check_digits[3]
Two check digits.
Definition parser.hpp:4965
bool is_valid
Is reference valid per ISO 7064.
Definition parser.hpp:4967
Legacy CSS comment end -->
Definition parser.hpp:7470
Legacy CSS comment start <!--
Definition parser.hpp:7432
CSS comment.
Definition parser.hpp:7372
stdex::interval< size_t > content
content position in source
Definition parser.hpp:7380
CSS import directive.
Definition parser.hpp:7684
stdex::interval< size_t > content
content position in source
Definition parser.hpp:7692
CSS string.
Definition parser.hpp:7507
stdex::interval< size_t > content
content position in source
Definition parser.hpp:7515
URI in CSS.
Definition parser.hpp:7574
stdex::interval< size_t > content
content position in source
Definition parser.hpp:7582
Test for any code unit from a given string of code units.
Definition parser.hpp:739
Test for specific code unit.
Definition parser.hpp:307
Test for date.
Definition parser.hpp:4013
Test for valid DNS domain character.
Definition parser.hpp:2795
bool allow_on_edge
Is character allowed at the beginning or an end of a DNS domain?
Definition parser.hpp:2805
Test for DNS domain/hostname.
Definition parser.hpp:2895
bool m_allow_absolute
May DNS names end with a dot (absolute name)?
Definition parser.hpp:2959
Test for e-mail address.
Definition parser.hpp:3787
Test for emoticon.
Definition parser.hpp:3890
std::shared_ptr< basic_parser< T > > apex
apex/eyebrows/halo (e.g. O, 0)
Definition parser.hpp:3918
std::shared_ptr< basic_parser< T > > eyes
eyes (e.g. :, ;, >, |, B)
Definition parser.hpp:3919
std::shared_ptr< basic_set< T > > mouth
mouth (e.g. ), ), (, (, |, P, D, p, d)
Definition parser.hpp:3921
std::shared_ptr< basic_parser< T > > nose
nose (e.g. -, o)
Definition parser.hpp:3920
std::shared_ptr< basic_parser< T > > emoticon
emoticon as a whole (e.g. 😀, 🤔, 😶)
Definition parser.hpp:3917
Test for end of line.
Definition parser.hpp:673
Test for fraction.
Definition parser.hpp:1705
End of condition ...]]>
Definition parser.hpp:8355
Start of condition <![condition[...
Definition parser.hpp:8289
virtual bool do_match(_In_reads_or_z_opt_(end) const T *text, size_t start=0, size_t end=SIZE_MAX, int flags=match_multiline)
condition position in source
Definition parser.hpp:8300
Contiguous sequence of characters representing name of element, attribute etc.
Definition parser.hpp:7901
Tag.
Definition parser.hpp:8053
std::vector< html_attribute > attributes
tag attributes
Definition parser.hpp:8070
html_sequence_t type
tag type
Definition parser.hpp:8068
stdex::interval< size_t > name
tag name position in source
Definition parser.hpp:8069
Optionally-quoted string representing value of an attribute.
Definition parser.hpp:7948
stdex::interval< size_t > content
content position in source
Definition parser.hpp:7956
Test for International Bank Account Number.
Definition parser.hpp:4655
T bban[31]
Normalized Basic Bank Account Number.
Definition parser.hpp:4680
T country[3]
ISO 3166-1 alpha-2 country code.
Definition parser.hpp:4678
T check_digits[3]
Two check digits.
Definition parser.hpp:4679
bool is_valid
Is IBAN valid per ISO 7064.
Definition parser.hpp:4681
Test for decimal integer.
Definition parser.hpp:1315
Test for decimal integer possibly containing thousand separators.
Definition parser.hpp:1400
bool has_separators
Did integer have any separators?
Definition parser.hpp:1421
size_t digit_count
Total number of digits in integer.
Definition parser.hpp:1420
Test for hexadecimal integer.
Definition parser.hpp:1480
Base class for integer testing.
Definition parser.hpp:1293
size_t value
Calculated value of the numeral.
Definition parser.hpp:1307
Test for IPv4 address.
Definition parser.hpp:2363
stdex::interval< size_t > components[4]
Individual component intervals.
Definition parser.hpp:2408
struct in_addr value
IPv4 address value.
Definition parser.hpp:2409
Test for IPv6 address.
Definition parser.hpp:2575
std::shared_ptr< basic_parser< T > > scope_id
Scope ID (e.g. NIC index with link-local addresses)
Definition parser.hpp:2647
stdex::interval< size_t > components[8]
Individual component intervals.
Definition parser.hpp:2645
struct in6_addr value
IPv6 address value.
Definition parser.hpp:2646
Test for valid IPv6 address scope ID character.
Definition parser.hpp:2501
Test for repeating.
Definition parser.hpp:929
bool m_greedy
try to match as long sequence as possible
Definition parser.hpp:968
std::shared_ptr< basic_parser< T > > m_el
repeating element
Definition parser.hpp:965
size_t m_min_iterations
minimum number of iterations
Definition parser.hpp:966
size_t m_max_iterations
maximum number of iterations
Definition parser.hpp:967
Test for JSON string.
Definition parser.hpp:7220
MIME content type.
Definition parser.hpp:7768
stdex::interval< size_t > base_type
basic type position in source
Definition parser.hpp:7778
stdex::interval< size_t > sub_type
sub-type position in source
Definition parser.hpp:7779
stdex::interval< size_t > charset
charset position in source
Definition parser.hpp:7780
Test for mixed numeral.
Definition parser.hpp:1940
std::shared_ptr< basic_parser< T > > fraction
fraction
Definition parser.hpp:1973
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:1971
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:1970
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:1969
std::shared_ptr< basic_parser< T > > integer
Integer part.
Definition parser.hpp:1972
Test for monetary numeral.
Definition parser.hpp:2234
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:2267
std::shared_ptr< basic_parser< T > > decimal_separator
Decimal separator.
Definition parser.hpp:2272
std::shared_ptr< basic_parser< T > > currency
Currency part.
Definition parser.hpp:2270
std::shared_ptr< basic_parser< T > > decimal
Decimal part.
Definition parser.hpp:2273
std::shared_ptr< basic_parser< T > > integer
Integer part.
Definition parser.hpp:2271
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:2268
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:2269
"No-op" match
Definition parser.hpp:203
Base template for all parsers.
Definition parser.hpp:79
stdex::interval< size_t > interval
Region of the last match.
Definition parser.hpp:119
Test for permutation.
Definition parser.hpp:1217
Test for phone number.
Definition parser.hpp:4503
std::basic_string< T > value
Normalized phone number.
Definition parser.hpp:4528
Test for any punctuation code unit.
Definition parser.hpp:480
Test for Roman numeral.
Definition parser.hpp:1589
Test for scientific numeral.
Definition parser.hpp:2065
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:2111
std::shared_ptr< basic_parser< T > > exponent_symbol
Exponent symbol (e.g. 'e')
Definition parser.hpp:2115
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:2109
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:2110
double value
Calculated value of the numeral.
Definition parser.hpp:2119
std::shared_ptr< basic_parser< T > > negative_exp_sign
Negative exponent sign (e.g. '-')
Definition parser.hpp:2117
std::shared_ptr< basic_integer< T > > decimal
Decimal part.
Definition parser.hpp:2114
std::shared_ptr< basic_parser< T > > positive_exp_sign
Positive exponent sign (e.g. '+')
Definition parser.hpp:2116
std::shared_ptr< basic_integer< T > > exponent
Exponent part.
Definition parser.hpp:2118
std::shared_ptr< basic_parser< T > > decimal_separator
Decimal separator.
Definition parser.hpp:2113
std::shared_ptr< basic_integer< T > > integer
Integer part.
Definition parser.hpp:2112
Test for match score.
Definition parser.hpp:1768
Test for sequence.
Definition parser.hpp:1025
Definition parser.hpp:708
Test for SI Reference delimiter.
Definition parser.hpp:5138
Test for SI Reference part.
Definition parser.hpp:5092
Test for SI Reference.
Definition parser.hpp:5177
basic_si_reference_part< T > part3
Reference data part 3 (P3)
Definition parser.hpp:5206
basic_si_reference_part< T > part1
Reference data part 1 (P1)
Definition parser.hpp:5204
bool is_valid
Is reference valid.
Definition parser.hpp:5207
T model[3]
Reference model.
Definition parser.hpp:5203
basic_si_reference_part< T > part2
Reference data part 2 (P2)
Definition parser.hpp:5205
Test for signed numeral.
Definition parser.hpp:1854
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:1880
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:1879
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:1878
std::shared_ptr< basic_parser< T > > number
Number.
Definition parser.hpp:1881
Test for any space code unit.
Definition parser.hpp:400
Test for any space or punctuation code unit.
Definition parser.hpp:555
Test for any string.
Definition parser.hpp:1145
Test for given string.
Definition parser.hpp:834
Test for time.
Definition parser.hpp:4278
Test for valid URL password character.
Definition parser.hpp:3079
Test for valid URL path character.
Definition parser.hpp:3181
Test for URL path.
Definition parser.hpp:3291
Test for valid URL username character.
Definition parser.hpp:2978
Test for URL.
Definition parser.hpp:3431
Test for HTTP agent.
Definition parser.hpp:6764
Test for HTTP any type.
Definition parser.hpp:5907
Test for HTTP asterisk.
Definition parser.hpp:6535
Test for HTTP header.
Definition parser.hpp:7072
Test for HTTP language (RFC1766)
Definition parser.hpp:6403
Test for HTTP line break (RFC2616: CRLF | LF)
Definition parser.hpp:5589
Test for HTTP media range (RFC2616: media-range)
Definition parser.hpp:5939
Test for HTTP media type (RFC2616: media-type)
Definition parser.hpp:5991
Test for HTTP parameter (RFC2616: parameter)
Definition parser.hpp:5855
http_token name
Parameter name.
Definition parser.hpp:5864
http_value value
Parameter value.
Definition parser.hpp:5865
Test for HTTP protocol.
Definition parser.hpp:6839
uint16_t version
HTTP protocol version: 0x100 = 1.0, 0x101 = 1.1...
Definition parser.hpp:6861
Test for HTTP quoted string (RFC2616: quoted-string)
Definition parser.hpp:5748
stdex::interval< size_t > content
String content (without quotes)
Definition parser.hpp:5757
Test for HTTP request.
Definition parser.hpp:6940
Test for HTTP space (RFC2616: LWS)
Definition parser.hpp:5625
Test for HTTP text character (RFC2616: TEXT)
Definition parser.hpp:5661
Test for HTTP token (RFC2616: token - tolerates non-ASCII)
Definition parser.hpp:5694
Test for HTTP URL parameter.
Definition parser.hpp:6228
Test for HTTP URL path segment.
Definition parser.hpp:6140
Test for HTTP URL path segment.
Definition parser.hpp:6173
std::vector< http_url_path_segment > segments
Path segments.
Definition parser.hpp:6181
Test for HTTP URL port.
Definition parser.hpp:6084
Test for HTTP URL server.
Definition parser.hpp:6047
Test for HTTP URL.
Definition parser.hpp:6305
Collection of HTTP values.
Definition parser.hpp:7176
Test for HTTP value (RFC2616: value)
Definition parser.hpp:5811
http_quoted_string string
Value when matched as quoted string.
Definition parser.hpp:5820
http_token token
Value when matched as token.
Definition parser.hpp:5821
Test for HTTP weight factor.
Definition parser.hpp:6466
float value
Calculated value of the weight factor.
Definition parser.hpp:6479
Test for HTTP weighted value.
Definition parser.hpp:6558
Base template for collection-holding parsers.
Definition parser.hpp:985
Test for any SGML code point.
Definition parser.hpp:268
Test for any SGML code point from a given string of SGML code points.
Definition parser.hpp:791
Test for specific SGML code point.
Definition parser.hpp:356
Test for valid DNS domain SGML character.
Definition parser.hpp:2850
Test for valid IPv6 address scope ID SGML character.
Definition parser.hpp:2540
Test for any SGML punctuation code point.
Definition parser.hpp:521
Test for any SGML space code point.
Definition parser.hpp:443
Test for any SGML space or punctuation code point.
Definition parser.hpp:598
Test for SGML given string.
Definition parser.hpp:881
Test for valid URL password SGML character.
Definition parser.hpp:3132
Test for valid URL path SGML character.
Definition parser.hpp:3238
Test for valid URL username SGML character.
Definition parser.hpp:3030
Numerical interval.
Definition interval.hpp:18
T size() const
Returns interval size.
Definition interval.hpp:47
T end
interval end
Definition interval.hpp:20
interval() noexcept
Constructs an invalid interval.
Definition interval.hpp:25
void invalidate()
Invalidates interval.
Definition interval.hpp:59
T start
interval start
Definition interval.hpp:19
Tag attribute.
Definition parser.hpp:8043
stdex::interval< size_t > name
attribute name position in source
Definition parser.hpp:8044
stdex::interval< size_t > value
attribute value position in source
Definition parser.hpp:8045
Definition parser.hpp:7202