stdex
Additional custom or not Standard C++ covered algorithms
Loading...
Searching...
No Matches
parser.hpp
1/*
2 SPDX-License-Identifier: MIT
3 Copyright © 2023-2024 Amebis
4*/
5
6#pragma once
7
8#include "compat.hpp"
9#include "interval.hpp"
10#include "memory.hpp"
11#include "sgml.hpp"
12#include "string.hpp"
13#include <stdarg.h>
14#include <stdint.h>
15#include <math.h>
16#if defined(_WIN32)
17#include <winsock2.h>
18#if _MSC_VER >= 1300
19#include <ws2ipdef.h>
20#endif
21#include <ws2tcpip.h>
22#else
23#include <netinet/in.h>
24#endif
25#include <limits>
26#include <list>
27#include <locale>
28#include <memory>
29#include <set>
30#include <string_view>
31#include <string>
32
33#ifdef _MSC_VER
34#pragma warning(push)
35#pragma warning(disable: 4100)
36#endif
37
38#define ENUM_FLAG_OPERATOR(T,X) \
39inline T operator X (const T lhs, const T rhs) { return static_cast<T>(static_cast<std::underlying_type_t<T>>(lhs) X static_cast<std::underlying_type_t<T>>(rhs)); } \
40inline T operator X (const T lhs, const std::underlying_type_t<T> rhs) { return static_cast<T>(static_cast<std::underlying_type_t<T>>(lhs) X rhs); } \
41inline T operator X (const std::underlying_type_t<T> lhs, const T rhs) { return static_cast<T>(lhs X static_cast<std::underlying_type_t<T>>(rhs)); } \
42inline T& operator X= (T& lhs, const T rhs) { return lhs = lhs X rhs; } \
43inline T& operator X= (T& lhs, const std::underlying_type_t<T> rhs) { return lhs = lhs X rhs; }
44#define ENUM_FLAGS(T, type) \
45enum class T : type; \
46inline T operator ~ (T t) { return (T) (~static_cast<std::underlying_type_t <T>>(t)); } \
47ENUM_FLAG_OPERATOR(T,|) \
48ENUM_FLAG_OPERATOR(T,^) \
49ENUM_FLAG_OPERATOR(T,&) \
50enum class T : type
51
52#if defined(_WIN32)
53#elif defined(__APPLE__)
54#define s6_words __u6_addr.__u6_addr16
55#else
56#define s6_words s6_addr16
57#endif
58
59namespace stdex
60{
61 namespace parser
62 {
66 constexpr int match_default = 0;
67 constexpr int match_case_insensitive = 0x1;
68 constexpr int match_multiline = 0x2;
69
73 template <class T>
75 {
76 public:
77 basic_parser(_In_ const std::locale& locale = std::locale()) : m_locale(locale) {}
78 virtual ~basic_parser() {}
79
80 bool search(
81 _In_reads_or_z_opt_(end) const T* text,
82 _In_ size_t start = 0,
83 _In_ size_t end = SIZE_MAX,
84 _In_ int flags = match_default)
85 {
86 for (size_t i = start; i < end && text[i]; i++)
87 if (match(text, i, end, flags))
88 return true;
89 return false;
90 }
91
92 bool match(
93 _In_reads_or_z_opt_(end) const T* text,
94 _In_ size_t start = 0,
95 _In_ size_t end = SIZE_MAX,
96 _In_ int flags = match_default)
97 {
98 return do_match(text, start, end, flags);
99 }
100
101 bool match(
102 _In_ const std::basic_string_view<T, std::char_traits<T>> text,
103 _In_ size_t start = 0,
104 _In_ size_t end = SIZE_MAX,
105 _In_ int flags = match_default)
106 {
107 return match(text.data(), start, std::min<size_t>(end, text.size()), flags);
108 }
109
110 virtual void invalidate()
111 {
112 this->interval.invalidate();
113 }
114
116
117 protected:
118 virtual bool do_match(
119 _In_reads_or_z_opt_(end) const T* text,
120 _In_ size_t start = 0,
121 _In_ size_t end = SIZE_MAX,
122 _In_ int flags = match_default) = 0;
123
125 const wchar_t* next_sgml_cp(_In_ const char* text, _In_ size_t start, _In_ size_t end, _Out_ size_t& chr_end, _Out_ wchar_t(&buf)[3])
126 {
127 if (text[start] == '&') {
128 // Potential entity start
129 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
130 for (chr_end = start + 1;; chr_end++) {
131 if (chr_end >= end || text[chr_end] == 0) {
132 // Unterminated entity
133 break;
134 }
135 if (text[chr_end] == ';') {
136 // Entity end
137 size_t n = chr_end - start - 1;
138 if (n >= 2 && text[start + 1] == '#') {
139 // Numerical entity
140 char32_t unicode;
141 if (text[start + 2] == 'x' || text[start + 2] == 'X')
142 unicode = strtou32(text + start + 3, n - 2, nullptr, 16);
143 else
144 unicode = strtou32(text + start + 2, n - 1, nullptr, 10);
145#ifdef _WIN32
146 if (unicode < 0x10000) {
147 buf[0] = (wchar_t)unicode;
148 buf[1] = 0;
149 }
150 else {
151 ucs4_to_surrogate_pair(buf, unicode);
152 buf[2] = 0;
153 }
154#else
155 buf[0] = (wchar_t)unicode;
156 buf[1] = 0;
157#endif
158 chr_end++;
159 return buf;
160 }
161 const wchar_t* entity_w = sgml2uni(text + start + 1, n);
162 if (entity_w) {
163 chr_end++;
164 return entity_w;
165 }
166 // Unknown entity.
167 break;
168 }
169 else if (text[chr_end] == '&' || ctype.is(ctype.space, text[chr_end])) {
170 // This char cannot possibly be a part of entity.
171 break;
172 }
173 }
174 }
175 buf[0] = text[start];
176 buf[1] = 0;
177 chr_end = start + 1;
178 return buf;
179 }
181
182 std::locale m_locale;
183 };
184
185 using parser = basic_parser<char>;
186 using wparser = basic_parser<wchar_t>;
187#ifdef _UNICODE
188 using tparser = wparser;
189#else
190 using tparser = parser;
191#endif
192 using sgml_parser = basic_parser<char>;
193
197 template <class T>
198 class basic_noop : public basic_parser<T>
199 {
200 protected:
201 virtual bool do_match(
202 _In_reads_or_z_opt_(end) const T* text,
203 _In_ size_t start = 0,
204 _In_ size_t end = SIZE_MAX,
205 _In_ int flags = match_default)
206 {
207 _Assume_(text || start >= end);
208 if (start < end && text[start]) {
209 this->interval.start = this->interval.end = start;
210 return true;
211 }
212 this->interval.invalidate();
213 return false;
214 }
215 };
216
217 using noop = basic_noop<char>;
219#ifdef _UNICODE
220 using tnoop = wnoop;
221#else
222 using tnoop = noop;
223#endif
225
229 template <class T>
230 class basic_any_cu : public basic_parser<T>
231 {
232 public:
233 basic_any_cu(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
234
235 protected:
236 virtual bool do_match(
237 _In_reads_or_z_opt_(end) const T* text,
238 _In_ size_t start = 0,
239 _In_ size_t end = SIZE_MAX,
240 _In_ int flags = match_default)
241 {
242 _Assume_(text || start >= end);
243 if (start < end && text[start]) {
244 this->interval.end = (this->interval.start = start) + 1;
245 return true;
246 }
247 this->interval.invalidate();
248 return false;
249 }
250 };
251
254#ifdef _UNICODE
255 using tany_cu = wany_cu;
256#else
257 using tany_cu = any_cu;
258#endif
259
263 class sgml_any_cp : public basic_any_cu<char>
264 {
265 public:
266 sgml_any_cp(_In_ const std::locale& locale = std::locale()) : basic_any_cu<char>(locale) {}
267
268 protected:
269 virtual bool do_match(
270 _In_reads_or_z_(end) const char* text,
271 _In_ size_t start = 0,
272 _In_ size_t end = SIZE_MAX,
273 _In_ int flags = match_default)
274 {
275 _Assume_(text || start >= end);
276 if (start < end && text[start]) {
277 if (text[start] == '&') {
278 // SGML entity
279 const auto& ctype = std::use_facet<std::ctype<char>>(m_locale);
280 for (this->interval.end = start + 1; this->interval.end < end && text[this->interval.end]; this->interval.end++)
281 if (text[this->interval.end] == ';') {
282 this->interval.end++;
283 this->interval.start = start;
284 return true;
285 }
286 else if (text[this->interval.end] == '&' || ctype.is(ctype.space, text[this->interval.end]))
287 break;
288 // Unterminated entity
289 }
290 this->interval.end = (this->interval.start = start) + 1;
291 return true;
292 }
293 this->interval.invalidate();
294 return false;
295 }
296 };
297
301 template <class T>
302 class basic_cu : public basic_parser<T>
303 {
304 public:
305 basic_cu(T chr, bool invert = false, _In_ const std::locale& locale = std::locale()) :
307 m_chr(chr),
308 m_invert(invert)
309 {}
310
311 protected:
312 virtual bool do_match(
313 _In_reads_or_z_opt_(end) const T* text,
314 _In_ size_t start = 0,
315 _In_ size_t end = SIZE_MAX,
316 _In_ int flags = match_default)
317 {
318 _Assume_(text || start >= end);
319 if (start < end && text[start]) {
320 bool r;
321 if (flags & match_case_insensitive) {
322 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
323 r = ctype.tolower(text[start]) == ctype.tolower(m_chr);
324 }
325 else
326 r = text[start] == m_chr;
327 if ((r && !m_invert) || (!r && m_invert)) {
328 this->interval.end = (this->interval.start = start) + 1;
329 return true;
330 }
331 }
332 this->interval.invalidate();
333 return false;
334 }
335
336 T m_chr;
337 bool m_invert;
338 };
339
340 using cu = basic_cu<char>;
341 using wcu = basic_cu<wchar_t>;
342#ifdef _UNICODE
343 using tcu = wcu;
344#else
345 using tcu = cu;
346#endif
347
351 class sgml_cp : public sgml_parser
352 {
353 public:
354 sgml_cp(const char* chr, size_t count = SIZE_MAX, bool invert = false, _In_ const std::locale& locale = std::locale()) :
356 m_invert(invert)
357 {
358 _Assume_(chr || !count);
359 wchar_t buf[3];
360 size_t chr_end;
361 m_chr.assign(count ? next_sgml_cp(chr, 0, count, chr_end, buf) : L"");
362 }
363
364 protected:
365 virtual bool do_match(
366 _In_reads_or_z_(end) const char* text,
367 _In_ size_t start = 0,
368 _In_ size_t end = SIZE_MAX,
369 _In_ int flags = match_default)
370 {
371 _Assume_(text || start >= end);
372 if (start < end && text[start]) {
373 wchar_t buf[3];
374 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
375 bool r = ((flags & match_case_insensitive) ?
376 stdex::strnicmp(chr, SIZE_MAX, m_chr.data(), m_chr.size(), m_locale) :
377 stdex::strncmp(chr, SIZE_MAX, m_chr.data(), m_chr.size())) == 0;
378 if ((r && !m_invert) || (!r && m_invert)) {
379 this->interval.start = start;
380 return true;
381 }
382 }
383 this->interval.invalidate();
384 return false;
385 }
386
387 std::wstring m_chr;
388 bool m_invert;
389 };
390
394 template <class T>
395 class basic_space_cu : public basic_parser<T>
396 {
397 public:
398 basic_space_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) :
400 m_invert(invert)
401 {}
402
403 protected:
404 virtual bool do_match(
405 _In_reads_or_z_opt_(end) const T* text,
406 _In_ size_t start = 0,
407 _In_ size_t end = SIZE_MAX,
408 _In_ int flags = match_default)
409 {
410 _Assume_(text || start >= end);
411 if (start < end && text[start]) {
412 bool r =
413 ((flags & match_multiline) || !stdex::islbreak(text[start])) &&
414 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::space, text[start]);
415 if ((r && !m_invert) || (!r && m_invert)) {
416 this->interval.end = (this->interval.start = start) + 1;
417 return true;
418 }
419 }
420 this->interval.invalidate();
421 return false;
422 }
423
424 bool m_invert;
425 };
426
429#ifdef _UNICODE
430 using tspace_cu = wspace_cu;
431#else
432 using tspace_cu = space_cu;
433#endif
434
438 class sgml_space_cp : public basic_space_cu<char>
439 {
440 public:
441 sgml_space_cp(_In_ bool invert = false, _In_ const std::locale& locale = std::locale()) :
443 {}
444
445 protected:
446 virtual bool do_match(
447 _In_reads_or_z_(end) const char* text,
448 _In_ size_t start = 0,
449 _In_ size_t end = SIZE_MAX,
450 _In_ int flags = match_default)
451 {
452 _Assume_(text || start >= end);
453 if (start < end && text[start]) {
454 wchar_t buf[3];
455 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
456 const wchar_t* chr_end = chr + stdex::strlen(chr);
457 bool r =
458 ((flags & match_multiline) || !stdex::islbreak(chr, SIZE_MAX)) &&
459 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::space, chr, chr_end) == chr_end;
460 if ((r && !m_invert) || (!r && m_invert)) {
461 this->interval.start = start;
462 return true;
463 }
464 }
465
466 this->interval.invalidate();
467 return false;
468 }
469 };
470
474 template <class T>
475 class basic_punct_cu : public basic_parser<T>
476 {
477 public:
478 basic_punct_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) :
480 m_invert(invert)
481 {}
482
483 protected:
484 virtual bool do_match(
485 _In_reads_or_z_opt_(end) const T* text,
486 _In_ size_t start = 0,
487 _In_ size_t end = SIZE_MAX,
488 _In_ int flags = match_default)
489 {
490 _Assume_(text || start >= end);
491 if (start < end && text[start]) {
492 bool r = std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::punct, text[start]);
493 if ((r && !m_invert) || (!r && m_invert)) {
494 this->interval.end = (this->interval.start = start) + 1;
495 return true;
496 }
497 }
498 this->interval.invalidate();
499 return false;
500 }
501
502 bool m_invert;
503 };
504
507#ifdef _UNICODE
508 using tpunct_cu = wpunct_cu;
509#else
510 using tpunct_cu = punct_cu;
511#endif
512
516 class sgml_punct_cp : public basic_punct_cu<char>
517 {
518 public:
519 sgml_punct_cp(bool invert = false, _In_ const std::locale& locale = std::locale()) :
521 {}
522
523 protected:
524 virtual bool do_match(
525 _In_reads_or_z_(end) const char* text,
526 _In_ size_t start = 0,
527 _In_ size_t end = SIZE_MAX,
528 _In_ int flags = match_default)
529 {
530 _Assume_(text || start >= end);
531 if (start < end && text[start]) {
532 wchar_t buf[3];
533 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
534 const wchar_t* chr_end = chr + stdex::strlen(chr);
535 bool r = std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::punct, chr, chr_end) == chr_end;
536 if ((r && !m_invert) || (!r && m_invert)) {
537 this->interval.start = start;
538 return true;
539 }
540 }
541 this->interval.invalidate();
542 return false;
543 }
544 };
545
549 template <class T>
551 {
552 public:
553 basic_space_or_punct_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) :
555 m_invert(invert)
556 {}
557
558 protected:
559 virtual bool do_match(
560 _In_reads_or_z_opt_(end) const T* text,
561 _In_ size_t start = 0,
562 _In_ size_t end = SIZE_MAX,
563 _In_ int flags = match_default)
564 {
565 _Assume_(text || start >= end);
566 if (start < end && text[start]) {
567 bool r =
568 ((flags & match_multiline) || !stdex::islbreak(text[start])) &&
569 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::space | std::ctype_base::punct, text[start]);
570 if ((r && !m_invert) || (!r && m_invert)) {
571 this->interval.end = (this->interval.start = start) + 1;
572 return true;
573 }
574 }
575 this->interval.invalidate();
576 return false;
577 }
578
579 bool m_invert;
580 };
581
584#ifdef _UNICODE
586#else
588#endif
589
594 {
595 public:
596 sgml_space_or_punct_cp(bool invert = false, _In_ const std::locale& locale = std::locale()) :
598 {}
599
600 protected:
601 virtual bool do_match(
602 _In_reads_or_z_(end) const char* text,
603 _In_ size_t start = 0,
604 _In_ size_t end = SIZE_MAX,
605 _In_ int flags = match_default)
606 {
607 _Assume_(text || start >= end);
608 if (start < end && text[start]) {
609 wchar_t buf[3];
610 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
611 const wchar_t* chr_end = chr + stdex::strlen(chr);
612 bool r =
613 ((flags & match_multiline) || !stdex::islbreak(chr, SIZE_MAX)) &&
614 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::space | std::ctype_base::punct, chr, chr_end) == chr_end;
615 if ((r && !m_invert) || (!r && m_invert)) {
616 this->interval.start = start;
617 return true;
618 }
619 }
620 this->interval.invalidate();
621 return false;
622 }
623 };
624
628 template <class T>
629 class basic_bol : public basic_parser<T>
630 {
631 public:
632 basic_bol(bool invert = false) : m_invert(invert) {}
633
634 protected:
635 virtual bool do_match(
636 _In_reads_or_z_opt_(end) const T* text,
637 _In_ size_t start = 0,
638 _In_ size_t end = SIZE_MAX,
639 _In_ int flags = match_default)
640 {
641 _Assume_(text || !end);
642 _Assume_(text || start >= end);
643 bool r = start == 0 || (start <= end && stdex::islbreak(text[start - 1]));
644 if ((r && !m_invert) || (!r && m_invert)) {
645 this->interval.end = this->interval.start = start;
646 return true;
647 }
648 this->interval.invalidate();
649 return false;
650 }
651
652 bool m_invert;
653 };
654
655 using bol = basic_bol<char>;
656 using wbol = basic_bol<wchar_t>;
657#ifdef _UNICODE
658 using tbol = wbol;
659#else
660 using tbol = bol;
661#endif
663
667 template <class T>
668 class basic_eol : public basic_parser<T>
669 {
670 public:
671 basic_eol(bool invert = false) : m_invert(invert) {}
672
673 protected:
674 virtual bool do_match(
675 _In_reads_or_z_opt_(end) const T* text,
676 _In_ size_t start = 0,
677 _In_ size_t end = SIZE_MAX,
678 _In_ int flags = match_default)
679 {
680 _Assume_(text || start >= end);
681 bool r = start >= end || !text[start] || stdex::islbreak(text[start]);
682 if ((r && !m_invert) || (!r && m_invert)) {
683 this->interval.end = this->interval.start = start;
684 return true;
685 }
686 this->interval.invalidate();
687 return false;
688 }
689
690 bool m_invert;
691 };
692
693 using eol = basic_eol<char>;
694 using weol = basic_eol<wchar_t>;
695#ifdef _UNICODE
696 using teol = weol;
697#else
698 using teol = eol;
699#endif
701
702 template <class T>
703 class basic_set : public basic_parser<T>
704 {
705 public:
706 basic_set(bool invert = false, _In_ const std::locale& locale = std::locale()) :
708 hit_offset(SIZE_MAX),
709 m_invert(invert)
710 {}
711
712 virtual void invalidate()
713 {
714 hit_offset = SIZE_MAX;
716 }
717
718 size_t hit_offset;
719
720 protected:
721 virtual bool do_match(
722 _In_reads_or_z_opt_(end) const T* text,
723 _In_ size_t start = 0,
724 _In_ size_t end = SIZE_MAX,
725 _In_ int flags = match_default) = 0;
726
727 bool m_invert;
728 };
729
733 template <class T>
734 class basic_cu_set : public basic_set<T>
735 {
736 public:
738 _In_reads_or_z_(count) const T* set,
739 _In_ size_t count = SIZE_MAX,
740 _In_ bool invert = false,
741 _In_ const std::locale& locale = std::locale()) :
743 {
744 if (set)
745 m_set.assign(set, set + stdex::strnlen(set, count));
746 }
747
748 protected:
749 virtual bool do_match(
750 _In_reads_or_z_opt_(end) const T* text,
751 _In_ size_t start = 0,
752 _In_ size_t end = SIZE_MAX,
753 _In_ int flags = match_default)
754 {
755 _Assume_(text || start >= end);
756 if (start < end && text[start]) {
757 const T* set = m_set.data();
758 size_t r = (flags & match_case_insensitive) ?
759 stdex::strnichr(set, m_set.size(), text[start], this->m_locale) :
760 stdex::strnchr(set, m_set.size(), text[start]);
761 if ((r != stdex::npos && !this->m_invert) || (r == stdex::npos && this->m_invert)) {
762 this->hit_offset = r;
763 this->interval.end = (this->interval.start = start) + 1;
764 return true;
765 }
766 }
767 this->hit_offset = SIZE_MAX;
768 this->interval.invalidate();
769 return false;
770 }
771
772 std::basic_string<T> m_set;
773 };
774
777#ifdef _UNICODE
778 using tcu_set = wcu_set;
779#else
780 using tcu_set = cu_set;
781#endif
782
786 class sgml_cp_set : public basic_set<char>
787 {
788 public:
789 sgml_cp_set(const char* set, size_t count = SIZE_MAX, bool invert = false, _In_ const std::locale& locale = std::locale()) :
791 {
792 if (set)
793 m_set = sgml2str(set, count);
794 }
795
796 protected:
797 virtual bool do_match(
798 _In_reads_or_z_(end) const char* text,
799 _In_ size_t start = 0,
800 _In_ size_t end = SIZE_MAX,
801 _In_ int flags = match_default)
802 {
803 _Assume_(text || start >= end);
804 if (start < end && text[start]) {
805 wchar_t buf[3];
806 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
807 const wchar_t* set = m_set.data();
808 size_t r = (flags & match_case_insensitive) ?
809 stdex::strnistr(set, m_set.size(), chr, m_locale) :
810 stdex::strnstr(set, m_set.size(), chr);
811 if ((r != stdex::npos && !m_invert) || (r == stdex::npos && m_invert)) {
812 hit_offset = r;
813 this->interval.start = start;
814 return true;
815 }
816 }
817 hit_offset = SIZE_MAX;
818 this->interval.invalidate();
819 return false;
820 }
821
822 std::wstring m_set;
823 };
824
828 template <class T>
829 class basic_string : public basic_parser<T>
830 {
831 public:
833 _In_reads_or_z_(count) const T* str,
834 _In_ size_t count = SIZE_MAX,
835 _In_ const std::locale& locale = std::locale()) :
837 m_str(str, str + stdex::strnlen(str, count))
838 {}
839
840 protected:
841 virtual bool do_match(
842 _In_reads_or_z_opt_(end) const T* text,
843 _In_ size_t start = 0,
844 _In_ size_t end = SIZE_MAX,
845 _In_ int flags = match_default)
846 {
847 _Assume_(text || start >= end);
848 size_t
849 m = m_str.size(),
850 n = std::min<size_t>(end - start, m);
851 bool r = ((flags & match_case_insensitive) ?
852 stdex::strnicmp(text + start, n, m_str.data(), m, this->m_locale) :
853 stdex::strncmp(text + start, n, m_str.data(), m)) == 0;
854 if (r) {
855 this->interval.end = (this->interval.start = start) + n;
856 return true;
857 }
858 this->interval.invalidate();
859 return false;
860 }
861
862 std::basic_string<T> m_str;
863 };
864
867#ifdef _UNICODE
868 using tstring = wstring;
869#else
870 using tstring = string;
871#endif
872
877 {
878 public:
879 sgml_string(const char* str, size_t count = SIZE_MAX, _In_ const std::locale& locale = std::locale()) :
881 m_str(sgml2str(str, count))
882 {}
883
884 protected:
885 virtual bool do_match(
886 _In_reads_or_z_(end) const char* text,
887 _In_ size_t start = 0,
888 _In_ size_t end = SIZE_MAX,
889 _In_ int flags = match_default)
890 {
891 _Assume_(text || start >= end);
892 const wchar_t* str = m_str.data();
893 const bool case_insensitive = flags & match_case_insensitive ? true : false;
894 const auto& ctype = std::use_facet<std::ctype<wchar_t>>(m_locale);
895 for (this->interval.end = start;;) {
896 if (!*str) {
897 this->interval.start = start;
898 return true;
899 }
900 if (this->interval.end >= end || !text[this->interval.end]) {
901 this->interval.invalidate();
902 return false;
903 }
904 wchar_t buf[3];
905 const wchar_t* chr = next_sgml_cp(text, this->interval.end, end, this->interval.end, buf);
906 for (; *chr; ++str, ++chr) {
907 if (!*str ||
908 (case_insensitive ? ctype.tolower(*str) != ctype.tolower(*chr) : *str != *chr))
909 {
910 this->interval.invalidate();
911 return false;
912 }
913 }
914 }
915 }
916
917 std::wstring m_str;
918 };
919
923 template <class T>
925 {
926 public:
927 basic_iterations(const std::shared_ptr<basic_parser<T>>& el, size_t min_iterations = 0, size_t max_iterations = SIZE_MAX, bool greedy = true) :
928 m_el(el),
932 {}
933
934 protected:
935 virtual bool do_match(
936 _In_reads_or_z_opt_(end) const T* text,
937 _In_ size_t start = 0,
938 _In_ size_t end = SIZE_MAX,
939 _In_ int flags = match_default)
940 {
941 _Assume_(text || start >= end);
942 this->interval.start = this->interval.end = start;
943 for (size_t i = 0; ; i++) {
944 if ((!m_greedy && i >= m_min_iterations) || i >= m_max_iterations)
945 return true;
946 if (!m_el->match(text, this->interval.end, end, flags)) {
947 if (i >= m_min_iterations)
948 return true;
949 break;
950 }
951 if (m_el->interval.end == this->interval.end) {
952 // Element did match, but the matching interval was empty. Quit instead of spinning.
953 return true;
954 }
955 this->interval.end = m_el->interval.end;
956 }
957 this->interval.invalidate();
958 return false;
959 }
960
961 std::shared_ptr<basic_parser<T>> m_el;
964 bool m_greedy;
965 };
966
969#ifdef _UNICODE
970 using titerations = witerations;
971#else
972 using titerations = iterations;
973#endif
975
979 template <class T>
981 {
982 protected:
983 parser_collection(_In_ const std::locale& locale) : basic_parser<T>(locale) {}
984
985 public:
987 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el,
988 _In_ size_t count,
989 _In_ const std::locale& locale = std::locale()) :
991 {
992 _Assume_(el || !count);
993 m_collection.reserve(count);
994 for (size_t i = 0; i < count; i++)
995 m_collection.push_back(el[i]);
996 }
997
999 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1000 _In_ const std::locale& locale = std::locale()) :
1002 m_collection(std::move(collection))
1003 {}
1004
1005 virtual void invalidate()
1006 {
1007 for (auto& el : m_collection)
1008 el->invalidate();
1010 }
1011
1012 protected:
1013 std::vector<std::shared_ptr<basic_parser<T>>> m_collection;
1014 };
1015
1019 template <class T>
1021 {
1022 public:
1024 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el = nullptr,
1025 _In_ size_t count = 0,
1026 _In_ const std::locale& locale = std::locale()) :
1028 {}
1029
1031 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1032 _In_ const std::locale& locale = std::locale()) :
1034 {}
1035
1036 protected:
1037 virtual bool do_match(
1038 _In_reads_or_z_opt_(end) const T* text,
1039 _In_ size_t start = 0,
1040 _In_ size_t end = SIZE_MAX,
1041 _In_ int flags = match_default)
1042 {
1043 _Assume_(text || start >= end);
1044 this->interval.end = start;
1045 for (auto i = this->m_collection.begin(); i != this->m_collection.end(); ++i) {
1046 if (!(*i)->match(text, this->interval.end, end, flags)) {
1047 for (++i; i != this->m_collection.end(); ++i)
1048 (*i)->invalidate();
1049 this->interval.invalidate();
1050 return false;
1051 }
1052 this->interval.end = (*i)->interval.end;
1053 }
1054 this->interval.start = start;
1055 return true;
1056 }
1057 };
1058
1061#ifdef _UNICODE
1062 using tsequence = wsequence;
1063#else
1064 using tsequence = sequence;
1065#endif
1067
1071 template <class T>
1073 {
1074 protected:
1075 basic_branch(_In_ const std::locale& locale) :
1077 hit_offset(SIZE_MAX)
1078 {}
1079
1080 public:
1082 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el = nullptr,
1083 _In_ size_t count = 0,
1084 _In_ const std::locale& locale = std::locale()) :
1086 hit_offset(SIZE_MAX)
1087 {}
1088
1090 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1091 _In_ const std::locale& locale = std::locale()) :
1093 hit_offset(SIZE_MAX)
1094 {}
1095
1096 virtual void invalidate()
1097 {
1098 hit_offset = SIZE_MAX;
1100 }
1101
1102 size_t hit_offset;
1103
1104 protected:
1105 virtual bool do_match(
1106 _In_reads_or_z_opt_(end) const T* text,
1107 _In_ size_t start = 0,
1108 _In_ size_t end = SIZE_MAX,
1109 _In_ int flags = match_default)
1110 {
1111 _Assume_(text || start >= end);
1112 hit_offset = 0;
1113 for (auto i = this->m_collection.begin(); i != this->m_collection.end(); ++i, ++hit_offset) {
1114 if ((*i)->match(text, start, end, flags)) {
1115 this->interval = (*i)->interval;
1116 for (++i; i != this->m_collection.end(); ++i)
1117 (*i)->invalidate();
1118 return true;
1119 }
1120 }
1121 hit_offset = SIZE_MAX;
1122 this->interval.invalidate();
1123 return false;
1124 }
1125 };
1126
1127 using branch = basic_branch<char>;
1129#ifdef _UNICODE
1130 using tbranch = wbranch;
1131#else
1132 using tbranch = branch;
1133#endif
1135
1139 template <class T, class T_parser = basic_string<T>>
1141 {
1142 public:
1144 _In_reads_(count) const T* str_z = nullptr,
1145 _In_ size_t count = 0,
1146 _In_ const std::locale& locale = std::locale()) :
1148 {
1149 build(str_z, count);
1150 }
1151
1152 basic_string_branch(_In_z_ const T* str, ...) :
1153 basic_branch<T>(std::locale())
1154 {
1155 va_list params;
1156 va_start(params, str);
1157 build(str, params);
1158 va_end(params);
1159 }
1160
1161 basic_string_branch(_In_ const std::locale& locale, _In_z_ const T* str, ...) :
1163 {
1164 va_list params;
1165 va_start(params, str);
1166 build(str, params);
1167 va_end(params);
1168 }
1169
1170 protected:
1171 void build(_In_reads_(count) const T* str_z, _In_ size_t count)
1172 {
1173 _Assume_(str_z || !count);
1174 if (count) {
1175 size_t offset, n;
1176 for (
1177 offset = n = 0;
1178 offset < count && str_z[offset];
1179 offset += stdex::strnlen(str_z + offset, count - offset) + 1, ++n);
1180 this->m_collection.reserve(n);
1181 for (
1182 offset = 0;
1183 offset < count && str_z[offset];
1184 offset += stdex::strnlen(str_z + offset, count - offset) + 1)
1185 this->m_collection.push_back(std::move(std::make_shared<T_parser>(str_z + offset, count - offset, this->m_locale)));
1186 }
1187 }
1188
1189 void build(_In_z_ const T* str, _In_ va_list params)
1190 {
1191 const T* p;
1192 for (
1193 this->m_collection.push_back(std::move(std::make_shared<T_parser>(str, SIZE_MAX, this->m_locale)));
1194 (p = va_arg(params, const T*)) != nullptr;
1195 this->m_collection.push_back(std::move(std::make_shared<T_parser>(p, SIZE_MAX, this->m_locale))));
1196 }
1197 };
1198
1201#ifdef _UNICODE
1203#else
1205#endif
1207
1211 template <class T>
1213 {
1214 public:
1216 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el = nullptr,
1217 _In_ size_t count = 0,
1218 _In_ const std::locale& locale = std::locale()) :
1220 {}
1221
1223 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1224 _In_ const std::locale& locale = std::locale()) :
1226 {}
1227
1228 protected:
1229 virtual bool do_match(
1230 _In_reads_or_z_opt_(end) const T* text,
1231 _In_ size_t start = 0,
1232 _In_ size_t end = SIZE_MAX,
1233 _In_ int flags = match_default)
1234 {
1235 _Assume_(text || start >= end);
1236 for (auto& el : this->m_collection)
1237 el->invalidate();
1238 if (match_recursively(text, start, end, flags)) {
1239 this->interval.start = start;
1240 return true;
1241 }
1242 this->interval.invalidate();
1243 return false;
1244 }
1245
1246 bool match_recursively(
1247 _In_reads_or_z_opt_(end) const T* text,
1248 _In_ size_t start = 0,
1249 _In_ size_t end = SIZE_MAX,
1250 _In_ int flags = match_default)
1251 {
1252 bool all_matched = true;
1253 for (auto& el : this->m_collection) {
1254 if (!el->interval) {
1255 // Element was not matched in permutatuion yet.
1256 all_matched = false;
1257 if (el->match(text, start, end, flags)) {
1258 // Element matched for the first time.
1259 if (match_recursively(text, el->interval.end, end, flags)) {
1260 // Rest of the elements matched too.
1261 return true;
1262 }
1263 el->invalidate();
1264 }
1265 }
1266 }
1267 if (all_matched) {
1268 this->interval.end = start;
1269 return true;
1270 }
1271 return false;
1272 }
1273 };
1274
1277#ifdef _UNICODE
1278 using tpermutation = wpermutation;
1279#else
1280 using tpermutation = permutation;
1281#endif
1283
1287 template <class T>
1288 class basic_integer : public basic_parser<T>
1289 {
1290 public:
1291 basic_integer(_In_ const std::locale& locale = std::locale()) :
1293 value(0)
1294 {}
1295
1296 virtual void invalidate()
1297 {
1298 value = 0;
1300 }
1301
1302 public:
1303 size_t value;
1304 };
1305
1309 template <class T>
1311 {
1312 public:
1314 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
1315 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
1316 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
1317 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
1318 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
1319 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
1320 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
1321 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
1322 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
1323 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
1324 _In_ const std::locale& locale = std::locale()) :
1326 m_digit_0(digit_0),
1327 m_digit_1(digit_1),
1328 m_digit_2(digit_2),
1329 m_digit_3(digit_3),
1330 m_digit_4(digit_4),
1331 m_digit_5(digit_5),
1332 m_digit_6(digit_6),
1333 m_digit_7(digit_7),
1334 m_digit_8(digit_8),
1335 m_digit_9(digit_9)
1336 {}
1337
1338 protected:
1339 virtual bool do_match(
1340 _In_reads_or_z_opt_(end) const T* text,
1341 _In_ size_t start = 0,
1342 _In_ size_t end = SIZE_MAX,
1343 _In_ int flags = match_default)
1344 {
1345 _Assume_(text || start >= end);
1346 for (this->interval.end = start, this->value = 0; this->interval.end < end && text[this->interval.end];) {
1347 size_t dig;
1348 if (m_digit_0->match(text, this->interval.end, end, flags)) { dig = 0; this->interval.end = m_digit_0->interval.end; }
1349 else if (m_digit_1->match(text, this->interval.end, end, flags)) { dig = 1; this->interval.end = m_digit_1->interval.end; }
1350 else if (m_digit_2->match(text, this->interval.end, end, flags)) { dig = 2; this->interval.end = m_digit_2->interval.end; }
1351 else if (m_digit_3->match(text, this->interval.end, end, flags)) { dig = 3; this->interval.end = m_digit_3->interval.end; }
1352 else if (m_digit_4->match(text, this->interval.end, end, flags)) { dig = 4; this->interval.end = m_digit_4->interval.end; }
1353 else if (m_digit_5->match(text, this->interval.end, end, flags)) { dig = 5; this->interval.end = m_digit_5->interval.end; }
1354 else if (m_digit_6->match(text, this->interval.end, end, flags)) { dig = 6; this->interval.end = m_digit_6->interval.end; }
1355 else if (m_digit_7->match(text, this->interval.end, end, flags)) { dig = 7; this->interval.end = m_digit_7->interval.end; }
1356 else if (m_digit_8->match(text, this->interval.end, end, flags)) { dig = 8; this->interval.end = m_digit_8->interval.end; }
1357 else if (m_digit_9->match(text, this->interval.end, end, flags)) { dig = 9; this->interval.end = m_digit_9->interval.end; }
1358 else break;
1359 this->value = this->value * 10 + dig;
1360 }
1362 this->interval.start = start;
1363 return true;
1364 }
1365 this->interval.invalidate();
1366 return false;
1367 }
1368
1369 std::shared_ptr<basic_parser<T>>
1370 m_digit_0,
1371 m_digit_1,
1372 m_digit_2,
1373 m_digit_3,
1374 m_digit_4,
1375 m_digit_5,
1376 m_digit_6,
1377 m_digit_7,
1378 m_digit_8,
1379 m_digit_9;
1380 };
1381
1384#ifdef _UNICODE
1385 using tinteger10 = winteger10;
1386#else
1387 using tinteger10 = integer10;
1388#endif
1390
1394 template <class T>
1396 {
1397 public:
1399 _In_ const std::shared_ptr<basic_integer10<T>>& digits,
1400 _In_ const std::shared_ptr<basic_set<T>>& separator,
1401 _In_ const std::locale& locale = std::locale()) :
1403 digit_count(0),
1404 has_separators(false),
1405 m_digits(digits),
1406 m_separator(separator)
1407 {}
1408
1409 virtual void invalidate()
1410 {
1411 digit_count = 0;
1412 has_separators = false;
1414 }
1415
1418
1419 protected:
1420 virtual bool do_match(
1421 _In_reads_or_z_opt_(end) const T* text,
1422 _In_ size_t start = 0,
1423 _In_ size_t end = SIZE_MAX,
1424 _In_ int flags = match_default)
1425 {
1426 _Assume_(text || start >= end);
1427 if (m_digits->match(text, start, end, flags)) {
1428 // Leading part match.
1429 this->value = m_digits->value;
1430 digit_count = m_digits->interval.size();
1431 has_separators = false;
1432 this->interval.start = start;
1433 this->interval.end = m_digits->interval.end;
1434 if (m_digits->interval.size() <= 3) {
1435 // Maybe separated with thousand separators?
1436 size_t hit_offset = SIZE_MAX;
1437 while (m_separator->match(text, this->interval.end, end, flags) &&
1438 (hit_offset == SIZE_MAX || hit_offset == m_separator->hit_offset) && // All separators must be the same, no mixing.
1439 m_digits->match(text, m_separator->interval.end, end, flags) &&
1440 m_digits->interval.size() == 3)
1441 {
1442 // Thousand separator and three-digit integer followed.
1443 this->value = this->value * 1000 + m_digits->value;
1444 digit_count += 3;
1445 has_separators = true;
1446 this->interval.end = m_digits->interval.end;
1447 hit_offset = m_separator->hit_offset;
1448 }
1449 }
1450
1451 return true;
1452 }
1453 this->value = 0;
1454 this->interval.invalidate();
1455 return false;
1456 }
1457
1458 std::shared_ptr<basic_integer10<T>> m_digits;
1459 std::shared_ptr<basic_set<T>> m_separator;
1460 };
1461
1462 using integer10ts = basic_integer10ts<char>;
1463 using winteger10ts = basic_integer10ts<wchar_t>;
1464#ifdef _UNICODE
1465 using tinteger10ts = winteger10ts;
1466#else
1467 using tinteger10ts = integer10ts;
1468#endif
1469 using sgml_integer10ts = basic_integer10ts<char>;
1470
1474 template <class T>
1476 {
1477 public:
1479 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
1480 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
1481 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
1482 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
1483 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
1484 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
1485 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
1486 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
1487 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
1488 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
1489 _In_ const std::shared_ptr<basic_parser<T>>& digit_10,
1490 _In_ const std::shared_ptr<basic_parser<T>>& digit_11,
1491 _In_ const std::shared_ptr<basic_parser<T>>& digit_12,
1492 _In_ const std::shared_ptr<basic_parser<T>>& digit_13,
1493 _In_ const std::shared_ptr<basic_parser<T>>& digit_14,
1494 _In_ const std::shared_ptr<basic_parser<T>>& digit_15,
1495 _In_ const std::locale& locale = std::locale()) :
1497 m_digit_0(digit_0),
1498 m_digit_1(digit_1),
1499 m_digit_2(digit_2),
1500 m_digit_3(digit_3),
1501 m_digit_4(digit_4),
1502 m_digit_5(digit_5),
1503 m_digit_6(digit_6),
1504 m_digit_7(digit_7),
1505 m_digit_8(digit_8),
1506 m_digit_9(digit_9),
1507 m_digit_10(digit_10),
1508 m_digit_11(digit_11),
1509 m_digit_12(digit_12),
1510 m_digit_13(digit_13),
1511 m_digit_14(digit_14),
1512 m_digit_15(digit_15)
1513 {}
1514
1515 protected:
1516 virtual bool do_match(
1517 _In_reads_or_z_opt_(end) const T* text,
1518 _In_ size_t start = 0,
1519 _In_ size_t end = SIZE_MAX,
1520 _In_ int flags = match_default)
1521 {
1522 _Assume_(text || start >= end);
1523 for (this->interval.end = start, this->value = 0; this->interval.end < end && text[this->interval.end];) {
1524 size_t dig;
1525 if (m_digit_0->match(text, this->interval.end, end, flags)) { dig = 0; this->interval.end = m_digit_0->interval.end; }
1526 else if (m_digit_1->match(text, this->interval.end, end, flags)) { dig = 1; this->interval.end = m_digit_1->interval.end; }
1527 else if (m_digit_2->match(text, this->interval.end, end, flags)) { dig = 2; this->interval.end = m_digit_2->interval.end; }
1528 else if (m_digit_3->match(text, this->interval.end, end, flags)) { dig = 3; this->interval.end = m_digit_3->interval.end; }
1529 else if (m_digit_4->match(text, this->interval.end, end, flags)) { dig = 4; this->interval.end = m_digit_4->interval.end; }
1530 else if (m_digit_5->match(text, this->interval.end, end, flags)) { dig = 5; this->interval.end = m_digit_5->interval.end; }
1531 else if (m_digit_6->match(text, this->interval.end, end, flags)) { dig = 6; this->interval.end = m_digit_6->interval.end; }
1532 else if (m_digit_7->match(text, this->interval.end, end, flags)) { dig = 7; this->interval.end = m_digit_7->interval.end; }
1533 else if (m_digit_8->match(text, this->interval.end, end, flags)) { dig = 8; this->interval.end = m_digit_8->interval.end; }
1534 else if (m_digit_9->match(text, this->interval.end, end, flags)) { dig = 9; this->interval.end = m_digit_9->interval.end; }
1535 else if (m_digit_10->match(text, this->interval.end, end, flags)) { dig = 10; this->interval.end = m_digit_10->interval.end; }
1536 else if (m_digit_11->match(text, this->interval.end, end, flags)) { dig = 11; this->interval.end = m_digit_11->interval.end; }
1537 else if (m_digit_12->match(text, this->interval.end, end, flags)) { dig = 12; this->interval.end = m_digit_12->interval.end; }
1538 else if (m_digit_13->match(text, this->interval.end, end, flags)) { dig = 13; this->interval.end = m_digit_13->interval.end; }
1539 else if (m_digit_14->match(text, this->interval.end, end, flags)) { dig = 14; this->interval.end = m_digit_14->interval.end; }
1540 else if (m_digit_15->match(text, this->interval.end, end, flags)) { dig = 15; this->interval.end = m_digit_15->interval.end; }
1541 else break;
1542 this->value = this->value * 16 + dig;
1543 }
1545 this->interval.start = start;
1546 return true;
1547 }
1548 this->interval.invalidate();
1549 return false;
1550 }
1551
1552 std::shared_ptr<basic_parser<T>>
1553 m_digit_0,
1554 m_digit_1,
1555 m_digit_2,
1556 m_digit_3,
1557 m_digit_4,
1558 m_digit_5,
1559 m_digit_6,
1560 m_digit_7,
1561 m_digit_8,
1562 m_digit_9,
1563 m_digit_10,
1564 m_digit_11,
1565 m_digit_12,
1566 m_digit_13,
1567 m_digit_14,
1568 m_digit_15;
1569 };
1570
1573#ifdef _UNICODE
1574 using tinteger16 = winteger16;
1575#else
1576 using tinteger16 = integer16;
1577#endif
1579
1583 template <class T>
1585 {
1586 public:
1588 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
1589 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
1590 _In_ const std::shared_ptr<basic_parser<T>>& digit_10,
1591 _In_ const std::shared_ptr<basic_parser<T>>& digit_50,
1592 _In_ const std::shared_ptr<basic_parser<T>>& digit_100,
1593 _In_ const std::shared_ptr<basic_parser<T>>& digit_500,
1594 _In_ const std::shared_ptr<basic_parser<T>>& digit_1000,
1595 _In_ const std::shared_ptr<basic_parser<T>>& digit_5000,
1596 _In_ const std::shared_ptr<basic_parser<T>>& digit_10000,
1597 _In_ const std::locale& locale = std::locale()) :
1599 m_digit_1(digit_1),
1600 m_digit_5(digit_5),
1601 m_digit_10(digit_10),
1602 m_digit_50(digit_50),
1603 m_digit_100(digit_100),
1604 m_digit_500(digit_500),
1605 m_digit_1000(digit_1000),
1606 m_digit_5000(digit_5000),
1607 m_digit_10000(digit_10000)
1608 {}
1609
1610 protected:
1611 virtual bool do_match(
1612 _In_reads_or_z_opt_(end) const T* text,
1613 _In_ size_t start = 0,
1614 _In_ size_t end = SIZE_MAX,
1615 _In_ int flags = match_default)
1616 {
1617 _Assume_(text || start >= end);
1618 size_t
1620 end2;
1621
1622 for (this->interval.end = start, this->value = 0; this->interval.end < end && text[this->interval.end]; dig[3] = dig[2], dig[2] = dig[1], dig[1] = dig[0], this->interval.end = end2) {
1623 if (m_digit_1 && m_digit_1->match(text, this->interval.end, end, flags)) { dig[0] = 1; end2 = m_digit_1->interval.end; }
1624 else if (m_digit_5 && m_digit_5->match(text, this->interval.end, end, flags)) { dig[0] = 5; end2 = m_digit_5->interval.end; }
1625 else if (m_digit_10 && m_digit_10->match(text, this->interval.end, end, flags)) { dig[0] = 10; end2 = m_digit_10->interval.end; }
1626 else if (m_digit_50 && m_digit_50->match(text, this->interval.end, end, flags)) { dig[0] = 50; end2 = m_digit_50->interval.end; }
1627 else if (m_digit_100 && m_digit_100->match(text, this->interval.end, end, flags)) { dig[0] = 100; end2 = m_digit_100->interval.end; }
1628 else if (m_digit_500 && m_digit_500->match(text, this->interval.end, end, flags)) { dig[0] = 500; end2 = m_digit_500->interval.end; }
1629 else if (m_digit_1000 && m_digit_1000->match(text, this->interval.end, end, flags)) { dig[0] = 1000; end2 = m_digit_1000->interval.end; }
1630 else if (m_digit_5000 && m_digit_5000->match(text, this->interval.end, end, flags)) { dig[0] = 5000; end2 = m_digit_5000->interval.end; }
1631 else if (m_digit_10000 && m_digit_10000->match(text, this->interval.end, end, flags)) { dig[0] = 10000; end2 = m_digit_10000->interval.end; }
1632 else break;
1633
1634 // Store first digit.
1635 if (dig[4] == SIZE_MAX) dig[4] = dig[0];
1636
1637 if (dig[3] == dig[2] && dig[2] == dig[1] && dig[1] == dig[0] && dig[0] != dig[4]) {
1638 // Same digit repeated four times. No-go, unless first digit. E.g. XIIII vs. XIV. MMMMMCD allowed, IIII also...
1639 break;
1640 }
1641 if (dig[0] <= dig[1]) {
1642 // Digit is less or equal previous one: add.
1643 this->value += dig[0];
1644 }
1645 else if (
1646 (dig[1] == 1 && (dig[0] == 5 || dig[0] == 10)) ||
1647 (dig[1] == 10 && (dig[0] == 50 || dig[0] == 100)) ||
1648 (dig[1] == 100 && (dig[0] == 500 || dig[0] == 1000)) ||
1649 (dig[1] == 1000 && (dig[0] == 5000 || dig[0] == 10000)))
1650 {
1651 // Digit is up to two orders bigger than previous one: subtract. But...
1652 if (dig[2] < dig[0]) {
1653 // Digit is also bigger than pre-previous one. E.g. VIX (V < X => invalid)
1654 break;
1655 }
1656 this->value -= dig[1]; // Cancel addition in the previous step.
1657 dig[0] -= dig[1]; // Combine last two digits.
1658 dig[1] = dig[2]; // The true previous digit is now pre-previous one. :)
1659 dig[2] = dig[3]; // The true pre-previous digit is now pre-pre-previous one. :)
1660 this->value += dig[0]; // Add combined value.
1661 }
1662 else {
1663 // New digit is too big than the previous one. E.g. VX (V < X => invalid)
1664 break;
1665 }
1666 }
1667 if (this->value) {
1668 this->interval.start = start;
1669 return true;
1670 }
1671 this->interval.invalidate();
1672 return false;
1673 }
1674
1675 std::shared_ptr<basic_parser<T>>
1676 m_digit_1,
1677 m_digit_5,
1678 m_digit_10,
1679 m_digit_50,
1680 m_digit_100,
1681 m_digit_500,
1682 m_digit_1000,
1683 m_digit_5000,
1684 m_digit_10000;
1685 };
1686
1689#ifdef _UNICODE
1691#else
1693#endif
1695
1699 template <class T>
1701 {
1702 public:
1704 _In_ const std::shared_ptr<basic_parser<T>>& _numerator,
1705 _In_ const std::shared_ptr<basic_parser<T>>& _fraction_line,
1706 _In_ const std::shared_ptr<basic_parser<T>>& _denominator,
1707 _In_ const std::locale& locale = std::locale()) :
1709 numerator(_numerator),
1710 fraction_line(_fraction_line),
1711 denominator(_denominator)
1712 {}
1713
1714 virtual void invalidate()
1715 {
1716 numerator->invalidate();
1717 fraction_line->invalidate();
1718 denominator->invalidate();
1720 }
1721
1722 std::shared_ptr<basic_parser<T>> numerator;
1723 std::shared_ptr<basic_parser<T>> fraction_line;
1724 std::shared_ptr<basic_parser<T>> denominator;
1725
1726 protected:
1727 virtual bool do_match(
1728 _In_reads_or_z_opt_(end) const T* text,
1729 _In_ size_t start = 0,
1730 _In_ size_t end = SIZE_MAX,
1731 _In_ int flags = match_default)
1732 {
1733 _Assume_(text || start >= end);
1734 if (numerator->match(text, start, end, flags) &&
1735 fraction_line->match(text, numerator->interval.end, end, flags) &&
1736 denominator->match(text, fraction_line->interval.end, end, flags))
1737 {
1738 this->interval.start = start;
1739 this->interval.end = denominator->interval.end;
1740 return true;
1741 }
1742 numerator->invalidate();
1743 fraction_line->invalidate();
1744 denominator->invalidate();
1745 this->interval.invalidate();
1746 return false;
1747 }
1748 };
1749
1752#ifdef _UNICODE
1753 using tfraction = wfraction;
1754#else
1755 using tfraction = fraction;
1756#endif
1758
1762 template <class T>
1763 class basic_score : public basic_parser<T>
1764 {
1765 public:
1767 _In_ const std::shared_ptr<basic_parser<T>>& _home,
1768 _In_ const std::shared_ptr<basic_parser<T>>& _separator,
1769 _In_ const std::shared_ptr<basic_parser<T>>& _guest,
1770 _In_ const std::shared_ptr<basic_parser<T>>& space,
1771 _In_ const std::locale& locale = std::locale()) :
1773 home(_home),
1774 separator(_separator),
1775 guest(_guest),
1776 m_space(space)
1777 {}
1778
1779 virtual void invalidate()
1780 {
1781 home->invalidate();
1782 separator->invalidate();
1783 guest->invalidate();
1785 }
1786
1787 std::shared_ptr<basic_parser<T>> home;
1788 std::shared_ptr<basic_parser<T>> separator;
1789 std::shared_ptr<basic_parser<T>> guest;
1790
1791 protected:
1792 virtual bool do_match(
1793 _In_reads_or_z_opt_(end) const T* text,
1794 _In_ size_t start = 0,
1795 _In_ size_t end = SIZE_MAX,
1796 _In_ int flags = match_default)
1797 {
1798 _Assume_(text || start >= end);
1799 this->interval.end = start;
1800
1801 const int space_match_flags = flags & ~match_multiline; // Spaces in score must never be broken in new line.
1802
1803 if (home->match(text, this->interval.end, end, flags))
1804 this->interval.end = home->interval.end;
1805 else
1806 goto end;
1807
1808 for (; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
1809
1810 if (separator->match(text, this->interval.end, end, flags))
1811 this->interval.end = separator->interval.end;
1812 else
1813 goto end;
1814
1815 for (; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
1816
1817 if (guest->match(text, this->interval.end, end, flags))
1818 this->interval.end = guest->interval.end;
1819 else
1820 goto end;
1821
1822 this->interval.start = start;
1823 return true;
1824
1825 end:
1826 home->invalidate();
1827 separator->invalidate();
1828 guest->invalidate();
1829 this->interval.invalidate();
1830 return false;
1831 }
1832
1833 std::shared_ptr<basic_parser<T>> m_space;
1834 };
1835
1836 using score = basic_score<char>;
1838#ifdef _UNICODE
1839 using tscore = wscore;
1840#else
1841 using tscore = score;
1842#endif
1844
1848 template <class T>
1850 {
1851 public:
1853 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
1854 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
1855 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
1856 _In_ const std::shared_ptr<basic_parser<T>>& _number,
1857 _In_ const std::locale& locale = std::locale()) :
1863 {}
1864
1865 virtual void invalidate()
1866 {
1867 if (positive_sign) positive_sign->invalidate();
1868 if (negative_sign) negative_sign->invalidate();
1869 if (special_sign) special_sign->invalidate();
1870 number->invalidate();
1872 }
1873
1874 std::shared_ptr<basic_parser<T>> positive_sign;
1875 std::shared_ptr<basic_parser<T>> negative_sign;
1876 std::shared_ptr<basic_parser<T>> special_sign;
1877 std::shared_ptr<basic_parser<T>> number;
1878
1879 protected:
1880 virtual bool do_match(
1881 _In_reads_or_z_opt_(end) const T* text,
1882 _In_ size_t start = 0,
1883 _In_ size_t end = SIZE_MAX,
1884 _In_ int flags = match_default)
1885 {
1886 _Assume_(text || start >= end);
1887 this->interval.end = start;
1888 if (positive_sign && positive_sign->match(text, this->interval.end, end, flags)) {
1889 this->interval.end = positive_sign->interval.end;
1890 if (negative_sign) negative_sign->invalidate();
1891 if (special_sign) special_sign->invalidate();
1892 }
1893 else if (negative_sign && negative_sign->match(text, this->interval.end, end, flags)) {
1894 this->interval.end = negative_sign->interval.end;
1895 if (positive_sign) positive_sign->invalidate();
1896 if (special_sign) special_sign->invalidate();
1897 }
1898 else if (special_sign && special_sign->match(text, this->interval.end, end, flags)) {
1899 this->interval.end = special_sign->interval.end;
1900 if (positive_sign) positive_sign->invalidate();
1901 if (negative_sign) negative_sign->invalidate();
1902 }
1903 else {
1904 if (positive_sign) positive_sign->invalidate();
1905 if (negative_sign) negative_sign->invalidate();
1906 if (special_sign) special_sign->invalidate();
1907 }
1908 if (number->match(text, this->interval.end, end, flags)) {
1909 this->interval.start = start;
1910 this->interval.end = number->interval.end;
1911 return true;
1912 }
1913 if (positive_sign) positive_sign->invalidate();
1914 if (negative_sign) negative_sign->invalidate();
1915 if (special_sign) special_sign->invalidate();
1916 number->invalidate();
1917 this->interval.invalidate();
1918 return false;
1919 }
1920 };
1921
1922 using signed_numeral = basic_signed_numeral<char>;
1923 using wsigned_numeral = basic_signed_numeral<wchar_t>;
1924#ifdef _UNICODE
1925 using tsigned_numeral = wsigned_numeral;
1926#else
1927 using tsigned_numeral = signed_numeral;
1928#endif
1929 using sgml_signed_numeral = basic_signed_numeral<char>;
1930
1934 template <class T>
1936 {
1937 public:
1939 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
1940 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
1941 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
1942 _In_ const std::shared_ptr<basic_parser<T>>& _integer,
1943 _In_ const std::shared_ptr<basic_parser<T>>& space,
1944 _In_ const std::shared_ptr<basic_parser<T>>& _fraction,
1945 _In_ const std::locale& locale = std::locale()) :
1952 m_space(space)
1953 {}
1954
1955 virtual void invalidate()
1956 {
1957 if (positive_sign) positive_sign->invalidate();
1958 if (negative_sign) negative_sign->invalidate();
1959 if (special_sign) special_sign->invalidate();
1960 integer->invalidate();
1961 fraction->invalidate();
1963 }
1964
1965 std::shared_ptr<basic_parser<T>> positive_sign;
1966 std::shared_ptr<basic_parser<T>> negative_sign;
1967 std::shared_ptr<basic_parser<T>> special_sign;
1968 std::shared_ptr<basic_parser<T>> integer;
1969 std::shared_ptr<basic_parser<T>> fraction;
1970
1971 protected:
1972 virtual bool do_match(
1973 _In_reads_or_z_opt_(end) const T* text,
1974 _In_ size_t start = 0,
1975 _In_ size_t end = SIZE_MAX,
1976 _In_ int flags = match_default)
1977 {
1978 _Assume_(text || start >= end);
1979 this->interval.end = start;
1980
1981 if (positive_sign && positive_sign->match(text, this->interval.end, end, flags)) {
1982 this->interval.end = positive_sign->interval.end;
1983 if (negative_sign) negative_sign->invalidate();
1984 if (special_sign) special_sign->invalidate();
1985 }
1986 else if (negative_sign && negative_sign->match(text, this->interval.end, end, flags)) {
1987 this->interval.end = negative_sign->interval.end;
1988 if (positive_sign) positive_sign->invalidate();
1989 if (special_sign) special_sign->invalidate();
1990 }
1991 else if (special_sign && special_sign->match(text, this->interval.end, end, flags)) {
1992 this->interval.end = special_sign->interval.end;
1993 if (positive_sign) positive_sign->invalidate();
1994 if (negative_sign) negative_sign->invalidate();
1995 }
1996 else {
1997 if (positive_sign) positive_sign->invalidate();
1998 if (negative_sign) negative_sign->invalidate();
1999 if (special_sign) special_sign->invalidate();
2000 }
2001
2002 // Check for <integer> <fraction>
2003 const int space_match_flags = flags & ~match_multiline; // Spaces in fractions must never be broken in new line.
2004 if (integer->match(text, this->interval.end, end, flags) &&
2005 m_space->match(text, integer->interval.end, end, space_match_flags))
2006 {
2007 for (this->interval.end = m_space->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
2008 if (fraction->match(text, this->interval.end, end, flags)) {
2009 this->interval.start = start;
2010 this->interval.end = fraction->interval.end;
2011 return true;
2012 }
2013 fraction->invalidate();
2014 this->interval.start = start;
2015 this->interval.end = integer->interval.end;
2016 return true;
2017 }
2018
2019 // Check for <fraction>
2020 if (fraction->match(text, this->interval.end, end, flags)) {
2021 integer->invalidate();
2022 this->interval.start = start;
2023 this->interval.end = fraction->interval.end;
2024 return true;
2025 }
2026
2027 // Check for <integer>
2028 if (integer->match(text, this->interval.end, end, flags)) {
2029 fraction->invalidate();
2030 this->interval.start = start;
2031 this->interval.end = integer->interval.end;
2032 return true;
2033 }
2034
2035 if (positive_sign) positive_sign->invalidate();
2036 if (negative_sign) negative_sign->invalidate();
2037 if (special_sign) special_sign->invalidate();
2038 integer->invalidate();
2039 fraction->invalidate();
2040 this->interval.invalidate();
2041 return false;
2042 }
2043
2044 std::shared_ptr<basic_parser<T>> m_space;
2045 };
2046
2047 using mixed_numeral = basic_mixed_numeral<char>;
2048 using wmixed_numeral = basic_mixed_numeral<wchar_t>;
2049#ifdef _UNICODE
2050 using tmixed_numeral = wmixed_numeral;
2051#else
2052 using tmixed_numeral = mixed_numeral;
2053#endif
2054 using sgml_mixed_numeral = basic_mixed_numeral<char>;
2055
2059 template <class T>
2061 {
2062 public:
2064 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
2065 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
2066 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
2067 _In_ const std::shared_ptr<basic_integer<T>>& _integer,
2068 _In_ const std::shared_ptr<basic_parser<T>>& _decimal_separator,
2069 _In_ const std::shared_ptr<basic_integer<T>>& _decimal,
2070 _In_ const std::shared_ptr<basic_parser<T>>& _exponent_symbol,
2071 _In_ const std::shared_ptr<basic_parser<T>>& _positive_exp_sign,
2072 _In_ const std::shared_ptr<basic_parser<T>>& _negative_exp_sign,
2073 _In_ const std::shared_ptr<basic_integer<T>>& _exponent,
2074 _In_ const std::locale& locale = std::locale()) :
2086 value(std::numeric_limits<double>::quiet_NaN())
2087 {}
2088
2089 virtual void invalidate()
2090 {
2091 if (positive_sign) positive_sign->invalidate();
2092 if (negative_sign) negative_sign->invalidate();
2093 if (special_sign) special_sign->invalidate();
2094 integer->invalidate();
2095 decimal_separator->invalidate();
2096 decimal->invalidate();
2097 if (exponent_symbol) exponent_symbol->invalidate();
2098 if (positive_exp_sign) positive_exp_sign->invalidate();
2099 if (negative_exp_sign) negative_exp_sign->invalidate();
2100 if (exponent) exponent->invalidate();
2101 value = std::numeric_limits<double>::quiet_NaN();
2103 }
2104
2105 std::shared_ptr<basic_parser<T>> positive_sign;
2106 std::shared_ptr<basic_parser<T>> negative_sign;
2107 std::shared_ptr<basic_parser<T>> special_sign;
2108 std::shared_ptr<basic_integer<T>> integer;
2109 std::shared_ptr<basic_parser<T>> decimal_separator;
2110 std::shared_ptr<basic_integer<T>> decimal;
2111 std::shared_ptr<basic_parser<T>> exponent_symbol;
2112 std::shared_ptr<basic_parser<T>> positive_exp_sign;
2113 std::shared_ptr<basic_parser<T>> negative_exp_sign;
2114 std::shared_ptr<basic_integer<T>> exponent;
2115 double value;
2116
2117 protected:
2118 virtual bool do_match(
2119 _In_reads_or_z_opt_(end) const T* text,
2120 _In_ size_t start = 0,
2121 _In_ size_t end = SIZE_MAX,
2122 _In_ int flags = match_default)
2123 {
2124 _Assume_(text || start >= end);
2125 this->interval.end = start;
2126
2127 if (positive_sign && positive_sign->match(text, this->interval.end, end, flags)) {
2128 this->interval.end = positive_sign->interval.end;
2129 if (negative_sign) negative_sign->invalidate();
2130 if (special_sign) special_sign->invalidate();
2131 }
2132 else if (negative_sign && negative_sign->match(text, this->interval.end, end, flags)) {
2133 this->interval.end = negative_sign->interval.end;
2134 if (positive_sign) positive_sign->invalidate();
2135 if (special_sign) special_sign->invalidate();
2136 }
2137 else if (special_sign && special_sign->match(text, this->interval.end, end, flags)) {
2138 this->interval.end = special_sign->interval.end;
2139 if (positive_sign) positive_sign->invalidate();
2140 if (negative_sign) negative_sign->invalidate();
2141 }
2142 else {
2143 if (positive_sign) positive_sign->invalidate();
2144 if (negative_sign) negative_sign->invalidate();
2145 if (special_sign) special_sign->invalidate();
2146 }
2147
2148 if (integer->match(text, this->interval.end, end, flags))
2149 this->interval.end = integer->interval.end;
2150
2151 if (decimal_separator->match(text, this->interval.end, end, flags) &&
2152 decimal->match(text, decimal_separator->interval.end, end, flags))
2153 this->interval.end = decimal->interval.end;
2154 else {
2155 decimal_separator->invalidate();
2156 decimal->invalidate();
2157 }
2158
2159 if (integer->interval.empty() &&
2160 decimal->interval.empty())
2161 {
2162 // No integer part, no decimal part.
2163 if (positive_sign) positive_sign->invalidate();
2164 if (negative_sign) negative_sign->invalidate();
2165 if (special_sign) special_sign->invalidate();
2166 integer->invalidate();
2167 decimal_separator->invalidate();
2168 decimal->invalidate();
2169 if (exponent_symbol) exponent_symbol->invalidate();
2170 if (positive_exp_sign) positive_exp_sign->invalidate();
2171 if (negative_exp_sign) negative_exp_sign->invalidate();
2172 if (exponent) exponent->invalidate();
2173 this->interval.invalidate();
2174 return false;
2175 }
2176
2177 if (exponent_symbol && exponent_symbol->match(text, this->interval.end, end, flags) &&
2178 ((positive_exp_sign && positive_exp_sign->match(text, exponent_symbol->interval.end, end, flags) &&
2179 exponent && exponent->match(text, positive_exp_sign->interval.end, end, flags)) ||
2180 (exponent && exponent->match(text, exponent_symbol->interval.end, end, flags))))
2181 {
2182 this->interval.end = exponent->interval.end;
2183 if (negative_exp_sign) negative_exp_sign->invalidate();
2184 }
2185 else if (exponent_symbol && exponent_symbol->match(text, this->interval.end, end, flags) &&
2186 negative_exp_sign && negative_exp_sign->match(text, exponent_symbol->interval.end, end, flags) &&
2187 exponent && exponent->match(text, negative_exp_sign->interval.end, end, flags))
2188 {
2189 this->interval.end = exponent->interval.end;
2190 if (positive_exp_sign) positive_exp_sign->invalidate();
2191 }
2192 else {
2193 if (exponent_symbol) exponent_symbol->invalidate();
2194 if (positive_exp_sign) positive_exp_sign->invalidate();
2195 if (negative_exp_sign) negative_exp_sign->invalidate();
2196 if (exponent) exponent->invalidate();
2197 }
2198
2199 value = (double)integer->value;
2200 if (decimal->interval)
2201 value += (double)decimal->value * pow(10.0, -(double)decimal->interval.size());
2202 if (negative_sign && negative_sign->interval)
2203 value = -value;
2204 if (exponent && exponent->interval) {
2205 double e = (double)exponent->value;
2206 if (negative_exp_sign && negative_exp_sign->interval)
2207 e = -e;
2208 value *= pow(10.0, e);
2209 }
2210
2211 this->interval.start = start;
2212 return true;
2213 }
2214 };
2215
2216 using scientific_numeral = basic_scientific_numeral<char>;
2217 using wscientific_numeral = basic_scientific_numeral<wchar_t>;
2218#ifdef _UNICODE
2219 using tscientific_numeral = wscientific_numeral;
2220#else
2221 using tscientific_numeral = scientific_numeral;
2222#endif
2223 using sgml_scientific_numeral = basic_scientific_numeral<char>;
2224
2228 template <class T>
2230 {
2231 public:
2233 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
2234 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
2235 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
2236 _In_ const std::shared_ptr<basic_parser<T>>& _currency,
2237 _In_ const std::shared_ptr<basic_parser<T>>& _integer,
2238 _In_ const std::shared_ptr<basic_parser<T>>& _decimal_separator,
2239 _In_ const std::shared_ptr<basic_parser<T>>& _decimal,
2240 _In_ const std::locale& locale = std::locale()) :
2249 {}
2250
2251 virtual void invalidate()
2252 {
2253 if (positive_sign) positive_sign->invalidate();
2254 if (negative_sign) negative_sign->invalidate();
2255 if (special_sign) special_sign->invalidate();
2256 currency->invalidate();
2257 integer->invalidate();
2258 decimal_separator->invalidate();
2259 decimal->invalidate();
2261 }
2262
2263 std::shared_ptr<basic_parser<T>> positive_sign;
2264 std::shared_ptr<basic_parser<T>> negative_sign;
2265 std::shared_ptr<basic_parser<T>> special_sign;
2266 std::shared_ptr<basic_parser<T>> currency;
2267 std::shared_ptr<basic_parser<T>> integer;
2268 std::shared_ptr<basic_parser<T>> decimal_separator;
2269 std::shared_ptr<basic_parser<T>> decimal;
2270
2271 protected:
2272 virtual bool do_match(
2273 _In_reads_or_z_opt_(end) const T* text,
2274 _In_ size_t start = 0,
2275 _In_ size_t end = SIZE_MAX,
2276 _In_ int flags = match_default)
2277 {
2278 _Assume_(text || start >= end);
2279 this->interval.end = start;
2280
2281 if (positive_sign->match(text, this->interval.end, end, flags)) {
2282 this->interval.end = positive_sign->interval.end;
2283 if (negative_sign) negative_sign->invalidate();
2284 if (special_sign) special_sign->invalidate();
2285 }
2286 else if (negative_sign->match(text, this->interval.end, end, flags)) {
2287 this->interval.end = negative_sign->interval.end;
2288 if (positive_sign) positive_sign->invalidate();
2289 if (special_sign) special_sign->invalidate();
2290 }
2291 else if (special_sign->match(text, this->interval.end, end, flags)) {
2292 this->interval.end = special_sign->interval.end;
2293 if (positive_sign) positive_sign->invalidate();
2294 if (negative_sign) negative_sign->invalidate();
2295 }
2296 else {
2297 if (positive_sign) positive_sign->invalidate();
2298 if (negative_sign) negative_sign->invalidate();
2299 if (special_sign) special_sign->invalidate();
2300 }
2301
2302 if (currency->match(text, this->interval.end, end, flags))
2303 this->interval.end = currency->interval.end;
2304 else {
2305 if (positive_sign) positive_sign->invalidate();
2306 if (negative_sign) negative_sign->invalidate();
2307 if (special_sign) special_sign->invalidate();
2308 integer->invalidate();
2309 decimal_separator->invalidate();
2310 decimal->invalidate();
2311 this->interval.invalidate();
2312 return false;
2313 }
2314
2315 if (integer->match(text, this->interval.end, end, flags))
2316 this->interval.end = integer->interval.end;
2317 if (decimal_separator->match(text, this->interval.end, end, flags) &&
2318 decimal->match(text, decimal_separator->interval.end, end, flags))
2319 this->interval.end = decimal->interval.end;
2320 else {
2321 decimal_separator->invalidate();
2322 decimal->invalidate();
2323 }
2324
2325 if (integer->interval.empty() &&
2326 decimal->interval.empty())
2327 {
2328 // No integer part, no decimal part.
2329 if (positive_sign) positive_sign->invalidate();
2330 if (negative_sign) negative_sign->invalidate();
2331 if (special_sign) special_sign->invalidate();
2332 currency->invalidate();
2333 integer->invalidate();
2334 decimal_separator->invalidate();
2335 decimal->invalidate();
2336 this->interval.invalidate();
2337 return false;
2338 }
2339
2340 this->interval.start = start;
2341 return true;
2342 }
2343 };
2344
2345 using monetary_numeral = basic_monetary_numeral<char>;
2346 using wmonetary_numeral = basic_monetary_numeral<wchar_t>;
2347#ifdef _UNICODE
2348 using tmonetary_numeral = wmonetary_numeral;
2349#else
2350 using tmonetary_numeral = monetary_numeral;
2351#endif
2352 using sgml_monetary_numeral = basic_monetary_numeral<char>;
2353
2357 template <class T>
2359 {
2360 public:
2362 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
2363 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
2364 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
2365 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
2366 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
2367 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
2368 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
2369 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
2370 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
2371 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
2372 _In_ const std::shared_ptr<basic_parser<T>>& separator,
2373 _In_ const std::locale& locale = std::locale()) :
2375 m_digit_0(digit_0),
2376 m_digit_1(digit_1),
2377 m_digit_2(digit_2),
2378 m_digit_3(digit_3),
2379 m_digit_4(digit_4),
2380 m_digit_5(digit_5),
2381 m_digit_6(digit_6),
2382 m_digit_7(digit_7),
2383 m_digit_8(digit_8),
2384 m_digit_9(digit_9),
2385 m_separator(separator)
2386 {
2387 value.s_addr = 0;
2388 }
2389
2390 virtual void invalidate()
2391 {
2392 components[0].start = 1;
2393 components[0].end = 0;
2394 components[1].start = 1;
2395 components[1].end = 0;
2396 components[2].start = 1;
2397 components[2].end = 0;
2398 components[3].start = 1;
2399 components[3].end = 0;
2400 value.s_addr = 0;
2402 }
2403
2406
2407 protected:
2408 virtual bool do_match(
2409 _In_reads_or_z_opt_(end) const T* text,
2410 _In_ size_t start = 0,
2411 _In_ size_t end = SIZE_MAX,
2412 _In_ int flags = match_default)
2413 {
2414 _Assume_(text || start >= end);
2415 this->interval.end = start;
2416 value.s_addr = 0;
2417
2418 size_t i;
2419 for (i = 0; i < 4; i++) {
2420 if (i) {
2421 if (m_separator->match(text, this->interval.end, end, flags))
2422 this->interval.end = m_separator->interval.end;
2423 else
2424 goto error;
2425 }
2426
2427 components[i].start = this->interval.end;
2428 bool is_empty = true;
2429 size_t x;
2430 for (x = 0; this->interval.end < end && text[this->interval.end];) {
2431 size_t dig, digit_end;
2432 if (m_digit_0->match(text, this->interval.end, end, flags)) { dig = 0; digit_end = m_digit_0->interval.end; }
2433 else if (m_digit_1->match(text, this->interval.end, end, flags)) { dig = 1; digit_end = m_digit_1->interval.end; }
2434 else if (m_digit_2->match(text, this->interval.end, end, flags)) { dig = 2; digit_end = m_digit_2->interval.end; }
2435 else if (m_digit_3->match(text, this->interval.end, end, flags)) { dig = 3; digit_end = m_digit_3->interval.end; }
2436 else if (m_digit_4->match(text, this->interval.end, end, flags)) { dig = 4; digit_end = m_digit_4->interval.end; }
2437 else if (m_digit_5->match(text, this->interval.end, end, flags)) { dig = 5; digit_end = m_digit_5->interval.end; }
2438 else if (m_digit_6->match(text, this->interval.end, end, flags)) { dig = 6; digit_end = m_digit_6->interval.end; }
2439 else if (m_digit_7->match(text, this->interval.end, end, flags)) { dig = 7; digit_end = m_digit_7->interval.end; }
2440 else if (m_digit_8->match(text, this->interval.end, end, flags)) { dig = 8; digit_end = m_digit_8->interval.end; }
2441 else if (m_digit_9->match(text, this->interval.end, end, flags)) { dig = 9; digit_end = m_digit_9->interval.end; }
2442 else break;
2443 size_t x_n = x * 10 + dig;
2444 if (x_n <= 255) {
2445 x = x_n;
2446 this->interval.end = digit_end;
2447 is_empty = false;
2448 }
2449 else
2450 break;
2451 }
2452 if (is_empty)
2453 goto error;
2454 components[i].end = this->interval.end;
2455 value.s_addr = (value.s_addr << 8) | (uint8_t)x;
2456 }
2457 if (i < 4)
2458 goto error;
2459
2460 value.s_addr = htonl(value.s_addr);
2461 this->interval.start = start;
2462 return true;
2463
2464 error:
2465 invalidate();
2466 return false;
2467 }
2468
2469 std::shared_ptr<basic_parser<T>>
2470 m_digit_0,
2471 m_digit_1,
2472 m_digit_2,
2473 m_digit_3,
2474 m_digit_4,
2475 m_digit_5,
2476 m_digit_6,
2477 m_digit_7,
2478 m_digit_8,
2479 m_digit_9;
2480 std::shared_ptr<basic_parser<T>> m_separator;
2481 };
2482
2483 using ipv4_address = basic_ipv4_address<char>;
2484 using wipv4_address = basic_ipv4_address<wchar_t>;
2485#ifdef _UNICODE
2486 using tipv4_address = wipv4_address;
2487#else
2488 using tipv4_address = ipv4_address;
2489#endif
2490 using sgml_ipv4_address = basic_ipv4_address<char>;
2491
2495 template <class T>
2497 {
2498 public:
2499 basic_ipv6_scope_id_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
2500
2501 protected:
2502 virtual bool do_match(
2503 _In_reads_or_z_opt_(end) const T* text,
2504 _In_ size_t start = 0,
2505 _In_ size_t end = SIZE_MAX,
2506 _In_ int flags = match_default)
2507 {
2508 _Assume_(text || start >= end);
2509 if (start < end && text[start]) {
2510 if (text[start] == '-' ||
2511 text[start] == '_' ||
2512 text[start] == ':' ||
2513 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
2514 {
2515 this->interval.end = (this->interval.start = start) + 1;
2516 return true;
2517 }
2518 }
2519 this->interval.invalidate();
2520 return false;
2521 }
2522 };
2523
2526#ifdef _UNICODE
2528#else
2530#endif
2531
2536 {
2537 public:
2538 sgml_ipv6_scope_id_char(_In_ const std::locale& locale = std::locale()) : sgml_parser(locale) {}
2539
2540 protected:
2541 virtual bool do_match(
2542 _In_reads_or_z_(end) const char* text,
2543 _In_ size_t start = 0,
2544 _In_ size_t end = SIZE_MAX,
2545 _In_ int flags = match_default)
2546 {
2547 _Assume_(text || start >= end);
2548 if (start < end && text[start]) {
2549 wchar_t buf[3];
2550 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
2551 const wchar_t* chr_end = chr + stdex::strlen(chr);
2552 if (((chr[0] == L'-' ||
2553 chr[0] == L'_' ||
2554 chr[0] == L':') && chr[1] == 0) ||
2555 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
2556 {
2557 this->interval.start = start;
2558 return true;
2559 }
2560 }
2561 this->interval.invalidate();
2562 return false;
2563 }
2564 };
2565
2569 template <class T>
2571 {
2572 public:
2574 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
2575 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
2576 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
2577 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
2578 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
2579 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
2580 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
2581 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
2582 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
2583 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
2584 _In_ const std::shared_ptr<basic_parser<T>>& digit_10,
2585 _In_ const std::shared_ptr<basic_parser<T>>& digit_11,
2586 _In_ const std::shared_ptr<basic_parser<T>>& digit_12,
2587 _In_ const std::shared_ptr<basic_parser<T>>& digit_13,
2588 _In_ const std::shared_ptr<basic_parser<T>>& digit_14,
2589 _In_ const std::shared_ptr<basic_parser<T>>& digit_15,
2590 _In_ const std::shared_ptr<basic_parser<T>>& separator,
2591 _In_ const std::shared_ptr<basic_parser<T>>& scope_id_separator = nullptr,
2592 _In_ const std::shared_ptr<basic_parser<T>>& _scope_id = nullptr,
2593 _In_ const std::locale& locale = std::locale()) :
2595 m_digit_0(digit_0),
2596 m_digit_1(digit_1),
2597 m_digit_2(digit_2),
2598 m_digit_3(digit_3),
2599 m_digit_4(digit_4),
2600 m_digit_5(digit_5),
2601 m_digit_6(digit_6),
2602 m_digit_7(digit_7),
2603 m_digit_8(digit_8),
2604 m_digit_9(digit_9),
2605 m_digit_10(digit_10),
2606 m_digit_11(digit_11),
2607 m_digit_12(digit_12),
2608 m_digit_13(digit_13),
2609 m_digit_14(digit_14),
2610 m_digit_15(digit_15),
2611 m_separator(separator),
2612 m_scope_id_separator(scope_id_separator),
2614 {
2615 memset(&value, 0, sizeof(value));
2616 }
2617
2618 virtual void invalidate()
2619 {
2620 components[0].start = 1;
2621 components[0].end = 0;
2622 components[1].start = 1;
2623 components[1].end = 0;
2624 components[2].start = 1;
2625 components[2].end = 0;
2626 components[3].start = 1;
2627 components[3].end = 0;
2628 components[4].start = 1;
2629 components[4].end = 0;
2630 components[5].start = 1;
2631 components[5].end = 0;
2632 components[6].start = 1;
2633 components[6].end = 0;
2634 components[7].start = 1;
2635 components[7].end = 0;
2636 memset(&value, 0, sizeof(value));
2637 if (scope_id) scope_id->invalidate();
2639 }
2640
2643 std::shared_ptr<basic_parser<T>> scope_id;
2644
2645 protected:
2646 virtual bool do_match(
2647 _In_reads_or_z_opt_(end) const T* text,
2648 _In_ size_t start = 0,
2649 _In_ size_t end = SIZE_MAX,
2650 _In_ int flags = match_default)
2651 {
2652 _Assume_(text || start >= end);
2653 this->interval.end = start;
2654 memset(&value, 0, sizeof(value));
2655
2656 size_t i, compaction_i = SIZE_MAX, compaction_start = start;
2657 for (i = 0; i < 8; i++) {
2658 bool is_empty = true;
2659
2660 if (m_separator->match(text, this->interval.end, end, flags)) {
2661 // : found
2662 this->interval.end = m_separator->interval.end;
2663 if (m_separator->match(text, this->interval.end, end, flags)) {
2664 // :: found
2665 if (compaction_i == SIZE_MAX) {
2666 // Zero compaction start
2667 compaction_i = i;
2668 compaction_start = m_separator->interval.start;
2669 this->interval.end = m_separator->interval.end;
2670 }
2671 else {
2672 // More than one zero compaction
2673 break;
2674 }
2675 }
2676 else if (!i) {
2677 // Leading : found
2678 goto error;
2679 }
2680 }
2681 else if (i) {
2682 // : missing
2683 break;
2684 }
2685
2686 components[i].start = this->interval.end;
2687 size_t x;
2688 for (x = 0; this->interval.end < end && text[this->interval.end];) {
2689 size_t dig, digit_end;
2690 if (m_digit_0->match(text, this->interval.end, end, flags)) { dig = 0; digit_end = m_digit_0->interval.end; }
2691 else if (m_digit_1->match(text, this->interval.end, end, flags)) { dig = 1; digit_end = m_digit_1->interval.end; }
2692 else if (m_digit_2->match(text, this->interval.end, end, flags)) { dig = 2; digit_end = m_digit_2->interval.end; }
2693 else if (m_digit_3->match(text, this->interval.end, end, flags)) { dig = 3; digit_end = m_digit_3->interval.end; }
2694 else if (m_digit_4->match(text, this->interval.end, end, flags)) { dig = 4; digit_end = m_digit_4->interval.end; }
2695 else if (m_digit_5->match(text, this->interval.end, end, flags)) { dig = 5; digit_end = m_digit_5->interval.end; }
2696 else if (m_digit_6->match(text, this->interval.end, end, flags)) { dig = 6; digit_end = m_digit_6->interval.end; }
2697 else if (m_digit_7->match(text, this->interval.end, end, flags)) { dig = 7; digit_end = m_digit_7->interval.end; }
2698 else if (m_digit_8->match(text, this->interval.end, end, flags)) { dig = 8; digit_end = m_digit_8->interval.end; }
2699 else if (m_digit_9->match(text, this->interval.end, end, flags)) { dig = 9; digit_end = m_digit_9->interval.end; }
2700 else if (m_digit_10->match(text, this->interval.end, end, flags)) { dig = 10; digit_end = m_digit_10->interval.end; }
2701 else if (m_digit_11->match(text, this->interval.end, end, flags)) { dig = 11; digit_end = m_digit_11->interval.end; }
2702 else if (m_digit_12->match(text, this->interval.end, end, flags)) { dig = 12; digit_end = m_digit_12->interval.end; }
2703 else if (m_digit_13->match(text, this->interval.end, end, flags)) { dig = 13; digit_end = m_digit_13->interval.end; }
2704 else if (m_digit_14->match(text, this->interval.end, end, flags)) { dig = 14; digit_end = m_digit_14->interval.end; }
2705 else if (m_digit_15->match(text, this->interval.end, end, flags)) { dig = 15; digit_end = m_digit_15->interval.end; }
2706 else break;
2707 size_t x_n = x * 16 + dig;
2708 if (x_n <= 0xffff) {
2709 x = x_n;
2710 this->interval.end = digit_end;
2711 is_empty = false;
2712 }
2713 else
2714 break;
2715 }
2716 if (is_empty) {
2717 if (compaction_i != SIZE_MAX) {
2718 // Zero compaction active: no sweat.
2719 break;
2720 }
2721 goto error;
2722 }
2723 components[i].end = this->interval.end;
2724 this->value.s6_words[i] = htons((uint16_t)x);
2725 }
2726
2727 if (compaction_i != SIZE_MAX) {
2728 // Align components right due to zero compaction.
2729 size_t j, k;
2730 for (j = 8, k = i; k > compaction_i;) {
2731 this->value.s6_words[--j] = this->value.s6_words[--k];
2733 }
2734 for (; j > compaction_i;) {
2735 this->value.s6_words[--j] = 0;
2736 components[j].start =
2738 }
2739 }
2740 else if (i < 8)
2741 goto error;
2742
2743 if (m_scope_id_separator && m_scope_id_separator->match(text, this->interval.end, end, flags) &&
2744 scope_id && scope_id->match(text, m_scope_id_separator->interval.end, end, flags))
2745 this->interval.end = scope_id->interval.end;
2746 else if (scope_id)
2747 scope_id->invalidate();
2748
2749 this->interval.start = start;
2750 return true;
2751
2752 error:
2753 invalidate();
2754 return false;
2755 }
2756
2757 std::shared_ptr<basic_parser<T>>
2758 m_digit_0,
2759 m_digit_1,
2760 m_digit_2,
2761 m_digit_3,
2762 m_digit_4,
2763 m_digit_5,
2764 m_digit_6,
2765 m_digit_7,
2766 m_digit_8,
2767 m_digit_9,
2768 m_digit_10,
2769 m_digit_11,
2770 m_digit_12,
2771 m_digit_13,
2772 m_digit_14,
2773 m_digit_15;
2774 std::shared_ptr<basic_parser<T>> m_separator, m_scope_id_separator;
2775 };
2776
2777 using ipv6_address = basic_ipv6_address<char>;
2778 using wipv6_address = basic_ipv6_address<wchar_t>;
2779#ifdef _UNICODE
2780 using tipv6_address = wipv6_address;
2781#else
2782 using tipv6_address = ipv6_address;
2783#endif
2784 using sgml_ipv6_address = basic_ipv6_address<char>;
2785
2789 template <class T>
2791 {
2792 public:
2794 _In_ bool allow_idn,
2795 _In_ const std::locale& locale = std::locale()) :
2797 m_allow_idn(allow_idn),
2798 allow_on_edge(true)
2799 {}
2800
2802
2803 protected:
2804 virtual bool do_match(
2805 _In_reads_or_z_opt_(end) const T* text,
2806 _In_ size_t start = 0,
2807 _In_ size_t end = SIZE_MAX,
2808 _In_ int flags = match_default)
2809 {
2810 _Assume_(text || start >= end);
2811 if (start < end && text[start]) {
2812 if (('A' <= text[start] && text[start] <= 'Z') ||
2813 ('a' <= text[start] && text[start] <= 'z') ||
2814 ('0' <= text[start] && text[start] <= '9'))
2815 allow_on_edge = true;
2816 else if (text[start] == '-')
2817 allow_on_edge = false;
2818 else if (m_allow_idn && std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
2819 allow_on_edge = true;
2820 else {
2821 this->interval.invalidate();
2822 return false;
2823 }
2824 this->interval.end = (this->interval.start = start) + 1;
2825 return true;
2826 }
2827 this->interval.invalidate();
2828 return false;
2829 }
2830
2831 bool m_allow_idn;
2832 };
2833
2834 using dns_domain_char = basic_dns_domain_char<char>;
2835 using wdns_domain_char = basic_dns_domain_char<wchar_t>;
2836#ifdef _UNICODE
2837 using tdns_domain_char = wdns_domain_char;
2838#else
2839 using tdns_domain_char = dns_domain_char;
2840#endif
2841
2846 {
2847 public:
2849 _In_ bool allow_idn,
2850 _In_ const std::locale& locale = std::locale()) :
2852 {}
2853
2854 protected:
2855 virtual bool do_match(
2856 _In_reads_or_z_(end) const char* text,
2857 _In_ size_t start = 0,
2858 _In_ size_t end = SIZE_MAX,
2859 _In_ int flags = match_default)
2860 {
2861 _Assume_(text || start >= end);
2862 if (start < end && text[start]) {
2863 wchar_t buf[3];
2864 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
2865 const wchar_t* chr_end = chr + stdex::strlen(chr);
2866 if ((('A' <= chr[0] && chr[0] <= 'Z') ||
2867 ('a' <= chr[0] && chr[0] <= 'z') ||
2868 ('0' <= chr[0] && chr[0] <= '9')) && chr[1] == 0)
2869 allow_on_edge = true;
2870 else if (chr[0] == '-' && chr[1] == 0)
2871 allow_on_edge = false;
2872 else if (m_allow_idn && std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
2873 allow_on_edge = true;
2874 else {
2875 this->interval.invalidate();
2876 return false;
2877 }
2878 this->interval.start = start;
2879 return true;
2880 }
2881 this->interval.invalidate();
2882 return false;
2883 }
2884 };
2885
2889 template <class T>
2891 {
2892 public:
2894 _In_ bool allow_absolute,
2895 _In_ const std::shared_ptr<basic_dns_domain_char<T>>& domain_char,
2896 _In_ const std::shared_ptr<basic_parser<T>>& separator,
2897 _In_ const std::locale& locale = std::locale()) :
2900 m_domain_char(domain_char),
2901 m_separator(separator)
2902 {}
2903
2904 protected:
2905 virtual bool do_match(
2906 _In_reads_or_z_opt_(end) const T* text,
2907 _In_ size_t start = 0,
2908 _In_ size_t end = SIZE_MAX,
2909 _In_ int flags = match_default)
2910 {
2911 _Assume_(text || start >= end);
2912 size_t i = start, count;
2913 for (count = 0; i < end && text[i] && count < 127; count++) {
2914 if (m_domain_char->match(text, i, end, flags) &&
2915 m_domain_char->allow_on_edge)
2916 {
2917 // Domain start
2918 this->interval.end = i = m_domain_char->interval.end;
2919 while (i < end && text[i]) {
2920 if (m_domain_char->allow_on_edge &&
2921 m_separator->match(text, i, end, flags))
2922 {
2923 // Domain end
2924 if (m_allow_absolute)
2925 this->interval.end = i = m_separator->interval.end;
2926 else {
2927 this->interval.end = i;
2928 i = m_separator->interval.end;
2929 }
2930 break;
2931 }
2932 if (m_domain_char->match(text, i, end, flags)) {
2933 if (m_domain_char->allow_on_edge)
2934 this->interval.end = i = m_domain_char->interval.end;
2935 else
2936 i = m_domain_char->interval.end;
2937 }
2938 else {
2939 this->interval.start = start;
2940 return true;
2941 }
2942 }
2943 }
2944 else
2945 break;
2946 }
2947 if (count) {
2948 this->interval.start = start;
2949 return true;
2950 }
2951 this->interval.invalidate();
2952 return false;
2953 }
2954
2956 std::shared_ptr<basic_dns_domain_char<T>> m_domain_char;
2957 std::shared_ptr<basic_parser<T>> m_separator;
2958 };
2959
2962#ifdef _UNICODE
2963 using tdns_name = wdns_name;
2964#else
2965 using tdns_name = dns_name;
2966#endif
2968
2972 template <class T>
2974 {
2975 public:
2976 basic_url_username_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
2977
2978 protected:
2979 virtual bool do_match(
2980 _In_reads_or_z_opt_(end) const T* text,
2981 _In_ size_t start = 0,
2982 _In_ size_t end = SIZE_MAX,
2983 _In_ int flags = match_default)
2984 {
2985 _Assume_(text || start >= end);
2986 if (start < end && text[start]) {
2987 if (text[start] == '-' ||
2988 text[start] == '.' ||
2989 text[start] == '_' ||
2990 text[start] == '~' ||
2991 text[start] == '%' ||
2992 text[start] == '!' ||
2993 text[start] == '$' ||
2994 text[start] == '&' ||
2995 text[start] == '\'' ||
2996 //text[start] == '(' ||
2997 //text[start] == ')' ||
2998 text[start] == '*' ||
2999 text[start] == '+' ||
3000 text[start] == ',' ||
3001 text[start] == ';' ||
3002 text[start] == '=' ||
3003 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
3004 {
3005 this->interval.end = (this->interval.start = start) + 1;
3006 return true;
3007 }
3008 }
3009 this->interval.invalidate();
3010 return false;
3011 }
3012 };
3013
3016#ifdef _UNICODE
3018#else
3020#endif
3021
3026 {
3027 public:
3028 sgml_url_username_char(_In_ const std::locale& locale = std::locale()) : basic_url_username_char<char>(locale) {}
3029
3030 protected:
3031 virtual bool do_match(
3032 _In_reads_or_z_(end) const char* text,
3033 _In_ size_t start = 0,
3034 _In_ size_t end = SIZE_MAX,
3035 _In_ int flags = match_default)
3036 {
3037 _Assume_(text || start >= end);
3038 if (start < end && text[start]) {
3039 wchar_t buf[3];
3040 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
3041 const wchar_t* chr_end = chr + stdex::strlen(chr);
3042 if (((chr[0] == L'-' ||
3043 chr[0] == L'.' ||
3044 chr[0] == L'_' ||
3045 chr[0] == L'~' ||
3046 chr[0] == L'%' ||
3047 chr[0] == L'!' ||
3048 chr[0] == L'$' ||
3049 chr[0] == L'&' ||
3050 chr[0] == L'\'' ||
3051 //chr[0] == L'(' ||
3052 //chr[0] == L')' ||
3053 chr[0] == L'*' ||
3054 chr[0] == L'+' ||
3055 chr[0] == L',' ||
3056 chr[0] == L';' ||
3057 chr[0] == L'=') && chr[1] == 0) ||
3058 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
3059 {
3060 this->interval.start = start;
3061 return true;
3062 }
3063 }
3064
3065 this->interval.invalidate();
3066 return false;
3067 }
3068 };
3069
3073 template <class T>
3075 {
3076 public:
3077 basic_url_password_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
3078
3079 protected:
3080 virtual bool do_match(
3081 _In_reads_or_z_opt_(end) const T* text,
3082 _In_ size_t start = 0,
3083 _In_ size_t end = SIZE_MAX,
3084 _In_ int flags = match_default)
3085 {
3086 _Assume_(text || start >= end);
3087 if (start < end && text[start]) {
3088 if (text[start] == '-' ||
3089 text[start] == '.' ||
3090 text[start] == '_' ||
3091 text[start] == '~' ||
3092 text[start] == '%' ||
3093 text[start] == '!' ||
3094 text[start] == '$' ||
3095 text[start] == '&' ||
3096 text[start] == '\'' ||
3097 text[start] == '(' ||
3098 text[start] == ')' ||
3099 text[start] == '*' ||
3100 text[start] == '+' ||
3101 text[start] == ',' ||
3102 text[start] == ';' ||
3103 text[start] == '=' ||
3104 text[start] == ':' ||
3105 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
3106 {
3107 this->interval.end = (this->interval.start = start) + 1;
3108 return true;
3109 }
3110 }
3111 this->interval.invalidate();
3112 return false;
3113 }
3114 };
3115
3118#ifdef _UNICODE
3120#else
3122#endif
3123
3128 {
3129 public:
3130 sgml_url_password_char(_In_ const std::locale& locale = std::locale()) : basic_url_password_char<char>(locale) {}
3131
3132 protected:
3133 virtual bool do_match(
3134 _In_reads_or_z_(end) const char* text,
3135 _In_ size_t start = 0,
3136 _In_ size_t end = SIZE_MAX,
3137 _In_ int flags = match_default)
3138 {
3139 _Assume_(text || start >= end);
3140 if (start < end && text[start]) {
3141 wchar_t buf[3];
3142 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
3143 const wchar_t* chr_end = chr + stdex::strlen(chr);
3144 if (((chr[0] == L'-' ||
3145 chr[0] == L'.' ||
3146 chr[0] == L'_' ||
3147 chr[0] == L'~' ||
3148 chr[0] == L'%' ||
3149 chr[0] == L'!' ||
3150 chr[0] == L'$' ||
3151 chr[0] == L'&' ||
3152 chr[0] == L'\'' ||
3153 chr[0] == L'(' ||
3154 chr[0] == L')' ||
3155 chr[0] == L'*' ||
3156 chr[0] == L'+' ||
3157 chr[0] == L',' ||
3158 chr[0] == L';' ||
3159 chr[0] == L'=' ||
3160 chr[0] == L':') && chr[1] == 0) ||
3161 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
3162 {
3163 this->interval.start = start;
3164 return true;
3165 }
3166 }
3167 this->interval.invalidate();
3168 return false;
3169 }
3170 };
3171
3175 template <class T>
3177 {
3178 public:
3179 basic_url_path_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
3180
3181 protected:
3182 virtual bool do_match(
3183 _In_reads_or_z_opt_(end) const T* text,
3184 _In_ size_t start = 0,
3185 _In_ size_t end = SIZE_MAX,
3186 _In_ int flags = match_default)
3187 {
3188 _Assume_(text || start >= end);
3189 if (start < end && text[start]) {
3190 if (text[start] == '/' ||
3191 text[start] == '-' ||
3192 text[start] == '.' ||
3193 text[start] == '_' ||
3194 text[start] == '~' ||
3195 text[start] == '%' ||
3196 text[start] == '!' ||
3197 text[start] == '$' ||
3198 text[start] == '&' ||
3199 text[start] == '\'' ||
3200 text[start] == '(' ||
3201 text[start] == ')' ||
3202 text[start] == '*' ||
3203 text[start] == '+' ||
3204 text[start] == ',' ||
3205 text[start] == ';' ||
3206 text[start] == '=' ||
3207 text[start] == ':' ||
3208 text[start] == '@' ||
3209 text[start] == '?' ||
3210 text[start] == '#' ||
3211 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
3212 {
3213 this->interval.end = (this->interval.start = start) + 1;
3214 return true;
3215 }
3216 }
3217 this->interval.invalidate();
3218 return false;
3219 }
3220 };
3221
3224#ifdef _UNICODE
3226#else
3228#endif
3229
3234 {
3235 public:
3236 sgml_url_path_char(_In_ const std::locale& locale = std::locale()) : basic_url_path_char<char>(locale) {}
3237
3238 protected:
3239 virtual bool do_match(
3240 _In_reads_or_z_(end) const char* text,
3241 _In_ size_t start = 0,
3242 _In_ size_t end = SIZE_MAX,
3243 _In_ int flags = match_default)
3244 {
3245 _Assume_(text || start >= end);
3246 if (start < end && text[start]) {
3247 wchar_t buf[3];
3248 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
3249 const wchar_t* chr_end = chr + stdex::strlen(chr);
3250 if (((chr[0] == L'/' ||
3251 chr[0] == L'-' ||
3252 chr[0] == L'.' ||
3253 chr[0] == L'_' ||
3254 chr[0] == L'~' ||
3255 chr[0] == L'%' ||
3256 chr[0] == L'!' ||
3257 chr[0] == L'$' ||
3258 chr[0] == L'&' ||
3259 chr[0] == L'\'' ||
3260 chr[0] == L'(' ||
3261 chr[0] == L')' ||
3262 chr[0] == L'*' ||
3263 chr[0] == L'+' ||
3264 chr[0] == L',' ||
3265 chr[0] == L';' ||
3266 chr[0] == L'=' ||
3267 chr[0] == L':' ||
3268 chr[0] == L'@' ||
3269 chr[0] == L'?' ||
3270 chr[0] == L'#') && chr[1] == 0) ||
3271 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
3272 {
3273 this->interval.start = start;
3274 return true;
3275 }
3276 }
3277 this->interval.invalidate();
3278 return false;
3279 }
3280 };
3281
3285 template <class T>
3287 {
3288 public:
3290 _In_ const std::shared_ptr<basic_parser<T>>& path_char,
3291 _In_ const std::shared_ptr<basic_parser<T>>& query_start,
3292 _In_ const std::shared_ptr<basic_parser<T>>& bookmark_start,
3293 _In_ const std::locale& locale = std::locale()) :
3295 m_path_char(path_char),
3296 m_query_start(query_start),
3297 m_bookmark_start(bookmark_start)
3298 {}
3299
3300 virtual void invalidate()
3301 {
3302 path.start = 1;
3303 path.end = 0;
3304 query.start = 1;
3305 query.end = 0;
3306 bookmark.start = 1;
3307 bookmark.end = 0;
3309 }
3310
3313 stdex::interval<size_t> bookmark;
3314
3315 protected:
3316 virtual bool do_match(
3317 _In_reads_or_z_opt_(end) const T* text,
3318 _In_ size_t start = 0,
3319 _In_ size_t end = SIZE_MAX,
3320 _In_ int flags = match_default)
3321 {
3322 _Assume_(text || start >= end);
3323
3324 this->interval.end = start;
3325 path.start = start;
3326 query.start = 1;
3327 query.end = 0;
3328 bookmark.start = 1;
3329 bookmark.end = 0;
3330
3331 for (;;) {
3332 if (this->interval.end >= end || !text[this->interval.end])
3333 break;
3334 if (m_query_start->match(text, this->interval.end, end, flags)) {
3335 path.end = this->interval.end;
3336 query.start = this->interval.end = m_query_start->interval.end;
3337 for (;;) {
3338 if (this->interval.end >= end || !text[this->interval.end]) {
3339 query.end = this->interval.end;
3340 break;
3341 }
3342 if (m_bookmark_start->match(text, this->interval.end, end, flags)) {
3343 query.end = this->interval.end;
3344 bookmark.start = this->interval.end = m_bookmark_start->interval.end;
3345 for (;;) {
3346 if (this->interval.end >= end || !text[this->interval.end]) {
3347 bookmark.end = this->interval.end;
3348 break;
3349 }
3350 if (m_path_char->match(text, this->interval.end, end, flags))
3351 this->interval.end = m_path_char->interval.end;
3352 else {
3353 bookmark.end = this->interval.end;
3354 break;
3355 }
3356 }
3357 this->interval.start = start;
3358 return true;
3359 }
3360 if (m_path_char->match(text, this->interval.end, end, flags))
3361 this->interval.end = m_path_char->interval.end;
3362 else {
3363 query.end = this->interval.end;
3364 break;
3365 }
3366 }
3367 this->interval.start = start;
3368 return true;
3369 }
3370 if (m_bookmark_start->match(text, this->interval.end, end, flags)) {
3371 path.end = this->interval.end;
3372 bookmark.start = this->interval.end = m_bookmark_start->interval.end;
3373 for (;;) {
3374 if (this->interval.end >= end || !text[this->interval.end]) {
3375 bookmark.end = this->interval.end;
3376 break;
3377 }
3378 if (m_path_char->match(text, this->interval.end, end, flags))
3379 this->interval.end = m_path_char->interval.end;
3380 else {
3381 bookmark.end = this->interval.end;
3382 break;
3383 }
3384 }
3385 this->interval.start = start;
3386 return true;
3387 }
3388 if (m_path_char->match(text, this->interval.end, end, flags))
3389 this->interval.end = m_path_char->interval.end;
3390 else
3391 break;
3392 }
3393
3395 path.end = this->interval.end;
3396 this->interval.start = start;
3397 return true;
3398 }
3399
3400 path.start = 1;
3401 path.end = 0;
3402 bookmark.start = 1;
3403 bookmark.end = 0;
3404 this->interval.invalidate();
3405 return false;
3406 }
3407
3408 std::shared_ptr<basic_parser<T>> m_path_char;
3409 std::shared_ptr<basic_parser<T>> m_query_start;
3410 std::shared_ptr<basic_parser<T>> m_bookmark_start;
3411 };
3412
3415#ifdef _UNICODE
3416 using turl_path = wurl_path;
3417#else
3418 using turl_path = url_path;
3419#endif
3421
3425 template <class T>
3426 class basic_url : public basic_parser<T>
3427 {
3428 public:
3429 basic_url(
3430 _In_ const std::shared_ptr<basic_parser<T>>& _http_scheme,
3431 _In_ const std::shared_ptr<basic_parser<T>>& _ftp_scheme,
3432 _In_ const std::shared_ptr<basic_parser<T>>& _mailto_scheme,
3433 _In_ const std::shared_ptr<basic_parser<T>>& _file_scheme,
3434 _In_ const std::shared_ptr<basic_parser<T>>& colon,
3435 _In_ const std::shared_ptr<basic_parser<T>>& slash,
3436 _In_ const std::shared_ptr<basic_parser<T>>& _username,
3437 _In_ const std::shared_ptr<basic_parser<T>>& _password,
3438 _In_ const std::shared_ptr<basic_parser<T>>& at,
3439 _In_ const std::shared_ptr<basic_parser<T>>& ip_lbracket,
3440 _In_ const std::shared_ptr<basic_parser<T>>& ip_rbracket,
3441 _In_ const std::shared_ptr<basic_parser<T>>& _ipv4_host,
3442 _In_ const std::shared_ptr<basic_parser<T>>& _ipv6_host,
3443 _In_ const std::shared_ptr<basic_parser<T>>& _dns_host,
3444 _In_ const std::shared_ptr<basic_parser<T>>& _port,
3445 _In_ const std::shared_ptr<basic_parser<T>>& _path,
3446 _In_ const std::locale& locale = std::locale()) :
3448 http_scheme(_http_scheme),
3449 ftp_scheme(_ftp_scheme),
3450 mailto_scheme(_mailto_scheme),
3451 file_scheme(_file_scheme),
3452 m_colon(colon),
3453 m_slash(slash),
3454 username(_username),
3455 password(_password),
3456 m_at(at),
3457 m_ip_lbracket(ip_lbracket),
3458 m_ip_rbracket(ip_rbracket),
3459 ipv4_host(_ipv4_host),
3460 ipv6_host(_ipv6_host),
3461 dns_host(_dns_host),
3462 port(_port),
3463 path(_path)
3464 {}
3465
3466 virtual void invalidate()
3467 {
3468 http_scheme->invalidate();
3469 ftp_scheme->invalidate();
3470 mailto_scheme->invalidate();
3471 file_scheme->invalidate();
3472 username->invalidate();
3473 password->invalidate();
3474 ipv4_host->invalidate();
3475 ipv6_host->invalidate();
3476 dns_host->invalidate();
3477 port->invalidate();
3478 path->invalidate();
3480 }
3481
3482 std::shared_ptr<basic_parser<T>> http_scheme;
3483 std::shared_ptr<basic_parser<T>> ftp_scheme;
3484 std::shared_ptr<basic_parser<T>> mailto_scheme;
3485 std::shared_ptr<basic_parser<T>> file_scheme;
3486 std::shared_ptr<basic_parser<T>> username;
3487 std::shared_ptr<basic_parser<T>> password;
3488 std::shared_ptr<basic_parser<T>> ipv4_host;
3489 std::shared_ptr<basic_parser<T>> ipv6_host;
3490 std::shared_ptr<basic_parser<T>> dns_host;
3491 std::shared_ptr<basic_parser<T>> port;
3492 std::shared_ptr<basic_parser<T>> path;
3493
3494 protected:
3495 virtual bool do_match(
3496 _In_reads_or_z_opt_(end) const T* text,
3497 _In_ size_t start = 0,
3498 _In_ size_t end = SIZE_MAX,
3499 _In_ int flags = match_default)
3500 {
3501 _Assume_(text || start >= end);
3502
3503 this->interval.end = start;
3504
3505 if (http_scheme->match(text, this->interval.end, end, flags) &&
3506 m_colon->match(text, http_scheme->interval.end, end, flags) &&
3507 m_slash->match(text, m_colon->interval.end, end, flags) &&
3508 m_slash->match(text, m_slash->interval.end, end, flags))
3509 {
3510 // http://
3511 this->interval.end = m_slash->interval.end;
3512 ftp_scheme->invalidate();
3513 mailto_scheme->invalidate();
3514 file_scheme->invalidate();
3515 }
3516 else if (ftp_scheme->match(text, this->interval.end, end, flags) &&
3517 m_colon->match(text, ftp_scheme->interval.end, end, flags) &&
3518 m_slash->match(text, m_colon->interval.end, end, flags) &&
3519 m_slash->match(text, m_slash->interval.end, end, flags))
3520 {
3521 // ftp://
3522 this->interval.end = m_slash->interval.end;
3523 http_scheme->invalidate();
3524 mailto_scheme->invalidate();
3525 file_scheme->invalidate();
3526 }
3527 else if (mailto_scheme->match(text, this->interval.end, end, flags) &&
3528 m_colon->match(text, mailto_scheme->interval.end, end, flags))
3529 {
3530 // mailto:
3531 this->interval.end = m_colon->interval.end;
3532 http_scheme->invalidate();
3533 ftp_scheme->invalidate();
3534 file_scheme->invalidate();
3535 }
3536 else if (file_scheme->match(text, this->interval.end, end, flags) &&
3537 m_colon->match(text, file_scheme->interval.end, end, flags) &&
3538 m_slash->match(text, m_colon->interval.end, end, flags) &&
3539 m_slash->match(text, m_slash->interval.end, end, flags))
3540 {
3541 // file://
3542 this->interval.end = m_slash->interval.end;
3543 http_scheme->invalidate();
3544 ftp_scheme->invalidate();
3545 mailto_scheme->invalidate();
3546 }
3547 else {
3548 // Default to http:
3549 http_scheme->invalidate();
3550 ftp_scheme->invalidate();
3551 mailto_scheme->invalidate();
3552 file_scheme->invalidate();
3553 }
3554
3555 if (ftp_scheme->interval) {
3556 if (username->match(text, this->interval.end, end, flags)) {
3557 if (m_colon->match(text, username->interval.end, end, flags) &&
3558 password->match(text, m_colon->interval.end, end, flags) &&
3559 m_at->match(text, password->interval.end, end, flags))
3560 {
3561 // Username and password
3562 this->interval.end = m_at->interval.end;
3563 }
3564 else if (m_at->match(text, this->interval.end, end, flags)) {
3565 // Username only
3566 this->interval.end = m_at->interval.end;
3567 password->invalidate();
3568 }
3569 else {
3570 username->invalidate();
3571 password->invalidate();
3572 }
3573 }
3574 else {
3575 username->invalidate();
3576 password->invalidate();
3577 }
3578
3579 if (ipv4_host->match(text, this->interval.end, end, flags)) {
3580 // Host is IPv4
3581 this->interval.end = ipv4_host->interval.end;
3582 ipv6_host->invalidate();
3583 dns_host->invalidate();
3584 }
3585 else if (
3586 m_ip_lbracket->match(text, this->interval.end, end, flags) &&
3587 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3588 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3589 {
3590 // Host is IPv6
3591 this->interval.end = m_ip_rbracket->interval.end;
3592 ipv4_host->invalidate();
3593 dns_host->invalidate();
3594 }
3595 else if (dns_host->match(text, this->interval.end, end, flags)) {
3596 // Host is hostname
3597 this->interval.end = dns_host->interval.end;
3598 ipv4_host->invalidate();
3599 ipv6_host->invalidate();
3600 }
3601 else {
3602 invalidate();
3603 return false;
3604 }
3605
3606 if (m_colon->match(text, this->interval.end, end, flags) &&
3607 port->match(text, m_colon->interval.end, end, flags))
3608 {
3609 // Port
3610 this->interval.end = port->interval.end;
3611 }
3612 else
3613 port->invalidate();
3614
3615 if (path->match(text, this->interval.end, end, flags)) {
3616 // Path
3617 this->interval.end = path->interval.end;
3618 }
3619
3620 this->interval.start = start;
3621 return true;
3622 }
3623
3624 if (mailto_scheme->interval) {
3625 if (username->match(text, this->interval.end, end, flags) &&
3626 m_at->match(text, username->interval.end, end, flags))
3627 {
3628 // Username
3629 this->interval.end = m_at->interval.end;
3630 }
3631 else {
3632 invalidate();
3633 return false;
3634 }
3635
3636 if (m_ip_lbracket->match(text, this->interval.end, end, flags) &&
3637 ipv4_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3638 m_ip_rbracket->match(text, ipv4_host->interval.end, end, flags))
3639 {
3640 // Host is IPv4
3641 this->interval.end = m_ip_rbracket->interval.end;
3642 ipv6_host->invalidate();
3643 dns_host->invalidate();
3644 }
3645 else if (
3646 m_ip_lbracket->match(text, this->interval.end, end, flags) &&
3647 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3648 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3649 {
3650 // Host is IPv6
3651 this->interval.end = m_ip_rbracket->interval.end;
3652 ipv4_host->invalidate();
3653 dns_host->invalidate();
3654 }
3655 else if (dns_host->match(text, this->interval.end, end, flags)) {
3656 // Host is hostname
3657 this->interval.end = dns_host->interval.end;
3658 ipv4_host->invalidate();
3659 ipv6_host->invalidate();
3660 }
3661 else {
3662 invalidate();
3663 return false;
3664 }
3665
3666 password->invalidate();
3667 port->invalidate();
3668 path->invalidate();
3669 this->interval.start = start;
3670 return true;
3671 }
3672
3673 if (file_scheme->interval) {
3674 if (path->match(text, this->interval.end, end, flags)) {
3675 // Path
3676 this->interval.end = path->interval.end;
3677 }
3678
3679 username->invalidate();
3680 password->invalidate();
3681 ipv4_host->invalidate();
3682 ipv6_host->invalidate();
3683 dns_host->invalidate();
3684 port->invalidate();
3685 this->interval.start = start;
3686 return true;
3687 }
3688
3689 // "http://" found or defaulted to
3690
3691 // If "http://" explicit, test for username&password.
3692 if (http_scheme->interval &&
3693 username->match(text, this->interval.end, end, flags))
3694 {
3695 if (m_colon->match(text, username->interval.end, end, flags) &&
3696 password->match(text, m_colon->interval.end, end, flags) &&
3697 m_at->match(text, password->interval.end, end, flags))
3698 {
3699 // Username and password
3700 this->interval.end = m_at->interval.end;
3701 }
3702 else if (m_at->match(text, username->interval.end, end, flags)) {
3703 // Username only
3704 this->interval.end = m_at->interval.end;
3705 password->invalidate();
3706 }
3707 else {
3708 username->invalidate();
3709 password->invalidate();
3710 }
3711 }
3712 else {
3713 username->invalidate();
3714 password->invalidate();
3715 }
3716
3717 if (ipv4_host->match(text, this->interval.end, end, flags)) {
3718 // Host is IPv4
3719 this->interval.end = ipv4_host->interval.end;
3720 ipv6_host->invalidate();
3721 dns_host->invalidate();
3722 }
3723 else if (
3724 m_ip_lbracket->match(text, this->interval.end, end, flags) &&
3725 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3726 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3727 {
3728 // Host is IPv6
3729 this->interval.end = m_ip_rbracket->interval.end;
3730 ipv4_host->invalidate();
3731 dns_host->invalidate();
3732 }
3733 else if (dns_host->match(text, this->interval.end, end, flags)) {
3734 // Host is hostname
3735 this->interval.end = dns_host->interval.end;
3736 ipv4_host->invalidate();
3737 ipv6_host->invalidate();
3738 }
3739 else {
3740 invalidate();
3741 return false;
3742 }
3743
3744 if (m_colon->match(text, this->interval.end, end, flags) &&
3745 port->match(text, m_colon->interval.end, end, flags))
3746 {
3747 // Port
3748 this->interval.end = port->interval.end;
3749 }
3750 else
3751 port->invalidate();
3752
3753 if (path->match(text, this->interval.end, end, flags)) {
3754 // Path
3755 this->interval.end = path->interval.end;
3756 }
3757
3758 this->interval.start = start;
3759 return true;
3760 }
3761
3762 std::shared_ptr<basic_parser<T>> m_colon;
3763 std::shared_ptr<basic_parser<T>> m_slash;
3764 std::shared_ptr<basic_parser<T>> m_at;
3765 std::shared_ptr<basic_parser<T>> m_ip_lbracket;
3766 std::shared_ptr<basic_parser<T>> m_ip_rbracket;
3767 };
3768
3769 using url = basic_url<char>;
3770 using wurl = basic_url<wchar_t>;
3771#ifdef _UNICODE
3772 using turl = wurl;
3773#else
3774 using turl = url;
3775#endif
3776 using sgml_url = basic_url<char>;
3777
3781 template <class T>
3783 {
3784 public:
3786 _In_ const std::shared_ptr<basic_parser<T>>& _username,
3787 _In_ const std::shared_ptr<basic_parser<T>>& at,
3788 _In_ const std::shared_ptr<basic_parser<T>>& ip_lbracket,
3789 _In_ const std::shared_ptr<basic_parser<T>>& ip_rbracket,
3790 _In_ const std::shared_ptr<basic_parser<T>>& _ipv4_host,
3791 _In_ const std::shared_ptr<basic_parser<T>>& _ipv6_host,
3792 _In_ const std::shared_ptr<basic_parser<T>>& _dns_host,
3793 _In_ const std::locale& locale = std::locale()) :
3795 username(_username),
3796 m_at(at),
3797 m_ip_lbracket(ip_lbracket),
3798 m_ip_rbracket(ip_rbracket),
3799 ipv4_host(_ipv4_host),
3800 ipv6_host(_ipv6_host),
3801 dns_host(_dns_host)
3802 {}
3803
3804 virtual void invalidate()
3805 {
3806 username->invalidate();
3807 ipv4_host->invalidate();
3808 ipv6_host->invalidate();
3809 dns_host->invalidate();
3811 }
3812
3813 std::shared_ptr<basic_parser<T>> username;
3814 std::shared_ptr<basic_parser<T>> ipv4_host;
3815 std::shared_ptr<basic_parser<T>> ipv6_host;
3816 std::shared_ptr<basic_parser<T>> dns_host;
3817
3818 protected:
3819 virtual bool do_match(
3820 _In_reads_or_z_opt_(end) const T* text,
3821 _In_ size_t start = 0,
3822 _In_ size_t end = SIZE_MAX,
3823 _In_ int flags = match_default)
3824 {
3825 _Assume_(text || start >= end);
3826
3827 if (username->match(text, start, end, flags) &&
3828 m_at->match(text, username->interval.end, end, flags))
3829 {
3830 // Username@
3831 if (m_ip_lbracket->match(text, m_at->interval.end, end, flags) &&
3832 ipv4_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3833 m_ip_rbracket->match(text, ipv4_host->interval.end, end, flags))
3834 {
3835 // Host is IPv4
3836 this->interval.end = m_ip_rbracket->interval.end;
3837 ipv6_host->invalidate();
3838 dns_host->invalidate();
3839 }
3840 else if (
3841 m_ip_lbracket->match(text, m_at->interval.end, end, flags) &&
3842 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3843 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3844 {
3845 // Host is IPv6
3846 this->interval.end = m_ip_rbracket->interval.end;
3847 ipv4_host->invalidate();
3848 dns_host->invalidate();
3849 }
3850 else if (dns_host->match(text, m_at->interval.end, end, flags)) {
3851 // Host is hostname
3852 this->interval.end = dns_host->interval.end;
3853 ipv4_host->invalidate();
3854 ipv6_host->invalidate();
3855 }
3856 else
3857 goto error;
3858 this->interval.start = start;
3859 return true;
3860 }
3861
3862 error:
3863 invalidate();
3864 return false;
3865 }
3866
3867 std::shared_ptr<basic_parser<T>> m_at;
3868 std::shared_ptr<basic_parser<T>> m_ip_lbracket;
3869 std::shared_ptr<basic_parser<T>> m_ip_rbracket;
3870 };
3871
3874#ifdef _UNICODE
3876#else
3878#endif
3880
3884 template <class T>
3886 {
3887 public:
3889 _In_ const std::shared_ptr<basic_parser<T>>& _emoticon,
3890 _In_ const std::shared_ptr<basic_parser<T>>& _apex,
3891 _In_ const std::shared_ptr<basic_parser<T>>& _eyes,
3892 _In_ const std::shared_ptr<basic_parser<T>>& _nose,
3893 _In_ const std::shared_ptr<basic_set<T>>& _mouth,
3894 _In_ const std::locale& locale = std::locale()) :
3897 apex(_apex),
3898 eyes(_eyes),
3899 nose(_nose),
3900 mouth(_mouth)
3901 {}
3902
3903 virtual void invalidate()
3904 {
3905 if (emoticon) emoticon->invalidate();
3906 if (apex) apex->invalidate();
3907 eyes->invalidate();
3908 if (nose) nose->invalidate();
3909 mouth->invalidate();
3911 }
3912
3913 std::shared_ptr<basic_parser<T>> emoticon;
3914 std::shared_ptr<basic_parser<T>> apex;
3915 std::shared_ptr<basic_parser<T>> eyes;
3916 std::shared_ptr<basic_parser<T>> nose;
3917 std::shared_ptr<basic_set<T>> mouth;
3918
3919 protected:
3920 virtual bool do_match(
3921 _In_reads_or_z_opt_(end) const T* text,
3922 _In_ size_t start = 0,
3923 _In_ size_t end = SIZE_MAX,
3924 _In_ int flags = match_default)
3925 {
3926 _Assume_(text || start >= end);
3927
3928 if (emoticon && emoticon->match(text, start, end, flags)) {
3929 if (apex) apex->invalidate();
3930 eyes->invalidate();
3931 if (nose) nose->invalidate();
3932 mouth->invalidate();
3933 this->interval.start = start;
3934 this->interval.end = emoticon->interval.end;
3935 return true;
3936 }
3937
3938 this->interval.end = start;
3939
3940 if (apex && apex->match(text, this->interval.end, end, flags))
3941 this->interval.end = apex->interval.end;
3942
3943 if (eyes->match(text, this->interval.end, end, flags)) {
3944 if (nose && nose->match(text, eyes->interval.end, end, flags) &&
3945 mouth->match(text, nose->interval.end, end, flags))
3946 {
3947 size_t
3949 hit_offset = mouth->hit_offset;
3950 // Mouth may repeat :-)))))))
3951 for (this->interval.end = mouth->interval.end; mouth->match(text, this->interval.end, end, flags) && mouth->hit_offset == hit_offset; this->interval.end = mouth->interval.end);
3952 mouth->interval.start = start_mouth;
3953 mouth->interval.end = this->interval.end;
3954 this->interval.start = start;
3955 return true;
3956 }
3957 if (mouth->match(text, eyes->interval.end, end, flags)) {
3958 size_t
3960 hit_offset = mouth->hit_offset;
3961 // Mouth may repeat :-)))))))
3962 for (this->interval.end = mouth->interval.end; mouth->match(text, this->interval.end, end, flags) && mouth->hit_offset == hit_offset; this->interval.end = mouth->interval.end);
3963 if (nose) nose->invalidate();
3964 mouth->interval.start = start_mouth;
3965 mouth->interval.end = this->interval.end;
3966 this->interval.start = start;
3967 return true;
3968 }
3969 }
3970
3971 if (emoticon) emoticon->invalidate();
3972 if (apex) apex->invalidate();
3973 eyes->invalidate();
3974 if (nose) nose->invalidate();
3975 mouth->invalidate();
3976 this->interval.invalidate();
3977 return false;
3978 }
3979 };
3980
3981 using emoticon = basic_emoticon<char>;
3982 using wemoticon = basic_emoticon<wchar_t>;
3983#ifdef _UNICODE
3984 using temoticon = wemoticon;
3985#else
3986 using temoticon = emoticon;
3987#endif
3988 using sgml_emoticon = basic_emoticon<char>;
3989
3993 enum date_format_t {
3994 date_format_none = 0,
3995 date_format_dmy = 0x1,
3996 date_format_mdy = 0x2,
3997 date_format_ymd = 0x4,
3998 date_format_ym = 0x8,
3999 date_format_my = 0x10,
4000 date_format_dm = 0x20,
4001 date_format_md = 0x40,
4002 };
4003
4007 template <class T>
4008 class basic_date : public basic_parser<T>
4009 {
4010 public:
4011 basic_date(
4012 _In_ int format_mask,
4013 _In_ const std::shared_ptr<basic_integer<T>>& _day,
4014 _In_ const std::shared_ptr<basic_integer<T>>& _month,
4015 _In_ const std::shared_ptr<basic_integer<T>>& _year,
4016 _In_ const std::shared_ptr<basic_set<T>>& separator,
4017 _In_ const std::shared_ptr<basic_parser<T>>& space,
4018 _In_ const std::locale& locale = std::locale()) :
4020 format(date_format_none),
4021 m_format_mask(format_mask),
4022 day(_day),
4023 month(_month),
4024 year(_year),
4025 m_separator(separator),
4026 m_space(space)
4027 {}
4028
4029 virtual void invalidate()
4030 {
4031 if (day) day->invalidate();
4032 if (month) month->invalidate();
4033 if (year) year->invalidate();
4034 format = date_format_none;
4036 }
4037
4038 date_format_t format;
4039 std::shared_ptr<basic_integer<T>> day;
4040 std::shared_ptr<basic_integer<T>> month;
4041 std::shared_ptr<basic_integer<T>> year;
4042
4043 protected:
4044 virtual bool do_match(
4045 _In_reads_or_z_opt_(end) const T* text,
4046 _In_ size_t start = 0,
4047 _In_ size_t end = SIZE_MAX,
4048 _In_ int flags = match_default)
4049 {
4050 _Assume_(text || start >= end);
4051
4052 const int space_match_flags = flags & ~match_multiline; // Spaces in dates must never be broken in new line.
4053 if ((m_format_mask & date_format_dmy) == date_format_dmy) {
4054 if (day->match(text, start, end, flags)) {
4055 for (this->interval.end = day->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4056 if (m_separator->match(text, this->interval.end, end, flags)) {
4057 size_t hit_offset = m_separator->hit_offset;
4058 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4059 if (month->match(text, this->interval.end, end, flags)) {
4060 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4061 if (m_separator->match(text, this->interval.end, end, flags) &&
4062 m_separator->hit_offset == hit_offset) // Both separators must match.
4063 {
4064 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4065 if (year->match(text, this->interval.end, end, flags) &&
4066 is_valid(day->value, month->value))
4067 {
4068 this->interval.start = start;
4069 this->interval.end = year->interval.end;
4070 format = date_format_dmy;
4071 return true;
4072 }
4073 }
4074 }
4075 }
4076 }
4077 }
4078
4079 if ((m_format_mask & date_format_mdy) == date_format_mdy) {
4080 if (month->match(text, start, end, flags)) {
4081 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4082 if (m_separator->match(text, this->interval.end, end, flags)) {
4083 size_t hit_offset = m_separator->hit_offset;
4084 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4085 if (day->match(text, this->interval.end, end, flags)) {
4086 for (this->interval.end = day->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4087 if (m_separator->match(text, this->interval.end, end, flags) &&
4088 m_separator->hit_offset == hit_offset) // Both separators must match.
4089 {
4090 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4091 if (year->match(text, this->interval.end, end, flags) &&
4092 is_valid(day->value, month->value))
4093 {
4094 this->interval.start = start;
4095 this->interval.end = year->interval.end;
4096 format = date_format_mdy;
4097 return true;
4098 }
4099 }
4100 }
4101 }
4102 }
4103 }
4104
4105 if ((m_format_mask & date_format_ymd) == date_format_ymd) {
4106 if (year->match(text, start, end, flags)) {
4107 for (this->interval.end = year->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4108 if (m_separator->match(text, this->interval.end, end, flags)) {
4109 size_t hit_offset = m_separator->hit_offset;
4110 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4111 if (month->match(text, this->interval.end, end, flags)) {
4112 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4113 if (m_separator->match(text, this->interval.end, end, flags) &&
4114 m_separator->hit_offset == hit_offset) // Both separators must match.
4115 {
4116 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4117 if (day->match(text, this->interval.end, end, flags) &&
4118 is_valid(day->value, month->value))
4119 {
4120 this->interval.start = start;
4121 this->interval.end = day->interval.end;
4122 format = date_format_ymd;
4123 return true;
4124 }
4125 }
4126 }
4127 }
4128 }
4129 }
4130
4131 if ((m_format_mask & date_format_ym) == date_format_ym) {
4132 if (year->match(text, start, end, flags)) {
4133 for (this->interval.end = year->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4134 if (m_separator->match(text, this->interval.end, end, flags)) {
4135 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4136 if (month->match(text, this->interval.end, end, flags) &&
4137 is_valid(SIZE_MAX, month->value))
4138 {
4139 if (day) day->invalidate();
4140 this->interval.start = start;
4141 this->interval.end = month->interval.end;
4142 format = date_format_ym;
4143 return true;
4144 }
4145 }
4146 }
4147 }
4148
4149 if ((m_format_mask & date_format_my) == date_format_my) {
4150 if (month->match(text, start, end, flags)) {
4151 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4152 if (m_separator->match(text, this->interval.end, end, flags)) {
4153 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4154 if (year->match(text, this->interval.end, end, flags) &&
4155 is_valid(SIZE_MAX, month->value))
4156 {
4157 if (day) day->invalidate();
4158 this->interval.start = start;
4159 this->interval.end = year->interval.end;
4160 format = date_format_my;
4161 return true;
4162 }
4163 }
4164 }
4165 }
4166
4167 if ((m_format_mask & date_format_dm) == date_format_dm) {
4168 if (day->match(text, start, end, flags)) {
4169 for (this->interval.end = day->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4170 if (m_separator->match(text, this->interval.end, end, flags)) {
4171 size_t hit_offset = m_separator->hit_offset;
4172 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4173 if (month->match(text, this->interval.end, end, flags) &&
4174 is_valid(day->value, month->value))
4175 {
4176 if (year) year->invalidate();
4177 this->interval.start = start;
4178 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4179 if (m_separator->match(text, this->interval.end, end, flags) &&
4180 m_separator->hit_offset == hit_offset) // Both separators must match.
4181 this->interval.end = m_separator->interval.end;
4182 else
4183 this->interval.end = month->interval.end;
4184 format = date_format_dm;
4185 return true;
4186 }
4187 }
4188 }
4189 }
4190
4191 if ((m_format_mask & date_format_md) == date_format_md) {
4192 if (month->match(text, start, end, flags)) {
4193 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4194 if (m_separator->match(text, this->interval.end, end, flags)) {
4195 size_t hit_offset = m_separator->hit_offset;
4196 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4197 if (day->match(text, this->interval.end, end, flags) &&
4198 is_valid(day->value, month->value))
4199 {
4200 if (year) year->invalidate();
4201 this->interval.start = start;
4202 for (this->interval.end = day->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4203 if (m_separator->match(text, this->interval.end, end, flags) &&
4204 m_separator->hit_offset == hit_offset) // Both separators must match.
4205 this->interval.end = m_separator->interval.end;
4206 else
4207 this->interval.end = day->interval.end;
4208 format = date_format_md;
4209 return true;
4210 }
4211 }
4212 }
4213 }
4214
4215 if (day) day->invalidate();
4216 if (month) month->invalidate();
4217 if (year) year->invalidate();
4218 format = date_format_none;
4219 this->interval.invalidate();
4220 return false;
4221 }
4222
4223 static bool is_valid(size_t day, size_t month)
4224 {
4225 if (month == SIZE_MAX) {
4226 // Default to January. This allows validating day only, as January has all 31 days.
4227 month = 1;
4228 }
4229 if (day == SIZE_MAX) {
4230 // Default to 1st day in month. This allows validating month only, as each month has 1st day.
4231 day = 1;
4232 }
4233
4234 switch (month) {
4235 case 1:
4236 case 3:
4237 case 5:
4238 case 7:
4239 case 8:
4240 case 10:
4241 case 12:
4242 return 1 <= day && day <= 31;
4243 case 2:
4244 return 1 <= day && day <= 29;
4245 case 4:
4246 case 6:
4247 case 9:
4248 case 11:
4249 return 1 <= day && day <= 30;
4250 default:
4251 return false;
4252 }
4253 }
4254
4255 int m_format_mask;
4256 std::shared_ptr<basic_set<T>> m_separator;
4257 std::shared_ptr<basic_parser<T>> m_space;
4258 };
4259
4260 using date = basic_date<char>;
4261 using wdate = basic_date<wchar_t>;
4262#ifdef _UNICODE
4263 using tdate = wdate;
4264#else
4265 using tdate = date;
4266#endif
4268
4272 template <class T>
4273 class basic_time : public basic_parser<T>
4274 {
4275 public:
4276 basic_time(
4277 _In_ const std::shared_ptr<basic_integer10<T>>& _hour,
4278 _In_ const std::shared_ptr<basic_integer10<T>>& _minute,
4279 _In_ const std::shared_ptr<basic_integer10<T>>& _second,
4280 _In_ const std::shared_ptr<basic_integer10<T>>& _millisecond,
4281 _In_ const std::shared_ptr<basic_set<T>>& separator,
4282 _In_ const std::shared_ptr<basic_parser<T>>& millisecond_separator,
4283 _In_ const std::locale& locale = std::locale()) :
4285 hour(_hour),
4286 minute(_minute),
4287 second(_second),
4288 millisecond(_millisecond),
4289 m_separator(separator),
4290 m_millisecond_separator(millisecond_separator)
4291 {}
4292
4293 virtual void invalidate()
4294 {
4295 hour->invalidate();
4296 minute->invalidate();
4297 if (second) second->invalidate();
4298 if (millisecond) millisecond->invalidate();
4300 }
4301
4302 std::shared_ptr<basic_integer10<T>> hour;
4303 std::shared_ptr<basic_integer10<T>> minute;
4304 std::shared_ptr<basic_integer10<T>> second;
4305 std::shared_ptr<basic_integer10<T>> millisecond;
4306
4307 protected:
4308 virtual bool do_match(
4309 _In_reads_or_z_opt_(end) const T* text,
4310 _In_ size_t start = 0,
4311 _In_ size_t end = SIZE_MAX,
4312 _In_ int flags = match_default)
4313 {
4314 _Assume_(text || start >= end);
4315
4316 if (hour->match(text, start, end, flags) &&
4317 m_separator->match(text, hour->interval.end, end, flags) &&
4318 minute->match(text, m_separator->interval.end, end, flags) &&
4319 minute->value < 60)
4320 {
4321 // hh::mm
4322 size_t hit_offset = m_separator->hit_offset;
4323 if (m_separator->match(text, minute->interval.end, end, flags) &&
4324 m_separator->hit_offset == hit_offset && // Both separators must match.
4325 second && second->match(text, m_separator->interval.end, end, flags) &&
4326 second->value < 60)
4327 {
4328 // hh::mm:ss
4329 if (m_millisecond_separator && m_millisecond_separator->match(text, second->interval.end, end, flags) &&
4330 millisecond && millisecond->match(text, m_millisecond_separator->interval.end, end, flags) &&
4331 millisecond->value < 1000)
4332 {
4333 // hh::mm:ss.mmmm
4334 this->interval.end = millisecond->interval.end;
4335 }
4336 else {
4337 if (millisecond) millisecond->invalidate();
4338 this->interval.end = second->interval.end;
4339 }
4340 }
4341 else {
4342 if (second) second->invalidate();
4343 if (millisecond) millisecond->invalidate();
4344 this->interval.end = minute->interval.end;
4345 }
4346 this->interval.start = start;
4347 return true;
4348 }
4349
4350 hour->invalidate();
4351 minute->invalidate();
4352 if (second) second->invalidate();
4353 if (millisecond) millisecond->invalidate();
4354 this->interval.invalidate();
4355 return false;
4356 }
4357
4358 std::shared_ptr<basic_set<T>> m_separator;
4359 std::shared_ptr<basic_parser<T>> m_millisecond_separator;
4360 };
4361
4362 using time = basic_time<char>;
4363 using wtime = basic_time<wchar_t>;
4364#ifdef _UNICODE
4365 using ttime = wtime;
4366#else
4367 using ttime = time;
4368#endif
4370
4374 template <class T>
4375 class basic_angle : public basic_parser<T>
4376 {
4377 public:
4379 _In_ const std::shared_ptr<basic_integer10<T>>& _degree,
4380 _In_ const std::shared_ptr<basic_parser<T>>& _degree_separator,
4381 _In_ const std::shared_ptr<basic_integer10<T>>& _minute,
4382 _In_ const std::shared_ptr<basic_parser<T>>& _minute_separator,
4383 _In_ const std::shared_ptr<basic_integer10<T>>& _second,
4384 _In_ const std::shared_ptr<basic_parser<T>>& _second_separator,
4385 _In_ const std::shared_ptr<basic_parser<T>>& _decimal,
4386 _In_ const std::locale& locale = std::locale()) :
4388 degree(_degree),
4389 degree_separator(_degree_separator),
4390 minute(_minute),
4391 minute_separator(_minute_separator),
4392 second(_second),
4393 second_separator(_second_separator),
4394 decimal(_decimal)
4395 {}
4396
4397 virtual void invalidate()
4398 {
4399 degree->invalidate();
4400 degree_separator->invalidate();
4401 minute->invalidate();
4402 minute_separator->invalidate();
4403 if (second) second->invalidate();
4404 if (second_separator) second_separator->invalidate();
4405 if (decimal) decimal->invalidate();
4407 }
4408
4409 std::shared_ptr<basic_integer10<T>> degree;
4410 std::shared_ptr<basic_parser<T>> degree_separator;
4411 std::shared_ptr<basic_integer10<T>> minute;
4412 std::shared_ptr<basic_parser<T>> minute_separator;
4413 std::shared_ptr<basic_integer10<T>> second;
4414 std::shared_ptr<basic_parser<T>> second_separator;
4415 std::shared_ptr<basic_parser<T>> decimal;
4416
4417 protected:
4418 virtual bool do_match(
4419 _In_reads_or_z_opt_(end) const T* text,
4420 _In_ size_t start = 0,
4421 _In_ size_t end = SIZE_MAX,
4422 _In_ int flags = match_default)
4423 {
4424 _Assume_(text || start >= end);
4425
4426 this->interval.end = start;
4427
4428 if (degree->match(text, this->interval.end, end, flags) &&
4429 degree_separator->match(text, degree->interval.end, end, flags))
4430 {
4431 // Degrees
4432 this->interval.end = degree_separator->interval.end;
4433 }
4434 else {
4435 degree->invalidate();
4436 degree_separator->invalidate();
4437 }
4438
4439 if (minute->match(text, this->interval.end, end, flags) &&
4440 minute->value < 60 &&
4441 minute_separator->match(text, minute->interval.end, end, flags))
4442 {
4443 // Minutes
4444 this->interval.end = minute_separator->interval.end;
4445 }
4446 else {
4447 minute->invalidate();
4448 minute_separator->invalidate();
4449 }
4450
4451 if (second && second->match(text, this->interval.end, end, flags) &&
4452 second->value < 60)
4453 {
4454 // Seconds
4455 this->interval.end = second->interval.end;
4456 if (second_separator && second_separator->match(text, this->interval.end, end, flags))
4457 this->interval.end = second_separator->interval.end;
4458 else
4459 if (second_separator) second_separator->invalidate();
4460 }
4461 else {
4462 if (second) second->invalidate();
4463 if (second_separator) second_separator->invalidate();
4464 }
4465
4466 if (degree->interval.start < degree->interval.end ||
4467 minute->interval.start < minute->interval.end ||
4468 (second && second->interval.start < second->interval.end))
4469 {
4470 if (decimal && decimal->match(text, this->interval.end, end, flags)) {
4471 // Decimals
4472 this->interval.end = decimal->interval.end;
4473 }
4474 else if (decimal)
4475 decimal->invalidate();
4476 this->interval.start = start;
4477 return true;
4478 }
4479 if (decimal) decimal->invalidate();
4480 this->interval.invalidate();
4481 return false;
4482 }
4483 };
4484
4485 using angle = basic_angle<char>;
4487#ifdef _UNICODE
4488 using RRegElKot = wangle;
4489#else
4490 using RRegElKot = angle;
4491#endif
4493
4497 template <class T>
4499 {
4500 public:
4502 _In_ const std::shared_ptr<basic_parser<T>>& digit,
4503 _In_ const std::shared_ptr<basic_parser<T>>& plus_sign,
4504 _In_ const std::shared_ptr<basic_set<T>>& lparenthesis,
4505 _In_ const std::shared_ptr<basic_set<T>>& rparenthesis,
4506 _In_ const std::shared_ptr<basic_parser<T>>& separator,
4507 _In_ const std::shared_ptr<basic_parser<T>>& space,
4508 _In_ const std::locale& locale = std::locale()) :
4510 m_digit(digit),
4511 m_plus_sign(plus_sign),
4512 m_lparenthesis(lparenthesis),
4513 m_rparenthesis(rparenthesis),
4514 m_separator(separator),
4515 m_space(space)
4516 {}
4517
4518 virtual void invalidate()
4519 {
4520 value.clear();
4522 }
4523
4524 std::basic_string<T> value;
4525
4526 protected:
4527 virtual bool do_match(
4528 _In_reads_or_z_opt_(end) const T* text,
4529 _In_ size_t start = 0,
4530 _In_ size_t end = SIZE_MAX,
4531 _In_ int flags = match_default)
4532 {
4533 _Assume_(text || start >= end);
4534
4535 size_t safe_digit_end = start, safe_value_size = 0;
4536 bool has_digits = false, after_digit = false, in_parentheses = false, after_parentheses = false;
4537 const int space_match_flags = flags & ~match_multiline; // Spaces in phone numbers must never be broken in new line.
4538
4539 this->interval.end = start;
4540 value.clear();
4541 m_lparenthesis->invalidate();
4542 m_rparenthesis->invalidate();
4543
4544 if (m_plus_sign && m_plus_sign->match(text, this->interval.end, end, flags)) {
4545 value.append(text + m_plus_sign->interval.start, text + m_plus_sign->interval.end);
4546 safe_value_size = value.size();
4547 this->interval.end = m_plus_sign->interval.end;
4548 }
4549
4550 for (;;) {
4551 _Assume_(text || this->interval.end >= end);
4552 if (this->interval.end >= end || !text[this->interval.end])
4553 break;
4554 if (m_digit->match(text, this->interval.end, end, flags)) {
4555 // Digit
4556 value.append(text + m_digit->interval.start, text + m_digit->interval.end);
4557 this->interval.end = m_digit->interval.end;
4558 if (!in_parentheses) {
4559 safe_digit_end = this->interval.end;
4560 safe_value_size = value.size();
4561 has_digits = true;
4562 }
4563 after_digit = true;
4564 after_parentheses = false;
4565 }
4566 else if (
4567 m_lparenthesis && !m_lparenthesis->interval && // No left parenthesis yet
4568 m_rparenthesis && !m_rparenthesis->interval && // Right parenthesis after left
4569 m_lparenthesis->match(text, this->interval.end, end, flags))
4570 {
4571 // Left parenthesis
4572 value.append(text + m_lparenthesis->interval.start, m_lparenthesis->interval.size());
4573 this->interval.end = m_lparenthesis->interval.end;
4574 in_parentheses = true;
4575 after_digit = false;
4576 after_parentheses = false;
4577 }
4578 else if (
4579 in_parentheses && // After left parenthesis
4580 m_rparenthesis && !m_rparenthesis->interval && // No right parenthesis yet
4581 m_rparenthesis->match(text, this->interval.end, end, flags) &&
4582 m_lparenthesis->hit_offset == m_rparenthesis->hit_offset) // Left and right parentheses must match
4583 {
4584 // Right parenthesis
4585 value.append(text + m_rparenthesis->interval.start, text + m_rparenthesis->interval.end);
4586 this->interval.end = m_rparenthesis->interval.end;
4587 safe_digit_end = this->interval.end;
4588 safe_value_size = value.size();
4589 in_parentheses = false;
4590 after_digit = false;
4591 after_parentheses = true;
4592 }
4593 else if (
4594 after_digit &&
4595 !in_parentheses && // No separators inside parentheses
4596 !after_parentheses && // No separators following right parenthesis
4597 m_separator && m_separator->match(text, this->interval.end, end, flags))
4598 {
4599 // Separator
4600 this->interval.end = m_separator->interval.end;
4601 after_digit = false;
4602 after_parentheses = false;
4603 }
4604 else if (
4606 m_space && m_space->match(text, this->interval.end, end, space_match_flags))
4607 {
4608 // Space
4609 this->interval.end = m_space->interval.end;
4610 after_digit = false;
4611 after_parentheses = false;
4612 }
4613 else
4614 break;
4615 }
4616 if (has_digits) {
4617 value.erase(safe_value_size);
4618 this->interval.start = start;
4619 this->interval.end = safe_digit_end;
4620 return true;
4621 }
4622 value.clear();
4623 this->interval.invalidate();
4624 return false;
4625 }
4626
4627 std::shared_ptr<basic_parser<T>> m_digit;
4628 std::shared_ptr<basic_parser<T>> m_plus_sign;
4629 std::shared_ptr<basic_set<T>> m_lparenthesis;
4630 std::shared_ptr<basic_set<T>> m_rparenthesis;
4631 std::shared_ptr<basic_parser<T>> m_separator;
4632 std::shared_ptr<basic_parser<T>> m_space;
4633 };
4634
4635 using phone_number = basic_phone_number<char>;
4636 using wphone_number = basic_phone_number<wchar_t>;
4637#ifdef _UNICODE
4638 using tphone_number = wphone_number;
4639#else
4640 using tphone_number = phone_number;
4641#endif
4642 using sgml_phone_number = basic_phone_number<char>;
4643
4649 template <class T>
4650 class basic_iban : public basic_parser<T>
4651 {
4652 public:
4653 basic_iban(
4654 _In_ const std::shared_ptr<basic_parser<T>>& space,
4655 _In_ const std::locale& locale = std::locale()) :
4657 m_space(space)
4658 {
4659 this->country[0] = 0;
4660 this->check_digits[0] = 0;
4661 this->bban[0] = 0;
4662 this->is_valid = false;
4663 }
4664
4665 virtual void invalidate()
4666 {
4667 this->country[0] = 0;
4668 this->check_digits[0] = 0;
4669 this->bban[0] = 0;
4670 this->is_valid = false;
4672 }
4673
4674 T country[3];
4676 T bban[31];
4678
4679 protected:
4680 virtual bool do_match(
4681 _In_reads_or_z_opt_(end) const T* text,
4682 _In_ size_t start = 0,
4683 _In_ size_t end = SIZE_MAX,
4684 _In_ int flags = match_default)
4685 {
4686 _Assume_(text || start >= end);
4687 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
4688 const bool case_insensitive = flags & match_case_insensitive ? true : false;
4689 struct country_t {
4690 T country[2];
4691 T check_digits[2];
4692 size_t length;
4693 };
4694 static const country_t s_countries[] = {
4695 { { 'A', 'D' }, {}, 24 }, // Andorra
4696 { { 'A', 'E' }, {}, 23 }, // United Arab Emirates
4697 { { 'A', 'L' }, {}, 28 }, // Albania
4698 { { 'A', 'O' }, {}, 25 }, // Angola
4699 { { 'A', 'T' }, {}, 20 }, // Austria
4700 { { 'A', 'Z' }, {}, 28 }, // Azerbaijan
4701 { { 'B', 'A' }, { '3', '9' }, 20}, // Bosnia and Herzegovina
4702 { { 'B', 'E' }, {}, 16 }, // Belgium
4703 { { 'B', 'F' }, {}, 28 }, // Burkina Faso
4704 { { 'B', 'G' }, {}, 22 }, // Bulgaria
4705 { { 'B', 'H' }, {}, 22 }, // Bahrain
4706 { { 'B', 'I' }, {}, 27 }, // Burundi
4707 { { 'B', 'J' }, {}, 28 }, // Benin
4708 { { 'B', 'R' }, {}, 29 }, // Brazil
4709 { { 'B', 'Y' }, {}, 28 }, // Belarus
4710 { { 'C', 'F' }, {}, 27 }, // Central African Republic
4711 { { 'C', 'G' }, {}, 27 }, // Congo, Republic of the
4712 { { 'C', 'H' }, {}, 21 }, // Switzerland
4713 { { 'C', 'I' }, {}, 28 }, // Côte d'Ivoire
4714 { { 'C', 'M' }, {}, 27 }, // Cameroon
4715 { { 'C', 'R' }, {}, 22 }, // Costa Rica
4716 { { 'C', 'V' }, {}, 25 }, // Cabo Verde
4717 { { 'C', 'Y' }, {}, 28 }, // Cyprus
4718 { { 'C', 'Z' }, {}, 24 }, // Czech Republic
4719 { { 'D', 'E' }, {}, 22 }, // Germany
4720 { { 'D', 'J' }, {}, 27 }, // Djibouti
4721 { { 'D', 'K' }, {}, 18 }, // Denmark
4722 { { 'D', 'O' }, {}, 28 }, // Dominican Republic
4723 { { 'D', 'Z' }, {}, 26 }, // Algeria
4724 { { 'E', 'E' }, {}, 20 }, // Estonia
4725 { { 'E', 'G' }, {}, 29 }, // Egypt
4726 { { 'E', 'S' }, {}, 24 }, // Spain
4727 { { 'F', 'I' }, {}, 18 }, // Finland
4728 { { 'F', 'O' }, {}, 18 }, // Faroe Islands
4729 { { 'F', 'R' }, {}, 27 }, // France
4730 { { 'G', 'A' }, {}, 27 }, // Gabon
4731 { { 'G', 'B' }, {}, 22 }, // United Kingdom
4732 { { 'G', 'E' }, {}, 22 }, // Georgia
4733 { { 'G', 'I' }, {}, 23 }, // Gibraltar
4734 { { 'G', 'L' }, {}, 18 }, // Greenland
4735 { { 'G', 'Q' }, {}, 27 }, // Equatorial Guinea
4736 { { 'G', 'R' }, {}, 27 }, // Greece
4737 { { 'G', 'T' }, {}, 28 }, // Guatemala
4738 { { 'G', 'W' }, {}, 25 }, // Guinea-Bissau
4739 { { 'H', 'N' }, {}, 28 }, // Honduras
4740 { { 'H', 'R' }, {}, 21 }, // Croatia
4741 { { 'H', 'U' }, {}, 28 }, // Hungary
4742 { { 'I', 'E' }, {}, 22 }, // Ireland
4743 { { 'I', 'L' }, {}, 23 }, // Israel
4744 { { 'I', 'Q' }, {}, 23 }, // Iraq
4745 { { 'I', 'R' }, {}, 26 }, // Iran
4746 { { 'I', 'S' }, {}, 26 }, // Iceland
4747 { { 'I', 'T' }, {}, 27 }, // Italy
4748 { { 'J', 'O' }, {}, 30 }, // Jordan
4749 { { 'K', 'M' }, {}, 27 }, // Comoros
4750 { { 'K', 'W' }, {}, 30 }, // Kuwait
4751 { { 'K', 'Z' }, {}, 20 }, // Kazakhstan
4752 { { 'L', 'B' }, {}, 28 }, // Lebanon
4753 { { 'L', 'C' }, {}, 32 }, // Saint Lucia
4754 { { 'L', 'I' }, {}, 21 }, // Liechtenstein
4755 { { 'L', 'T' }, {}, 20 }, // Lithuania
4756 { { 'L', 'U' }, {}, 20 }, // Luxembourg
4757 { { 'L', 'V' }, {}, 21 }, // Latvia
4758 { { 'L', 'Y' }, {}, 25 }, // Libya
4759 { { 'M', 'A' }, {}, 28 }, // Morocco
4760 { { 'M', 'C' }, {}, 27 }, // Monaco
4761 { { 'M', 'D' }, {}, 24 }, // Moldova
4762 { { 'M', 'E' }, { '2', '5' }, 22 }, // Montenegro
4763 { { 'M', 'G' }, {}, 27 }, // Madagascar
4764 { { 'M', 'K' }, { '0', '7' }, 19 }, // North Macedonia
4765 { { 'M', 'L' }, {}, 28 }, // Mali
4766 { { 'M', 'R' }, { '1', '3' }, 27}, // Mauritania
4767 { { 'M', 'T' }, {}, 31 }, // Malta
4768 { { 'M', 'U' }, {}, 30 }, // Mauritius
4769 { { 'M', 'Z' }, {}, 25 }, // Mozambique
4770 { { 'N', 'E' }, {}, 28 }, // Niger
4771 { { 'N', 'I' }, {}, 32 }, // Nicaragua
4772 { { 'N', 'L' }, {}, 18 }, // Netherlands
4773 { { 'N', 'O' }, {}, 15 }, // Norway
4774 { { 'P', 'K' }, {}, 24 }, // Pakistan
4775 { { 'P', 'L' }, {}, 28 }, // Poland
4776 { { 'P', 'S' }, {}, 29 }, // Palestinian territories
4777 { { 'P', 'T' }, { '5', '0' }, 25 }, // Portugal
4778 { { 'Q', 'A' }, {}, 29 }, // Qatar
4779 { { 'R', 'O' }, {}, 24 }, // Romania
4780 { { 'R', 'S' }, { '3', '5' }, 22 }, // Serbia
4781 { { 'R', 'U' }, {}, 33 }, // Russia
4782 { { 'S', 'A' }, {}, 24 }, // Saudi Arabia
4783 { { 'S', 'C' }, {}, 31 }, // Seychelles
4784 { { 'S', 'D' }, {}, 18 }, // Sudan
4785 { { 'S', 'E' }, {}, 24 }, // Sweden
4786 { { 'S', 'I' }, { '5', '6' }, 19 }, // Slovenia
4787 { { 'S', 'K' }, {}, 24 }, // Slovakia
4788 { { 'S', 'M' }, {}, 27 }, // San Marino
4789 { { 'S', 'N' }, {}, 28 }, // Senegal
4790 { { 'S', 'T' }, {}, 25 }, // São Tomé and Príncipe
4791 { { 'S', 'V' }, {}, 28 }, // El Salvador
4792 { { 'T', 'D' }, {}, 27 }, // Chad
4793 { { 'T', 'G' }, {}, 28 }, // Togo
4794 { { 'T', 'L' }, { '3', '8' }, 23}, // East Timor
4795 { { 'T', 'N' }, { '5', '9' }, 24 }, // Tunisia
4796 { { 'T', 'R' }, {}, 26 }, // Turkey
4797 { { 'U', 'A' }, {}, 29 }, // Ukraine
4798 { { 'V', 'A' }, {}, 22 }, // Vatican City
4799 { { 'V', 'G' }, {}, 24 }, // Virgin Islands, British
4800 { { 'X', 'K' }, {}, 20 }, // Kosovo
4801 };
4802 const country_t* country_desc = nullptr;
4803 size_t n, available, next, bban_length;
4805
4806 this->interval.end = start;
4807 for (size_t i = 0; i < 2; ++i, ++this->interval.end) {
4808 if (this->interval.end >= end || !text[this->interval.end])
4809 goto error; // incomplete country code
4810 T chr = case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end];
4811 if (chr < 'A' || 'Z' < chr)
4812 goto error; // invalid country code
4813 this->country[i] = chr;
4814 }
4815 for (size_t l = 0, r = _countof(s_countries);;) {
4816 if (l >= r)
4817 goto error; // unknown country
4818 size_t m = (l + r) / 2;
4819 const country_t& c = s_countries[m];
4820 if (c.country[0] < this->country[0] || (c.country[0] == this->country[0] && c.country[1] < this->country[1]))
4821 l = m + 1;
4822 else if (this->country[0] < c.country[0] || (this->country[0] == c.country[0] && this->country[1] < c.country[1]))
4823 r = m;
4824 else {
4825 country_desc = &c;
4826 break;
4827 }
4828 }
4829 this->country[2] = 0;
4830
4831 for (size_t i = 0; i < 2; ++i, ++this->interval.end) {
4832 if (this->interval.end >= end || text[this->interval.end] < '0' || '9' < text[this->interval.end])
4833 goto error; // incomplete or invalid check digits
4834 this->check_digits[i] = text[this->interval.end];
4835 }
4836 this->check_digits[2] = 0;
4837
4838 if ((country_desc->check_digits[0] && this->check_digits[0] != country_desc->check_digits[0]) ||
4839 (country_desc->check_digits[1] && this->check_digits[1] != country_desc->check_digits[1]))
4840 goto error; // unexpected check digits
4841
4842 bban_length = country_desc->length - 4;
4843 for (n = 0; n < bban_length;) {
4844 if (this->interval.end >= end || !text[this->interval.end])
4845 goto error; // bban too short
4846 if (m_space && m_space->match(text, this->interval.end, end, flags)) {
4847 this->interval.end = m_space->interval.end;
4848 continue;
4849 }
4850 T chr = case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end];
4851 if (('0' <= chr && chr <= '9') || ('A' <= chr && chr <= 'Z')) {
4852 this->bban[n++] = chr;
4853 this->interval.end++;
4854 }
4855 else
4856 goto error; // invalid bban
4857 }
4858 this->bban[n] = 0;
4859
4860 // Normalize IBAN.
4861 T normalized[69];
4862 available = 0;
4863 for (size_t i = 0; ; ++i) {
4864 if (!this->bban[i]) {
4865 for (i = 0; i < 2; ++i) {
4866 if ('A' <= this->country[i] && this->country[i] <= 'J') {
4867 normalized[available++] = '1';
4868 normalized[available++] = '0' + this->country[i] - 'A';
4869 }
4870 else if ('K' <= this->country[i] && this->country[i] <= 'T') {
4871 normalized[available++] = '2';
4872 normalized[available++] = '0' + this->country[i] - 'K';
4873 }
4874 else if ('U' <= this->country[i] && this->country[i] <= 'Z') {
4875 normalized[available++] = '3';
4876 normalized[available++] = '0' + this->country[i] - 'U';
4877 }
4878 }
4879 normalized[available++] = this->check_digits[0];
4880 normalized[available++] = this->check_digits[1];
4881 normalized[available] = 0;
4882 break;
4883 }
4884 if ('0' <= this->bban[i] && this->bban[i] <= '9')
4885 normalized[available++] = this->bban[i];
4886 else if ('A' <= this->bban[i] && this->bban[i] <= 'J') {
4887 normalized[available++] = '1';
4888 normalized[available++] = '0' + this->bban[i] - 'A';
4889 }
4890 else if ('K' <= this->bban[i] && this->bban[i] <= 'T') {
4891 normalized[available++] = '2';
4892 normalized[available++] = '0' + this->bban[i] - 'K';
4893 }
4894 else if ('U' <= this->bban[i] && this->bban[i] <= 'Z') {
4895 normalized[available++] = '3';
4896 normalized[available++] = '0' + this->bban[i] - 'U';
4897 }
4898 }
4899
4900 // Calculate modulo 97.
4901 nominator = stdex::strtou32(normalized, 9, &next, 10);
4902 for (;;) {
4903 nominator %= 97;
4904 if (!normalized[next]) {
4905 this->is_valid = nominator == 1;
4906 break;
4907 }
4908 size_t digit_count = nominator == 0 ? 0 : nominator < 10 ? 1 : 2;
4909 for (; digit_count < 9 && normalized[next]; ++next, ++digit_count)
4910 nominator = nominator * 10 + (normalized[next] - '0');
4911 }
4912
4913 this->interval.start = start;
4914 return true;
4915
4916 error:
4917 invalidate();
4918 return false;
4919 }
4920
4921 std::shared_ptr<basic_parser<T>> m_space;
4922 };
4923
4924 using iban = basic_iban<char>;
4925 using wiban = basic_iban<wchar_t>;
4926#ifdef _UNICODE
4927 using tiban = wiban;
4928#else
4929 using tiban = iban;
4930#endif
4931 using sgml_iban = basic_iban<char>;
4932
4938 template <class T>
4940 {
4941 public:
4943 _In_ const std::shared_ptr<basic_parser<T>>& space,
4944 _In_ const std::locale& locale = std::locale()) :
4946 m_space(space)
4947 {
4948 this->check_digits[0] = 0;
4949 this->reference[0] = 0;
4950 this->is_valid = false;
4951 }
4952
4953 virtual void invalidate()
4954 {
4955 this->check_digits[0] = 0;
4956 this->reference[0] = 0;
4957 this->is_valid = false;
4959 }
4960
4964
4965 protected:
4966 virtual bool do_match(
4967 _In_reads_or_z_opt_(end) const T* text,
4968 _In_ size_t start = 0,
4969 _In_ size_t end = SIZE_MAX,
4970 _In_ int flags = match_default)
4971 {
4972 _Assume_(text || start >= end);
4973 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
4974 const bool case_insensitive = flags & match_case_insensitive ? true : false;
4975 size_t n, available, next;
4977
4978 this->interval.end = start;
4979 if (this->interval.end + 1 >= end ||
4980 (case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end]) != 'R' ||
4981 (case_insensitive ? ctype.toupper(text[this->interval.end + 1]) : text[this->interval.end + 1]) != 'F')
4982 goto error; // incomplete or wrong reference ID
4983 this->interval.end += 2;
4984
4985 for (size_t i = 0; i < 2; ++i, ++this->interval.end) {
4986 if (this->interval.end >= end || text[this->interval.end] < '0' || '9' < text[this->interval.end])
4987 goto error; // incomplete or invalid check digits
4988 this->check_digits[i] = text[this->interval.end];
4989 }
4990 this->check_digits[2] = 0;
4991
4992 for (n = 0;;) {
4993 if (m_space && m_space->match(text, this->interval.end, end, flags))
4994 this->interval.end = m_space->interval.end;
4995 for (size_t j = 0; j < 4; ++j) {
4996 if (this->interval.end >= end || !text[this->interval.end])
4997 goto out;
4998 T chr = case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end];
4999 if (('0' <= chr && chr <= '9') || ('A' <= chr && chr <= 'Z')) {
5000 if (n >= _countof(reference) - 1)
5001 goto error; // reference overflow
5002 this->reference[n++] = chr;
5003 this->interval.end++;
5004 }
5005 else
5006 goto out;
5007 }
5008 }
5009 out:
5010 if (!n)
5011 goto error; // reference too short
5012 this->reference[_countof(this->reference) - 1] = 0;
5013 for (size_t i = n, j = _countof(this->reference) - 1; i;)
5014 this->reference[--j] = this->reference[--i];
5015 for (size_t j = _countof(this->reference) - 1 - n; j;)
5016 this->reference[--j] = '0';
5017
5018 // Normalize creditor reference.
5019 T normalized[47];
5020 available = 0;
5021 for (size_t i = 0; ; ++i) {
5022 if (!this->reference[i]) {
5023 normalized[available++] = '2'; // R
5024 normalized[available++] = '7';
5025 normalized[available++] = '1'; // F
5026 normalized[available++] = '5';
5027 normalized[available++] = this->check_digits[0];
5028 normalized[available++] = this->check_digits[1];
5029 normalized[available] = 0;
5030 break;
5031 }
5032 if ('0' <= this->reference[i] && this->reference[i] <= '9')
5033 normalized[available++] = this->reference[i];
5034 else if ('A' <= this->reference[i] && this->reference[i] <= 'J') {
5035 normalized[available++] = '1';
5036 normalized[available++] = '0' + this->reference[i] - 'A';
5037 }
5038 else if ('K' <= this->reference[i] && this->reference[i] <= 'T') {
5039 normalized[available++] = '2';
5040 normalized[available++] = '0' + this->reference[i] - 'K';
5041 }
5042 else if ('U' <= this->reference[i] && this->reference[i] <= 'Z') {
5043 normalized[available++] = '3';
5044 normalized[available++] = '0' + this->reference[i] - 'U';
5045 }
5046 }
5047
5048 // Calculate modulo 97.
5049 nominator = stdex::strtou32(normalized, 9, &next, 10);
5050 for (;;) {
5051 nominator %= 97;
5052 if (!normalized[next]) {
5053 this->is_valid = nominator == 1;
5054 break;
5055 }
5056 size_t digit_count = nominator == 0 ? 0 : nominator < 10 ? 1 : 2;
5057 for (; digit_count < 9 && normalized[next]; ++next, ++digit_count)
5058 nominator = nominator * 10 + (normalized[next] - '0');
5059 }
5060
5061 this->interval.start = start;
5062 return true;
5063
5064 error:
5065 invalidate();
5066 return false;
5067 }
5068
5069 std::shared_ptr<basic_parser<T>> m_space;
5070 };
5071
5072 using creditor_reference = basic_creditor_reference<char>;
5073 using wcreditor_reference = basic_creditor_reference<wchar_t>;
5074#ifdef _UNICODE
5075 using tcreditor_reference = wcreditor_reference;
5076#else
5077 using tcreditor_reference = creditor_reference;
5078#endif
5079 using sgml_creditor_reference = basic_creditor_reference<char>;
5080
5086 template <class T>
5088 {
5089 public:
5090 basic_si_reference_part(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
5091
5092 protected:
5093 virtual bool do_match(
5094 _In_reads_or_z_opt_(end) const T* text,
5095 _In_ size_t start = 0,
5096 _In_ size_t end = SIZE_MAX,
5097 _In_ int flags = match_default)
5098 {
5099 _Assume_(text || start >= end);
5100 this->interval.end = start;
5101 for (;;) {
5102 if (this->interval.end >= end || !text[this->interval.end])
5103 break;
5104 if ('0' <= text[this->interval.end] && text[this->interval.end] <= '9')
5105 this->interval.end++;
5106 else
5107 break;
5108 }
5110 this->interval.start = start;
5111 return true;
5112 }
5113 this->interval.invalidate();
5114 return false;
5115 }
5116 };
5117
5120#ifdef _UNICODE
5122#else
5124#endif
5126
5132 template <class T>
5134 {
5135 public:
5136 basic_si_reference_delimiter(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
5137
5138 protected:
5139 virtual bool do_match(
5140 _In_reads_or_z_opt_(end) const T* text,
5141 _In_ size_t start = 0,
5142 _In_ size_t end = SIZE_MAX,
5143 _In_ int flags = match_default)
5144 {
5145 _Assume_(text || start >= end);
5146 if (start < end && text[start] == '-') {
5147 this->interval.end = (this->interval.start = start) + 1;
5148 return true;
5149 }
5150 this->interval.invalidate();
5151 return false;
5152 }
5153 };
5154
5157#ifdef _UNICODE
5159#else
5161#endif
5163
5171 template <class T>
5173 {
5174 public:
5176 _In_ const std::shared_ptr<basic_parser<T>>& space,
5177 _In_ const std::locale& locale = std::locale()) :
5179 part1(locale),
5180 part2(locale),
5181 part3(locale),
5182 is_valid(false),
5183 m_space(space),
5184 m_delimiter(locale)
5185 {
5186 this->model[0] = 0;
5187 }
5188
5189 virtual void invalidate()
5190 {
5191 this->model[0] = 0;
5192 this->part1.invalidate();
5193 this->part2.invalidate();
5194 this->part3.invalidate();
5195 this->is_valid = false;
5197 }
5198
5199 T model[3];
5204
5205 protected:
5206 virtual bool do_match(
5207 _In_reads_or_z_opt_(end) const T* text,
5208 _In_ size_t start = 0,
5209 _In_ size_t end = SIZE_MAX,
5210 _In_ int flags = match_default)
5211 {
5212 _Assume_(text || start >= end);
5213 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
5214 const bool case_insensitive = flags & match_case_insensitive ? true : false;
5215
5216 this->interval.end = start;
5217 if (this->interval.end + 1 >= end ||
5218 (case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end]) != 'S' ||
5219 (case_insensitive ? ctype.toupper(text[this->interval.end + 1]) : text[this->interval.end + 1]) != 'I')
5220 goto error; // incomplete or wrong reference ID
5221 this->interval.end += 2;
5222
5223 for (size_t i = 0; i < 2; ++i, ++this->interval.end) {
5224 if (this->interval.end >= end || text[this->interval.end] < '0' || '9' < text[this->interval.end])
5225 goto error; // incomplete or invalid model
5226 this->model[i] = text[this->interval.end];
5227 }
5228 this->model[2] = 0;
5229
5230 this->part1.invalidate();
5231 this->part2.invalidate();
5232 this->part3.invalidate();
5233 if (this->model[0] == '9' && this->model[1] == '9') {
5234 is_valid = true;
5235 this->interval.start = start;
5236 return true;
5237 }
5238
5239 if (m_space && m_space->match(text, this->interval.end, end, flags))
5240 this->interval.end = m_space->interval.end;
5241
5242 this->part1.match(text, this->interval.end, end, flags) &&
5243 this->m_delimiter.match(text, this->part1.interval.end, end, flags) &&
5244 this->part2.match(text, this->m_delimiter.interval.end, end, flags) &&
5245 this->m_delimiter.match(text, this->part2.interval.end, end, flags) &&
5246 this->part3.match(text, this->m_delimiter.interval.end, end, flags);
5247
5248 this->interval.start = start;
5249 if (this->part3.interval)
5250 this->interval.end = this->part3.interval.end;
5251 else if (this->part2.interval)
5252 this->interval.end = this->part2.interval.end;
5253 else if (this->part1.interval)
5254 this->interval.end = this->part1.interval.end;
5255 else
5256 this->interval.end = start + 4;
5257
5258 if (this->model[0] == '0' && this->model[1] == '0')
5259 is_valid =
5260 this->part3.interval ?
5261 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5262 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 :
5263 this->part2.interval ?
5264 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5265 this->part1.interval.size() + this->part2.interval.size() <= 20 :
5266 this->part1.interval ?
5267 this->part1.interval.size() <= 12 :
5268 false;
5269 else if (this->model[0] == '0' && this->model[1] == '1')
5270 is_valid =
5271 this->part3.interval ?
5272 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5273 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5274 check11(
5275 text + this->part1.interval.start, this->part1.interval.size(),
5276 text + this->part2.interval.start, this->part2.interval.size(),
5277 text + this->part3.interval.start, this->part3.interval.size()) :
5278 this->part2.interval ?
5279 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5280 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5281 check11(
5282 text + this->part1.interval.start, this->part1.interval.size(),
5283 text + this->part2.interval.start, this->part2.interval.size()) :
5284 this->part1.interval ?
5285 this->part1.interval.size() <= 12 &&
5286 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5287 false;
5288 else if (this->model[0] == '0' && this->model[1] == '2')
5289 is_valid =
5290 this->part3.interval ?
5291 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5292 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5293 check11(text + this->part2.interval.start, this->part2.interval.size()) &&
5294 check11(text + this->part3.interval.start, this->part3.interval.size()) :
5295 false;
5296 else if (this->model[0] == '0' && this->model[1] == '3')
5297 is_valid =
5298 this->part3.interval ?
5299 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5300 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5301 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5302 check11(text + this->part2.interval.start, this->part2.interval.size()) &&
5303 check11(text + this->part3.interval.start, this->part3.interval.size()) :
5304 false;
5305 else if (this->model[0] == '0' && this->model[1] == '4')
5306 is_valid =
5307 this->part3.interval ?
5308 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5309 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5310 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5311 check11(text + this->part3.interval.start, this->part3.interval.size()) :
5312 false;
5313 else if ((this->model[0] == '0' || this->model[0] == '5') && this->model[1] == '5')
5314 is_valid =
5315 this->part3.interval ?
5316 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5317 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5318 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5319 this->part2.interval ?
5320 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5321 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5322 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5323 this->part1.interval ?
5324 this->part1.interval.size() <= 12 &&
5325 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5326 false;
5327 else if (this->model[0] == '0' && this->model[1] == '6')
5328 is_valid =
5329 this->part3.interval ?
5330 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5331 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5332 check11(
5333 text + this->part2.interval.start, this->part2.interval.size(),
5334 text + this->part3.interval.start, this->part3.interval.size()) :
5335 this->part2.interval ?
5336 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5337 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5338 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5339 false;
5340 else if (this->model[0] == '0' && this->model[1] == '7')
5341 is_valid =
5342 this->part3.interval ?
5343 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5344 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5345 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5346 this->part2.interval ?
5347 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5348 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5349 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5350 false;
5351 else if (this->model[0] == '0' && this->model[1] == '8')
5352 is_valid =
5353 this->part3.interval ?
5354 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5355 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5356 check11(
5357 text + this->part1.interval.start, this->part1.interval.size(),
5358 text + this->part2.interval.start, this->part2.interval.size()) &&
5359 check11(text + this->part3.interval.start, this->part3.interval.size()) :
5360 false;
5361 else if (this->model[0] == '0' && this->model[1] == '9')
5362 is_valid =
5363 this->part3.interval ?
5364 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5365 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5366 check11(
5367 text + this->part1.interval.start, this->part1.interval.size(),
5368 text + this->part2.interval.start, this->part2.interval.size()) :
5369 this->part2.interval ?
5370 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5371 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5372 check11(
5373 text + this->part1.interval.start, this->part1.interval.size(),
5374 text + this->part2.interval.start, this->part2.interval.size()) :
5375 this->part1.interval ?
5376 this->part1.interval.size() <= 12 &&
5377 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5378 false;
5379 else if (this->model[0] == '1' && this->model[1] == '0')
5380 is_valid =
5381 this->part3.interval ?
5382 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5383 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5384 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5385 check11(
5386 text + this->part2.interval.start, this->part2.interval.size(),
5387 text + this->part3.interval.start, this->part3.interval.size()) :
5388 this->part2.interval ?
5389 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5390 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5391 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5392 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5393 false;
5394 else if (
5395 (this->model[0] == '1' && (this->model[1] == '1' || this->model[1] == '8' || this->model[1] == '9')) ||
5396 ((this->model[0] == '2' || this->model[0] == '3') && this->model[1] == '8') ||
5397 (this->model[0] == '4' && (this->model[1] == '0' || this->model[1] == '1' || this->model[1] == '8' || this->model[1] == '9')) ||
5398 (this->model[0] == '5' && (this->model[1] == '1' || this->model[1] == '8')))
5399 is_valid =
5400 this->part3.interval ?
5401 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5402 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5403 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5404 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5405 this->part2.interval ?
5406 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5407 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5408 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5409 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5410 false;
5411 else if (this->model[0] == '1' && this->model[1] == '2')
5412 is_valid =
5413 this->part3.interval ? false :
5414 this->part2.interval ? false :
5415 this->part1.interval ?
5416 this->part1.interval.size() <= 13 &&
5417 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5418 false;
5419 else if ((this->model[0] == '2' || this->model[0] == '3') && this->model[1] == '1')
5420 is_valid =
5421 this->part3.interval ? false :
5422 this->part2.interval ?
5423 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5424 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5425 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5426 false;
5427 else
5428 is_valid = true; // Assume models we don't handle as valid
5429 return true;
5430
5431 error:
5432 invalidate();
5433 return false;
5434 }
5435
5436 static bool check11(
5437 _In_count_(num_part1) const T* part1, _In_ size_t num_part1)
5438 {
5439 _Assume_(part1 && num_part1 >= 1);
5440 uint32_t nominator = 0, ponder = 2;
5441 for (size_t i = num_part1 - 1; i--; ++ponder)
5442 nominator += (part1[i] - '0') * ponder;
5443 uint8_t control = 11 - static_cast<uint8_t>(nominator % 11);
5444 if (control >= 10)
5445 control = 0;
5446 return control == part1[num_part1 - 1] - '0';
5447 }
5448
5449 static bool check11(
5450 _In_count_(num_part1) const T* part1, _In_ size_t num_part1,
5451 _In_count_(num_part2) const T* part2, _In_ size_t num_part2)
5452 {
5453 _Assume_(part1 || !num_part1);
5454 _Assume_(part2 && num_part2 >= 1);
5455 uint32_t nominator = 0, ponder = 2;
5456 for (size_t i = num_part2 - 1; i--; ++ponder)
5457 nominator += (part2[i] - '0') * ponder;
5458 for (size_t i = num_part1; i--; ++ponder)
5459 nominator += (part1[i] - '0') * ponder;
5460 uint8_t control = 11 - static_cast<uint8_t>(nominator % 11);
5461 if (control == 10)
5462 control = 0;
5463 return control == part2[num_part2 - 1] - '0';
5464 }
5465
5466 static bool check11(
5467 _In_count_(num_part1) const T* part1, _In_ size_t num_part1,
5468 _In_count_(num_part2) const T* part2, _In_ size_t num_part2,
5469 _In_count_(num_part3) const T* part3, _In_ size_t num_part3)
5470 {
5471 _Assume_(part1 || !num_part1);
5472 _Assume_(part2 || !num_part2);
5473 _Assume_(part3 && num_part3 >= 1);
5474 uint32_t nominator = 0, ponder = 2;
5475 for (size_t i = num_part3 - 1; i--; ++ponder)
5476 nominator += (part3[i] - '0') * ponder;
5477 for (size_t i = num_part2; i--; ++ponder)
5478 nominator += (part2[i] - '0') * ponder;
5479 for (size_t i = num_part1; i--; ++ponder)
5480 nominator += (part1[i] - '0') * ponder;
5481 uint8_t control = 11 - static_cast<uint8_t>(nominator % 11);
5482 if (control == 10)
5483 control = 0;
5484 return control == part2[num_part3 - 1] - '0';
5485 }
5486
5487 std::shared_ptr<basic_parser<T>> m_space;
5488 basic_si_reference_delimiter<T> m_delimiter;
5489 };
5490
5491 using si_reference = basic_si_reference<char>;
5492 using wsi_reference = basic_si_reference<wchar_t>;
5493#ifdef _UNICODE
5494 using tsi_reference = wsi_reference;
5495#else
5496 using tsi_reference = si_reference;
5497#endif
5498 using sgml_si_reference = basic_si_reference<char>;
5499
5503 template <class T>
5505 {
5506 public:
5508 _In_ const std::shared_ptr<basic_parser<T>>& element,
5509 _In_ const std::shared_ptr<basic_parser<T>>& digit,
5510 _In_ const std::shared_ptr<basic_parser<T>>& sign,
5511 _In_ const std::locale& locale = std::locale()) :
5513 m_element(element),
5514 m_digit(digit),
5515 m_sign(sign),
5516 has_digits(false),
5517 has_charge(false)
5518 {}
5519
5520 virtual void invalidate()
5521 {
5522 has_digits = false;
5523 has_charge = false;
5525 }
5526
5527 bool has_digits;
5528 bool has_charge;
5529
5530 protected:
5531 virtual bool do_match(
5532 _In_reads_or_z_opt_(end) const T* text,
5533 _In_ size_t start = 0,
5534 _In_ size_t end = SIZE_MAX,
5535 _In_ int flags = match_default)
5536 {
5537 _Assume_(text || start >= end);
5538
5539 has_digits = false;
5540 has_charge = false;
5541 this->interval.end = start;
5542
5543 const int element_match_flags = flags & ~match_case_insensitive; // Chemical elements are always case-sensitive.
5544 for (;;) {
5545 if (m_element->match(text, this->interval.end, end, element_match_flags)) {
5546 this->interval.end = m_element->interval.end;
5547 while (m_digit->match(text, this->interval.end, end, flags)) {
5548 this->interval.end = m_digit->interval.end;
5549 has_digits = true;
5550 }
5551 }
5552 else if (start < this->interval.end) {
5553 if (m_sign->match(text, this->interval.end, end, flags)) {
5554 this->interval.end = m_sign->interval.end;
5555 has_charge = true;
5556 }
5557 this->interval.start = start;
5558 return true;
5559 }
5560 else {
5561 this->interval.invalidate();
5562 return false;
5563 }
5564 }
5565 }
5566
5567 std::shared_ptr<basic_parser<T>> m_element;
5568 std::shared_ptr<basic_parser<T>> m_digit;
5569 std::shared_ptr<basic_parser<T>> m_sign;
5570 };
5571
5574#ifdef _UNICODE
5576#else
5578#endif
5580
5585 {
5586 protected:
5587 virtual bool do_match(
5588 _In_reads_or_z_(end) const char* text,
5589 _In_ size_t start = 0,
5590 _In_ size_t end = SIZE_MAX,
5591 _In_ int flags = match_default)
5592 {
5593 _Assume_(text || start >= end);
5594 this->interval.end = start;
5595
5596 _Assume_(text || this->interval.end >= end);
5597 if (this->interval.end < end && text[this->interval.end]) {
5598 if (text[this->interval.end] == '\r') {
5599 this->interval.end++;
5600 if (this->interval.end < end && text[this->interval.end] == '\n') {
5601 this->interval.start = start;
5602 this->interval.end++;
5603 return true;
5604 }
5605 }
5606 else if (text[this->interval.end] == '\n') {
5607 this->interval.start = start;
5608 this->interval.end++;
5609 return true;
5610 }
5611 }
5612 this->interval.invalidate();
5613 return false;
5614 }
5615 };
5616
5620 class http_space : public parser
5621 {
5622 protected:
5623 virtual bool do_match(
5624 _In_reads_or_z_(end) const char* text,
5625 _In_ size_t start = 0,
5626 _In_ size_t end = SIZE_MAX,
5627 _In_ int flags = match_default)
5628 {
5629 _Assume_(text || start >= end);
5630 this->interval.end = start;
5631 if (m_line_break.match(text, this->interval.end, end, flags)) {
5632 this->interval.end = m_line_break.interval.end;
5633 if (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) {
5634 this->interval.start = start;
5635 this->interval.end++;
5636 while (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) this->interval.end++;
5637 return true;
5638 }
5639 }
5640 else if (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) {
5641 this->interval.start = start;
5642 this->interval.end++;
5643 while (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) this->interval.end++;
5644 return true;
5645 }
5646 this->interval.invalidate();
5647 return false;
5648 }
5649
5650 http_line_break m_line_break;
5651 };
5652
5656 class http_text_char : public parser
5657 {
5658 protected:
5659 virtual bool do_match(
5660 _In_reads_or_z_(end) const char* text,
5661 _In_ size_t start = 0,
5662 _In_ size_t end = SIZE_MAX,
5663 _In_ int flags = match_default)
5664 {
5665 _Assume_(text || start >= end);
5666 this->interval.end = start;
5667
5668 _Assume_(text || this->interval.end >= end);
5669 if (m_space.match(text, this->interval.end, end, flags)) {
5670 this->interval.start = start;
5671 this->interval.end = m_space.interval.end;
5672 return true;
5673 }
5674 else if (this->interval.end < end && text[this->interval.end] && text[this->interval.end] >= 0x20) {
5675 this->interval.start = start;
5676 this->interval.end++;
5677 return true;
5678 }
5679 this->interval.invalidate();
5680 return false;
5681 }
5682
5683 http_space m_space;
5684 };
5685
5689 class http_token : public parser
5690 {
5691 protected:
5692 virtual bool do_match(
5693 _In_reads_or_z_(end) const char* text,
5694 _In_ size_t start = 0,
5695 _In_ size_t end = SIZE_MAX,
5696 _In_ int flags = match_default)
5697 {
5698 _Assume_(text || start >= end);
5699 this->interval.end = start;
5700 for (;;) {
5701 if (this->interval.end < end && text[this->interval.end]) {
5702 if ((unsigned int)text[this->interval.end] < 0x20 ||
5703 (unsigned int)text[this->interval.end] == 0x7f ||
5704 text[this->interval.end] == '(' ||
5705 text[this->interval.end] == ')' ||
5706 text[this->interval.end] == '<' ||
5707 text[this->interval.end] == '>' ||
5708 text[this->interval.end] == '@' ||
5709 text[this->interval.end] == ',' ||
5710 text[this->interval.end] == ';' ||
5711 text[this->interval.end] == ':' ||
5712 text[this->interval.end] == '\\' ||
5713 text[this->interval.end] == '\"' ||
5714 text[this->interval.end] == '/' ||
5715 text[this->interval.end] == '[' ||
5716 text[this->interval.end] == ']' ||
5717 text[this->interval.end] == '?' ||
5718 text[this->interval.end] == '=' ||
5719 text[this->interval.end] == '{' ||
5720 text[this->interval.end] == '}' ||
5721 stdex::isspace(text[this->interval.end]))
5722 break;
5723 else
5724 this->interval.end++;
5725 }
5726 else
5727 break;
5728 }
5730 this->interval.start = start;
5731 return true;
5732 }
5733 else {
5734 this->interval.invalidate();
5735 return false;
5736 }
5737 }
5738 };
5739
5744 {
5745 public:
5746 virtual void invalidate()
5747 {
5748 content.start = 1;
5749 content.end = 0;
5750 parser::invalidate();
5751 }
5752
5754
5755 protected:
5756 virtual bool do_match(
5757 _In_reads_or_z_(end) const char* text,
5758 _In_ size_t start = 0,
5759 _In_ size_t end = SIZE_MAX,
5760 _In_ int flags = match_default)
5761 {
5762 _Assume_(text || start >= end);
5763 this->interval.end = start;
5764 if (this->interval.end < end && text[this->interval.end] != '"')
5765 goto error;
5766 this->interval.end++;
5767 content.start = this->interval.end;
5768 for (;;) {
5769 _Assume_(text || this->interval.end >= end);
5770 if (this->interval.end < end && text[this->interval.end]) {
5771 if (text[this->interval.end] == '"') {
5772 content.end = this->interval.end;
5773 this->interval.end++;
5774 break;
5775 }
5776 else if (text[this->interval.end] == '\\') {
5777 this->interval.end++;
5778 if (this->interval.end < end && text[this->interval.end]) {
5779 this->interval.end++;
5780 }
5781 else
5782 goto error;
5783 }
5784 else if (m_chr.match(text, this->interval.end, end, flags))
5785 this->interval.end++;
5786 else
5787 goto error;
5788 }
5789 else
5790 goto error;
5791 }
5792 this->interval.start = start;
5793 return true;
5794
5795 error:
5796 invalidate();
5797 return false;
5798 }
5799
5800 http_text_char m_chr;
5801 };
5802
5806 class http_value : public parser
5807 {
5808 public:
5809 virtual void invalidate()
5810 {
5811 string.invalidate();
5812 token.invalidate();
5813 parser::invalidate();
5814 }
5815
5818
5819 protected:
5820 virtual bool do_match(
5821 _In_reads_or_z_(end) const char* text,
5822 _In_ size_t start = 0,
5823 _In_ size_t end = SIZE_MAX,
5824 _In_ int flags = match_default)
5825 {
5826 _Assume_(text || start >= end);
5827 this->interval.end = start;
5828 if (string.match(text, this->interval.end, end, flags)) {
5829 token.invalidate();
5830 this->interval.end = string.interval.end;
5831 this->interval.start = start;
5832 return true;
5833 }
5834 else if (token.match(text, this->interval.end, end, flags)) {
5835 string.invalidate();
5836 this->interval.end = token.interval.end;
5837 this->interval.start = start;
5838 return true;
5839 }
5840 else {
5841 this->interval.invalidate();
5842 return false;
5843 }
5844 }
5845 };
5846
5850 class http_parameter : public parser
5851 {
5852 public:
5853 virtual void invalidate()
5854 {
5855 name.invalidate();
5856 value.invalidate();
5857 parser::invalidate();
5858 }
5859
5862
5863 protected:
5864 virtual bool do_match(
5865 _In_reads_or_z_(end) const char* text,
5866 _In_ size_t start = 0,
5867 _In_ size_t end = SIZE_MAX,
5868 _In_ int flags = match_default)
5869 {
5870 _Assume_(text || start >= end);
5871 this->interval.end = start;
5872 if (name.match(text, this->interval.end, end, flags))
5873 this->interval.end = name.interval.end;
5874 else
5875 goto error;
5876 while (m_space.match(text, this->interval.end, end, flags))
5877 this->interval.end = m_space.interval.end;
5878 _Assume_(text || this->interval.end >= end);
5879 if (this->interval.end < end && text[this->interval.end] == '=')
5880 this->interval.end++;
5881 else
5882 while (m_space.match(text, this->interval.end, end, flags))
5883 this->interval.end = m_space.interval.end;
5884 if (value.match(text, this->interval.end, end, flags))
5885 this->interval.end = value.interval.end;
5886 else
5887 goto error;
5888 this->interval.start = start;
5889 return true;
5890
5891 error:
5892 invalidate();
5893 return false;
5894 }
5895
5896 http_space m_space;
5897 };
5898
5902 class http_any_type : public parser
5903 {
5904 protected:
5905 virtual bool do_match(
5906 _In_reads_or_z_(end) const char* text,
5907 _In_ size_t start = 0,
5908 _In_ size_t end = SIZE_MAX,
5909 _In_ int flags = match_default)
5910 {
5911 _Assume_(text || start >= end);
5912 if (start + 2 < end &&
5913 text[start] == '*' &&
5914 text[start + 1] == '/' &&
5915 text[start + 2] == '*')
5916 {
5917 this->interval.end = (this->interval.start = start) + 3;
5918 return true;
5919 }
5920 else if (start < end && text[start] == '*') {
5921 this->interval.end = (this->interval.start = start) + 1;
5922 return true;
5923 }
5924 else {
5925 this->interval.invalidate();
5926 return false;
5927 }
5928 }
5929 };
5930
5935 {
5936 public:
5937 virtual void invalidate()
5938 {
5939 type.invalidate();
5940 subtype.invalidate();
5941 parser::invalidate();
5942 }
5943
5944 http_token type;
5945 http_token subtype;
5946
5947 protected:
5948 virtual bool do_match(
5949 _In_reads_or_z_(end) const char* text,
5950 _In_ size_t start = 0,
5951 _In_ size_t end = SIZE_MAX,
5952 _In_ int flags = match_default)
5953 {
5954 _Assume_(text || start >= end);
5955 this->interval.end = start;
5956 if (type.match(text, this->interval.end, end, flags))
5957 this->interval.end = type.interval.end;
5958 else
5959 goto error;
5960 while (m_space.match(text, this->interval.end, end, flags))
5961 this->interval.end = m_space.interval.end;
5962 if (this->interval.end < end && text[this->interval.end] == '/')
5963 this->interval.end++;
5964 else
5965 goto error;
5966 while (m_space.match(text, this->interval.end, end, flags))
5967 this->interval.end = m_space.interval.end;
5968 if (subtype.match(text, this->interval.end, end, flags))
5969 this->interval.end = subtype.interval.end;
5970 else
5971 goto error;
5972 this->interval.start = start;
5973 return true;
5974
5975 error:
5976 invalidate();
5977 return false;
5978 }
5979
5980 http_space m_space;
5981 };
5982
5987 {
5988 public:
5989 virtual void invalidate()
5990 {
5991 params.clear();
5992 http_media_range::invalidate();
5993 }
5994
5995 std::list<http_parameter> params;
5996
5997 protected:
5998 virtual bool do_match(
5999 _In_reads_or_z_(end) const char* text,
6000 _In_ size_t start = 0,
6001 _In_ size_t end = SIZE_MAX,
6002 _In_ int flags = match_default)
6003 {
6004 _Assume_(text || start >= end);
6005 if (!http_media_range::do_match(text, start, end, flags))
6006 goto error;
6007 params.clear();
6008 for (;;) {
6009 if (this->interval.end < end && text[this->interval.end]) {
6010 if (m_space.match(text, this->interval.end, end, flags))
6011 this->interval.end = m_space.interval.end;
6012 else if (text[this->interval.end] == ';') {
6013 this->interval.end++;
6014 while (m_space.match(text, this->interval.end, end, flags))
6015 this->interval.end = m_space.interval.end;
6016 http_parameter param;
6017 if (param.match(text, this->interval.end, end, flags)) {
6018 this->interval.end = param.interval.end;
6019 params.push_back(std::move(param));
6020 }
6021 else
6022 break;
6023 }
6024 else
6025 break;
6026 }
6027 else
6028 break;
6029 }
6030 this->interval.end = params.empty() ? subtype.interval.end : params.back().interval.end;
6031 return true;
6032
6033 error:
6034 invalidate();
6035 return false;
6036 }
6037 };
6038
6043 {
6044 protected:
6045 virtual bool do_match(
6046 _In_reads_or_z_(end) const char* text,
6047 _In_ size_t start = 0,
6048 _In_ size_t end = SIZE_MAX,
6049 _In_ int flags = match_default)
6050 {
6051 _Assume_(text || start >= end);
6052 this->interval.end = start;
6053 for (;;) {
6054 if (this->interval.end < end && text[this->interval.end]) {
6055 if ((unsigned int)text[this->interval.end] < 0x20 ||
6056 (unsigned int)text[this->interval.end] == 0x7f ||
6057 text[this->interval.end] == ':' ||
6058 text[this->interval.end] == '/' ||
6059 stdex::isspace(text[this->interval.end]))
6060 break;
6061 else
6062 this->interval.end++;
6063 }
6064 else
6065 break;
6066 }
6068 this->interval.start = start;
6069 return true;
6070 }
6071 this->interval.invalidate();
6072 return false;
6073 }
6074 };
6075
6079 class http_url_port : public parser
6080 {
6081 public:
6082 http_url_port(_In_ const std::locale& locale = std::locale()) :
6083 parser(locale),
6084 value(0)
6085 {}
6086
6087 virtual void invalidate()
6088 {
6089 value = 0;
6090 parser::invalidate();
6091 }
6092
6093 uint16_t value;
6094
6095 protected:
6096 virtual bool do_match(
6097 _In_reads_or_z_(end) const char* text,
6098 _In_ size_t start = 0,
6099 _In_ size_t end = SIZE_MAX,
6100 _In_ int flags = match_default)
6101 {
6102 _Assume_(text || start >= end);
6103 value = 0;
6104 this->interval.end = start;
6105 for (;;) {
6106 if (this->interval.end < end && text[this->interval.end]) {
6107 if ('0' <= text[this->interval.end] && text[this->interval.end] <= '9') {
6108 size_t _value = (size_t)value * 10 + text[this->interval.end] - '0';
6109 if (_value > (uint16_t)-1) {
6110 value = 0;
6111 this->interval.invalidate();
6112 return false;
6113 }
6114 value = (uint16_t)_value;
6115 this->interval.end++;
6116 }
6117 else
6118 break;
6119 }
6120 else
6121 break;
6122 }
6124 this->interval.start = start;
6125 return true;
6126 }
6127 this->interval.invalidate();
6128 return false;
6129 }
6130 };
6131
6136 {
6137 protected:
6138 virtual bool do_match(
6139 _In_reads_or_z_(end) const char* text,
6140 _In_ size_t start = 0,
6141 _In_ size_t end = SIZE_MAX,
6142 _In_ int flags = match_default)
6143 {
6144 _Assume_(text || start >= end);
6145 this->interval.end = start;
6146 for (;;) {
6147 if (this->interval.end < end && text[this->interval.end]) {
6148 if ((unsigned int)text[this->interval.end] < 0x20 ||
6149 (unsigned int)text[this->interval.end] == 0x7f ||
6150 text[this->interval.end] == '?' ||
6151 text[this->interval.end] == '/' ||
6152 stdex::isspace(text[this->interval.end]))
6153 break;
6154 else
6155 this->interval.end++;
6156 }
6157 else
6158 break;
6159 }
6160 this->interval.start = start;
6161 return true;
6162 }
6163 };
6164
6168 class http_url_path : public parser
6169 {
6170 public:
6171 virtual void invalidate()
6172 {
6173 segments.clear();
6174 parser::invalidate();
6175 }
6176
6177 std::vector<http_url_path_segment> segments;
6178
6179 protected:
6180 virtual bool do_match(
6181 _In_reads_or_z_(end) const char* text,
6182 _In_ size_t start = 0,
6183 _In_ size_t end = SIZE_MAX,
6184 _In_ int flags = match_default)
6185 {
6186 _Assume_(text || start >= end);
6188 this->interval.end = start;
6189 segments.clear();
6190 _Assume_(text || this->interval.end >= end);
6191 if (this->interval.end < end && text[this->interval.end] != '/')
6192 goto error;
6193 this->interval.end++;
6194 s.match(text, this->interval.end, end, flags);
6195 segments.push_back(s);
6196 this->interval.end = s.interval.end;
6197 for (;;) {
6198 if (this->interval.end < end && text[this->interval.end]) {
6199 if (text[this->interval.end] == '/') {
6200 this->interval.end++;
6201 s.match(text, this->interval.end, end, flags);
6202 segments.push_back(s);
6203 this->interval.end = s.interval.end;
6204 }
6205 else
6206 break;
6207 }
6208 else
6209 break;
6210 }
6211 this->interval.start = start;
6212 return true;
6213
6214 error:
6215 invalidate();
6216 return false;
6217 }
6218 };
6219
6224 {
6225 public:
6226 virtual void invalidate()
6227 {
6228 name.start = 1;
6229 name.end = 0;
6230 value.start = 1;
6231 value.end = 0;
6232 parser::invalidate();
6233 }
6234
6237
6238 protected:
6239 virtual bool do_match(
6240 _In_reads_or_z_(end) const char* text,
6241 _In_ size_t start = 0,
6242 _In_ size_t end = SIZE_MAX,
6243 _In_ int flags = match_default)
6244 {
6245 _Assume_(text || start >= end);
6246 this->interval.end = start;
6247 name.start = this->interval.end;
6248 for (;;) {
6249 if (this->interval.end < end && text[this->interval.end]) {
6250 if ((unsigned int)text[this->interval.end] < 0x20 ||
6251 (unsigned int)text[this->interval.end] == 0x7f ||
6252 text[this->interval.end] == '&' ||
6253 text[this->interval.end] == '=' ||
6254 stdex::isspace(text[this->interval.end]))
6255 break;
6256 else
6257 this->interval.end++;
6258 }
6259 else
6260 break;
6261 }
6263 name.end = this->interval.end;
6264 else
6265 goto error;
6266 if (text[this->interval.end] == '=') {
6267 this->interval.end++;
6268 value.start = this->interval.end;
6269 for (;;) {
6270 if (this->interval.end < end && text[this->interval.end]) {
6271 if ((unsigned int)text[this->interval.end] < 0x20 ||
6272 (unsigned int)text[this->interval.end] == 0x7f ||
6273 text[this->interval.end] == '&' ||
6274 stdex::isspace(text[this->interval.end]))
6275 break;
6276 else
6277 this->interval.end++;
6278 }
6279 else
6280 break;
6281 }
6282 value.end = this->interval.end;
6283 }
6284 else {
6285 value.start = 1;
6286 value.end = 0;
6287 }
6288 this->interval.start = start;
6289 return true;
6290
6291 error:
6292 invalidate();
6293 return false;
6294 }
6295 };
6296
6300 class http_url : public parser
6301 {
6302 public:
6303 http_url(_In_ const std::locale& locale = std::locale()) :
6304 parser(locale),
6305 port(locale)
6306 {}
6307
6308 virtual void invalidate()
6309 {
6310 server.invalidate();
6311 port.invalidate();
6312 path.invalidate();
6313 params.clear();
6314 parser::invalidate();
6315 }
6316
6317 http_url_server server;
6318 http_url_port port;
6319 http_url_path path;
6320 std::list<http_url_parameter> params;
6321
6322 protected:
6323 virtual bool do_match(
6324 _In_reads_or_z_(end) const char* text,
6325 _In_ size_t start = 0,
6326 _In_ size_t end = SIZE_MAX,
6327 _In_ int flags = match_default)
6328 {
6329 _Assume_(text || start >= end);
6330 this->interval.end = start;
6331
6332 if (this->interval.end + 7 <= end && stdex::strnicmp(text + this->interval.end, 7, "http://", SIZE_MAX, m_locale) == 0) {
6333 this->interval.end += 7;
6334 if (server.match(text, this->interval.end, end, flags))
6335 this->interval.end = server.interval.end;
6336 else
6337 goto error;
6338 if (this->interval.end < end && text[this->interval.end] == ':') {
6339 this->interval.end++;
6340 if (port.match(text, this->interval.end, end, flags))
6341 this->interval.end = port.interval.end;
6342 }
6343 else {
6344 port.invalidate();
6345 port.value = 80;
6346 }
6347 }
6348 else {
6349 server.invalidate();
6350 port.invalidate();
6351 port.value = 80;
6352 }
6353
6354 if (path.match(text, this->interval.end, end, flags))
6355 this->interval.end = path.interval.end;
6356 else
6357 goto error;
6358
6359 params.clear();
6360
6361 if (this->interval.end < end && text[this->interval.end] == '?') {
6362 this->interval.end++;
6363 for (;;) {
6364 if (this->interval.end < end && text[this->interval.end]) {
6365 if ((unsigned int)text[this->interval.end] < 0x20 ||
6366 (unsigned int)text[this->interval.end] == 0x7f ||
6367 stdex::isspace(text[this->interval.end]))
6368 break;
6369 else if (text[this->interval.end] == '&')
6370 this->interval.end++;
6371 else {
6372 http_url_parameter param;
6373 if (param.match(text, this->interval.end, end, flags)) {
6374 this->interval.end = param.interval.end;
6375 params.push_back(std::move(param));
6376 }
6377 else
6378 break;
6379 }
6380 }
6381 else
6382 break;
6383 }
6384 }
6385
6386 this->interval.start = start;
6387 return true;
6388
6389 error:
6390 invalidate();
6391 return false;
6392 }
6393 };
6394
6398 class http_language : public parser
6399 {
6400 public:
6401 virtual void invalidate()
6402 {
6403 components.clear();
6404 parser::invalidate();
6405 }
6406
6407 std::vector<stdex::interval<size_t>> components;
6408
6409 protected:
6410 virtual bool do_match(
6411 _In_reads_or_z_(end) const char* text,
6412 _In_ size_t start = 0,
6413 _In_ size_t end = SIZE_MAX,
6414 _In_ int flags = match_default)
6415 {
6416 _Assume_(text || start >= end);
6417 this->interval.end = start;
6418 components.clear();
6419 for (;;) {
6420 if (this->interval.end < end && text[this->interval.end]) {
6422 k.end = this->interval.end;
6423 for (;;) {
6424 if (k.end < end && text[k.end]) {
6425 if (stdex::isalpha(text[k.end]))
6426 k.end++;
6427 else
6428 break;
6429 }
6430 else
6431 break;
6432 }
6433 if (this->interval.end < k.end) {
6434 k.start = this->interval.end;
6435 this->interval.end = k.end;
6436 components.push_back(k);
6437 }
6438 else
6439 break;
6440 if (this->interval.end < end && text[this->interval.end] == '-')
6441 this->interval.end++;
6442 else
6443 break;
6444 }
6445 else
6446 break;
6447 }
6448 if (!components.empty()) {
6449 this->interval.start = start;
6450 this->interval.end = components.back().end;
6451 return true;
6452 }
6453 this->interval.invalidate();
6454 return false;
6455 }
6456 };
6457
6461 class http_weight : public parser
6462 {
6463 public:
6464 http_weight(_In_ const std::locale& locale = std::locale()) :
6465 parser(locale),
6466 value(1.0f)
6467 {}
6468
6469 virtual void invalidate()
6470 {
6471 value = 1.0f;
6472 parser::invalidate();
6473 }
6474
6475 float value;
6476
6477 protected:
6478 virtual bool do_match(
6479 _In_reads_or_z_(end) const char* text,
6480 _In_ size_t start = 0,
6481 _In_ size_t end = SIZE_MAX,
6482 _In_ int flags = match_default)
6483 {
6484 _Assume_(text || start >= end);
6485 size_t celi_del = 0, decimalni_del = 0, decimalni_del_n = 1;
6486 this->interval.end = start;
6487 for (;;) {
6488 if (this->interval.end < end && text[this->interval.end]) {
6489 if ('0' <= text[this->interval.end] && text[this->interval.end] <= '9') {
6490 celi_del = celi_del * 10 + text[this->interval.end] - '0';
6491 this->interval.end++;
6492 }
6493 else if (text[this->interval.end] == '.') {
6494 this->interval.end++;
6495 for (;;) {
6496 if (this->interval.end < end && text[this->interval.end]) {
6497 if ('0' <= text[this->interval.end] && text[this->interval.end] <= '9') {
6498 decimalni_del = decimalni_del * 10 + text[this->interval.end] - '0';
6499 decimalni_del_n *= 10;
6500 this->interval.end++;
6501 }
6502 else
6503 break;
6504 }
6505 else
6506 break;
6507 }
6508 break;
6509 }
6510 else
6511 break;
6512 }
6513 else
6514 break;
6515 }
6518 this->interval.start = start;
6519 return true;
6520 }
6521 value = 1.0f;
6522 this->interval.invalidate();
6523 return false;
6524 }
6525 };
6526
6530 class http_asterisk : public parser
6531 {
6532 protected:
6533 virtual bool do_match(
6534 _In_reads_or_z_(end) const char* text,
6535 _In_ size_t start = 0,
6536 _In_ size_t end = SIZE_MAX,
6537 _In_ int flags = match_default)
6538 {
6539 _Assume_(text || end <= start);
6540 if (start < end && text[start] == '*') {
6541 this->interval.end = (this->interval.start = start) + 1;
6542 return true;
6543 }
6544 this->interval.invalidate();
6545 return false;
6546 }
6547 };
6548
6552 template <class T, class T_asterisk = http_asterisk>
6554 {
6555 public:
6556 http_weighted_value(_In_ const std::locale& locale = std::locale()) :
6557 parser(locale),
6558 factor(locale)
6559 {}
6560
6561 virtual void invalidate()
6562 {
6563 asterisk.invalidate();
6564 value.invalidate();
6565 factor.invalidate();
6566 parser::invalidate();
6567 }
6568
6569 T_asterisk asterisk;
6570 T value;
6571 http_weight factor;
6572
6573 protected:
6574 virtual bool do_match(
6575 _In_reads_or_z_(end) const char* text,
6576 _In_ size_t start = 0,
6577 _In_ size_t end = SIZE_MAX,
6578 _In_ int flags = match_default)
6579 {
6580 _Assume_(text || start >= end);
6581 size_t konec_vrednosti;
6582 this->interval.end = start;
6583 if (asterisk.match(text, this->interval.end, end, flags)) {
6584 this->interval.end = konec_vrednosti = asterisk.interval.end;
6585 value.invalidate();
6586 }
6587 else if (value.match(text, this->interval.end, end, flags)) {
6588 this->interval.end = konec_vrednosti = value.interval.end;
6589 asterisk.invalidate();
6590 }
6591 else {
6592 asterisk.invalidate();
6593 value.invalidate();
6594 this->interval.invalidate();
6595 return false;
6596 }
6597
6598 while (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) this->interval.end++;
6599 if (this->interval.end < end && text[this->interval.end] == ';') {
6600 this->interval.end++;
6601 while (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) this->interval.end++;
6602 if (this->interval.end < end && (text[this->interval.end] == 'q' || text[this->interval.end] == 'Q')) {
6603 this->interval.end++;
6604 while (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) this->interval.end++;
6605 if (this->interval.end < end && text[this->interval.end] == '=') {
6606 this->interval.end++;
6607 while (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) this->interval.end++;
6608 if (factor.match(text, this->interval.end, end, flags))
6609 this->interval.end = factor.interval.end;
6610 }
6611 }
6612 }
6613 if (!factor.interval) {
6614 factor.invalidate();
6615 this->interval.end = konec_vrednosti;
6616 }
6617 this->interval.start = start;
6618 return true;
6619 }
6620 };
6621
6626 {
6627 public:
6628 virtual void invalidate()
6629 {
6630 name.invalidate();
6631 value.invalidate();
6632 parser::invalidate();
6633 }
6634
6635 http_token name;
6636 http_value value;
6637
6638 protected:
6639 virtual bool do_match(
6640 _In_reads_or_z_(end) const char* text,
6641 _In_ size_t start = 0,
6642 _In_ size_t end = SIZE_MAX,
6643 _In_ int flags = match_default)
6644 {
6645 _Assume_(text || start >= end);
6646 this->interval.end = start;
6647 if (this->interval.end < end && text[this->interval.end] == '$')
6648 this->interval.end++;
6649 else
6650 goto error;
6651 if (name.match(text, this->interval.end, end, flags))
6652 this->interval.end = name.interval.end;
6653 else
6654 goto error;
6655 while (m_space.match(text, this->interval.end, end, flags))
6656 this->interval.end = m_space.interval.end;
6657 if (this->interval.end < end && text[this->interval.end] == '=')
6658 this->interval.end++;
6659 else
6660 goto error;
6661 while (m_space.match(text, this->interval.end, end, flags))
6662 this->interval.end = m_space.interval.end;
6663 if (value.match(text, this->interval.end, end, flags))
6664 this->interval.end = value.interval.end;
6665 else
6666 goto error;
6667 this->interval.start = start;
6668 return true;
6669
6670 error:
6671 invalidate();
6672 return false;
6673 }
6674
6675 http_space m_space;
6676 };
6677
6681 class http_cookie : public parser
6682 {
6683 public:
6684 virtual void invalidate()
6685 {
6686 name.invalidate();
6687 value.invalidate();
6688 params.clear();
6689 parser::invalidate();
6690 }
6691
6694 std::list<http_cookie_parameter> params;
6695
6696 protected:
6697 virtual bool do_match(
6698 _In_reads_or_z_(end) const char* text,
6699 _In_ size_t start = 0,
6700 _In_ size_t end = SIZE_MAX,
6701 _In_ int flags = match_default)
6702 {
6703 _Assume_(text || start >= end);
6704 this->interval.end = start;
6705 if (name.match(text, this->interval.end, end, flags))
6706 this->interval.end = name.interval.end;
6707 else
6708 goto error;
6709 while (m_space.match(text, this->interval.end, end, flags))
6710 this->interval.end = m_space.interval.end;
6711 if (this->interval.end < end && text[this->interval.end] == '=')
6712 this->interval.end++;
6713 else
6714 goto error;
6715 while (m_space.match(text, this->interval.end, end, flags))
6716 this->interval.end = m_space.interval.end;
6717 if (value.match(text, this->interval.end, end, flags))
6718 this->interval.end = value.interval.end;
6719 else
6720 goto error;
6721 params.clear();
6722 for (;;) {
6723 if (this->interval.end < end && text[this->interval.end]) {
6724 if (m_space.match(text, this->interval.end, end, flags))
6725 this->interval.end = m_space.interval.end;
6726 else if (text[this->interval.end] == ';') {
6727 this->interval.end++;
6728 while (m_space.match(text, this->interval.end, end, flags))
6729 this->interval.end = m_space.interval.end;
6731 if (param.match(text, this->interval.end, end, flags)) {
6732 this->interval.end = param.interval.end;
6733 params.push_back(std::move(param));
6734 }
6735 else
6736 break;
6737 }
6738 else
6739 break;
6740 }
6741 else
6742 break;
6743 }
6744 this->interval.start = start;
6745 this->interval.end = params.empty() ? value.interval.end : params.back().interval.end;
6746 return true;
6747
6748 error:
6749 invalidate();
6750 return false;
6751 }
6752
6753 http_space m_space;
6754 };
6755
6759 class http_agent : public parser
6760 {
6761 public:
6762 virtual void invalidate()
6763 {
6764 type.start = 1;
6765 type.end = 0;
6766 version.start = 1;
6767 version.end = 0;
6768 parser::invalidate();
6769 }
6770
6773
6774 protected:
6775 virtual bool do_match(
6776 _In_reads_or_z_(end) const char* text,
6777 _In_ size_t start = 0,
6778 _In_ size_t end = SIZE_MAX,
6779 _In_ int flags = match_default)
6780 {
6781 _Assume_(text || start >= end);
6782 this->interval.end = start;
6783 type.start = this->interval.end;
6784 for (;;) {
6785 if (this->interval.end < end && text[this->interval.end]) {
6786 if (text[this->interval.end] == '/') {
6787 type.end = this->interval.end;
6788 this->interval.end++;
6789 version.start = this->interval.end;
6790 for (;;) {
6791 if (this->interval.end < end && text[this->interval.end]) {
6792 if (stdex::isspace(text[this->interval.end])) {
6793 version.end = this->interval.end;
6794 break;
6795 }
6796 else
6797 this->interval.end++;
6798 }
6799 else {
6800 version.end = this->interval.end;
6801 break;
6802 }
6803 }
6804 break;
6805 }
6806 else if (stdex::isspace(text[this->interval.end])) {
6807 type.end = this->interval.end;
6808 break;
6809 }
6810 else
6811 this->interval.end++;
6812 }
6813 else {
6814 type.end = this->interval.end;
6815 break;
6816 }
6817 }
6819 this->interval.start = start;
6820 return true;
6821 }
6822 type.start = 1;
6823 type.end = 0;
6824 version.start = 1;
6825 version.end = 0;
6826 this->interval.invalidate();
6827 return false;
6828 }
6829 };
6830
6834 class http_protocol : public parser
6835 {
6836 public:
6837 http_protocol(_In_ const std::locale& locale = std::locale()) :
6838 parser(locale),
6839 version(0x009)
6840 {}
6841
6842 virtual void invalidate()
6843 {
6844 type.start = 1;
6845 type.end = 0;
6846 version_maj.start = 1;
6847 version_maj.end = 0;
6848 version_min.start = 1;
6849 version_min.end = 0;
6850 version = 0x009;
6851 parser::invalidate();
6852 }
6853
6855 stdex::interval<size_t> version_maj;
6856 stdex::interval<size_t> version_min;
6858
6859 protected:
6860 virtual bool do_match(
6861 _In_reads_or_z_(end) const char* text,
6862 _In_ size_t start = 0,
6863 _In_ size_t end = SIZE_MAX,
6864 _In_ int flags = match_default)
6865 {
6866 _Assume_(text || start >= end);
6867 this->interval.end = start;
6868 type.start = this->interval.end;
6869 for (;;) {
6870 if (this->interval.end < end && text[this->interval.end]) {
6871 if (text[this->interval.end] == '/') {
6872 type.end = this->interval.end;
6873 this->interval.end++;
6874 break;
6875 }
6876 else if (stdex::isspace(text[this->interval.end]))
6877 goto error;
6878 else
6879 this->interval.end++;
6880 }
6881 else {
6882 type.end = this->interval.end;
6883 goto error;
6884 }
6885 }
6886 version_maj.start = this->interval.end;
6887 for (;;) {
6888 if (this->interval.end < end && text[this->interval.end]) {
6889 if (text[this->interval.end] == '.') {
6890 version_maj.end = this->interval.end;
6891 this->interval.end++;
6892 version_min.start = this->interval.end;
6893 for (;;) {
6894 if (this->interval.end < end && text[this->interval.end]) {
6895 if (stdex::isspace(text[this->interval.end])) {
6896 version_min.end = this->interval.end;
6897 version =
6898 (uint16_t)strtoui(text + version_maj.start, version_maj.size(), nullptr, 10) * 0x100 +
6899 (uint16_t)strtoui(text + version_min.start, version_min.size(), nullptr, 10);
6900 break;
6901 }
6902 else
6903 this->interval.end++;
6904 }
6905 else
6906 goto error;
6907 }
6908 break;
6909 }
6910 else if (stdex::isspace(text[this->interval.end])) {
6911 version_maj.end = this->interval.end;
6912 version_min.start = 1;
6913 version_min.end = 0;
6914 version = (uint16_t)strtoui(text + version_maj.start, version_maj.size(), nullptr, 10) * 0x100;
6915 break;
6916 }
6917 else
6918 this->interval.end++;
6919 }
6920 else
6921 goto error;
6922 }
6923 this->interval.start = start;
6924 return true;
6925
6926 error:
6927 invalidate();
6928 return false;
6929 }
6930 };
6931
6935 class http_request : public parser
6936 {
6937 public:
6938 http_request(_In_ const std::locale& locale = std::locale()) :
6939 parser(locale),
6940 url(locale),
6941 protocol(locale)
6942 {}
6943
6944 virtual void invalidate()
6945 {
6946 verb.start = 1;
6947 verb.end = 0;
6948 url.invalidate();
6949 protocol.invalidate();
6950 parser::invalidate();
6951 }
6952
6954 http_url url;
6955 http_protocol protocol;
6956
6957 protected:
6958 virtual bool do_match(
6959 _In_reads_or_z_(end) const char* text,
6960 _In_ size_t start = 0,
6961 _In_ size_t end = SIZE_MAX,
6962 _In_ int flags = match_default)
6963 {
6964 _Assume_(text || start >= end);
6965 this->interval.end = start;
6966
6967 for (;;) {
6968 if (m_line_break.match(text, this->interval.end, end, flags))
6969 goto error;
6970 else if (this->interval.end < end && text[this->interval.end]) {
6971 if (stdex::isspace(text[this->interval.end]))
6972 this->interval.end++;
6973 else
6974 break;
6975 }
6976 else
6977 goto error;
6978 }
6979 verb.start = this->interval.end;
6980 for (;;) {
6981 if (m_line_break.match(text, this->interval.end, end, flags))
6982 goto error;
6983 else if (this->interval.end < end && text[this->interval.end]) {
6984 if (stdex::isspace(text[this->interval.end])) {
6985 verb.end = this->interval.end;
6986 this->interval.end++;
6987 break;
6988 }
6989 else
6990 this->interval.end++;
6991 }
6992 else
6993 goto error;
6994 }
6995
6996 for (;;) {
6997 if (m_line_break.match(text, this->interval.end, end, flags))
6998 goto error;
6999 else if (this->interval.end < end && text[this->interval.end]) {
7000 if (stdex::isspace(text[this->interval.end]))
7001 this->interval.end++;
7002 else
7003 break;
7004 }
7005 else
7006 goto error;
7007 }
7008 if (url.match(text, this->interval.end, end, flags))
7009 this->interval.end = url.interval.end;
7010 else
7011 goto error;
7012
7013 protocol.invalidate();
7014 for (;;) {
7015 if (m_line_break.match(text, this->interval.end, end, flags)) {
7016 this->interval.end = m_line_break.interval.end;
7017 goto end;
7018 }
7019 else if (this->interval.end < end && text[this->interval.end]) {
7020 if (stdex::isspace(text[this->interval.end]))
7021 this->interval.end++;
7022 else
7023 break;
7024 }
7025 else
7026 goto end;
7027 }
7028 for (;;) {
7029 if (m_line_break.match(text, this->interval.end, end, flags)) {
7030 this->interval.end = m_line_break.interval.end;
7031 goto end;
7032 }
7033 else if (protocol.match(text, this->interval.end, end, flags)) {
7034 this->interval.end = protocol.interval.end;
7035 break;
7036 }
7037 else
7038 goto end;
7039 }
7040
7041 for (;;) {
7042 if (m_line_break.match(text, this->interval.end, end, flags)) {
7043 this->interval.end = m_line_break.interval.end;
7044 break;
7045 }
7046 else if (this->interval.end < end && text[this->interval.end])
7047 this->interval.end++;
7048 else
7049 goto end;
7050 }
7051
7052 end:
7053 this->interval.start = start;
7054 return true;
7055
7056 error:
7057 invalidate();
7058 return false;
7059 }
7060
7061 http_line_break m_line_break;
7062 };
7063
7067 class http_header : public parser
7068 {
7069 public:
7070 virtual void invalidate()
7071 {
7072 name.start = 1;
7073 name.end = 0;
7074 value.start = 1;
7075 value.end = 0;
7076 parser::invalidate();
7077 }
7078
7081
7082 protected:
7083 virtual bool do_match(
7084 _In_reads_or_z_(end) const char* text,
7085 _In_ size_t start = 0,
7086 _In_ size_t end = SIZE_MAX,
7087 _In_ int flags = match_default)
7088 {
7089 _Assume_(text || start >= end);
7090 this->interval.end = start;
7091
7092 if (m_line_break.match(text, this->interval.end, end, flags) ||
7093 (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])))
7094 goto error;
7095 name.start = this->interval.end;
7096 for (;;) {
7097 if (m_line_break.match(text, this->interval.end, end, flags))
7098 goto error;
7099 else if (this->interval.end < end && text[this->interval.end]) {
7100 if (stdex::isspace(text[this->interval.end])) {
7101 name.end = this->interval.end;
7102 this->interval.end++;
7103 for (;;) {
7104 if (m_line_break.match(text, this->interval.end, end, flags))
7105 goto error;
7106 else if (this->interval.end < end && text[this->interval.end]) {
7107 if (stdex::isspace(text[this->interval.end]))
7108 this->interval.end++;
7109 else
7110 break;
7111 }
7112 else
7113 goto error;
7114 }
7115 if (this->interval.end < end && text[this->interval.end] == ':') {
7116 this->interval.end++;
7117 break;
7118 }
7119 else
7120 goto error;
7121 break;
7122 }
7123 else if (text[this->interval.end] == ':') {
7124 name.end = this->interval.end;
7125 this->interval.end++;
7126 break;
7127 }
7128 else
7129 this->interval.end++;
7130 }
7131 else
7132 goto error;
7133 }
7134 value.start = SIZE_MAX;
7135 value.end = 0;
7136 for (;;) {
7137 if (m_line_break.match(text, this->interval.end, end, flags)) {
7138 this->interval.end = m_line_break.interval.end;
7139 if (!m_line_break.match(text, this->interval.end, end, flags) &&
7140 this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end]))
7141 this->interval.end++;
7142 else
7143 break;
7144 }
7145 else if (this->interval.end < end && text[this->interval.end]) {
7146 if (stdex::isspace(text[this->interval.end]))
7147 this->interval.end++;
7148 else {
7149 if (value.start == SIZE_MAX) value.start = this->interval.end;
7150 value.end = ++this->interval.end;
7151 }
7152 }
7153 else
7154 break;
7155 }
7156 this->interval.start = start;
7157 return true;
7158
7159 error:
7160 invalidate();
7161 return false;
7162 }
7163
7164 http_line_break m_line_break;
7165 };
7166
7170 template <class KEY, class T>
7171 class http_value_collection : public T
7172 {
7173 public:
7174 void insert(
7175 _In_reads_or_z_(end) const char* text,
7176 _In_ size_t start = 0,
7177 _In_ size_t end = SIZE_MAX,
7178 _In_ int flags = match_default)
7179 {
7180 while (start < end) {
7181 while (start < end && text[start] && stdex::isspace(text[start])) start++;
7182 if (start < end && text[start] == ',') {
7183 start++;
7184 while (start < end&& text[start] && stdex::isspace(text[start])) start++;
7185 }
7186 KEY el;
7187 if (el.match(text, start, end, flags)) {
7188 start = el.interval.end;
7189 T::insert(std::move(el));
7190 }
7191 else
7192 break;
7193 }
7194 }
7195 };
7196
7197 template <class T>
7199 constexpr bool operator()(const T& a, const T& b) const noexcept
7200 {
7201 return a.factor.value > b.factor.value;
7202 }
7203 };
7204
7208 template <class T, class AX = std::allocator<T>>
7210
7214 template <class T>
7216 {
7217 public:
7219 _In_ const std::shared_ptr<basic_parser<T>>& quote,
7220 _In_ const std::shared_ptr<basic_parser<T>>& chr,
7221 _In_ const std::shared_ptr<basic_parser<T>>& escape,
7222 _In_ const std::shared_ptr<basic_parser<T>>& sol,
7223 _In_ const std::shared_ptr<basic_parser<T>>& bs,
7224 _In_ const std::shared_ptr<basic_parser<T>>& ff,
7225 _In_ const std::shared_ptr<basic_parser<T>>& lf,
7226 _In_ const std::shared_ptr<basic_parser<T>>& cr,
7227 _In_ const std::shared_ptr<basic_parser<T>>& htab,
7228 _In_ const std::shared_ptr<basic_parser<T>>& uni,
7229 _In_ const std::shared_ptr<basic_integer16<T>>& hex,
7230 _In_ const std::locale& locale = std::locale()) :
7232 m_quote(quote),
7233 m_chr(chr),
7234 m_escape(escape),
7235 m_sol(sol),
7236 m_bs(bs),
7237 m_ff(ff),
7238 m_lf(lf),
7239 m_cr(cr),
7240 m_htab(htab),
7241 m_uni(uni),
7242 m_hex(hex)
7243 {}
7244
7245 virtual void invalidate()
7246 {
7247 value.clear();
7249 }
7250
7251 std::basic_string<T> value;
7252
7253 protected:
7254 virtual bool do_match(
7255 _In_reads_or_z_opt_(end) const T* text,
7256 _In_ size_t start = 0,
7257 _In_ size_t end = SIZE_MAX,
7258 _In_ int flags = match_default)
7259 {
7260 _Assume_(text || start >= end);
7261 this->interval.end = start;
7262 if (m_quote->match(text, this->interval.end, end, flags)) {
7263 this->interval.end = m_quote->interval.end;
7264 value.clear();
7265 for (;;) {
7266 if (m_quote->match(text, this->interval.end, end, flags)) {
7267 this->interval.start = start;
7268 this->interval.end = m_quote->interval.end;
7269 return true;
7270 }
7271 if (m_escape->match(text, this->interval.end, end, flags)) {
7272 if (m_quote->match(text, m_escape->interval.end, end, flags)) {
7273 value += '"'; this->interval.end = m_quote->interval.end;
7274 continue;
7275 }
7276 if (m_sol->match(text, m_escape->interval.end, end, flags)) {
7277 value += '/'; this->interval.end = m_sol->interval.end;
7278 continue;
7279 }
7280 if (m_bs->match(text, m_escape->interval.end, end, flags)) {
7281 value += '\b'; this->interval.end = m_bs->interval.end;
7282 continue;
7283 }
7284 if (m_ff->match(text, m_escape->interval.end, end, flags)) {
7285 value += '\f'; this->interval.end = m_ff->interval.end;
7286 continue;
7287 }
7288 if (m_lf->match(text, m_escape->interval.end, end, flags)) {
7289 value += '\n'; this->interval.end = m_lf->interval.end;
7290 continue;
7291 }
7292 if (m_cr->match(text, m_escape->interval.end, end, flags)) {
7293 value += '\r'; this->interval.end = m_cr->interval.end;
7294 continue;
7295 }
7296 if (m_htab->match(text, m_escape->interval.end, end, flags)) {
7297 value += '\t'; this->interval.end = m_htab->interval.end;
7298 continue;
7299 }
7300 if (
7301 m_uni->match(text, m_escape->interval.end, end, flags) &&
7302 m_hex->match(text, m_uni->interval.end, std::min(m_uni->interval.end + 4, end), flags | match_case_insensitive) &&
7303 m_hex->interval.size() == 4 /* JSON requests 4-digit Unicode sequneces: \u.... */)
7304 {
7305 _Assume_(m_hex->value <= 0xffff);
7306 if (sizeof(T) == 1) {
7307 if (m_hex->value > 0x7ff) {
7308 value += (T)(0xe0 | ((m_hex->value >> 12) & 0x0f));
7309 value += (T)(0x80 | ((m_hex->value >> 6) & 0x3f));
7310 value += (T)(0x80 | (m_hex->value & 0x3f));
7311 }
7312 else if (m_hex->value > 0x7f) {
7313 value += (T)(0xc0 | ((m_hex->value >> 6) & 0x1f));
7314 value += (T)(0x80 | (m_hex->value & 0x3f));
7315 }
7316 else
7317 value += (T)(m_hex->value & 0x7f);
7318 }
7319 else
7320 value += (T)m_hex->value;
7321 this->interval.end = m_hex->interval.end;
7322 continue;
7323 }
7324 if (m_escape->match(text, m_escape->interval.end, end, flags)) {
7325 value += '\\'; this->interval.end = m_escape->interval.end;
7326 continue;
7327 }
7328 }
7329 if (m_chr->match(text, this->interval.end, end, flags)) {
7330 value.append(text + m_chr->interval.start, m_chr->interval.size());
7331 this->interval.end = m_chr->interval.end;
7332 continue;
7333 }
7334 break;
7335 }
7336 }
7337 value.clear();
7338 this->interval.invalidate();
7339 return false;
7340 }
7341
7342 std::shared_ptr<basic_parser<T>> m_quote;
7343 std::shared_ptr<basic_parser<T>> m_chr;
7344 std::shared_ptr<basic_parser<T>> m_escape;
7345 std::shared_ptr<basic_parser<T>> m_sol;
7346 std::shared_ptr<basic_parser<T>> m_bs;
7347 std::shared_ptr<basic_parser<T>> m_ff;
7348 std::shared_ptr<basic_parser<T>> m_lf;
7349 std::shared_ptr<basic_parser<T>> m_cr;
7350 std::shared_ptr<basic_parser<T>> m_htab;
7351 std::shared_ptr<basic_parser<T>> m_uni;
7352 std::shared_ptr<basic_integer16<T>> m_hex;
7353 };
7354
7357#ifdef _UNICODE
7358 using tjson_string = wjson_string;
7359#else
7360 using tjson_string = json_string;
7361#endif
7362
7366 template <class T>
7368 {
7369 public:
7370 virtual void invalidate()
7371 {
7372 this->content.invalidate();
7373 basic_parser::invalidate();
7374 }
7375
7377
7378 protected:
7379 virtual bool do_match(
7380 _In_reads_or_z_opt_(end) const T* text,
7381 _In_ size_t start = 0,
7382 _In_ size_t end = SIZE_MAX,
7383 _In_ int flags = match_multiline)
7384 {
7385 _Unreferenced_(flags);
7386 _Assume_(text || start + 1 >= end);
7387 if (start + 1 < end &&
7388 text[start] == '/' &&
7389 text[start + 1] == '*')
7390 {
7391 // /*
7392 this->content.start = this->interval.end = start + 2;
7393 for (;;) {
7394 if (this->interval.end >= end || !text[this->interval.end])
7395 break;
7396 if (this->interval.end + 1 < end &&
7397 text[this->interval.end] == '*' &&
7398 text[this->interval.end + 1] == '/')
7399 {
7400 // /*...*/
7401 this->content.end = this->interval.end;
7402 this->interval.start = start;
7403 this->interval.end = this->interval.end + 2;
7404 return true;
7405 }
7406 this->interval.end++;
7407 }
7408 }
7409 this->content.invalidate();
7410 this->interval.invalidate();
7411 return false;
7412 }
7413 };
7414
7415 using css_comment = basic_css_comment<char>;
7416 using wcss_comment = basic_css_comment<wchar_t>;
7417#ifdef _UNICODE
7418 using tcss_comment = wcss_comment;
7419#else
7420 using tcss_comment = css_comment;
7421#endif
7422
7426 template <class T>
7427 class basic_css_cdo : public basic_parser<T>
7428 {
7429 protected:
7430 virtual bool do_match(
7431 _In_reads_or_z_opt_(end) const T* text,
7432 _In_ size_t start = 0,
7433 _In_ size_t end = SIZE_MAX,
7434 _In_ int flags = match_multiline)
7435 {
7436 _Unreferenced_(flags);
7437 _Assume_(text || start + 3 >= end);
7438 if (start + 3 < end &&
7439 text[start] == '<' &&
7440 text[start + 1] == '!' &&
7441 text[start + 2] == '-' &&
7442 text[start + 3] == '-')
7443 {
7444 this->interval.start = start;
7445 this->interval.end = start + 4;
7446 return true;
7447 }
7448 this->interval.invalidate();
7449 return false;
7450 }
7451 };
7452
7455#ifdef _UNICODE
7456 using tcss_cdo = wcss_cdo;
7457#else
7458 using tcss_cdo = css_cdo;
7459#endif
7460
7464 template <class T>
7465 class basic_css_cdc : public basic_parser<T>
7466 {
7467 protected:
7468 virtual bool do_match(
7469 _In_reads_or_z_opt_(end) const T* text,
7470 _In_ size_t start = 0,
7471 _In_ size_t end = SIZE_MAX,
7472 _In_ int flags = match_multiline)
7473 {
7474 _Unreferenced_(flags);
7475 _Assume_(text || start + 2 >= end);
7476 if (start + 2 < end &&
7477 text[start] == '-' &&
7478 text[start + 1] == '-' &&
7479 text[start + 2] == '>')
7480 {
7481 this->interval.start = start;
7482 this->interval.end = start + 3;
7483 return true;
7484 }
7485 this->interval.invalidate();
7486 return false;
7487 }
7488 };
7489
7492#ifdef _UNICODE
7493 using tcss_cdc = wcss_cdc;
7494#else
7495 using tcss_cdc = css_cdc;
7496#endif
7497
7501 template <class T>
7503 {
7504 public:
7505 virtual void invalidate()
7506 {
7507 this->content.invalidate();
7508 basic_parser::invalidate();
7509 }
7510
7512
7513 protected:
7514 virtual bool do_match(
7515 _In_reads_or_z_opt_(end) const T* text,
7516 _In_ size_t start = 0,
7517 _In_ size_t end = SIZE_MAX,
7518 _In_ int flags = match_multiline)
7519 {
7520 _Unreferenced_(flags);
7521 this->interval.end = start;
7522 _Assume_(text || this->interval.end >= end);
7523 if (this->interval.end < end &&
7524 (text[this->interval.end] == '\"' || text[this->interval.end] == '\''))
7525 {
7526 // "Quoted...
7527 T quote = text[this->interval.end];
7528 this->content.start = ++this->interval.end;
7529 for (;;) {
7530 if (this->interval.end >= end || !text[this->interval.end])
7531 break;
7532 if (text[this->interval.end] == quote) {
7533 // End quote"
7534 this->content.end = this->interval.end;
7535 this->interval.start = start;
7536 this->interval.end++;
7537 return true;
7538 }
7539 if (this->interval.end + 1 < end &&
7540 text[this->interval.end] == '\\' &&
7541 (text[this->interval.end + 1] == '\"' || text[this->interval.end + 1] == '\''))
7542 {
7543 // Escaped quote
7544 this->interval.end = this->interval.end + 2;
7545 }
7546 else
7547 this->interval.end++;
7548 }
7549 }
7550
7551 this->content.invalidate();
7552 this->interval.invalidate();
7553 return false;
7554 }
7555 };
7556
7557 using css_string = basic_css_string<char>;
7558 using wcss_string = basic_css_string<wchar_t>;
7559#ifdef _UNICODE
7560 using tcss_string = wcss_string;
7561#else
7562 using tcss_string = css_string;
7563#endif
7564
7568 template <class T>
7569 class basic_css_uri : public basic_parser<T>
7570 {
7571 public:
7572 virtual void invalidate()
7573 {
7574 this->content.invalidate();
7575 basic_parser::invalidate();
7576 }
7577
7579
7580 protected:
7581 virtual bool do_match(
7582 _In_reads_or_z_opt_(end) const T* text,
7583 _In_ size_t start = 0,
7584 _In_ size_t end = SIZE_MAX,
7585 _In_ int flags = match_multiline)
7586 {
7587 _Unreferenced_(flags);
7588 this->interval.end = start;
7589 _Assume_(text || this->interval.end + 3 >= end);
7590 if (this->interval.end + 3 < end &&
7591 (text[this->interval.end] == 'u' || text[this->interval.end] == 'U') &&
7592 (text[this->interval.end + 1] == 'r' || text[this->interval.end + 1] == 'R') &&
7593 (text[this->interval.end + 2] == 'l' || text[this->interval.end + 2] == 'L') &&
7594 text[this->interval.end + 3] == '(')
7595 {
7596 // url(
7597 this->interval.end = this->interval.end + 4;
7598
7599 // Skip whitespace.
7600 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
7601 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
7602
7603 if (this->interval.end < end &&
7604 (text[this->interval.end] == '\"' || text[this->interval.end] == '\''))
7605 {
7606 // url("Quoted...
7607 T quote = text[this->interval.end];
7608 this->content.start = ++this->interval.end;
7609 for (;;) {
7610 if (this->interval.end >= end || !text[this->interval.end])
7611 goto error;
7612 if (text[this->interval.end] == quote) {
7613 // End quote"
7614 this->content.end = this->interval.end;
7615 this->interval.end++;
7616 break;
7617 }
7618 if (this->interval.end + 1 < end &&
7619 text[this->interval.end] == '\\' &&
7620 (text[this->interval.end + 1] == '\"' || text[this->interval.end + 1] == '\''))
7621 {
7622 // Escaped quote
7623 this->interval.end = this->interval.end + 2;
7624 }
7625 else
7626 this->interval.end++;
7627 }
7628
7629 // Skip whitespace.
7630 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
7631
7632 if (this->interval.end < end &&
7633 text[this->interval.end] == ')')
7634 {
7635 // url("...")
7636 this->interval.start = start;
7637 this->interval.end++;
7638 return true;
7639 }
7640 }
7641 else {
7642 // url(...
7643 this->content.start = content.end = this->interval.end;
7644 for (;;) {
7645 if (this->interval.end >= end || !text[this->interval.end])
7646 goto error;
7647 if (text[this->interval.end] == ')') {
7648 // url(...)
7649 this->interval.start = start;
7650 this->interval.end++;
7651 return true;
7652 }
7653 if (ctype.is(ctype.space, text[this->interval.end]))
7654 this->interval.end++;
7655 else
7656 this->content.end = ++this->interval.end;
7657 }
7658 }
7659 }
7660
7661 error:
7662 invalidate();
7663 return false;
7664 }
7665 };
7666
7667 using css_uri = basic_css_uri<char>;
7668 using wcss_uri = basic_css_uri<wchar_t>;
7669#ifdef _UNICODE
7670 using tcss_uri = wcss_uri;
7671#else
7672 using tcss_uri = css_uri;
7673#endif
7674
7678 template <class T>
7680 {
7681 public:
7682 virtual void invalidate()
7683 {
7684 this->content.invalidate();
7685 basic_parser::invalidate();
7686 }
7687
7689
7690 protected:
7691 virtual bool do_match(
7692 _In_reads_or_z_opt_(end) const T* text,
7693 _In_ size_t start = 0,
7694 _In_ size_t end = SIZE_MAX,
7695 _In_ int flags = match_multiline)
7696 {
7697 _Unreferenced_(flags);
7698 this->interval.end = start;
7699 _Assume_(text || this->interval.end + 6 >= end);
7700 if (this->interval.end + 6 < end &&
7701 text[this->interval.end] == '@' &&
7702 (text[this->interval.end + 1] == 'i' || text[this->interval.end + 1] == 'I') &&
7703 (text[this->interval.end + 2] == 'm' || text[this->interval.end + 2] == 'M') &&
7704 (text[this->interval.end + 3] == 'p' || text[this->interval.end + 3] == 'P') &&
7705 (text[this->interval.end + 4] == 'o' || text[this->interval.end + 4] == 'O') &&
7706 (text[this->interval.end + 5] == 'r' || text[this->interval.end + 5] == 'R') &&
7707 (text[this->interval.end + 6] == 't' || text[this->interval.end + 6] == 'T'))
7708 {
7709 // @import...
7710 this->interval.end = this->interval.end + 7;
7711
7712 // Skip whitespace.
7713 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
7714 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
7715
7716 if (this->interval.end < end &&
7717 (text[this->interval.end] == '\"' || text[this->interval.end] == '\''))
7718 {
7719 // @import "Quoted
7720 T quote = text[this->interval.end];
7721 this->content.start = ++this->interval.end;
7722 for (;;) {
7723 if (this->interval.end >= end || !text[this->interval.end])
7724 goto error;
7725 if (text[this->interval.end] == quote) {
7726 // End quote"
7727 this->content.end = this->interval.end;
7728 this->interval.start = start;
7729 this->interval.end++;
7730 return true;
7731 }
7732 if (this->interval.end + 1 < end &&
7733 text[this->interval.end] == '\\' &&
7734 (text[this->interval.end + 1] == '\"' || text[this->interval.end + 1] == '\''))
7735 {
7736 // Escaped quote
7737 this->interval.end = this->interval.end + 2;
7738 }
7739 else
7740 this->interval.end++;
7741 }
7742 }
7743 }
7744
7745 error:
7746 invalidate();
7747 return false;
7748 }
7749 };
7750
7751 using css_import = basic_css_import<char>;
7752 using wcss_import = basic_css_import<wchar_t>;
7753#ifdef _UNICODE
7754 using tcss_import = wcss_import;
7755#else
7756 using tcss_import = css_import;
7757#endif
7758
7762 template <class T>
7764 {
7765 public:
7766 virtual void invalidate()
7767 {
7768 this->base_type.invalidate();
7769 this->sub_type.invalidate();
7770 this->charset.invalidate();
7771 basic_parser::invalidate();
7772 }
7773
7777
7778 protected:
7779 virtual bool do_match(
7780 _In_reads_or_z_opt_(end) const T* text,
7781 _In_ size_t start = 0,
7782 _In_ size_t end = SIZE_MAX,
7783 _In_ int flags = match_multiline)
7784 {
7785 _Unreferenced_(flags);
7786 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
7787
7788 this->interval.end = start;
7789 this->base_type.start = this->interval.end;
7790 for (;;) {
7791 _Assume_(text || this->interval.end >= end);
7792 if (this->interval.end >= end || !text[this->interval.end])
7793 break;
7794 if (text[this->interval.end] == '/' ||
7795 text[this->interval.end] == ';' ||
7796 ctype.is(ctype.space, text[this->interval.end]))
7797 break;
7798 this->interval.end++;
7799 }
7800 if (this->interval.end <= this->base_type.start)
7801 goto error;
7802 this->base_type.end = this->interval.end;
7803
7804 if (end <= this->interval.end || text[this->interval.end] != '/')
7805 goto error;
7806
7807 this->interval.end++;
7808 this->sub_type.start = this->interval.end;
7809 for (;;) {
7810 if (this->interval.end >= end || !text[this->interval.end])
7811 break;
7812 if (text[this->interval.end] == '/' ||
7813 text[this->interval.end] == ';' ||
7814 ctype.is(ctype.space, text[this->interval.end]))
7815 break;
7816 this->interval.end++;
7817 }
7818 if (this->interval.end <= this->sub_type.start)
7819 goto error;
7820
7821 this->sub_type.end = this->interval.end;
7822 this->charset.invalidate();
7823 if (this->interval.end < end && text[this->interval.end] == ';') {
7824 this->interval.end++;
7825
7826 // Skip whitespace.
7827 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
7828
7829 if (this->interval.end + 7 < end &&
7830 (text[this->interval.end] == 'c' || text[this->interval.end] == 'C') &&
7831 (text[this->interval.end + 1] == 'h' || text[this->interval.end + 1] == 'H') &&
7832 (text[this->interval.end + 2] == 'a' || text[this->interval.end + 2] == 'A') &&
7833 (text[this->interval.end + 3] == 'r' || text[this->interval.end + 3] == 'R') &&
7834 (text[this->interval.end + 4] == 's' || text[this->interval.end + 4] == 'S') &&
7835 (text[this->interval.end + 5] == 'e' || text[this->interval.end + 5] == 'E') &&
7836 (text[this->interval.end + 6] == 't' || text[this->interval.end + 6] == 'T') &&
7837 text[this->interval.end + 7] == '=')
7838 {
7839 this->interval.end = this->interval.end + 8;
7840 if (this->interval.end < end &&
7841 (text[this->interval.end] == '\"' || text[this->interval.end] == '\''))
7842 {
7843 // "Quoted...
7844 T quote = text[this->interval.end];
7845 this->charset.start = ++this->interval.end;
7846 for (;;) {
7847 if (this->interval.end >= end || !text[this->interval.end]) {
7848 // No end quote!
7849 this->charset.invalidate();
7850 break;
7851 }
7852 if (text[this->interval.end] == quote) {
7853 // End quote"
7854 this->charset.end = this->interval.end;
7855 this->interval.end++;
7856 break;
7857 }
7858 this->interval.end++;
7859 }
7860 }
7861 else {
7862 // Nonquoted
7863 this->charset.start = this->interval.end;
7864 for (;;) {
7865 if (this->interval.end >= end || !text[this->interval.end] ||
7866 ctype.is(ctype.space, text[this->interval.end])) {
7867 this->charset.end = this->interval.end;
7868 break;
7869 }
7870 this->interval.end++;
7871 }
7872 }
7873 }
7874 }
7875 this->interval.start = start;
7876 return true;
7877
7878 error:
7879 invalidate();
7880 return false;
7881 }
7882 };
7883
7884 using mime_type = basic_mime_type<char>;
7885 using wmime_type = basic_mime_type<wchar_t>;
7886#ifdef _UNICODE
7887 using tmime_type = wmime_type;
7888#else
7889 using tmime_type = mime_type;
7890#endif
7891
7895 template <class T>
7897 {
7898 protected:
7899 virtual bool do_match(
7900 _In_reads_or_z_opt_(end) const T* text,
7901 _In_ size_t start = 0,
7902 _In_ size_t end = SIZE_MAX,
7903 _In_ int flags = match_default)
7904 {
7905 _Unreferenced_(flags);
7906 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
7907 this->interval.end = start;
7908 for (;;) {
7909 _Assume_(text || this->interval.end >= end);
7910 if (this->interval.end >= end || !text[this->interval.end]) {
7912 this->interval.start = start;
7913 return true;
7914 }
7915 this->interval.invalidate();
7916 return false;
7917 }
7918 if (text[this->interval.end] == '>' ||
7919 text[this->interval.end] == '=' ||
7920 text[this->interval.end] == '/' && this->interval.end + 1 < end && text[this->interval.end + 1] == '>' ||
7921 ctype.is(ctype.space, text[this->interval.end]))
7922 {
7923 this->interval.start = start;
7924 return true;
7925 }
7926 this->interval.end++;
7927 }
7928 }
7929 };
7930
7933#ifdef _UNICODE
7934 using thtml_ident = whtml_ident;
7935#else
7936 using thtml_ident = html_ident;
7937#endif
7938
7942 template <class T>
7944 {
7945 public:
7946 virtual void invalidate()
7947 {
7948 this->content.invalidate();
7949 basic_parser::invalidate();
7950 }
7951
7953
7954 protected:
7955 virtual bool do_match(
7956 _In_reads_or_z_opt_(end) const T* text,
7957 _In_ size_t start = 0,
7958 _In_ size_t end = SIZE_MAX,
7959 _In_ int flags = match_default)
7960 {
7961 _Unreferenced_(flags);
7962 this->interval.end = start;
7963 _Assume_(text || this->interval.end >= end);
7964 if (this->interval.end < end &&
7965 (text[this->interval.end] == '\"' || text[this->interval.end] == '\''))
7966 {
7967 // "Quoted...
7968 T quote = text[this->interval.end];
7969 this->content.start = ++this->interval.end;
7970 for (;;) {
7971 if (this->interval.end >= end || !text[this->interval.end]) {
7972 // No end quote!
7973 this->content.invalidate();
7974 this->interval.invalidate();
7975 return false;
7976 }
7977 if (text[this->interval.end] == quote) {
7978 // End quote"
7979 this->content.end = this->interval.end;
7980 this->interval.start = start;
7981 this->interval.end++;
7982 return true;
7983 }
7984 this->interval.end++;
7985 }
7986 }
7987
7988 // Nonquoted
7989 this->content.start = this->interval.end;
7990 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
7991 for (;;) {
7992 _Assume_(text || this->interval.end >= end);
7993 if (this->interval.end >= end || !text[this->interval.end]) {
7994 this->content.end = this->interval.end;
7995 this->interval.start = start;
7996 return true;
7997 }
7998 if (text[this->interval.end] == '>' ||
7999 text[this->interval.end] == '/' && this->interval.end + 1 < end && text[this->interval.end + 1] == '>' ||
8000 ctype.is(ctype.space, text[this->interval.end]))
8001 {
8002 this->content.end = this->interval.end;
8003 this->interval.start = start;
8004 return true;
8005 }
8006 this->interval.end++;
8007 }
8008 }
8009 };
8010
8011 using html_value = basic_html_value<char>;
8012 using whtml_value = basic_html_value<wchar_t>;
8013#ifdef _UNICODE
8014 using thtml_value = whtml_value;
8015#else
8016 using thtml_value = html_value;
8017#endif
8018
8022 enum class html_sequence_t {
8023 text = 0,
8024 element,
8025 element_start,
8026 element_end,
8027 declaration,
8028 comment,
8029 instruction,
8030 PCDATA,
8031 CDATA,
8032
8033 unknown = -1,
8034 };
8035
8043
8047 template <class T>
8049 {
8050 public:
8051 basic_html_tag(_In_ const std::locale& locale = std::locale()) :
8053 type(html_sequence_t::unknown)
8054 {}
8055
8056 virtual void invalidate()
8057 {
8058 this->type = html_sequence_t::unknown;
8059 this->name.invalidate();
8060 this->attributes.clear();
8061 basic_parser::invalidate();
8062 }
8063
8064 html_sequence_t type;
8066 std::vector<html_attribute> attributes;
8067
8068 protected:
8069 virtual bool do_match(
8070 _In_reads_or_z_opt_(end) const T* text,
8071 _In_ size_t start = 0,
8072 _In_ size_t end = SIZE_MAX,
8073 _In_ int flags = match_multiline)
8074 {
8075 _Assume_(text || start >= end);
8076 if (start >= end || text[start] != '<')
8077 goto error;
8078 this->interval.end = start + 1;
8079 if (this->interval.end >= end || !text[this->interval.end])
8080 goto error;
8081 if (text[this->interval.end] == '/' &&
8082 this->m_ident.match(text, this->interval.end + 1, end, flags))
8083 {
8084 // </...
8085 this->type = html_sequence_t::element_end;
8086 this->name = this->m_ident.interval;
8087 this->interval.end = this->m_ident.interval.end;
8088 }
8089 else if (text[this->interval.end] == '!') {
8090 // <!...
8091 this->interval.end++;
8092 if (this->interval.end + 1 < end &&
8093 text[this->interval.end] == '-' &&
8094 text[this->interval.end + 1] == '-')
8095 {
8096 // <!--...
8097 this->name.start = this->interval.end = this->interval.end + 2;
8098 for (;;) {
8099 if (this->interval.end >= end || !text[this->interval.end])
8100 goto error;
8101 if (this->interval.end + 2 < end &&
8102 text[this->interval.end] == '-' &&
8103 text[this->interval.end + 1] == '-' &&
8104 text[this->interval.end + 2] == '>')
8105 {
8106 // <!--...-->
8107 this->type = html_sequence_t::comment;
8108 this->name.end = this->interval.end;
8109 this->attributes.clear();
8110 this->interval.start = start;
8111 this->interval.end = this->interval.end + 3;
8112 return true;
8113 }
8114 this->interval.end++;
8115 }
8116 }
8117 this->type = html_sequence_t::declaration;
8118 this->name.start = this->name.end = this->interval.end;
8119 }
8120 else if (text[this->interval.end] == '?') {
8121 // <?...
8122 this->name.start = ++this->interval.end;
8123 for (;;) {
8124 if (this->interval.end >= end || !text[this->interval.end])
8125 goto error;
8126 if (text[this->interval.end] == '>') {
8127 // <?...>
8128 this->type = html_sequence_t::instruction;
8129 this->name.end = this->interval.end;
8130 this->attributes.clear();
8131 this->interval.start = start;
8132 this->interval.end++;
8133 return true;
8134 }
8135 if (this->interval.end + 1 < end &&
8136 text[this->interval.end] == '?' &&
8137 text[this->interval.end + 1] == '>')
8138 {
8139 // <?...?>
8140 this->type = html_sequence_t::instruction;
8141 this->name.end = this->interval.end;
8142 this->attributes.clear();
8143 this->interval.start = start;
8144 this->interval.end = this->interval.end + 2;
8145 return true;
8146 }
8147 this->interval.end++;
8148 }
8149 }
8150 else if (this->m_ident.match(text, this->interval.end, end, flags)) {
8151 // <tag...
8152 this->type = html_sequence_t::element_start;
8153 this->name = this->m_ident.interval;
8154 this->interval.end = this->m_ident.interval.end;
8155 }
8156 else
8157 goto error;
8158
8159 // Skip whitespace.
8160 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
8161 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8162
8163 this->attributes.clear();
8164 for (;;) {
8165 if (this->type == html_sequence_t::element_start &&
8166 this->interval.end + 1 < end &&
8167 text[this->interval.end] == '/' &&
8168 text[this->interval.end + 1] == '>')
8169 {
8170 // <tag .../>
8171 this->type = html_sequence_t::element;
8172 this->interval.end = this->interval.end + 2;
8173 break;
8174 }
8175 if (this->interval.end < end &&
8176 text[this->interval.end] == '>')
8177 {
8178 // <tag ...>
8179 this->interval.end++;
8180 break;
8181 }
8182 if (this->type == html_sequence_t::declaration &&
8183 this->interval.end + 1 < end &&
8184 text[this->interval.end] == '!' &&
8185 text[this->interval.end + 1] == '>')
8186 {
8187 // "<!...!>".
8188 this->interval.end = this->interval.end + 2;
8189 break;
8190 }
8191 if (this->type == html_sequence_t::declaration &&
8192 this->interval.end + 1 < end &&
8193 text[this->interval.end] == '-' &&
8194 text[this->interval.end + 1] == '-')
8195 {
8196 // "<! ... --...".
8197 this->interval.end = this->interval.end + 2;
8198 for (;;) {
8199 if (this->interval.end >= end || !text[this->interval.end])
8200 goto error;
8201 if (this->interval.end + 1 < end &&
8202 text[this->interval.end] == '-' &&
8203 text[this->interval.end + 1] == '-')
8204 {
8205 // "<! ... --...--".
8206 this->interval.end = this->interval.end + 2;
8207 break;
8208 }
8209 this->interval.end++;
8210 }
8211
8212 // Skip whitespace.
8213 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8214 continue;
8215 }
8216
8217 if (this->interval.end >= end || !text[this->interval.end])
8218 goto error;
8219
8220 // Attributes follow...
8221 html_attribute* a = nullptr;
8222 if (this->m_ident.match(text, this->interval.end, end, flags)) {
8223 this->attributes.push_back(std::move(html_attribute{ this->m_ident.interval }));
8224 a = &this->attributes.back();
8225 _Assume_(a);
8226 this->interval.end = this->m_ident.interval.end;
8227 }
8228 else {
8229 // What was that?! Skip.
8230 this->interval.end++;
8231 continue;
8232 }
8233
8234 // Skip whitespace.
8235 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8236
8237 if (this->interval.end < end && text[this->interval.end] == '=') {
8238 this->interval.end++;
8239
8240 // Skip whitespace.
8241 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8242
8243 if (this->m_value.match(text, this->interval.end, end, flags)) {
8244 // This attribute has value.
8245 a->value = this->m_value.content;
8246 this->interval.end = this->m_value.interval.end;
8247
8248 // Skip whitespace.
8249 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8250 }
8251 }
8252 else {
8253 // This attribute has no value.
8254 a->value.invalidate();
8255 }
8256 }
8257
8258 this->interval.start = start;
8259 return true;
8260
8261 error:
8262 invalidate();
8263 return false;
8264 }
8265
8266 basic_html_ident<T> m_ident;
8267 basic_html_value<T> m_value;
8268 };
8269
8270 using html_tag = basic_html_tag<char>;
8271 using whtml_tag = basic_html_tag<wchar_t>;
8272#ifdef _UNICODE
8273 using thtml_tag = whtml_tag;
8274#else
8275 using thtml_tag = html_tag;
8276#endif
8277
8281 template <class T>
8283 {
8284 public:
8285 virtual void invalidate()
8286 {
8287 this->condition.invalidate();
8288 basic_parser::invalidate();
8289 }
8290
8291 stdex::interval<size_t> condition;
8292
8293 protected:
8294 virtual bool do_match(
8295 _In_reads_or_z_opt_(end) const T* text,
8296 _In_ size_t start = 0,
8297 _In_ size_t end = SIZE_MAX,
8298 _In_ int flags = match_multiline)
8299 {
8300 _Unreferenced_(flags);
8301 _Assume_(text || start + 2 >= end);
8302 if (start + 2 < end &&
8303 text[start] == '<' &&
8304 text[start + 1] == '!' &&
8305 text[start + 2] == '[')
8306 {
8307 this->interval.end = start + 3;
8308
8309 // Skip whitespace.
8310 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
8311 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8312
8313 this->condition.start = this->condition.end = this->interval.end;
8314
8315 for (;;) {
8316 if (this->interval.end >= end || !text[this->interval.end])
8317 break;
8318 if (text[this->interval.end] == '[') {
8319 this->interval.start = start;
8320 this->interval.end++;
8321 return true;
8322 }
8323 if (ctype.is(ctype.space, text[this->interval.end]))
8324 this->interval.end++;
8325 else
8326 this->condition.end = ++this->interval.end;
8327 }
8328 }
8329
8330 this->condition.invalidate();
8331 this->interval.invalidate();
8332 return false;
8333 }
8334 };
8335
8336 using html_declaration_condition_start = basic_html_declaration_condition_start<char>;
8337 using whtml_declaration_condition_start = basic_html_declaration_condition_start<wchar_t>;
8338#ifdef _UNICODE
8339 using thtml_declaration_condition_start = whtml_declaration_condition_start;
8340#else
8341 using thtml_declaration_condition_start = html_declaration_condition_start;
8342#endif
8343
8347 template <class T>
8349 {
8350 protected:
8351 virtual bool do_match(
8352 _In_reads_or_z_opt_(end) const T* text,
8353 _In_ size_t start = 0,
8354 _In_ size_t end = SIZE_MAX,
8355 _In_ int flags = match_multiline)
8356 {
8357 _Unreferenced_(flags);
8358 _Assume_(text || start + 2 >= end);
8359 if (start + 2 < end &&
8360 text[start] == ']' &&
8361 text[start + 1] == ']' &&
8362 text[start + 2] == '>')
8363 {
8364 this->interval.start = start;
8365 this->interval.end = start + 3;
8366 return true;
8367 }
8368 this->interval.invalidate();
8369 return false;
8370 }
8371 };
8372
8375#ifdef _UNICODE
8377#else
8379#endif
8380 }
8381}
8382
8383#undef ENUM_FLAG_OPERATOR
8384#undef ENUM_FLAGS
8385
8386#ifdef _MSC_VER
8387#pragma warning(pop)
8388#endif
locale_t helper class to free_locale when going out of scope.
Definition locale.hpp:69
Test for angle in d°mm'ss.dddd form.
Definition parser.hpp:4376
Test for any code unit.
Definition parser.hpp:231
Test for beginning of line.
Definition parser.hpp:630
Test for any.
Definition parser.hpp:1073
Test for chemical formula.
Definition parser.hpp:5505
Test for Creditor Reference.
Definition parser.hpp:4940
T reference[22]
Normalized national reference number.
Definition parser.hpp:4962
T check_digits[3]
Two check digits.
Definition parser.hpp:4961
bool is_valid
Is reference valid per ISO 7064.
Definition parser.hpp:4963
Legacy CSS comment end -->
Definition parser.hpp:7466
Legacy CSS comment start <!--
Definition parser.hpp:7428
CSS comment.
Definition parser.hpp:7368
stdex::interval< size_t > content
content position in source
Definition parser.hpp:7376
CSS import directive.
Definition parser.hpp:7680
stdex::interval< size_t > content
content position in source
Definition parser.hpp:7688
CSS string.
Definition parser.hpp:7503
stdex::interval< size_t > content
content position in source
Definition parser.hpp:7511
URI in CSS.
Definition parser.hpp:7570
stdex::interval< size_t > content
content position in source
Definition parser.hpp:7578
Test for any code unit from a given string of code units.
Definition parser.hpp:735
Test for specific code unit.
Definition parser.hpp:303
Test for date.
Definition parser.hpp:4009
Test for valid DNS domain character.
Definition parser.hpp:2791
bool allow_on_edge
Is character allowed at the beginning or an end of a DNS domain?
Definition parser.hpp:2801
Test for DNS domain/hostname.
Definition parser.hpp:2891
bool m_allow_absolute
May DNS names end with a dot (absolute name)?
Definition parser.hpp:2955
Test for e-mail address.
Definition parser.hpp:3783
Test for emoticon.
Definition parser.hpp:3886
std::shared_ptr< basic_parser< T > > apex
apex/eyebrows/halo (e.g. O, 0)
Definition parser.hpp:3914
std::shared_ptr< basic_parser< T > > eyes
eyes (e.g. :, ;, >, |, B)
Definition parser.hpp:3915
std::shared_ptr< basic_set< T > > mouth
mouth (e.g. ), ), (, (, |, P, D, p, d)
Definition parser.hpp:3917
std::shared_ptr< basic_parser< T > > nose
nose (e.g. -, o)
Definition parser.hpp:3916
std::shared_ptr< basic_parser< T > > emoticon
emoticon as a whole (e.g. 😀, 🤔, 😶)
Definition parser.hpp:3913
Test for end of line.
Definition parser.hpp:669
Test for fraction.
Definition parser.hpp:1701
End of condition ...]]>
Definition parser.hpp:8349
Start of condition <![condition[...
Definition parser.hpp:8283
virtual bool do_match(_In_reads_or_z_opt_(end) const T *text, size_t start=0, size_t end=SIZE_MAX, int flags=match_multiline)
condition position in source
Definition parser.hpp:8294
Contiguous sequence of characters representing name of element, attribute etc.
Definition parser.hpp:7897
Tag.
Definition parser.hpp:8049
std::vector< html_attribute > attributes
tag attributes
Definition parser.hpp:8066
html_sequence_t type
tag type
Definition parser.hpp:8064
stdex::interval< size_t > name
tag name position in source
Definition parser.hpp:8065
Optionally-quoted string representing value of an attribute.
Definition parser.hpp:7944
stdex::interval< size_t > content
content position in source
Definition parser.hpp:7952
Test for International Bank Account Number.
Definition parser.hpp:4651
T bban[31]
Normalized Basic Bank Account Number.
Definition parser.hpp:4676
T country[3]
ISO 3166-1 alpha-2 country code.
Definition parser.hpp:4674
T check_digits[3]
Two check digits.
Definition parser.hpp:4675
bool is_valid
Is IBAN valid per ISO 7064.
Definition parser.hpp:4677
Test for decimal integer.
Definition parser.hpp:1311
Test for decimal integer possibly containing thousand separators.
Definition parser.hpp:1396
bool has_separators
Did integer have any separators?
Definition parser.hpp:1417
size_t digit_count
Total number of digits in integer.
Definition parser.hpp:1416
Test for hexadecimal integer.
Definition parser.hpp:1476
Base class for integer testing.
Definition parser.hpp:1289
size_t value
Calculated value of the numeral.
Definition parser.hpp:1303
Test for IPv4 address.
Definition parser.hpp:2359
stdex::interval< size_t > components[4]
Individual component intervals.
Definition parser.hpp:2404
struct in_addr value
IPv4 address value.
Definition parser.hpp:2405
Test for IPv6 address.
Definition parser.hpp:2571
std::shared_ptr< basic_parser< T > > scope_id
Scope ID (e.g. NIC index with link-local addresses)
Definition parser.hpp:2643
stdex::interval< size_t > components[8]
Individual component intervals.
Definition parser.hpp:2641
struct in6_addr value
IPv6 address value.
Definition parser.hpp:2642
Test for valid IPv6 address scope ID character.
Definition parser.hpp:2497
Test for repeating.
Definition parser.hpp:925
bool m_greedy
try to match as long sequence as possible
Definition parser.hpp:964
std::shared_ptr< basic_parser< T > > m_el
repeating element
Definition parser.hpp:961
size_t m_min_iterations
minimum number of iterations
Definition parser.hpp:962
size_t m_max_iterations
maximum number of iterations
Definition parser.hpp:963
Test for JSON string.
Definition parser.hpp:7216
MIME content type.
Definition parser.hpp:7764
stdex::interval< size_t > base_type
basic type position in source
Definition parser.hpp:7774
stdex::interval< size_t > sub_type
sub-type position in source
Definition parser.hpp:7775
stdex::interval< size_t > charset
charset position in source
Definition parser.hpp:7776
Test for mixed numeral.
Definition parser.hpp:1936
std::shared_ptr< basic_parser< T > > fraction
fraction
Definition parser.hpp:1969
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:1967
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:1966
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:1965
std::shared_ptr< basic_parser< T > > integer
Integer part.
Definition parser.hpp:1968
Test for monetary numeral.
Definition parser.hpp:2230
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:2263
std::shared_ptr< basic_parser< T > > decimal_separator
Decimal separator.
Definition parser.hpp:2268
std::shared_ptr< basic_parser< T > > currency
Currency part.
Definition parser.hpp:2266
std::shared_ptr< basic_parser< T > > decimal
Decimal part.
Definition parser.hpp:2269
std::shared_ptr< basic_parser< T > > integer
Integer part.
Definition parser.hpp:2267
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:2264
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:2265
"No-op" match
Definition parser.hpp:199
Base template for all parsers.
Definition parser.hpp:75
stdex::interval< size_t > interval
Region of the last match.
Definition parser.hpp:115
Test for permutation.
Definition parser.hpp:1213
Test for phone number.
Definition parser.hpp:4499
std::basic_string< T > value
Normalized phone number.
Definition parser.hpp:4524
Test for any punctuation code unit.
Definition parser.hpp:476
Test for Roman numeral.
Definition parser.hpp:1585
Test for scientific numeral.
Definition parser.hpp:2061
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:2107
std::shared_ptr< basic_parser< T > > exponent_symbol
Exponent symbol (e.g. 'e')
Definition parser.hpp:2111
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:2105
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:2106
double value
Calculated value of the numeral.
Definition parser.hpp:2115
std::shared_ptr< basic_parser< T > > negative_exp_sign
Negative exponent sign (e.g. '-')
Definition parser.hpp:2113
std::shared_ptr< basic_integer< T > > decimal
Decimal part.
Definition parser.hpp:2110
std::shared_ptr< basic_parser< T > > positive_exp_sign
Positive exponent sign (e.g. '+')
Definition parser.hpp:2112
std::shared_ptr< basic_integer< T > > exponent
Exponent part.
Definition parser.hpp:2114
std::shared_ptr< basic_parser< T > > decimal_separator
Decimal separator.
Definition parser.hpp:2109
std::shared_ptr< basic_integer< T > > integer
Integer part.
Definition parser.hpp:2108
Test for match score.
Definition parser.hpp:1764
Test for sequence.
Definition parser.hpp:1021
Definition parser.hpp:704
Test for SI Reference delimiter.
Definition parser.hpp:5134
Test for SI Reference part.
Definition parser.hpp:5088
Test for SI Reference.
Definition parser.hpp:5173
basic_si_reference_part< T > part3
Reference data part 3 (P3)
Definition parser.hpp:5202
basic_si_reference_part< T > part1
Reference data part 1 (P1)
Definition parser.hpp:5200
bool is_valid
Is reference valid.
Definition parser.hpp:5203
T model[3]
Reference model.
Definition parser.hpp:5199
basic_si_reference_part< T > part2
Reference data part 2 (P2)
Definition parser.hpp:5201
Test for signed numeral.
Definition parser.hpp:1850
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:1876
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:1875
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:1874
std::shared_ptr< basic_parser< T > > number
Number.
Definition parser.hpp:1877
Test for any space code unit.
Definition parser.hpp:396
Test for any space or punctuation code unit.
Definition parser.hpp:551
Test for any string.
Definition parser.hpp:1141
Test for given string.
Definition parser.hpp:830
Test for time.
Definition parser.hpp:4274
Test for valid URL password character.
Definition parser.hpp:3075
Test for valid URL path character.
Definition parser.hpp:3177
Test for URL path.
Definition parser.hpp:3287
Test for valid URL username character.
Definition parser.hpp:2974
Test for URL.
Definition parser.hpp:3427
Test for HTTP agent.
Definition parser.hpp:6760
Test for HTTP any type.
Definition parser.hpp:5903
Test for HTTP asterisk.
Definition parser.hpp:6531
Test for HTTP header.
Definition parser.hpp:7068
Test for HTTP language (RFC1766)
Definition parser.hpp:6399
Test for HTTP line break (RFC2616: CRLF | LF)
Definition parser.hpp:5585
Test for HTTP media range (RFC2616: media-range)
Definition parser.hpp:5935
Test for HTTP media type (RFC2616: media-type)
Definition parser.hpp:5987
Test for HTTP parameter (RFC2616: parameter)
Definition parser.hpp:5851
http_token name
Parameter name.
Definition parser.hpp:5860
http_value value
Parameter value.
Definition parser.hpp:5861
Test for HTTP protocol.
Definition parser.hpp:6835
uint16_t version
HTTP protocol version: 0x100 = 1.0, 0x101 = 1.1...
Definition parser.hpp:6857
Test for HTTP quoted string (RFC2616: quoted-string)
Definition parser.hpp:5744
stdex::interval< size_t > content
String content (without quotes)
Definition parser.hpp:5753
Test for HTTP request.
Definition parser.hpp:6936
Test for HTTP space (RFC2616: LWS)
Definition parser.hpp:5621
Test for HTTP text character (RFC2616: TEXT)
Definition parser.hpp:5657
Test for HTTP token (RFC2616: token - tolerates non-ASCII)
Definition parser.hpp:5690
Test for HTTP URL parameter.
Definition parser.hpp:6224
Test for HTTP URL path segment.
Definition parser.hpp:6136
Test for HTTP URL path segment.
Definition parser.hpp:6169
std::vector< http_url_path_segment > segments
Path segments.
Definition parser.hpp:6177
Test for HTTP URL port.
Definition parser.hpp:6080
Test for HTTP URL server.
Definition parser.hpp:6043
Test for HTTP URL.
Definition parser.hpp:6301
Collection of HTTP values.
Definition parser.hpp:7172
Test for HTTP value (RFC2616: value)
Definition parser.hpp:5807
http_quoted_string string
Value when matched as quoted string.
Definition parser.hpp:5816
http_token token
Value when matched as token.
Definition parser.hpp:5817
Test for HTTP weight factor.
Definition parser.hpp:6462
float value
Calculated value of the weight factor.
Definition parser.hpp:6475
Test for HTTP weighted value.
Definition parser.hpp:6554
Base template for collection-holding parsers.
Definition parser.hpp:981
Test for any SGML code point.
Definition parser.hpp:264
Test for any SGML code point from a given string of SGML code points.
Definition parser.hpp:787
Test for specific SGML code point.
Definition parser.hpp:352
Test for valid DNS domain SGML character.
Definition parser.hpp:2846
Test for valid IPv6 address scope ID SGML character.
Definition parser.hpp:2536
Test for any SGML punctuation code point.
Definition parser.hpp:517
Test for any SGML space code point.
Definition parser.hpp:439
Test for any SGML space or punctuation code point.
Definition parser.hpp:594
Test for SGML given string.
Definition parser.hpp:877
Test for valid URL password SGML character.
Definition parser.hpp:3128
Test for valid URL path SGML character.
Definition parser.hpp:3234
Test for valid URL username SGML character.
Definition parser.hpp:3026
Numerical interval.
Definition interval.hpp:18
T size() const
Returns interval size.
Definition interval.hpp:47
T end
interval end
Definition interval.hpp:20
interval() noexcept
Constructs an invalid interval.
Definition interval.hpp:25
void invalidate()
Invalidates interval.
Definition interval.hpp:59
T start
interval start
Definition interval.hpp:19
Tag attribute.
Definition parser.hpp:8039
stdex::interval< size_t > name
attribute name position in source
Definition parser.hpp:8040
stdex::interval< size_t > value
attribute value position in source
Definition parser.hpp:8041
Definition parser.hpp:7198