stdex
Additional custom or not Standard C++ covered algorithms
Loading...
Searching...
No Matches
parser.hpp
1/*
2 SPDX-License-Identifier: MIT
3 Copyright © 2023-2024 Amebis
4*/
5
6#pragma once
7
8#include "compat.hpp"
9#include "endian.hpp"
10#include "interval.hpp"
11#include "memory.hpp"
12#include "sgml.hpp"
13#include "string.hpp"
14#include <stdarg.h>
15#include <stdint.h>
16#include <math.h>
17#if defined(_WIN32)
18#include <winsock2.h>
19#if _MSC_VER >= 1300
20#include <ws2ipdef.h>
21#endif
22#include <ws2tcpip.h>
23#else
24#include <netinet/in.h>
25#endif
26#include <limits>
27#include <list>
28#include <locale>
29#include <memory>
30#include <set>
31#include <string_view>
32#include <string>
33
34#if defined(_MSC_VER)
35#pragma warning(push)
36#pragma warning(disable: 4100)
37#elif defined(__GNUC__)
38#pragma GCC diagnostic push
39#pragma GCC diagnostic ignored "-Wunknown-pragmas"
40#pragma GCC diagnostic ignored "-Wunused-parameter"
41#endif
42
43#define ENUM_FLAG_OPERATOR(T,X) \
44inline T operator X (const T lhs, const T rhs) { return static_cast<T>(static_cast<std::underlying_type_t<T>>(lhs) X static_cast<std::underlying_type_t<T>>(rhs)); } \
45inline T operator X (const T lhs, const std::underlying_type_t<T> rhs) { return static_cast<T>(static_cast<std::underlying_type_t<T>>(lhs) X rhs); } \
46inline T operator X (const std::underlying_type_t<T> lhs, const T rhs) { return static_cast<T>(lhs X static_cast<std::underlying_type_t<T>>(rhs)); } \
47inline T& operator X= (T& lhs, const T rhs) { return lhs = lhs X rhs; } \
48inline T& operator X= (T& lhs, const std::underlying_type_t<T> rhs) { return lhs = lhs X rhs; }
49#define ENUM_FLAGS(T, type) \
50enum class T : type; \
51inline T operator ~ (T t) { return (T) (~static_cast<std::underlying_type_t <T>>(t)); } \
52ENUM_FLAG_OPERATOR(T,|) \
53ENUM_FLAG_OPERATOR(T,^) \
54ENUM_FLAG_OPERATOR(T,&) \
55enum class T : type
56
57#if defined(_WIN32)
58#elif defined(__APPLE__)
59#define s6_words __u6_addr.__u6_addr16
60#else
61#define s6_words s6_addr16
62#endif
63
64namespace stdex
65{
66 namespace parser
67 {
71 constexpr int match_default = 0;
72 constexpr int match_case_insensitive = 0x1;
73 constexpr int match_multiline = 0x2;
74
78 template <class T>
80 {
81 public:
82 basic_parser(_In_ const std::locale& locale = std::locale()) : m_locale(locale) {}
83 virtual ~basic_parser() {}
84
85 bool search(
86 _In_reads_or_z_opt_(end) const T* text,
87 _In_ size_t start = 0,
88 _In_ size_t end = SIZE_MAX,
89 _In_ int flags = match_default)
90 {
91 for (size_t i = start; i < end && text[i]; i++)
92 if (match(text, i, end, flags))
93 return true;
94 return false;
95 }
96
97 bool match(
98 _In_reads_or_z_opt_(end) const T* text,
99 _In_ size_t start = 0,
100 _In_ size_t end = SIZE_MAX,
101 _In_ int flags = match_default)
102 {
103 return do_match(text, start, end, flags);
104 }
105
106 bool match(
107 _In_ const std::basic_string_view<T, std::char_traits<T>> text,
108 _In_ size_t start = 0,
109 _In_ size_t end = SIZE_MAX,
110 _In_ int flags = match_default)
111 {
112 return match(text.data(), start, std::min<size_t>(end, text.size()), flags);
113 }
114
115 virtual void invalidate()
116 {
117 this->interval.invalidate();
118 }
119
121
122 protected:
123 virtual bool do_match(
124 _In_reads_or_z_opt_(end) const T* text,
125 _In_ size_t start = 0,
126 _In_ size_t end = SIZE_MAX,
127 _In_ int flags = match_default) = 0;
128
130 template <class T_out = wchar_t>
131 const T_out* next_sgml_cp(_In_ const char* text, _In_ size_t start, _In_ size_t end, _Out_ size_t& chr_end, _Out_ T_out(&buf)[5])
132 {
133 if (text[start] == '&') {
134 // Potential entity start
135 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
136 for (chr_end = start + 1;; chr_end++) {
137 if (chr_end >= end || text[chr_end] == 0) {
138 // Unterminated entity
139 break;
140 }
141 if (text[chr_end] == ';') {
142 // Entity end
143 utf32_t buf32[2];
144 size_t n = chr_end - start - 1;
145 auto entity_w = utf32_to_wstr(sgml2uni(text + start + 1, n, buf32), buf);
146 if (entity_w) {
147 chr_end++;
148 return entity_w;
149 }
150 // Unknown entity.
151 break;
152 }
153 else if (text[chr_end] == '&' || ctype.is(ctype.space, text[chr_end])) {
154 // This char cannot possibly be a part of entity.
155 break;
156 }
157 }
158 }
159 buf[0] = text[start];
160 buf[1] = 0;
161 chr_end = start + 1;
162 return buf;
163 }
165
166 std::locale m_locale;
167 };
168
169 using parser = basic_parser<char>;
170 using wparser = basic_parser<wchar_t>;
171#ifdef _UNICODE
172 using tparser = wparser;
173#else
174 using tparser = parser;
175#endif
176 using sgml_parser = basic_parser<char>;
177
181 template <class T>
182 class basic_noop : public basic_parser<T>
183 {
184 protected:
185 virtual bool do_match(
186 _In_reads_or_z_opt_(end) const T* text,
187 _In_ size_t start = 0,
188 _In_ size_t end = SIZE_MAX,
189 _In_ int flags = match_default)
190 {
191 _Assume_(text || start >= end);
192 if (start < end && text[start]) {
193 this->interval.start = this->interval.end = start;
194 return true;
195 }
196 this->interval.invalidate();
197 return false;
198 }
199 };
200
201 using noop = basic_noop<char>;
203#ifdef _UNICODE
204 using tnoop = wnoop;
205#else
206 using tnoop = noop;
207#endif
209
213 template <class T>
214 class basic_any_cu : public basic_parser<T>
215 {
216 public:
217 basic_any_cu(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
218
219 protected:
220 virtual bool do_match(
221 _In_reads_or_z_opt_(end) const T* text,
222 _In_ size_t start = 0,
223 _In_ size_t end = SIZE_MAX,
224 _In_ int flags = match_default)
225 {
226 _Assume_(text || start >= end);
227 if (start < end && text[start]) {
228 this->interval.end = (this->interval.start = start) + 1;
229 return true;
230 }
231 this->interval.invalidate();
232 return false;
233 }
234 };
235
238#ifdef _UNICODE
239 using tany_cu = wany_cu;
240#else
241 using tany_cu = any_cu;
242#endif
243
247 class sgml_any_cp : public basic_any_cu<char>
248 {
249 public:
250 sgml_any_cp(_In_ const std::locale& locale = std::locale()) : basic_any_cu<char>(locale) {}
251
252 protected:
253 virtual bool do_match(
254 _In_reads_or_z_(end) const char* text,
255 _In_ size_t start = 0,
256 _In_ size_t end = SIZE_MAX,
257 _In_ int flags = match_default)
258 {
259 _Assume_(text || start >= end);
260 if (start < end && text[start]) {
261 if (text[start] == '&') {
262 // SGML entity
263 const auto& ctype = std::use_facet<std::ctype<char>>(m_locale);
264 for (this->interval.end = start + 1; this->interval.end < end && text[this->interval.end]; this->interval.end++)
265 if (text[this->interval.end] == ';') {
266 this->interval.end++;
267 this->interval.start = start;
268 return true;
269 }
270 else if (text[this->interval.end] == '&' || ctype.is(ctype.space, text[this->interval.end]))
271 break;
272 // Unterminated entity
273 }
274 this->interval.end = (this->interval.start = start) + 1;
275 return true;
276 }
277 this->interval.invalidate();
278 return false;
279 }
280 };
281
285 template <class T>
286 class basic_cu : public basic_parser<T>
287 {
288 public:
289 basic_cu(T chr, bool invert = false, _In_ const std::locale& locale = std::locale()) :
291 m_chr(chr),
292 m_invert(invert)
293 {}
294
295 protected:
296 virtual bool do_match(
297 _In_reads_or_z_opt_(end) const T* text,
298 _In_ size_t start = 0,
299 _In_ size_t end = SIZE_MAX,
300 _In_ int flags = match_default)
301 {
302 _Assume_(text || start >= end);
303 if (start < end && text[start]) {
304 bool r;
305 if (flags & match_case_insensitive) {
306 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
307 r = ctype.tolower(text[start]) == ctype.tolower(m_chr);
308 }
309 else
310 r = text[start] == m_chr;
311 if ((r && !m_invert) || (!r && m_invert)) {
312 this->interval.end = (this->interval.start = start) + 1;
313 return true;
314 }
315 }
316 this->interval.invalidate();
317 return false;
318 }
319
320 T m_chr;
321 bool m_invert;
322 };
323
324 using cu = basic_cu<char>;
325 using wcu = basic_cu<wchar_t>;
326#ifdef _UNICODE
327 using tcu = wcu;
328#else
329 using tcu = cu;
330#endif
331
335 class sgml_cp : public sgml_parser
336 {
337 public:
338 sgml_cp(const char* chr, size_t count = SIZE_MAX, bool invert = false, _In_ const std::locale& locale = std::locale()) :
340 m_invert(invert)
341 {
342 _Assume_(chr || !count);
343 wchar_t buf[5];
344 size_t chr_end;
345 m_chr.assign(count ? next_sgml_cp(chr, 0, count, chr_end, buf) : L"");
346 }
347
348 protected:
349 virtual bool do_match(
350 _In_reads_or_z_(end) const char* text,
351 _In_ size_t start = 0,
352 _In_ size_t end = SIZE_MAX,
353 _In_ int flags = match_default)
354 {
355 _Assume_(text || start >= end);
356 if (start < end && text[start]) {
357 wchar_t buf[5];
358 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
359 bool r = ((flags & match_case_insensitive) ?
360 stdex::strnicmp(chr, stdex::strlen(chr), m_chr.data(), m_chr.size(), m_locale) :
361 stdex::strncmp(chr, stdex::strlen(chr), m_chr.data(), m_chr.size())) == 0;
362 if ((r && !m_invert) || (!r && m_invert)) {
363 this->interval.start = start;
364 return true;
365 }
366 }
367 this->interval.invalidate();
368 return false;
369 }
370
371 std::wstring m_chr;
372 bool m_invert;
373 };
374
378 template <class T>
379 class basic_space_cu : public basic_parser<T>
380 {
381 public:
382 basic_space_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) :
384 m_invert(invert)
385 {}
386
387 protected:
388 virtual bool do_match(
389 _In_reads_or_z_opt_(end) const T* text,
390 _In_ size_t start = 0,
391 _In_ size_t end = SIZE_MAX,
392 _In_ int flags = match_default)
393 {
394 _Assume_(text || start >= end);
395 if (start < end && text[start]) {
396 bool r =
397 ((flags & match_multiline) || !stdex::islbreak(text[start])) &&
398 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::space, text[start]);
399 if ((r && !m_invert) || (!r && m_invert)) {
400 this->interval.end = (this->interval.start = start) + 1;
401 return true;
402 }
403 }
404 this->interval.invalidate();
405 return false;
406 }
407
408 bool m_invert;
409 };
410
413#ifdef _UNICODE
414 using tspace_cu = wspace_cu;
415#else
416 using tspace_cu = space_cu;
417#endif
418
422 class sgml_space_cp : public basic_space_cu<char>
423 {
424 public:
425 sgml_space_cp(_In_ bool invert = false, _In_ const std::locale& locale = std::locale()) :
427 {}
428
429 protected:
430 virtual bool do_match(
431 _In_reads_or_z_(end) const char* text,
432 _In_ size_t start = 0,
433 _In_ size_t end = SIZE_MAX,
434 _In_ int flags = match_default)
435 {
436 _Assume_(text || start >= end);
437 if (start < end && text[start]) {
438 wchar_t buf[5];
439 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
440 const wchar_t* chr_end = chr + stdex::strlen(chr);
441 bool r =
442 ((flags & match_multiline) || !stdex::islbreak(chr, SIZE_MAX)) &&
443 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::space, chr, chr_end) == chr_end;
444 if ((r && !m_invert) || (!r && m_invert)) {
445 this->interval.start = start;
446 return true;
447 }
448 }
449
450 this->interval.invalidate();
451 return false;
452 }
453 };
454
458 template <class T>
459 class basic_punct_cu : public basic_parser<T>
460 {
461 public:
462 basic_punct_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) :
464 m_invert(invert)
465 {}
466
467 protected:
468 virtual bool do_match(
469 _In_reads_or_z_opt_(end) const T* text,
470 _In_ size_t start = 0,
471 _In_ size_t end = SIZE_MAX,
472 _In_ int flags = match_default)
473 {
474 _Assume_(text || start >= end);
475 if (start < end && text[start]) {
476 bool r = std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::punct, text[start]);
477 if ((r && !m_invert) || (!r && m_invert)) {
478 this->interval.end = (this->interval.start = start) + 1;
479 return true;
480 }
481 }
482 this->interval.invalidate();
483 return false;
484 }
485
486 bool m_invert;
487 };
488
491#ifdef _UNICODE
492 using tpunct_cu = wpunct_cu;
493#else
494 using tpunct_cu = punct_cu;
495#endif
496
500 class sgml_punct_cp : public basic_punct_cu<char>
501 {
502 public:
503 sgml_punct_cp(bool invert = false, _In_ const std::locale& locale = std::locale()) :
505 {}
506
507 protected:
508 virtual bool do_match(
509 _In_reads_or_z_(end) const char* text,
510 _In_ size_t start = 0,
511 _In_ size_t end = SIZE_MAX,
512 _In_ int flags = match_default)
513 {
514 _Assume_(text || start >= end);
515 if (start < end && text[start]) {
516 wchar_t buf[5];
517 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
518 const wchar_t* chr_end = chr + stdex::strlen(chr);
519 bool r = std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::punct, chr, chr_end) == chr_end;
520 if ((r && !m_invert) || (!r && m_invert)) {
521 this->interval.start = start;
522 return true;
523 }
524 }
525 this->interval.invalidate();
526 return false;
527 }
528 };
529
533 template <class T>
535 {
536 public:
537 basic_space_or_punct_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) :
539 m_invert(invert)
540 {}
541
542 protected:
543 virtual bool do_match(
544 _In_reads_or_z_opt_(end) const T* text,
545 _In_ size_t start = 0,
546 _In_ size_t end = SIZE_MAX,
547 _In_ int flags = match_default)
548 {
549 _Assume_(text || start >= end);
550 if (start < end && text[start]) {
551 bool r =
552 ((flags & match_multiline) || !stdex::islbreak(text[start])) &&
553 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::space | std::ctype_base::punct, text[start]);
554 if ((r && !m_invert) || (!r && m_invert)) {
555 this->interval.end = (this->interval.start = start) + 1;
556 return true;
557 }
558 }
559 this->interval.invalidate();
560 return false;
561 }
562
563 bool m_invert;
564 };
565
568#ifdef _UNICODE
570#else
572#endif
573
578 {
579 public:
580 sgml_space_or_punct_cp(bool invert = false, _In_ const std::locale& locale = std::locale()) :
582 {}
583
584 protected:
585 virtual bool do_match(
586 _In_reads_or_z_(end) const char* text,
587 _In_ size_t start = 0,
588 _In_ size_t end = SIZE_MAX,
589 _In_ int flags = match_default)
590 {
591 _Assume_(text || start >= end);
592 if (start < end && text[start]) {
593 wchar_t buf[5];
594 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
595 const wchar_t* chr_end = chr + stdex::strlen(chr);
596 bool r =
597 ((flags & match_multiline) || !stdex::islbreak(chr, SIZE_MAX)) &&
598 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::space | std::ctype_base::punct, chr, chr_end) == chr_end;
599 if ((r && !m_invert) || (!r && m_invert)) {
600 this->interval.start = start;
601 return true;
602 }
603 }
604 this->interval.invalidate();
605 return false;
606 }
607 };
608
612 template <class T>
613 class basic_bol : public basic_parser<T>
614 {
615 public:
616 basic_bol(bool invert = false) : m_invert(invert) {}
617
618 protected:
619 virtual bool do_match(
620 _In_reads_or_z_opt_(end) const T* text,
621 _In_ size_t start = 0,
622 _In_ size_t end = SIZE_MAX,
623 _In_ int flags = match_default)
624 {
625 _Assume_(text || !end);
626 _Assume_(text || start >= end);
627 bool r = start == 0 || (start <= end && stdex::islbreak(text[start - 1]));
628 if ((r && !m_invert) || (!r && m_invert)) {
629 this->interval.end = this->interval.start = start;
630 return true;
631 }
632 this->interval.invalidate();
633 return false;
634 }
635
636 bool m_invert;
637 };
638
639 using bol = basic_bol<char>;
640 using wbol = basic_bol<wchar_t>;
641#ifdef _UNICODE
642 using tbol = wbol;
643#else
644 using tbol = bol;
645#endif
647
651 template <class T>
652 class basic_eol : public basic_parser<T>
653 {
654 public:
655 basic_eol(bool invert = false) : m_invert(invert) {}
656
657 protected:
658 virtual bool do_match(
659 _In_reads_or_z_opt_(end) const T* text,
660 _In_ size_t start = 0,
661 _In_ size_t end = SIZE_MAX,
662 _In_ int flags = match_default)
663 {
664 _Assume_(text || start >= end);
665 bool r = start >= end || !text[start] || stdex::islbreak(text[start]);
666 if ((r && !m_invert) || (!r && m_invert)) {
667 this->interval.end = this->interval.start = start;
668 return true;
669 }
670 this->interval.invalidate();
671 return false;
672 }
673
674 bool m_invert;
675 };
676
677 using eol = basic_eol<char>;
678 using weol = basic_eol<wchar_t>;
679#ifdef _UNICODE
680 using teol = weol;
681#else
682 using teol = eol;
683#endif
685
686 template <class T>
687 class basic_set : public basic_parser<T>
688 {
689 public:
690 basic_set(bool invert = false, _In_ const std::locale& locale = std::locale()) :
692 hit_offset(SIZE_MAX),
693 m_invert(invert)
694 {}
695
696 virtual void invalidate()
697 {
698 hit_offset = SIZE_MAX;
700 }
701
702 size_t hit_offset;
703
704 protected:
705 virtual bool do_match(
706 _In_reads_or_z_opt_(end) const T* text,
707 _In_ size_t start = 0,
708 _In_ size_t end = SIZE_MAX,
709 _In_ int flags = match_default) = 0;
710
711 bool m_invert;
712 };
713
717 template <class T>
718 class basic_cu_set : public basic_set<T>
719 {
720 public:
722 _In_reads_or_z_(count) const T* set,
723 _In_ size_t count = SIZE_MAX,
724 _In_ bool invert = false,
725 _In_ const std::locale& locale = std::locale()) :
727 {
728 if (set)
729 m_set.assign(set, set + stdex::strnlen(set, count));
730 }
731
732 protected:
733 virtual bool do_match(
734 _In_reads_or_z_opt_(end) const T* text,
735 _In_ size_t start = 0,
736 _In_ size_t end = SIZE_MAX,
737 _In_ int flags = match_default)
738 {
739 _Assume_(text || start >= end);
740 if (start < end && text[start]) {
741 const T* set = m_set.data();
742 size_t r = (flags & match_case_insensitive) ?
743 stdex::strnichr(set, m_set.size(), text[start], this->m_locale) :
744 stdex::strnchr(set, m_set.size(), text[start]);
745 if ((r != stdex::npos && !this->m_invert) || (r == stdex::npos && this->m_invert)) {
746 this->hit_offset = r;
747 this->interval.end = (this->interval.start = start) + 1;
748 return true;
749 }
750 }
751 this->hit_offset = SIZE_MAX;
752 this->interval.invalidate();
753 return false;
754 }
755
756 std::basic_string<T> m_set;
757 };
758
761#ifdef _UNICODE
762 using tcu_set = wcu_set;
763#else
764 using tcu_set = cu_set;
765#endif
766
770 class sgml_cp_set : public basic_set<char>
771 {
772 public:
773 sgml_cp_set(const char* set, size_t count = SIZE_MAX, bool invert = false, _In_ const std::locale& locale = std::locale()) :
775 {
776 if (set)
777 m_set = sgml2str(set, count);
778 }
779
780 protected:
781 virtual bool do_match(
782 _In_reads_or_z_(end) const char* text,
783 _In_ size_t start = 0,
784 _In_ size_t end = SIZE_MAX,
785 _In_ int flags = match_default)
786 {
787 _Assume_(text || start >= end);
788 if (start < end && text[start]) {
789 wchar_t buf[5];
790 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
791 const wchar_t* set = m_set.data();
792 size_t r = (flags & match_case_insensitive) ?
793 stdex::strnistr(set, m_set.size(), chr, m_locale) :
794 stdex::strnstr(set, m_set.size(), chr);
795 if ((r != stdex::npos && !m_invert) || (r == stdex::npos && m_invert)) {
796 hit_offset = r;
797 this->interval.start = start;
798 return true;
799 }
800 }
801 hit_offset = SIZE_MAX;
802 this->interval.invalidate();
803 return false;
804 }
805
806 std::wstring m_set;
807 };
808
812 template <class T>
813 class basic_string : public basic_parser<T>
814 {
815 public:
817 _In_reads_or_z_(count) const T* str,
818 _In_ size_t count = SIZE_MAX,
819 _In_ const std::locale& locale = std::locale()) :
821 m_str(str, str + stdex::strnlen(str, count))
822 {}
823
824 protected:
825 virtual bool do_match(
826 _In_reads_or_z_opt_(end) const T* text,
827 _In_ size_t start = 0,
828 _In_ size_t end = SIZE_MAX,
829 _In_ int flags = match_default)
830 {
831 _Assume_(text || start >= end);
832 size_t
833 m = m_str.size(),
834 n = std::min<size_t>(end - start, m);
835 bool r = ((flags & match_case_insensitive) ?
836 stdex::strnicmp(text + start, n, m_str.data(), m, this->m_locale) :
837 stdex::strncmp(text + start, n, m_str.data(), m)) == 0;
838 if (r) {
839 this->interval.end = (this->interval.start = start) + n;
840 return true;
841 }
842 this->interval.invalidate();
843 return false;
844 }
845
846 std::basic_string<T> m_str;
847 };
848
851#ifdef _UNICODE
852 using tstring = wstring;
853#else
854 using tstring = string;
855#endif
856
861 {
862 public:
863 sgml_string(const char* str, size_t count = SIZE_MAX, _In_ const std::locale& locale = std::locale()) :
865 m_str(sgml2str(str, count))
866 {}
867
868 protected:
869 virtual bool do_match(
870 _In_reads_or_z_(end) const char* text,
871 _In_ size_t start = 0,
872 _In_ size_t end = SIZE_MAX,
873 _In_ int flags = match_default)
874 {
875 _Assume_(text || start >= end);
876 const wchar_t* str = m_str.data();
877 const bool case_insensitive = flags & match_case_insensitive ? true : false;
878 const auto& ctype = std::use_facet<std::ctype<wchar_t>>(m_locale);
879 for (this->interval.end = start;;) {
880 if (!*str) {
881 this->interval.start = start;
882 return true;
883 }
884 if (this->interval.end >= end || !text[this->interval.end]) {
885 this->interval.invalidate();
886 return false;
887 }
888 wchar_t buf[5];
889 const wchar_t* chr = next_sgml_cp(text, this->interval.end, end, this->interval.end, buf);
890 for (; *chr; ++str, ++chr) {
891 if (!*str ||
892 (case_insensitive ? ctype.tolower(*str) != ctype.tolower(*chr) : *str != *chr))
893 {
894 this->interval.invalidate();
895 return false;
896 }
897 }
898 }
899 }
900
901 std::wstring m_str;
902 };
903
907 template <class T>
909 {
910 public:
911 basic_iterations(const std::shared_ptr<basic_parser<T>>& el, size_t min_iterations = 0, size_t max_iterations = SIZE_MAX, bool greedy = true) :
912 m_el(el),
916 {}
917
918 protected:
919 virtual bool do_match(
920 _In_reads_or_z_opt_(end) const T* text,
921 _In_ size_t start = 0,
922 _In_ size_t end = SIZE_MAX,
923 _In_ int flags = match_default)
924 {
925 _Assume_(text || start >= end);
926 this->interval.start = this->interval.end = start;
927 for (size_t i = 0; ; i++) {
928 if ((!m_greedy && i >= m_min_iterations) || i >= m_max_iterations)
929 return true;
930 if (!m_el->match(text, this->interval.end, end, flags)) {
931 if (i >= m_min_iterations)
932 return true;
933 break;
934 }
935 if (m_el->interval.end == this->interval.end) {
936 // Element did match, but the matching interval was empty. Quit instead of spinning.
937 return true;
938 }
939 this->interval.end = m_el->interval.end;
940 }
941 this->interval.invalidate();
942 return false;
943 }
944
945 std::shared_ptr<basic_parser<T>> m_el;
948 bool m_greedy;
949 };
950
953#ifdef _UNICODE
954 using titerations = witerations;
955#else
956 using titerations = iterations;
957#endif
959
963 template <class T>
965 {
966 protected:
967 parser_collection(_In_ const std::locale& locale) : basic_parser<T>(locale) {}
968
969 public:
971 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el,
972 _In_ size_t count,
973 _In_ const std::locale& locale = std::locale()) :
975 {
976 _Assume_(el || !count);
977 m_collection.reserve(count);
978 for (size_t i = 0; i < count; i++)
979 m_collection.push_back(el[i]);
980 }
981
983 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
984 _In_ const std::locale& locale = std::locale()) :
986 m_collection(std::move(collection))
987 {}
988
989 virtual void invalidate()
990 {
991 for (auto& el : m_collection)
992 el->invalidate();
994 }
995
996 protected:
997 std::vector<std::shared_ptr<basic_parser<T>>> m_collection;
998 };
999
1003 template <class T>
1005 {
1006 public:
1008 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el = nullptr,
1009 _In_ size_t count = 0,
1010 _In_ const std::locale& locale = std::locale()) :
1012 {}
1013
1015 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1016 _In_ const std::locale& locale = std::locale()) :
1018 {}
1019
1020 protected:
1021 virtual bool do_match(
1022 _In_reads_or_z_opt_(end) const T* text,
1023 _In_ size_t start = 0,
1024 _In_ size_t end = SIZE_MAX,
1025 _In_ int flags = match_default)
1026 {
1027 _Assume_(text || start >= end);
1028 this->interval.end = start;
1029 for (auto i = this->m_collection.begin(); i != this->m_collection.end(); ++i) {
1030 if (!(*i)->match(text, this->interval.end, end, flags)) {
1031 for (++i; i != this->m_collection.end(); ++i)
1032 (*i)->invalidate();
1033 this->interval.invalidate();
1034 return false;
1035 }
1036 this->interval.end = (*i)->interval.end;
1037 }
1038 this->interval.start = start;
1039 return true;
1040 }
1041 };
1042
1045#ifdef _UNICODE
1046 using tsequence = wsequence;
1047#else
1048 using tsequence = sequence;
1049#endif
1051
1055 template <class T>
1057 {
1058 protected:
1059 basic_branch(_In_ const std::locale& locale) :
1061 hit_offset(SIZE_MAX)
1062 {}
1063
1064 public:
1066 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el = nullptr,
1067 _In_ size_t count = 0,
1068 _In_ const std::locale& locale = std::locale()) :
1070 hit_offset(SIZE_MAX)
1071 {}
1072
1074 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1075 _In_ const std::locale& locale = std::locale()) :
1077 hit_offset(SIZE_MAX)
1078 {}
1079
1080 virtual void invalidate()
1081 {
1082 hit_offset = SIZE_MAX;
1084 }
1085
1086 size_t hit_offset;
1087
1088 protected:
1089 virtual bool do_match(
1090 _In_reads_or_z_opt_(end) const T* text,
1091 _In_ size_t start = 0,
1092 _In_ size_t end = SIZE_MAX,
1093 _In_ int flags = match_default)
1094 {
1095 _Assume_(text || start >= end);
1096 hit_offset = 0;
1097 for (auto i = this->m_collection.begin(); i != this->m_collection.end(); ++i, ++hit_offset) {
1098 if ((*i)->match(text, start, end, flags)) {
1099 this->interval = (*i)->interval;
1100 for (++i; i != this->m_collection.end(); ++i)
1101 (*i)->invalidate();
1102 return true;
1103 }
1104 }
1105 hit_offset = SIZE_MAX;
1106 this->interval.invalidate();
1107 return false;
1108 }
1109 };
1110
1111 using branch = basic_branch<char>;
1113#ifdef _UNICODE
1114 using tbranch = wbranch;
1115#else
1116 using tbranch = branch;
1117#endif
1119
1123 template <class T, class T_parser = basic_string<T>>
1125 {
1126 public:
1128 _In_reads_(count) const T* str_z = nullptr,
1129 _In_ size_t count = 0,
1130 _In_ const std::locale& locale = std::locale()) :
1132 {
1133 build(str_z, count);
1134 }
1135
1136 basic_string_branch(_In_z_ const T* str, ...) :
1137 basic_branch<T>(std::locale())
1138 {
1139 va_list params;
1140 va_start(params, str);
1141 build(str, params);
1142 va_end(params);
1143 }
1144
1145 basic_string_branch(_In_ const std::locale& locale, _In_z_ const T* str, ...) :
1147 {
1148 va_list params;
1149 va_start(params, str);
1150 build(str, params);
1151 va_end(params);
1152 }
1153
1154 protected:
1155 void build(_In_reads_(count) const T* str_z, _In_ size_t count)
1156 {
1157 _Assume_(str_z || !count);
1158 if (count) {
1159 size_t offset, n;
1160 for (
1161 offset = n = 0;
1162 offset < count && str_z[offset];
1163 offset += stdex::strnlen(str_z + offset, count - offset) + 1, ++n);
1164 this->m_collection.reserve(n);
1165 for (
1166 offset = 0;
1167 offset < count && str_z[offset];
1168 offset += stdex::strnlen(str_z + offset, count - offset) + 1)
1169 this->m_collection.push_back(std::move(std::make_shared<T_parser>(str_z + offset, count - offset, this->m_locale)));
1170 }
1171 }
1172
1173 void build(_In_z_ const T* str, _In_ va_list params)
1174 {
1175 const T* p;
1176 for (
1177 this->m_collection.push_back(std::move(std::make_shared<T_parser>(str, SIZE_MAX, this->m_locale)));
1178 (p = va_arg(params, const T*)) != nullptr;
1179 this->m_collection.push_back(std::move(std::make_shared<T_parser>(p, SIZE_MAX, this->m_locale))));
1180 }
1181 };
1182
1185#ifdef _UNICODE
1187#else
1189#endif
1191
1195 template <class T>
1197 {
1198 public:
1200 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el = nullptr,
1201 _In_ size_t count = 0,
1202 _In_ const std::locale& locale = std::locale()) :
1204 {}
1205
1207 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1208 _In_ const std::locale& locale = std::locale()) :
1210 {}
1211
1212 protected:
1213 virtual bool do_match(
1214 _In_reads_or_z_opt_(end) const T* text,
1215 _In_ size_t start = 0,
1216 _In_ size_t end = SIZE_MAX,
1217 _In_ int flags = match_default)
1218 {
1219 _Assume_(text || start >= end);
1220 for (auto& el : this->m_collection)
1221 el->invalidate();
1222 if (match_recursively(text, start, end, flags)) {
1223 this->interval.start = start;
1224 return true;
1225 }
1226 this->interval.invalidate();
1227 return false;
1228 }
1229
1230 bool match_recursively(
1231 _In_reads_or_z_opt_(end) const T* text,
1232 _In_ size_t start = 0,
1233 _In_ size_t end = SIZE_MAX,
1234 _In_ int flags = match_default)
1235 {
1236 bool all_matched = true;
1237 for (auto& el : this->m_collection) {
1238 if (!el->interval) {
1239 // Element was not matched in permutatuion yet.
1240 all_matched = false;
1241 if (el->match(text, start, end, flags)) {
1242 // Element matched for the first time.
1243 if (match_recursively(text, el->interval.end, end, flags)) {
1244 // Rest of the elements matched too.
1245 return true;
1246 }
1247 el->invalidate();
1248 }
1249 }
1250 }
1251 if (all_matched) {
1252 this->interval.end = start;
1253 return true;
1254 }
1255 return false;
1256 }
1257 };
1258
1261#ifdef _UNICODE
1262 using tpermutation = wpermutation;
1263#else
1264 using tpermutation = permutation;
1265#endif
1267
1271 template <class T>
1272 class basic_integer : public basic_parser<T>
1273 {
1274 public:
1275 basic_integer(_In_ const std::locale& locale = std::locale()) :
1277 value(0)
1278 {}
1279
1280 virtual void invalidate()
1281 {
1282 value = 0;
1284 }
1285
1286 public:
1287 size_t value;
1288 };
1289
1293 template <class T>
1295 {
1296 public:
1298 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
1299 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
1300 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
1301 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
1302 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
1303 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
1304 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
1305 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
1306 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
1307 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
1308 _In_ const std::locale& locale = std::locale()) :
1310 m_digit_0(digit_0),
1311 m_digit_1(digit_1),
1312 m_digit_2(digit_2),
1313 m_digit_3(digit_3),
1314 m_digit_4(digit_4),
1315 m_digit_5(digit_5),
1316 m_digit_6(digit_6),
1317 m_digit_7(digit_7),
1318 m_digit_8(digit_8),
1319 m_digit_9(digit_9)
1320 {}
1321
1322 protected:
1323 virtual bool do_match(
1324 _In_reads_or_z_opt_(end) const T* text,
1325 _In_ size_t start = 0,
1326 _In_ size_t end = SIZE_MAX,
1327 _In_ int flags = match_default)
1328 {
1329 _Assume_(text || start >= end);
1330 for (this->interval.end = start, this->value = 0; this->interval.end < end && text[this->interval.end];) {
1331 size_t dig;
1332 if (m_digit_0->match(text, this->interval.end, end, flags)) { dig = 0; this->interval.end = m_digit_0->interval.end; }
1333 else if (m_digit_1->match(text, this->interval.end, end, flags)) { dig = 1; this->interval.end = m_digit_1->interval.end; }
1334 else if (m_digit_2->match(text, this->interval.end, end, flags)) { dig = 2; this->interval.end = m_digit_2->interval.end; }
1335 else if (m_digit_3->match(text, this->interval.end, end, flags)) { dig = 3; this->interval.end = m_digit_3->interval.end; }
1336 else if (m_digit_4->match(text, this->interval.end, end, flags)) { dig = 4; this->interval.end = m_digit_4->interval.end; }
1337 else if (m_digit_5->match(text, this->interval.end, end, flags)) { dig = 5; this->interval.end = m_digit_5->interval.end; }
1338 else if (m_digit_6->match(text, this->interval.end, end, flags)) { dig = 6; this->interval.end = m_digit_6->interval.end; }
1339 else if (m_digit_7->match(text, this->interval.end, end, flags)) { dig = 7; this->interval.end = m_digit_7->interval.end; }
1340 else if (m_digit_8->match(text, this->interval.end, end, flags)) { dig = 8; this->interval.end = m_digit_8->interval.end; }
1341 else if (m_digit_9->match(text, this->interval.end, end, flags)) { dig = 9; this->interval.end = m_digit_9->interval.end; }
1342 else break;
1343 this->value = this->value * 10 + dig;
1344 }
1346 this->interval.start = start;
1347 return true;
1348 }
1349 this->interval.invalidate();
1350 return false;
1351 }
1352
1353 std::shared_ptr<basic_parser<T>>
1354 m_digit_0,
1355 m_digit_1,
1356 m_digit_2,
1357 m_digit_3,
1358 m_digit_4,
1359 m_digit_5,
1360 m_digit_6,
1361 m_digit_7,
1362 m_digit_8,
1363 m_digit_9;
1364 };
1365
1368#ifdef _UNICODE
1369 using tinteger10 = winteger10;
1370#else
1371 using tinteger10 = integer10;
1372#endif
1374
1378 template <class T>
1380 {
1381 public:
1383 _In_ const std::shared_ptr<basic_integer10<T>>& digits,
1384 _In_ const std::shared_ptr<basic_set<T>>& separator,
1385 _In_ const std::locale& locale = std::locale()) :
1387 digit_count(0),
1388 has_separators(false),
1389 m_digits(digits),
1390 m_separator(separator)
1391 {}
1392
1393 virtual void invalidate()
1394 {
1395 digit_count = 0;
1396 has_separators = false;
1398 }
1399
1402
1403 protected:
1404 virtual bool do_match(
1405 _In_reads_or_z_opt_(end) const T* text,
1406 _In_ size_t start = 0,
1407 _In_ size_t end = SIZE_MAX,
1408 _In_ int flags = match_default)
1409 {
1410 _Assume_(text || start >= end);
1411 if (m_digits->match(text, start, end, flags)) {
1412 // Leading part match.
1413 this->value = m_digits->value;
1414 digit_count = m_digits->interval.size();
1415 has_separators = false;
1416 this->interval.start = start;
1417 this->interval.end = m_digits->interval.end;
1418 if (m_digits->interval.size() <= 3) {
1419 // Maybe separated with thousand separators?
1420 size_t hit_offset = SIZE_MAX;
1421 while (m_separator->match(text, this->interval.end, end, flags) &&
1422 (hit_offset == SIZE_MAX || hit_offset == m_separator->hit_offset) && // All separators must be the same, no mixing.
1423 m_digits->match(text, m_separator->interval.end, end, flags) &&
1424 m_digits->interval.size() == 3)
1425 {
1426 // Thousand separator and three-digit integer followed.
1427 this->value = this->value * 1000 + m_digits->value;
1428 digit_count += 3;
1429 has_separators = true;
1430 this->interval.end = m_digits->interval.end;
1431 hit_offset = m_separator->hit_offset;
1432 }
1433 }
1434
1435 return true;
1436 }
1437 this->value = 0;
1438 this->interval.invalidate();
1439 return false;
1440 }
1441
1442 std::shared_ptr<basic_integer10<T>> m_digits;
1443 std::shared_ptr<basic_set<T>> m_separator;
1444 };
1445
1446 using integer10ts = basic_integer10ts<char>;
1447 using winteger10ts = basic_integer10ts<wchar_t>;
1448#ifdef _UNICODE
1449 using tinteger10ts = winteger10ts;
1450#else
1451 using tinteger10ts = integer10ts;
1452#endif
1453 using sgml_integer10ts = basic_integer10ts<char>;
1454
1458 template <class T>
1460 {
1461 public:
1463 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
1464 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
1465 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
1466 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
1467 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
1468 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
1469 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
1470 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
1471 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
1472 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
1473 _In_ const std::shared_ptr<basic_parser<T>>& digit_10,
1474 _In_ const std::shared_ptr<basic_parser<T>>& digit_11,
1475 _In_ const std::shared_ptr<basic_parser<T>>& digit_12,
1476 _In_ const std::shared_ptr<basic_parser<T>>& digit_13,
1477 _In_ const std::shared_ptr<basic_parser<T>>& digit_14,
1478 _In_ const std::shared_ptr<basic_parser<T>>& digit_15,
1479 _In_ const std::locale& locale = std::locale()) :
1481 m_digit_0(digit_0),
1482 m_digit_1(digit_1),
1483 m_digit_2(digit_2),
1484 m_digit_3(digit_3),
1485 m_digit_4(digit_4),
1486 m_digit_5(digit_5),
1487 m_digit_6(digit_6),
1488 m_digit_7(digit_7),
1489 m_digit_8(digit_8),
1490 m_digit_9(digit_9),
1491 m_digit_10(digit_10),
1492 m_digit_11(digit_11),
1493 m_digit_12(digit_12),
1494 m_digit_13(digit_13),
1495 m_digit_14(digit_14),
1496 m_digit_15(digit_15)
1497 {}
1498
1499 protected:
1500 virtual bool do_match(
1501 _In_reads_or_z_opt_(end) const T* text,
1502 _In_ size_t start = 0,
1503 _In_ size_t end = SIZE_MAX,
1504 _In_ int flags = match_default)
1505 {
1506 _Assume_(text || start >= end);
1507 for (this->interval.end = start, this->value = 0; this->interval.end < end && text[this->interval.end];) {
1508 size_t dig;
1509 if (m_digit_0->match(text, this->interval.end, end, flags)) { dig = 0; this->interval.end = m_digit_0->interval.end; }
1510 else if (m_digit_1->match(text, this->interval.end, end, flags)) { dig = 1; this->interval.end = m_digit_1->interval.end; }
1511 else if (m_digit_2->match(text, this->interval.end, end, flags)) { dig = 2; this->interval.end = m_digit_2->interval.end; }
1512 else if (m_digit_3->match(text, this->interval.end, end, flags)) { dig = 3; this->interval.end = m_digit_3->interval.end; }
1513 else if (m_digit_4->match(text, this->interval.end, end, flags)) { dig = 4; this->interval.end = m_digit_4->interval.end; }
1514 else if (m_digit_5->match(text, this->interval.end, end, flags)) { dig = 5; this->interval.end = m_digit_5->interval.end; }
1515 else if (m_digit_6->match(text, this->interval.end, end, flags)) { dig = 6; this->interval.end = m_digit_6->interval.end; }
1516 else if (m_digit_7->match(text, this->interval.end, end, flags)) { dig = 7; this->interval.end = m_digit_7->interval.end; }
1517 else if (m_digit_8->match(text, this->interval.end, end, flags)) { dig = 8; this->interval.end = m_digit_8->interval.end; }
1518 else if (m_digit_9->match(text, this->interval.end, end, flags)) { dig = 9; this->interval.end = m_digit_9->interval.end; }
1519 else if (m_digit_10->match(text, this->interval.end, end, flags)) { dig = 10; this->interval.end = m_digit_10->interval.end; }
1520 else if (m_digit_11->match(text, this->interval.end, end, flags)) { dig = 11; this->interval.end = m_digit_11->interval.end; }
1521 else if (m_digit_12->match(text, this->interval.end, end, flags)) { dig = 12; this->interval.end = m_digit_12->interval.end; }
1522 else if (m_digit_13->match(text, this->interval.end, end, flags)) { dig = 13; this->interval.end = m_digit_13->interval.end; }
1523 else if (m_digit_14->match(text, this->interval.end, end, flags)) { dig = 14; this->interval.end = m_digit_14->interval.end; }
1524 else if (m_digit_15->match(text, this->interval.end, end, flags)) { dig = 15; this->interval.end = m_digit_15->interval.end; }
1525 else break;
1526 this->value = this->value * 16 + dig;
1527 }
1529 this->interval.start = start;
1530 return true;
1531 }
1532 this->interval.invalidate();
1533 return false;
1534 }
1535
1536 std::shared_ptr<basic_parser<T>>
1537 m_digit_0,
1538 m_digit_1,
1539 m_digit_2,
1540 m_digit_3,
1541 m_digit_4,
1542 m_digit_5,
1543 m_digit_6,
1544 m_digit_7,
1545 m_digit_8,
1546 m_digit_9,
1547 m_digit_10,
1548 m_digit_11,
1549 m_digit_12,
1550 m_digit_13,
1551 m_digit_14,
1552 m_digit_15;
1553 };
1554
1557#ifdef _UNICODE
1558 using tinteger16 = winteger16;
1559#else
1560 using tinteger16 = integer16;
1561#endif
1563
1567 template <class T>
1569 {
1570 public:
1572 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
1573 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
1574 _In_ const std::shared_ptr<basic_parser<T>>& digit_10,
1575 _In_ const std::shared_ptr<basic_parser<T>>& digit_50,
1576 _In_ const std::shared_ptr<basic_parser<T>>& digit_100,
1577 _In_ const std::shared_ptr<basic_parser<T>>& digit_500,
1578 _In_ const std::shared_ptr<basic_parser<T>>& digit_1000,
1579 _In_ const std::shared_ptr<basic_parser<T>>& digit_5000,
1580 _In_ const std::shared_ptr<basic_parser<T>>& digit_10000,
1581 _In_ const std::locale& locale = std::locale()) :
1583 m_digit_1(digit_1),
1584 m_digit_5(digit_5),
1585 m_digit_10(digit_10),
1586 m_digit_50(digit_50),
1587 m_digit_100(digit_100),
1588 m_digit_500(digit_500),
1589 m_digit_1000(digit_1000),
1590 m_digit_5000(digit_5000),
1591 m_digit_10000(digit_10000)
1592 {}
1593
1594 protected:
1595 virtual bool do_match(
1596 _In_reads_or_z_opt_(end) const T* text,
1597 _In_ size_t start = 0,
1598 _In_ size_t end = SIZE_MAX,
1599 _In_ int flags = match_default)
1600 {
1601 _Assume_(text || start >= end);
1602 size_t
1604 end2;
1605
1606 for (this->interval.end = start, this->value = 0; this->interval.end < end && text[this->interval.end]; dig[3] = dig[2], dig[2] = dig[1], dig[1] = dig[0], this->interval.end = end2) {
1607 if (m_digit_1 && m_digit_1->match(text, this->interval.end, end, flags)) { dig[0] = 1; end2 = m_digit_1->interval.end; }
1608 else if (m_digit_5 && m_digit_5->match(text, this->interval.end, end, flags)) { dig[0] = 5; end2 = m_digit_5->interval.end; }
1609 else if (m_digit_10 && m_digit_10->match(text, this->interval.end, end, flags)) { dig[0] = 10; end2 = m_digit_10->interval.end; }
1610 else if (m_digit_50 && m_digit_50->match(text, this->interval.end, end, flags)) { dig[0] = 50; end2 = m_digit_50->interval.end; }
1611 else if (m_digit_100 && m_digit_100->match(text, this->interval.end, end, flags)) { dig[0] = 100; end2 = m_digit_100->interval.end; }
1612 else if (m_digit_500 && m_digit_500->match(text, this->interval.end, end, flags)) { dig[0] = 500; end2 = m_digit_500->interval.end; }
1613 else if (m_digit_1000 && m_digit_1000->match(text, this->interval.end, end, flags)) { dig[0] = 1000; end2 = m_digit_1000->interval.end; }
1614 else if (m_digit_5000 && m_digit_5000->match(text, this->interval.end, end, flags)) { dig[0] = 5000; end2 = m_digit_5000->interval.end; }
1615 else if (m_digit_10000 && m_digit_10000->match(text, this->interval.end, end, flags)) { dig[0] = 10000; end2 = m_digit_10000->interval.end; }
1616 else break;
1617
1618 // Store first digit.
1619 if (dig[4] == SIZE_MAX) dig[4] = dig[0];
1620
1621 if (dig[3] == dig[2] && dig[2] == dig[1] && dig[1] == dig[0] && dig[0] != dig[4]) {
1622 // Same digit repeated four times. No-go, unless first digit. E.g. XIIII vs. XIV. MMMMMCD allowed, IIII also...
1623 break;
1624 }
1625 if (dig[0] <= dig[1]) {
1626 // Digit is less or equal previous one: add.
1627 this->value += dig[0];
1628 }
1629 else if (
1630 (dig[1] == 1 && (dig[0] == 5 || dig[0] == 10)) ||
1631 (dig[1] == 10 && (dig[0] == 50 || dig[0] == 100)) ||
1632 (dig[1] == 100 && (dig[0] == 500 || dig[0] == 1000)) ||
1633 (dig[1] == 1000 && (dig[0] == 5000 || dig[0] == 10000)))
1634 {
1635 // Digit is up to two orders bigger than previous one: subtract. But...
1636 if (dig[2] < dig[0]) {
1637 // Digit is also bigger than pre-previous one. E.g. VIX (V < X => invalid)
1638 break;
1639 }
1640 this->value -= dig[1]; // Cancel addition in the previous step.
1641 dig[0] -= dig[1]; // Combine last two digits.
1642 dig[1] = dig[2]; // The true previous digit is now pre-previous one. :)
1643 dig[2] = dig[3]; // The true pre-previous digit is now pre-pre-previous one. :)
1644 this->value += dig[0]; // Add combined value.
1645 }
1646 else {
1647 // New digit is too big than the previous one. E.g. VX (V < X => invalid)
1648 break;
1649 }
1650 }
1651 if (this->value) {
1652 this->interval.start = start;
1653 return true;
1654 }
1655 this->interval.invalidate();
1656 return false;
1657 }
1658
1659 std::shared_ptr<basic_parser<T>>
1660 m_digit_1,
1661 m_digit_5,
1662 m_digit_10,
1663 m_digit_50,
1664 m_digit_100,
1665 m_digit_500,
1666 m_digit_1000,
1667 m_digit_5000,
1668 m_digit_10000;
1669 };
1670
1673#ifdef _UNICODE
1675#else
1677#endif
1679
1683 template <class T>
1685 {
1686 public:
1688 _In_ const std::shared_ptr<basic_parser<T>>& _numerator,
1689 _In_ const std::shared_ptr<basic_parser<T>>& _fraction_line,
1690 _In_ const std::shared_ptr<basic_parser<T>>& _denominator,
1691 _In_ const std::locale& locale = std::locale()) :
1693 numerator(_numerator),
1694 fraction_line(_fraction_line),
1695 denominator(_denominator)
1696 {}
1697
1698 virtual void invalidate()
1699 {
1700 numerator->invalidate();
1701 fraction_line->invalidate();
1702 denominator->invalidate();
1704 }
1705
1706 std::shared_ptr<basic_parser<T>> numerator;
1707 std::shared_ptr<basic_parser<T>> fraction_line;
1708 std::shared_ptr<basic_parser<T>> denominator;
1709
1710 protected:
1711 virtual bool do_match(
1712 _In_reads_or_z_opt_(end) const T* text,
1713 _In_ size_t start = 0,
1714 _In_ size_t end = SIZE_MAX,
1715 _In_ int flags = match_default)
1716 {
1717 _Assume_(text || start >= end);
1718 if (numerator->match(text, start, end, flags) &&
1719 fraction_line->match(text, numerator->interval.end, end, flags) &&
1720 denominator->match(text, fraction_line->interval.end, end, flags))
1721 {
1722 this->interval.start = start;
1723 this->interval.end = denominator->interval.end;
1724 return true;
1725 }
1726 numerator->invalidate();
1727 fraction_line->invalidate();
1728 denominator->invalidate();
1729 this->interval.invalidate();
1730 return false;
1731 }
1732 };
1733
1736#ifdef _UNICODE
1737 using tfraction = wfraction;
1738#else
1739 using tfraction = fraction;
1740#endif
1742
1746 template <class T>
1747 class basic_score : public basic_parser<T>
1748 {
1749 public:
1751 _In_ const std::shared_ptr<basic_parser<T>>& _home,
1752 _In_ const std::shared_ptr<basic_parser<T>>& _separator,
1753 _In_ const std::shared_ptr<basic_parser<T>>& _guest,
1754 _In_ const std::shared_ptr<basic_parser<T>>& space,
1755 _In_ const std::locale& locale = std::locale()) :
1757 home(_home),
1758 separator(_separator),
1759 guest(_guest),
1760 m_space(space)
1761 {}
1762
1763 virtual void invalidate()
1764 {
1765 home->invalidate();
1766 separator->invalidate();
1767 guest->invalidate();
1769 }
1770
1771 std::shared_ptr<basic_parser<T>> home;
1772 std::shared_ptr<basic_parser<T>> separator;
1773 std::shared_ptr<basic_parser<T>> guest;
1774
1775 protected:
1776 virtual bool do_match(
1777 _In_reads_or_z_opt_(end) const T* text,
1778 _In_ size_t start = 0,
1779 _In_ size_t end = SIZE_MAX,
1780 _In_ int flags = match_default)
1781 {
1782 _Assume_(text || start >= end);
1783 this->interval.end = start;
1784
1785 const int space_match_flags = flags & ~match_multiline; // Spaces in score must never be broken in new line.
1786
1787 if (home->match(text, this->interval.end, end, flags))
1788 this->interval.end = home->interval.end;
1789 else
1790 goto end;
1791
1792 for (; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
1793
1794 if (separator->match(text, this->interval.end, end, flags))
1795 this->interval.end = separator->interval.end;
1796 else
1797 goto end;
1798
1799 for (; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
1800
1801 if (guest->match(text, this->interval.end, end, flags))
1802 this->interval.end = guest->interval.end;
1803 else
1804 goto end;
1805
1806 this->interval.start = start;
1807 return true;
1808
1809 end:
1810 home->invalidate();
1811 separator->invalidate();
1812 guest->invalidate();
1813 this->interval.invalidate();
1814 return false;
1815 }
1816
1817 std::shared_ptr<basic_parser<T>> m_space;
1818 };
1819
1820 using score = basic_score<char>;
1822#ifdef _UNICODE
1823 using tscore = wscore;
1824#else
1825 using tscore = score;
1826#endif
1828
1832 template <class T>
1834 {
1835 public:
1837 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
1838 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
1839 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
1840 _In_ const std::shared_ptr<basic_parser<T>>& _number,
1841 _In_ const std::locale& locale = std::locale()) :
1847 {}
1848
1849 virtual void invalidate()
1850 {
1851 if (positive_sign) positive_sign->invalidate();
1852 if (negative_sign) negative_sign->invalidate();
1853 if (special_sign) special_sign->invalidate();
1854 number->invalidate();
1856 }
1857
1858 std::shared_ptr<basic_parser<T>> positive_sign;
1859 std::shared_ptr<basic_parser<T>> negative_sign;
1860 std::shared_ptr<basic_parser<T>> special_sign;
1861 std::shared_ptr<basic_parser<T>> number;
1862
1863 protected:
1864 virtual bool do_match(
1865 _In_reads_or_z_opt_(end) const T* text,
1866 _In_ size_t start = 0,
1867 _In_ size_t end = SIZE_MAX,
1868 _In_ int flags = match_default)
1869 {
1870 _Assume_(text || start >= end);
1871 this->interval.end = start;
1872 if (positive_sign && positive_sign->match(text, this->interval.end, end, flags)) {
1873 this->interval.end = positive_sign->interval.end;
1874 if (negative_sign) negative_sign->invalidate();
1875 if (special_sign) special_sign->invalidate();
1876 }
1877 else if (negative_sign && negative_sign->match(text, this->interval.end, end, flags)) {
1878 this->interval.end = negative_sign->interval.end;
1879 if (positive_sign) positive_sign->invalidate();
1880 if (special_sign) special_sign->invalidate();
1881 }
1882 else if (special_sign && special_sign->match(text, this->interval.end, end, flags)) {
1883 this->interval.end = special_sign->interval.end;
1884 if (positive_sign) positive_sign->invalidate();
1885 if (negative_sign) negative_sign->invalidate();
1886 }
1887 else {
1888 if (positive_sign) positive_sign->invalidate();
1889 if (negative_sign) negative_sign->invalidate();
1890 if (special_sign) special_sign->invalidate();
1891 }
1892 if (number->match(text, this->interval.end, end, flags)) {
1893 this->interval.start = start;
1894 this->interval.end = number->interval.end;
1895 return true;
1896 }
1897 if (positive_sign) positive_sign->invalidate();
1898 if (negative_sign) negative_sign->invalidate();
1899 if (special_sign) special_sign->invalidate();
1900 number->invalidate();
1901 this->interval.invalidate();
1902 return false;
1903 }
1904 };
1905
1906 using signed_numeral = basic_signed_numeral<char>;
1907 using wsigned_numeral = basic_signed_numeral<wchar_t>;
1908#ifdef _UNICODE
1909 using tsigned_numeral = wsigned_numeral;
1910#else
1911 using tsigned_numeral = signed_numeral;
1912#endif
1913 using sgml_signed_numeral = basic_signed_numeral<char>;
1914
1918 template <class T>
1920 {
1921 public:
1923 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
1924 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
1925 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
1926 _In_ const std::shared_ptr<basic_parser<T>>& _integer,
1927 _In_ const std::shared_ptr<basic_parser<T>>& space,
1928 _In_ const std::shared_ptr<basic_parser<T>>& _fraction,
1929 _In_ const std::locale& locale = std::locale()) :
1936 m_space(space)
1937 {}
1938
1939 virtual void invalidate()
1940 {
1941 if (positive_sign) positive_sign->invalidate();
1942 if (negative_sign) negative_sign->invalidate();
1943 if (special_sign) special_sign->invalidate();
1944 integer->invalidate();
1945 fraction->invalidate();
1947 }
1948
1949 std::shared_ptr<basic_parser<T>> positive_sign;
1950 std::shared_ptr<basic_parser<T>> negative_sign;
1951 std::shared_ptr<basic_parser<T>> special_sign;
1952 std::shared_ptr<basic_parser<T>> integer;
1953 std::shared_ptr<basic_parser<T>> fraction;
1954
1955 protected:
1956 virtual bool do_match(
1957 _In_reads_or_z_opt_(end) const T* text,
1958 _In_ size_t start = 0,
1959 _In_ size_t end = SIZE_MAX,
1960 _In_ int flags = match_default)
1961 {
1962 _Assume_(text || start >= end);
1963 this->interval.end = start;
1964
1965 if (positive_sign && positive_sign->match(text, this->interval.end, end, flags)) {
1966 this->interval.end = positive_sign->interval.end;
1967 if (negative_sign) negative_sign->invalidate();
1968 if (special_sign) special_sign->invalidate();
1969 }
1970 else if (negative_sign && negative_sign->match(text, this->interval.end, end, flags)) {
1971 this->interval.end = negative_sign->interval.end;
1972 if (positive_sign) positive_sign->invalidate();
1973 if (special_sign) special_sign->invalidate();
1974 }
1975 else if (special_sign && special_sign->match(text, this->interval.end, end, flags)) {
1976 this->interval.end = special_sign->interval.end;
1977 if (positive_sign) positive_sign->invalidate();
1978 if (negative_sign) negative_sign->invalidate();
1979 }
1980 else {
1981 if (positive_sign) positive_sign->invalidate();
1982 if (negative_sign) negative_sign->invalidate();
1983 if (special_sign) special_sign->invalidate();
1984 }
1985
1986 // Check for <integer> <fraction>
1987 const int space_match_flags = flags & ~match_multiline; // Spaces in fractions must never be broken in new line.
1988 if (integer->match(text, this->interval.end, end, flags) &&
1989 m_space->match(text, integer->interval.end, end, space_match_flags))
1990 {
1991 for (this->interval.end = m_space->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
1992 if (fraction->match(text, this->interval.end, end, flags)) {
1993 this->interval.start = start;
1994 this->interval.end = fraction->interval.end;
1995 return true;
1996 }
1997 fraction->invalidate();
1998 this->interval.start = start;
1999 this->interval.end = integer->interval.end;
2000 return true;
2001 }
2002
2003 // Check for <fraction>
2004 if (fraction->match(text, this->interval.end, end, flags)) {
2005 integer->invalidate();
2006 this->interval.start = start;
2007 this->interval.end = fraction->interval.end;
2008 return true;
2009 }
2010
2011 // Check for <integer>
2012 if (integer->match(text, this->interval.end, end, flags)) {
2013 fraction->invalidate();
2014 this->interval.start = start;
2015 this->interval.end = integer->interval.end;
2016 return true;
2017 }
2018
2019 if (positive_sign) positive_sign->invalidate();
2020 if (negative_sign) negative_sign->invalidate();
2021 if (special_sign) special_sign->invalidate();
2022 integer->invalidate();
2023 fraction->invalidate();
2024 this->interval.invalidate();
2025 return false;
2026 }
2027
2028 std::shared_ptr<basic_parser<T>> m_space;
2029 };
2030
2031 using mixed_numeral = basic_mixed_numeral<char>;
2032 using wmixed_numeral = basic_mixed_numeral<wchar_t>;
2033#ifdef _UNICODE
2034 using tmixed_numeral = wmixed_numeral;
2035#else
2036 using tmixed_numeral = mixed_numeral;
2037#endif
2038 using sgml_mixed_numeral = basic_mixed_numeral<char>;
2039
2043 template <class T>
2045 {
2046 public:
2048 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
2049 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
2050 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
2051 _In_ const std::shared_ptr<basic_integer<T>>& _integer,
2052 _In_ const std::shared_ptr<basic_parser<T>>& _decimal_separator,
2053 _In_ const std::shared_ptr<basic_integer<T>>& _decimal,
2054 _In_ const std::shared_ptr<basic_parser<T>>& _exponent_symbol,
2055 _In_ const std::shared_ptr<basic_parser<T>>& _positive_exp_sign,
2056 _In_ const std::shared_ptr<basic_parser<T>>& _negative_exp_sign,
2057 _In_ const std::shared_ptr<basic_integer<T>>& _exponent,
2058 _In_ const std::locale& locale = std::locale()) :
2070 value(std::numeric_limits<double>::quiet_NaN())
2071 {}
2072
2073 virtual void invalidate()
2074 {
2075 if (positive_sign) positive_sign->invalidate();
2076 if (negative_sign) negative_sign->invalidate();
2077 if (special_sign) special_sign->invalidate();
2078 integer->invalidate();
2079 decimal_separator->invalidate();
2080 decimal->invalidate();
2081 if (exponent_symbol) exponent_symbol->invalidate();
2082 if (positive_exp_sign) positive_exp_sign->invalidate();
2083 if (negative_exp_sign) negative_exp_sign->invalidate();
2084 if (exponent) exponent->invalidate();
2085 value = std::numeric_limits<double>::quiet_NaN();
2087 }
2088
2089 std::shared_ptr<basic_parser<T>> positive_sign;
2090 std::shared_ptr<basic_parser<T>> negative_sign;
2091 std::shared_ptr<basic_parser<T>> special_sign;
2092 std::shared_ptr<basic_integer<T>> integer;
2093 std::shared_ptr<basic_parser<T>> decimal_separator;
2094 std::shared_ptr<basic_integer<T>> decimal;
2095 std::shared_ptr<basic_parser<T>> exponent_symbol;
2096 std::shared_ptr<basic_parser<T>> positive_exp_sign;
2097 std::shared_ptr<basic_parser<T>> negative_exp_sign;
2098 std::shared_ptr<basic_integer<T>> exponent;
2099 double value;
2100
2101 protected:
2102 virtual bool do_match(
2103 _In_reads_or_z_opt_(end) const T* text,
2104 _In_ size_t start = 0,
2105 _In_ size_t end = SIZE_MAX,
2106 _In_ int flags = match_default)
2107 {
2108 _Assume_(text || start >= end);
2109 this->interval.end = start;
2110
2111 if (positive_sign && positive_sign->match(text, this->interval.end, end, flags)) {
2112 this->interval.end = positive_sign->interval.end;
2113 if (negative_sign) negative_sign->invalidate();
2114 if (special_sign) special_sign->invalidate();
2115 }
2116 else if (negative_sign && negative_sign->match(text, this->interval.end, end, flags)) {
2117 this->interval.end = negative_sign->interval.end;
2118 if (positive_sign) positive_sign->invalidate();
2119 if (special_sign) special_sign->invalidate();
2120 }
2121 else if (special_sign && special_sign->match(text, this->interval.end, end, flags)) {
2122 this->interval.end = special_sign->interval.end;
2123 if (positive_sign) positive_sign->invalidate();
2124 if (negative_sign) negative_sign->invalidate();
2125 }
2126 else {
2127 if (positive_sign) positive_sign->invalidate();
2128 if (negative_sign) negative_sign->invalidate();
2129 if (special_sign) special_sign->invalidate();
2130 }
2131
2132 if (integer->match(text, this->interval.end, end, flags))
2133 this->interval.end = integer->interval.end;
2134
2135 if (decimal_separator->match(text, this->interval.end, end, flags) &&
2136 decimal->match(text, decimal_separator->interval.end, end, flags))
2137 this->interval.end = decimal->interval.end;
2138 else {
2139 decimal_separator->invalidate();
2140 decimal->invalidate();
2141 }
2142
2143 if (integer->interval.empty() &&
2144 decimal->interval.empty())
2145 {
2146 // No integer part, no decimal part.
2147 if (positive_sign) positive_sign->invalidate();
2148 if (negative_sign) negative_sign->invalidate();
2149 if (special_sign) special_sign->invalidate();
2150 integer->invalidate();
2151 decimal_separator->invalidate();
2152 decimal->invalidate();
2153 if (exponent_symbol) exponent_symbol->invalidate();
2154 if (positive_exp_sign) positive_exp_sign->invalidate();
2155 if (negative_exp_sign) negative_exp_sign->invalidate();
2156 if (exponent) exponent->invalidate();
2157 this->interval.invalidate();
2158 return false;
2159 }
2160
2161 if (exponent_symbol && exponent_symbol->match(text, this->interval.end, end, flags) &&
2162 ((positive_exp_sign && positive_exp_sign->match(text, exponent_symbol->interval.end, end, flags) &&
2163 exponent && exponent->match(text, positive_exp_sign->interval.end, end, flags)) ||
2164 (exponent && exponent->match(text, exponent_symbol->interval.end, end, flags))))
2165 {
2166 this->interval.end = exponent->interval.end;
2167 if (negative_exp_sign) negative_exp_sign->invalidate();
2168 }
2169 else if (exponent_symbol && exponent_symbol->match(text, this->interval.end, end, flags) &&
2170 negative_exp_sign && negative_exp_sign->match(text, exponent_symbol->interval.end, end, flags) &&
2171 exponent && exponent->match(text, negative_exp_sign->interval.end, end, flags))
2172 {
2173 this->interval.end = exponent->interval.end;
2174 if (positive_exp_sign) positive_exp_sign->invalidate();
2175 }
2176 else {
2177 if (exponent_symbol) exponent_symbol->invalidate();
2178 if (positive_exp_sign) positive_exp_sign->invalidate();
2179 if (negative_exp_sign) negative_exp_sign->invalidate();
2180 if (exponent) exponent->invalidate();
2181 }
2182
2183 value = (double)integer->value;
2184 if (decimal->interval)
2185 value += (double)decimal->value * pow(10.0, -(double)decimal->interval.size());
2186 if (negative_sign && negative_sign->interval)
2187 value = -value;
2188 if (exponent && exponent->interval) {
2189 double e = (double)exponent->value;
2190 if (negative_exp_sign && negative_exp_sign->interval)
2191 e = -e;
2192 value *= pow(10.0, e);
2193 }
2194
2195 this->interval.start = start;
2196 return true;
2197 }
2198 };
2199
2200 using scientific_numeral = basic_scientific_numeral<char>;
2201 using wscientific_numeral = basic_scientific_numeral<wchar_t>;
2202#ifdef _UNICODE
2203 using tscientific_numeral = wscientific_numeral;
2204#else
2205 using tscientific_numeral = scientific_numeral;
2206#endif
2207 using sgml_scientific_numeral = basic_scientific_numeral<char>;
2208
2212 template <class T>
2214 {
2215 public:
2217 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
2218 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
2219 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
2220 _In_ const std::shared_ptr<basic_parser<T>>& _currency,
2221 _In_ const std::shared_ptr<basic_parser<T>>& _integer,
2222 _In_ const std::shared_ptr<basic_parser<T>>& _decimal_separator,
2223 _In_ const std::shared_ptr<basic_parser<T>>& _decimal,
2224 _In_ const std::locale& locale = std::locale()) :
2233 {}
2234
2235 virtual void invalidate()
2236 {
2237 if (positive_sign) positive_sign->invalidate();
2238 if (negative_sign) negative_sign->invalidate();
2239 if (special_sign) special_sign->invalidate();
2240 currency->invalidate();
2241 integer->invalidate();
2242 decimal_separator->invalidate();
2243 decimal->invalidate();
2245 }
2246
2247 std::shared_ptr<basic_parser<T>> positive_sign;
2248 std::shared_ptr<basic_parser<T>> negative_sign;
2249 std::shared_ptr<basic_parser<T>> special_sign;
2250 std::shared_ptr<basic_parser<T>> currency;
2251 std::shared_ptr<basic_parser<T>> integer;
2252 std::shared_ptr<basic_parser<T>> decimal_separator;
2253 std::shared_ptr<basic_parser<T>> decimal;
2254
2255 protected:
2256 virtual bool do_match(
2257 _In_reads_or_z_opt_(end) const T* text,
2258 _In_ size_t start = 0,
2259 _In_ size_t end = SIZE_MAX,
2260 _In_ int flags = match_default)
2261 {
2262 _Assume_(text || start >= end);
2263 this->interval.end = start;
2264
2265 if (positive_sign->match(text, this->interval.end, end, flags)) {
2266 this->interval.end = positive_sign->interval.end;
2267 if (negative_sign) negative_sign->invalidate();
2268 if (special_sign) special_sign->invalidate();
2269 }
2270 else if (negative_sign->match(text, this->interval.end, end, flags)) {
2271 this->interval.end = negative_sign->interval.end;
2272 if (positive_sign) positive_sign->invalidate();
2273 if (special_sign) special_sign->invalidate();
2274 }
2275 else if (special_sign->match(text, this->interval.end, end, flags)) {
2276 this->interval.end = special_sign->interval.end;
2277 if (positive_sign) positive_sign->invalidate();
2278 if (negative_sign) negative_sign->invalidate();
2279 }
2280 else {
2281 if (positive_sign) positive_sign->invalidate();
2282 if (negative_sign) negative_sign->invalidate();
2283 if (special_sign) special_sign->invalidate();
2284 }
2285
2286 if (currency->match(text, this->interval.end, end, flags))
2287 this->interval.end = currency->interval.end;
2288 else {
2289 if (positive_sign) positive_sign->invalidate();
2290 if (negative_sign) negative_sign->invalidate();
2291 if (special_sign) special_sign->invalidate();
2292 integer->invalidate();
2293 decimal_separator->invalidate();
2294 decimal->invalidate();
2295 this->interval.invalidate();
2296 return false;
2297 }
2298
2299 if (integer->match(text, this->interval.end, end, flags))
2300 this->interval.end = integer->interval.end;
2301 if (decimal_separator->match(text, this->interval.end, end, flags) &&
2302 decimal->match(text, decimal_separator->interval.end, end, flags))
2303 this->interval.end = decimal->interval.end;
2304 else {
2305 decimal_separator->invalidate();
2306 decimal->invalidate();
2307 }
2308
2309 if (integer->interval.empty() &&
2310 decimal->interval.empty())
2311 {
2312 // No integer part, no decimal part.
2313 if (positive_sign) positive_sign->invalidate();
2314 if (negative_sign) negative_sign->invalidate();
2315 if (special_sign) special_sign->invalidate();
2316 currency->invalidate();
2317 integer->invalidate();
2318 decimal_separator->invalidate();
2319 decimal->invalidate();
2320 this->interval.invalidate();
2321 return false;
2322 }
2323
2324 this->interval.start = start;
2325 return true;
2326 }
2327 };
2328
2329 using monetary_numeral = basic_monetary_numeral<char>;
2330 using wmonetary_numeral = basic_monetary_numeral<wchar_t>;
2331#ifdef _UNICODE
2332 using tmonetary_numeral = wmonetary_numeral;
2333#else
2334 using tmonetary_numeral = monetary_numeral;
2335#endif
2336 using sgml_monetary_numeral = basic_monetary_numeral<char>;
2337
2341 template <class T>
2343 {
2344 public:
2346 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
2347 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
2348 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
2349 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
2350 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
2351 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
2352 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
2353 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
2354 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
2355 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
2356 _In_ const std::shared_ptr<basic_parser<T>>& separator,
2357 _In_ const std::locale& locale = std::locale()) :
2359 m_digit_0(digit_0),
2360 m_digit_1(digit_1),
2361 m_digit_2(digit_2),
2362 m_digit_3(digit_3),
2363 m_digit_4(digit_4),
2364 m_digit_5(digit_5),
2365 m_digit_6(digit_6),
2366 m_digit_7(digit_7),
2367 m_digit_8(digit_8),
2368 m_digit_9(digit_9),
2369 m_separator(separator)
2370 {
2371 value.s_addr = 0;
2372 }
2373
2374 virtual void invalidate()
2375 {
2376 components[0].start = 1;
2377 components[0].end = 0;
2378 components[1].start = 1;
2379 components[1].end = 0;
2380 components[2].start = 1;
2381 components[2].end = 0;
2382 components[3].start = 1;
2383 components[3].end = 0;
2384 value.s_addr = 0;
2386 }
2387
2390
2391 protected:
2392 virtual bool do_match(
2393 _In_reads_or_z_opt_(end) const T* text,
2394 _In_ size_t start = 0,
2395 _In_ size_t end = SIZE_MAX,
2396 _In_ int flags = match_default)
2397 {
2398 _Assume_(text || start >= end);
2399 this->interval.end = start;
2400 value.s_addr = 0;
2401
2402 size_t i;
2403 for (i = 0; i < 4; i++) {
2404 if (i) {
2405 if (m_separator->match(text, this->interval.end, end, flags))
2406 this->interval.end = m_separator->interval.end;
2407 else
2408 goto error;
2409 }
2410
2411 components[i].start = this->interval.end;
2412 bool is_empty = true;
2413 size_t x;
2414 for (x = 0; this->interval.end < end && text[this->interval.end];) {
2415 size_t dig, digit_end;
2416 if (m_digit_0->match(text, this->interval.end, end, flags)) { dig = 0; digit_end = m_digit_0->interval.end; }
2417 else if (m_digit_1->match(text, this->interval.end, end, flags)) { dig = 1; digit_end = m_digit_1->interval.end; }
2418 else if (m_digit_2->match(text, this->interval.end, end, flags)) { dig = 2; digit_end = m_digit_2->interval.end; }
2419 else if (m_digit_3->match(text, this->interval.end, end, flags)) { dig = 3; digit_end = m_digit_3->interval.end; }
2420 else if (m_digit_4->match(text, this->interval.end, end, flags)) { dig = 4; digit_end = m_digit_4->interval.end; }
2421 else if (m_digit_5->match(text, this->interval.end, end, flags)) { dig = 5; digit_end = m_digit_5->interval.end; }
2422 else if (m_digit_6->match(text, this->interval.end, end, flags)) { dig = 6; digit_end = m_digit_6->interval.end; }
2423 else if (m_digit_7->match(text, this->interval.end, end, flags)) { dig = 7; digit_end = m_digit_7->interval.end; }
2424 else if (m_digit_8->match(text, this->interval.end, end, flags)) { dig = 8; digit_end = m_digit_8->interval.end; }
2425 else if (m_digit_9->match(text, this->interval.end, end, flags)) { dig = 9; digit_end = m_digit_9->interval.end; }
2426 else break;
2427 size_t x_n = x * 10 + dig;
2428 if (x_n <= 255) {
2429 x = x_n;
2430 this->interval.end = digit_end;
2431 is_empty = false;
2432 }
2433 else
2434 break;
2435 }
2436 if (is_empty)
2437 goto error;
2438 components[i].end = this->interval.end;
2439 value.s_addr = (value.s_addr << 8) | (uint8_t)x;
2440 }
2441 if (i < 4)
2442 goto error;
2443
2444 HE2BE(reinterpret_cast<uint32_t&>(value.s_addr));
2445 this->interval.start = start;
2446 return true;
2447
2448 error:
2449 invalidate();
2450 return false;
2451 }
2452
2453 std::shared_ptr<basic_parser<T>>
2454 m_digit_0,
2455 m_digit_1,
2456 m_digit_2,
2457 m_digit_3,
2458 m_digit_4,
2459 m_digit_5,
2460 m_digit_6,
2461 m_digit_7,
2462 m_digit_8,
2463 m_digit_9;
2464 std::shared_ptr<basic_parser<T>> m_separator;
2465 };
2466
2467 using ipv4_address = basic_ipv4_address<char>;
2468 using wipv4_address = basic_ipv4_address<wchar_t>;
2469#ifdef _UNICODE
2470 using tipv4_address = wipv4_address;
2471#else
2472 using tipv4_address = ipv4_address;
2473#endif
2474 using sgml_ipv4_address = basic_ipv4_address<char>;
2475
2479 template <class T>
2481 {
2482 public:
2483 basic_ipv6_scope_id_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
2484
2485 protected:
2486 virtual bool do_match(
2487 _In_reads_or_z_opt_(end) const T* text,
2488 _In_ size_t start = 0,
2489 _In_ size_t end = SIZE_MAX,
2490 _In_ int flags = match_default)
2491 {
2492 _Assume_(text || start >= end);
2493 if (start < end && text[start]) {
2494 if (text[start] == '-' ||
2495 text[start] == '_' ||
2496 text[start] == ':' ||
2497 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
2498 {
2499 this->interval.end = (this->interval.start = start) + 1;
2500 return true;
2501 }
2502 }
2503 this->interval.invalidate();
2504 return false;
2505 }
2506 };
2507
2510#ifdef _UNICODE
2512#else
2514#endif
2515
2520 {
2521 public:
2522 sgml_ipv6_scope_id_char(_In_ const std::locale& locale = std::locale()) : sgml_parser(locale) {}
2523
2524 protected:
2525 virtual bool do_match(
2526 _In_reads_or_z_(end) const char* text,
2527 _In_ size_t start = 0,
2528 _In_ size_t end = SIZE_MAX,
2529 _In_ int flags = match_default)
2530 {
2531 _Assume_(text || start >= end);
2532 if (start < end && text[start]) {
2533 wchar_t buf[5];
2534 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
2535 const wchar_t* chr_end = chr + stdex::strlen(chr);
2536 if (((chr[0] == L'-' ||
2537 chr[0] == L'_' ||
2538 chr[0] == L':') && chr[1] == 0) ||
2539 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
2540 {
2541 this->interval.start = start;
2542 return true;
2543 }
2544 }
2545 this->interval.invalidate();
2546 return false;
2547 }
2548 };
2549
2553 template <class T>
2555 {
2556 public:
2558 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
2559 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
2560 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
2561 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
2562 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
2563 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
2564 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
2565 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
2566 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
2567 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
2568 _In_ const std::shared_ptr<basic_parser<T>>& digit_10,
2569 _In_ const std::shared_ptr<basic_parser<T>>& digit_11,
2570 _In_ const std::shared_ptr<basic_parser<T>>& digit_12,
2571 _In_ const std::shared_ptr<basic_parser<T>>& digit_13,
2572 _In_ const std::shared_ptr<basic_parser<T>>& digit_14,
2573 _In_ const std::shared_ptr<basic_parser<T>>& digit_15,
2574 _In_ const std::shared_ptr<basic_parser<T>>& separator,
2575 _In_ const std::shared_ptr<basic_parser<T>>& scope_id_separator = nullptr,
2576 _In_ const std::shared_ptr<basic_parser<T>>& _scope_id = nullptr,
2577 _In_ const std::locale& locale = std::locale()) :
2579 m_digit_0(digit_0),
2580 m_digit_1(digit_1),
2581 m_digit_2(digit_2),
2582 m_digit_3(digit_3),
2583 m_digit_4(digit_4),
2584 m_digit_5(digit_5),
2585 m_digit_6(digit_6),
2586 m_digit_7(digit_7),
2587 m_digit_8(digit_8),
2588 m_digit_9(digit_9),
2589 m_digit_10(digit_10),
2590 m_digit_11(digit_11),
2591 m_digit_12(digit_12),
2592 m_digit_13(digit_13),
2593 m_digit_14(digit_14),
2594 m_digit_15(digit_15),
2595 m_separator(separator),
2596 m_scope_id_separator(scope_id_separator),
2598 {
2599 memset(&value, 0, sizeof(value));
2600 }
2601
2602 virtual void invalidate()
2603 {
2604 components[0].start = 1;
2605 components[0].end = 0;
2606 components[1].start = 1;
2607 components[1].end = 0;
2608 components[2].start = 1;
2609 components[2].end = 0;
2610 components[3].start = 1;
2611 components[3].end = 0;
2612 components[4].start = 1;
2613 components[4].end = 0;
2614 components[5].start = 1;
2615 components[5].end = 0;
2616 components[6].start = 1;
2617 components[6].end = 0;
2618 components[7].start = 1;
2619 components[7].end = 0;
2620 memset(&value, 0, sizeof(value));
2621 if (scope_id) scope_id->invalidate();
2623 }
2624
2627 std::shared_ptr<basic_parser<T>> scope_id;
2628
2629 protected:
2630 virtual bool do_match(
2631 _In_reads_or_z_opt_(end) const T* text,
2632 _In_ size_t start = 0,
2633 _In_ size_t end = SIZE_MAX,
2634 _In_ int flags = match_default)
2635 {
2636 _Assume_(text || start >= end);
2637 this->interval.end = start;
2638 memset(&value, 0, sizeof(value));
2639
2640 size_t i, compaction_i = SIZE_MAX, compaction_start = start;
2641 for (i = 0; i < 8; i++) {
2642 bool is_empty = true;
2643
2644 if (m_separator->match(text, this->interval.end, end, flags)) {
2645 // : found
2646 this->interval.end = m_separator->interval.end;
2647 if (m_separator->match(text, this->interval.end, end, flags)) {
2648 // :: found
2649 if (compaction_i == SIZE_MAX) {
2650 // Zero compaction start
2651 compaction_i = i;
2652 compaction_start = m_separator->interval.start;
2653 this->interval.end = m_separator->interval.end;
2654 }
2655 else {
2656 // More than one zero compaction
2657 break;
2658 }
2659 }
2660 else if (!i) {
2661 // Leading : found
2662 goto error;
2663 }
2664 }
2665 else if (i) {
2666 // : missing
2667 break;
2668 }
2669
2670 components[i].start = this->interval.end;
2671 size_t x;
2672 for (x = 0; this->interval.end < end && text[this->interval.end];) {
2673 size_t dig, digit_end;
2674 if (m_digit_0->match(text, this->interval.end, end, flags)) { dig = 0; digit_end = m_digit_0->interval.end; }
2675 else if (m_digit_1->match(text, this->interval.end, end, flags)) { dig = 1; digit_end = m_digit_1->interval.end; }
2676 else if (m_digit_2->match(text, this->interval.end, end, flags)) { dig = 2; digit_end = m_digit_2->interval.end; }
2677 else if (m_digit_3->match(text, this->interval.end, end, flags)) { dig = 3; digit_end = m_digit_3->interval.end; }
2678 else if (m_digit_4->match(text, this->interval.end, end, flags)) { dig = 4; digit_end = m_digit_4->interval.end; }
2679 else if (m_digit_5->match(text, this->interval.end, end, flags)) { dig = 5; digit_end = m_digit_5->interval.end; }
2680 else if (m_digit_6->match(text, this->interval.end, end, flags)) { dig = 6; digit_end = m_digit_6->interval.end; }
2681 else if (m_digit_7->match(text, this->interval.end, end, flags)) { dig = 7; digit_end = m_digit_7->interval.end; }
2682 else if (m_digit_8->match(text, this->interval.end, end, flags)) { dig = 8; digit_end = m_digit_8->interval.end; }
2683 else if (m_digit_9->match(text, this->interval.end, end, flags)) { dig = 9; digit_end = m_digit_9->interval.end; }
2684 else if (m_digit_10->match(text, this->interval.end, end, flags)) { dig = 10; digit_end = m_digit_10->interval.end; }
2685 else if (m_digit_11->match(text, this->interval.end, end, flags)) { dig = 11; digit_end = m_digit_11->interval.end; }
2686 else if (m_digit_12->match(text, this->interval.end, end, flags)) { dig = 12; digit_end = m_digit_12->interval.end; }
2687 else if (m_digit_13->match(text, this->interval.end, end, flags)) { dig = 13; digit_end = m_digit_13->interval.end; }
2688 else if (m_digit_14->match(text, this->interval.end, end, flags)) { dig = 14; digit_end = m_digit_14->interval.end; }
2689 else if (m_digit_15->match(text, this->interval.end, end, flags)) { dig = 15; digit_end = m_digit_15->interval.end; }
2690 else break;
2691 size_t x_n = x * 16 + dig;
2692 if (x_n <= 0xffff) {
2693 x = x_n;
2694 this->interval.end = digit_end;
2695 is_empty = false;
2696 }
2697 else
2698 break;
2699 }
2700 if (is_empty) {
2701 if (compaction_i != SIZE_MAX) {
2702 // Zero compaction active: no sweat.
2703 break;
2704 }
2705 goto error;
2706 }
2707 components[i].end = this->interval.end;
2708 HE2BE(reinterpret_cast<uint16_t&>(this->value.s6_words[i]));
2709 }
2710
2711 if (compaction_i != SIZE_MAX) {
2712 // Align components right due to zero compaction.
2713 size_t j, k;
2714 for (j = 8, k = i; k > compaction_i;) {
2715 this->value.s6_words[--j] = this->value.s6_words[--k];
2717 }
2718 for (; j > compaction_i;) {
2719 this->value.s6_words[--j] = 0;
2720 components[j].start =
2722 }
2723 }
2724 else if (i < 8)
2725 goto error;
2726
2727 if (m_scope_id_separator && m_scope_id_separator->match(text, this->interval.end, end, flags) &&
2728 scope_id && scope_id->match(text, m_scope_id_separator->interval.end, end, flags))
2729 this->interval.end = scope_id->interval.end;
2730 else if (scope_id)
2731 scope_id->invalidate();
2732
2733 this->interval.start = start;
2734 return true;
2735
2736 error:
2737 invalidate();
2738 return false;
2739 }
2740
2741 std::shared_ptr<basic_parser<T>>
2742 m_digit_0,
2743 m_digit_1,
2744 m_digit_2,
2745 m_digit_3,
2746 m_digit_4,
2747 m_digit_5,
2748 m_digit_6,
2749 m_digit_7,
2750 m_digit_8,
2751 m_digit_9,
2752 m_digit_10,
2753 m_digit_11,
2754 m_digit_12,
2755 m_digit_13,
2756 m_digit_14,
2757 m_digit_15;
2758 std::shared_ptr<basic_parser<T>> m_separator, m_scope_id_separator;
2759 };
2760
2761 using ipv6_address = basic_ipv6_address<char>;
2762 using wipv6_address = basic_ipv6_address<wchar_t>;
2763#ifdef _UNICODE
2764 using tipv6_address = wipv6_address;
2765#else
2766 using tipv6_address = ipv6_address;
2767#endif
2768 using sgml_ipv6_address = basic_ipv6_address<char>;
2769
2773 template <class T>
2775 {
2776 public:
2778 _In_ bool allow_idn,
2779 _In_ const std::locale& locale = std::locale()) :
2781 m_allow_idn(allow_idn),
2782 allow_on_edge(true)
2783 {}
2784
2786
2787 protected:
2788 virtual bool do_match(
2789 _In_reads_or_z_opt_(end) const T* text,
2790 _In_ size_t start = 0,
2791 _In_ size_t end = SIZE_MAX,
2792 _In_ int flags = match_default)
2793 {
2794 _Assume_(text || start >= end);
2795 if (start < end && text[start]) {
2796 if (('A' <= text[start] && text[start] <= 'Z') ||
2797 ('a' <= text[start] && text[start] <= 'z') ||
2798 ('0' <= text[start] && text[start] <= '9'))
2799 allow_on_edge = true;
2800 else if (text[start] == '-')
2801 allow_on_edge = false;
2802 else if (m_allow_idn && std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
2803 allow_on_edge = true;
2804 else {
2805 this->interval.invalidate();
2806 return false;
2807 }
2808 this->interval.end = (this->interval.start = start) + 1;
2809 return true;
2810 }
2811 this->interval.invalidate();
2812 return false;
2813 }
2814
2815 bool m_allow_idn;
2816 };
2817
2818 using dns_domain_char = basic_dns_domain_char<char>;
2819 using wdns_domain_char = basic_dns_domain_char<wchar_t>;
2820#ifdef _UNICODE
2821 using tdns_domain_char = wdns_domain_char;
2822#else
2823 using tdns_domain_char = dns_domain_char;
2824#endif
2825
2830 {
2831 public:
2833 _In_ bool allow_idn,
2834 _In_ const std::locale& locale = std::locale()) :
2836 {}
2837
2838 protected:
2839 virtual bool do_match(
2840 _In_reads_or_z_(end) const char* text,
2841 _In_ size_t start = 0,
2842 _In_ size_t end = SIZE_MAX,
2843 _In_ int flags = match_default)
2844 {
2845 _Assume_(text || start >= end);
2846 if (start < end && text[start]) {
2847 wchar_t buf[5];
2848 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
2849 const wchar_t* chr_end = chr + stdex::strlen(chr);
2850 if ((('A' <= chr[0] && chr[0] <= 'Z') ||
2851 ('a' <= chr[0] && chr[0] <= 'z') ||
2852 ('0' <= chr[0] && chr[0] <= '9')) && chr[1] == 0)
2853 allow_on_edge = true;
2854 else if (chr[0] == '-' && chr[1] == 0)
2855 allow_on_edge = false;
2856 else if (m_allow_idn && std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
2857 allow_on_edge = true;
2858 else {
2859 this->interval.invalidate();
2860 return false;
2861 }
2862 this->interval.start = start;
2863 return true;
2864 }
2865 this->interval.invalidate();
2866 return false;
2867 }
2868 };
2869
2873 template <class T>
2875 {
2876 public:
2878 _In_ bool allow_absolute,
2879 _In_ const std::shared_ptr<basic_dns_domain_char<T>>& domain_char,
2880 _In_ const std::shared_ptr<basic_parser<T>>& separator,
2881 _In_ const std::locale& locale = std::locale()) :
2884 m_domain_char(domain_char),
2885 m_separator(separator)
2886 {}
2887
2888 protected:
2889 virtual bool do_match(
2890 _In_reads_or_z_opt_(end) const T* text,
2891 _In_ size_t start = 0,
2892 _In_ size_t end = SIZE_MAX,
2893 _In_ int flags = match_default)
2894 {
2895 _Assume_(text || start >= end);
2896 size_t i = start, count;
2897 for (count = 0; i < end && text[i] && count < 127; count++) {
2898 if (m_domain_char->match(text, i, end, flags) &&
2899 m_domain_char->allow_on_edge)
2900 {
2901 // Domain start
2902 this->interval.end = i = m_domain_char->interval.end;
2903 while (i < end && text[i]) {
2904 if (m_domain_char->allow_on_edge &&
2905 m_separator->match(text, i, end, flags))
2906 {
2907 // Domain end
2908 if (m_allow_absolute)
2909 this->interval.end = i = m_separator->interval.end;
2910 else {
2911 this->interval.end = i;
2912 i = m_separator->interval.end;
2913 }
2914 break;
2915 }
2916 if (m_domain_char->match(text, i, end, flags)) {
2917 if (m_domain_char->allow_on_edge)
2918 this->interval.end = i = m_domain_char->interval.end;
2919 else
2920 i = m_domain_char->interval.end;
2921 }
2922 else {
2923 this->interval.start = start;
2924 return true;
2925 }
2926 }
2927 }
2928 else
2929 break;
2930 }
2931 if (count) {
2932 this->interval.start = start;
2933 return true;
2934 }
2935 this->interval.invalidate();
2936 return false;
2937 }
2938
2940 std::shared_ptr<basic_dns_domain_char<T>> m_domain_char;
2941 std::shared_ptr<basic_parser<T>> m_separator;
2942 };
2943
2946#ifdef _UNICODE
2947 using tdns_name = wdns_name;
2948#else
2949 using tdns_name = dns_name;
2950#endif
2952
2956 template <class T>
2958 {
2959 public:
2960 basic_url_username_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
2961
2962 protected:
2963 virtual bool do_match(
2964 _In_reads_or_z_opt_(end) const T* text,
2965 _In_ size_t start = 0,
2966 _In_ size_t end = SIZE_MAX,
2967 _In_ int flags = match_default)
2968 {
2969 _Assume_(text || start >= end);
2970 if (start < end && text[start]) {
2971 if (text[start] == '-' ||
2972 text[start] == '.' ||
2973 text[start] == '_' ||
2974 text[start] == '~' ||
2975 text[start] == '%' ||
2976 text[start] == '!' ||
2977 text[start] == '$' ||
2978 text[start] == '&' ||
2979 text[start] == '\'' ||
2980 //text[start] == '(' ||
2981 //text[start] == ')' ||
2982 text[start] == '*' ||
2983 text[start] == '+' ||
2984 text[start] == ',' ||
2985 text[start] == ';' ||
2986 text[start] == '=' ||
2987 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
2988 {
2989 this->interval.end = (this->interval.start = start) + 1;
2990 return true;
2991 }
2992 }
2993 this->interval.invalidate();
2994 return false;
2995 }
2996 };
2997
3000#ifdef _UNICODE
3002#else
3004#endif
3005
3010 {
3011 public:
3012 sgml_url_username_char(_In_ const std::locale& locale = std::locale()) : basic_url_username_char<char>(locale) {}
3013
3014 protected:
3015 virtual bool do_match(
3016 _In_reads_or_z_(end) const char* text,
3017 _In_ size_t start = 0,
3018 _In_ size_t end = SIZE_MAX,
3019 _In_ int flags = match_default)
3020 {
3021 _Assume_(text || start >= end);
3022 if (start < end && text[start]) {
3023 wchar_t buf[5];
3024 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
3025 const wchar_t* chr_end = chr + stdex::strlen(chr);
3026 if (((chr[0] == L'-' ||
3027 chr[0] == L'.' ||
3028 chr[0] == L'_' ||
3029 chr[0] == L'~' ||
3030 chr[0] == L'%' ||
3031 chr[0] == L'!' ||
3032 chr[0] == L'$' ||
3033 chr[0] == L'&' ||
3034 chr[0] == L'\'' ||
3035 //chr[0] == L'(' ||
3036 //chr[0] == L')' ||
3037 chr[0] == L'*' ||
3038 chr[0] == L'+' ||
3039 chr[0] == L',' ||
3040 chr[0] == L';' ||
3041 chr[0] == L'=') && chr[1] == 0) ||
3042 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
3043 {
3044 this->interval.start = start;
3045 return true;
3046 }
3047 }
3048
3049 this->interval.invalidate();
3050 return false;
3051 }
3052 };
3053
3057 template <class T>
3059 {
3060 public:
3061 basic_url_password_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
3062
3063 protected:
3064 virtual bool do_match(
3065 _In_reads_or_z_opt_(end) const T* text,
3066 _In_ size_t start = 0,
3067 _In_ size_t end = SIZE_MAX,
3068 _In_ int flags = match_default)
3069 {
3070 _Assume_(text || start >= end);
3071 if (start < end && text[start]) {
3072 if (text[start] == '-' ||
3073 text[start] == '.' ||
3074 text[start] == '_' ||
3075 text[start] == '~' ||
3076 text[start] == '%' ||
3077 text[start] == '!' ||
3078 text[start] == '$' ||
3079 text[start] == '&' ||
3080 text[start] == '\'' ||
3081 text[start] == '(' ||
3082 text[start] == ')' ||
3083 text[start] == '*' ||
3084 text[start] == '+' ||
3085 text[start] == ',' ||
3086 text[start] == ';' ||
3087 text[start] == '=' ||
3088 text[start] == ':' ||
3089 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
3090 {
3091 this->interval.end = (this->interval.start = start) + 1;
3092 return true;
3093 }
3094 }
3095 this->interval.invalidate();
3096 return false;
3097 }
3098 };
3099
3102#ifdef _UNICODE
3104#else
3106#endif
3107
3112 {
3113 public:
3114 sgml_url_password_char(_In_ const std::locale& locale = std::locale()) : basic_url_password_char<char>(locale) {}
3115
3116 protected:
3117 virtual bool do_match(
3118 _In_reads_or_z_(end) const char* text,
3119 _In_ size_t start = 0,
3120 _In_ size_t end = SIZE_MAX,
3121 _In_ int flags = match_default)
3122 {
3123 _Assume_(text || start >= end);
3124 if (start < end && text[start]) {
3125 wchar_t buf[5];
3126 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
3127 const wchar_t* chr_end = chr + stdex::strlen(chr);
3128 if (((chr[0] == L'-' ||
3129 chr[0] == L'.' ||
3130 chr[0] == L'_' ||
3131 chr[0] == L'~' ||
3132 chr[0] == L'%' ||
3133 chr[0] == L'!' ||
3134 chr[0] == L'$' ||
3135 chr[0] == L'&' ||
3136 chr[0] == L'\'' ||
3137 chr[0] == L'(' ||
3138 chr[0] == L')' ||
3139 chr[0] == L'*' ||
3140 chr[0] == L'+' ||
3141 chr[0] == L',' ||
3142 chr[0] == L';' ||
3143 chr[0] == L'=' ||
3144 chr[0] == L':') && chr[1] == 0) ||
3145 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
3146 {
3147 this->interval.start = start;
3148 return true;
3149 }
3150 }
3151 this->interval.invalidate();
3152 return false;
3153 }
3154 };
3155
3159 template <class T>
3161 {
3162 public:
3163 basic_url_path_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
3164
3165 protected:
3166 virtual bool do_match(
3167 _In_reads_or_z_opt_(end) const T* text,
3168 _In_ size_t start = 0,
3169 _In_ size_t end = SIZE_MAX,
3170 _In_ int flags = match_default)
3171 {
3172 _Assume_(text || start >= end);
3173 if (start < end && text[start]) {
3174 if (text[start] == '/' ||
3175 text[start] == '-' ||
3176 text[start] == '.' ||
3177 text[start] == '_' ||
3178 text[start] == '~' ||
3179 text[start] == '%' ||
3180 text[start] == '!' ||
3181 text[start] == '$' ||
3182 text[start] == '&' ||
3183 text[start] == '\'' ||
3184 text[start] == '(' ||
3185 text[start] == ')' ||
3186 text[start] == '*' ||
3187 text[start] == '+' ||
3188 text[start] == ',' ||
3189 text[start] == ';' ||
3190 text[start] == '=' ||
3191 text[start] == ':' ||
3192 text[start] == '@' ||
3193 text[start] == '?' ||
3194 text[start] == '#' ||
3195 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
3196 {
3197 this->interval.end = (this->interval.start = start) + 1;
3198 return true;
3199 }
3200 }
3201 this->interval.invalidate();
3202 return false;
3203 }
3204 };
3205
3208#ifdef _UNICODE
3210#else
3212#endif
3213
3218 {
3219 public:
3220 sgml_url_path_char(_In_ const std::locale& locale = std::locale()) : basic_url_path_char<char>(locale) {}
3221
3222 protected:
3223 virtual bool do_match(
3224 _In_reads_or_z_(end) const char* text,
3225 _In_ size_t start = 0,
3226 _In_ size_t end = SIZE_MAX,
3227 _In_ int flags = match_default)
3228 {
3229 _Assume_(text || start >= end);
3230 if (start < end && text[start]) {
3231 wchar_t buf[5];
3232 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
3233 const wchar_t* chr_end = chr + stdex::strlen(chr);
3234 if (((chr[0] == L'/' ||
3235 chr[0] == L'-' ||
3236 chr[0] == L'.' ||
3237 chr[0] == L'_' ||
3238 chr[0] == L'~' ||
3239 chr[0] == L'%' ||
3240 chr[0] == L'!' ||
3241 chr[0] == L'$' ||
3242 chr[0] == L'&' ||
3243 chr[0] == L'\'' ||
3244 chr[0] == L'(' ||
3245 chr[0] == L')' ||
3246 chr[0] == L'*' ||
3247 chr[0] == L'+' ||
3248 chr[0] == L',' ||
3249 chr[0] == L';' ||
3250 chr[0] == L'=' ||
3251 chr[0] == L':' ||
3252 chr[0] == L'@' ||
3253 chr[0] == L'?' ||
3254 chr[0] == L'#') && chr[1] == 0) ||
3255 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
3256 {
3257 this->interval.start = start;
3258 return true;
3259 }
3260 }
3261 this->interval.invalidate();
3262 return false;
3263 }
3264 };
3265
3269 template <class T>
3271 {
3272 public:
3274 _In_ const std::shared_ptr<basic_parser<T>>& path_char,
3275 _In_ const std::shared_ptr<basic_parser<T>>& query_start,
3276 _In_ const std::shared_ptr<basic_parser<T>>& bookmark_start,
3277 _In_ const std::locale& locale = std::locale()) :
3279 m_path_char(path_char),
3280 m_query_start(query_start),
3281 m_bookmark_start(bookmark_start)
3282 {}
3283
3284 virtual void invalidate()
3285 {
3286 path.start = 1;
3287 path.end = 0;
3288 query.start = 1;
3289 query.end = 0;
3290 bookmark.start = 1;
3291 bookmark.end = 0;
3293 }
3294
3297 stdex::interval<size_t> bookmark;
3298
3299 protected:
3300 virtual bool do_match(
3301 _In_reads_or_z_opt_(end) const T* text,
3302 _In_ size_t start = 0,
3303 _In_ size_t end = SIZE_MAX,
3304 _In_ int flags = match_default)
3305 {
3306 _Assume_(text || start >= end);
3307
3308 this->interval.end = start;
3309 path.start = start;
3310 query.start = 1;
3311 query.end = 0;
3312 bookmark.start = 1;
3313 bookmark.end = 0;
3314
3315 for (;;) {
3316 if (this->interval.end >= end || !text[this->interval.end])
3317 break;
3318 if (m_query_start->match(text, this->interval.end, end, flags)) {
3319 path.end = this->interval.end;
3320 query.start = this->interval.end = m_query_start->interval.end;
3321 for (;;) {
3322 if (this->interval.end >= end || !text[this->interval.end]) {
3323 query.end = this->interval.end;
3324 break;
3325 }
3326 if (m_bookmark_start->match(text, this->interval.end, end, flags)) {
3327 query.end = this->interval.end;
3328 bookmark.start = this->interval.end = m_bookmark_start->interval.end;
3329 for (;;) {
3330 if (this->interval.end >= end || !text[this->interval.end]) {
3331 bookmark.end = this->interval.end;
3332 break;
3333 }
3334 if (m_path_char->match(text, this->interval.end, end, flags))
3335 this->interval.end = m_path_char->interval.end;
3336 else {
3337 bookmark.end = this->interval.end;
3338 break;
3339 }
3340 }
3341 this->interval.start = start;
3342 return true;
3343 }
3344 if (m_path_char->match(text, this->interval.end, end, flags))
3345 this->interval.end = m_path_char->interval.end;
3346 else {
3347 query.end = this->interval.end;
3348 break;
3349 }
3350 }
3351 this->interval.start = start;
3352 return true;
3353 }
3354 if (m_bookmark_start->match(text, this->interval.end, end, flags)) {
3355 path.end = this->interval.end;
3356 bookmark.start = this->interval.end = m_bookmark_start->interval.end;
3357 for (;;) {
3358 if (this->interval.end >= end || !text[this->interval.end]) {
3359 bookmark.end = this->interval.end;
3360 break;
3361 }
3362 if (m_path_char->match(text, this->interval.end, end, flags))
3363 this->interval.end = m_path_char->interval.end;
3364 else {
3365 bookmark.end = this->interval.end;
3366 break;
3367 }
3368 }
3369 this->interval.start = start;
3370 return true;
3371 }
3372 if (m_path_char->match(text, this->interval.end, end, flags))
3373 this->interval.end = m_path_char->interval.end;
3374 else
3375 break;
3376 }
3377
3379 path.end = this->interval.end;
3380 this->interval.start = start;
3381 return true;
3382 }
3383
3384 path.start = 1;
3385 path.end = 0;
3386 bookmark.start = 1;
3387 bookmark.end = 0;
3388 this->interval.invalidate();
3389 return false;
3390 }
3391
3392 std::shared_ptr<basic_parser<T>> m_path_char;
3393 std::shared_ptr<basic_parser<T>> m_query_start;
3394 std::shared_ptr<basic_parser<T>> m_bookmark_start;
3395 };
3396
3399#ifdef _UNICODE
3400 using turl_path = wurl_path;
3401#else
3402 using turl_path = url_path;
3403#endif
3405
3409 template <class T>
3410 class basic_url : public basic_parser<T>
3411 {
3412 public:
3413 basic_url(
3414 _In_ const std::shared_ptr<basic_parser<T>>& _http_scheme,
3415 _In_ const std::shared_ptr<basic_parser<T>>& _ftp_scheme,
3416 _In_ const std::shared_ptr<basic_parser<T>>& _mailto_scheme,
3417 _In_ const std::shared_ptr<basic_parser<T>>& _file_scheme,
3418 _In_ const std::shared_ptr<basic_parser<T>>& colon,
3419 _In_ const std::shared_ptr<basic_parser<T>>& slash,
3420 _In_ const std::shared_ptr<basic_parser<T>>& _username,
3421 _In_ const std::shared_ptr<basic_parser<T>>& _password,
3422 _In_ const std::shared_ptr<basic_parser<T>>& at,
3423 _In_ const std::shared_ptr<basic_parser<T>>& ip_lbracket,
3424 _In_ const std::shared_ptr<basic_parser<T>>& ip_rbracket,
3425 _In_ const std::shared_ptr<basic_parser<T>>& _ipv4_host,
3426 _In_ const std::shared_ptr<basic_parser<T>>& _ipv6_host,
3427 _In_ const std::shared_ptr<basic_parser<T>>& _dns_host,
3428 _In_ const std::shared_ptr<basic_parser<T>>& _port,
3429 _In_ const std::shared_ptr<basic_parser<T>>& _path,
3430 _In_ const std::locale& locale = std::locale()) :
3432 http_scheme(_http_scheme),
3433 ftp_scheme(_ftp_scheme),
3434 mailto_scheme(_mailto_scheme),
3435 file_scheme(_file_scheme),
3436 m_colon(colon),
3437 m_slash(slash),
3438 username(_username),
3439 password(_password),
3440 m_at(at),
3441 m_ip_lbracket(ip_lbracket),
3442 m_ip_rbracket(ip_rbracket),
3443 ipv4_host(_ipv4_host),
3444 ipv6_host(_ipv6_host),
3445 dns_host(_dns_host),
3446 port(_port),
3447 path(_path)
3448 {}
3449
3450 virtual void invalidate()
3451 {
3452 http_scheme->invalidate();
3453 ftp_scheme->invalidate();
3454 mailto_scheme->invalidate();
3455 file_scheme->invalidate();
3456 username->invalidate();
3457 password->invalidate();
3458 ipv4_host->invalidate();
3459 ipv6_host->invalidate();
3460 dns_host->invalidate();
3461 port->invalidate();
3462 path->invalidate();
3464 }
3465
3466 std::shared_ptr<basic_parser<T>> http_scheme;
3467 std::shared_ptr<basic_parser<T>> ftp_scheme;
3468 std::shared_ptr<basic_parser<T>> mailto_scheme;
3469 std::shared_ptr<basic_parser<T>> file_scheme;
3470 std::shared_ptr<basic_parser<T>> username;
3471 std::shared_ptr<basic_parser<T>> password;
3472 std::shared_ptr<basic_parser<T>> ipv4_host;
3473 std::shared_ptr<basic_parser<T>> ipv6_host;
3474 std::shared_ptr<basic_parser<T>> dns_host;
3475 std::shared_ptr<basic_parser<T>> port;
3476 std::shared_ptr<basic_parser<T>> path;
3477
3478 protected:
3479 virtual bool do_match(
3480 _In_reads_or_z_opt_(end) const T* text,
3481 _In_ size_t start = 0,
3482 _In_ size_t end = SIZE_MAX,
3483 _In_ int flags = match_default)
3484 {
3485 _Assume_(text || start >= end);
3486
3487 this->interval.end = start;
3488
3489 if (http_scheme->match(text, this->interval.end, end, flags) &&
3490 m_colon->match(text, http_scheme->interval.end, end, flags) &&
3491 m_slash->match(text, m_colon->interval.end, end, flags) &&
3492 m_slash->match(text, m_slash->interval.end, end, flags))
3493 {
3494 // http://
3495 this->interval.end = m_slash->interval.end;
3496 ftp_scheme->invalidate();
3497 mailto_scheme->invalidate();
3498 file_scheme->invalidate();
3499 }
3500 else if (ftp_scheme->match(text, this->interval.end, end, flags) &&
3501 m_colon->match(text, ftp_scheme->interval.end, end, flags) &&
3502 m_slash->match(text, m_colon->interval.end, end, flags) &&
3503 m_slash->match(text, m_slash->interval.end, end, flags))
3504 {
3505 // ftp://
3506 this->interval.end = m_slash->interval.end;
3507 http_scheme->invalidate();
3508 mailto_scheme->invalidate();
3509 file_scheme->invalidate();
3510 }
3511 else if (mailto_scheme->match(text, this->interval.end, end, flags) &&
3512 m_colon->match(text, mailto_scheme->interval.end, end, flags))
3513 {
3514 // mailto:
3515 this->interval.end = m_colon->interval.end;
3516 http_scheme->invalidate();
3517 ftp_scheme->invalidate();
3518 file_scheme->invalidate();
3519 }
3520 else if (file_scheme->match(text, this->interval.end, end, flags) &&
3521 m_colon->match(text, file_scheme->interval.end, end, flags) &&
3522 m_slash->match(text, m_colon->interval.end, end, flags) &&
3523 m_slash->match(text, m_slash->interval.end, end, flags))
3524 {
3525 // file://
3526 this->interval.end = m_slash->interval.end;
3527 http_scheme->invalidate();
3528 ftp_scheme->invalidate();
3529 mailto_scheme->invalidate();
3530 }
3531 else {
3532 // Default to http:
3533 http_scheme->invalidate();
3534 ftp_scheme->invalidate();
3535 mailto_scheme->invalidate();
3536 file_scheme->invalidate();
3537 }
3538
3539 if (ftp_scheme->interval) {
3540 if (username->match(text, this->interval.end, end, flags)) {
3541 if (m_colon->match(text, username->interval.end, end, flags) &&
3542 password->match(text, m_colon->interval.end, end, flags) &&
3543 m_at->match(text, password->interval.end, end, flags))
3544 {
3545 // Username and password
3546 this->interval.end = m_at->interval.end;
3547 }
3548 else if (m_at->match(text, this->interval.end, end, flags)) {
3549 // Username only
3550 this->interval.end = m_at->interval.end;
3551 password->invalidate();
3552 }
3553 else {
3554 username->invalidate();
3555 password->invalidate();
3556 }
3557 }
3558 else {
3559 username->invalidate();
3560 password->invalidate();
3561 }
3562
3563 if (ipv4_host->match(text, this->interval.end, end, flags)) {
3564 // Host is IPv4
3565 this->interval.end = ipv4_host->interval.end;
3566 ipv6_host->invalidate();
3567 dns_host->invalidate();
3568 }
3569 else if (
3570 m_ip_lbracket->match(text, this->interval.end, end, flags) &&
3571 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3572 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3573 {
3574 // Host is IPv6
3575 this->interval.end = m_ip_rbracket->interval.end;
3576 ipv4_host->invalidate();
3577 dns_host->invalidate();
3578 }
3579 else if (dns_host->match(text, this->interval.end, end, flags)) {
3580 // Host is hostname
3581 this->interval.end = dns_host->interval.end;
3582 ipv4_host->invalidate();
3583 ipv6_host->invalidate();
3584 }
3585 else {
3586 invalidate();
3587 return false;
3588 }
3589
3590 if (m_colon->match(text, this->interval.end, end, flags) &&
3591 port->match(text, m_colon->interval.end, end, flags))
3592 {
3593 // Port
3594 this->interval.end = port->interval.end;
3595 }
3596 else
3597 port->invalidate();
3598
3599 if (path->match(text, this->interval.end, end, flags)) {
3600 // Path
3601 this->interval.end = path->interval.end;
3602 }
3603
3604 this->interval.start = start;
3605 return true;
3606 }
3607
3608 if (mailto_scheme->interval) {
3609 if (username->match(text, this->interval.end, end, flags) &&
3610 m_at->match(text, username->interval.end, end, flags))
3611 {
3612 // Username
3613 this->interval.end = m_at->interval.end;
3614 }
3615 else {
3616 invalidate();
3617 return false;
3618 }
3619
3620 if (m_ip_lbracket->match(text, this->interval.end, end, flags) &&
3621 ipv4_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3622 m_ip_rbracket->match(text, ipv4_host->interval.end, end, flags))
3623 {
3624 // Host is IPv4
3625 this->interval.end = m_ip_rbracket->interval.end;
3626 ipv6_host->invalidate();
3627 dns_host->invalidate();
3628 }
3629 else if (
3630 m_ip_lbracket->match(text, this->interval.end, end, flags) &&
3631 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3632 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3633 {
3634 // Host is IPv6
3635 this->interval.end = m_ip_rbracket->interval.end;
3636 ipv4_host->invalidate();
3637 dns_host->invalidate();
3638 }
3639 else if (dns_host->match(text, this->interval.end, end, flags)) {
3640 // Host is hostname
3641 this->interval.end = dns_host->interval.end;
3642 ipv4_host->invalidate();
3643 ipv6_host->invalidate();
3644 }
3645 else {
3646 invalidate();
3647 return false;
3648 }
3649
3650 password->invalidate();
3651 port->invalidate();
3652 path->invalidate();
3653 this->interval.start = start;
3654 return true;
3655 }
3656
3657 if (file_scheme->interval) {
3658 if (path->match(text, this->interval.end, end, flags)) {
3659 // Path
3660 this->interval.end = path->interval.end;
3661 }
3662
3663 username->invalidate();
3664 password->invalidate();
3665 ipv4_host->invalidate();
3666 ipv6_host->invalidate();
3667 dns_host->invalidate();
3668 port->invalidate();
3669 this->interval.start = start;
3670 return true;
3671 }
3672
3673 // "http://" found or defaulted to
3674
3675 // If "http://" explicit, test for username&password.
3676 if (http_scheme->interval &&
3677 username->match(text, this->interval.end, end, flags))
3678 {
3679 if (m_colon->match(text, username->interval.end, end, flags) &&
3680 password->match(text, m_colon->interval.end, end, flags) &&
3681 m_at->match(text, password->interval.end, end, flags))
3682 {
3683 // Username and password
3684 this->interval.end = m_at->interval.end;
3685 }
3686 else if (m_at->match(text, username->interval.end, end, flags)) {
3687 // Username only
3688 this->interval.end = m_at->interval.end;
3689 password->invalidate();
3690 }
3691 else {
3692 username->invalidate();
3693 password->invalidate();
3694 }
3695 }
3696 else {
3697 username->invalidate();
3698 password->invalidate();
3699 }
3700
3701 if (ipv4_host->match(text, this->interval.end, end, flags)) {
3702 // Host is IPv4
3703 this->interval.end = ipv4_host->interval.end;
3704 ipv6_host->invalidate();
3705 dns_host->invalidate();
3706 }
3707 else if (
3708 m_ip_lbracket->match(text, this->interval.end, end, flags) &&
3709 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3710 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3711 {
3712 // Host is IPv6
3713 this->interval.end = m_ip_rbracket->interval.end;
3714 ipv4_host->invalidate();
3715 dns_host->invalidate();
3716 }
3717 else if (dns_host->match(text, this->interval.end, end, flags)) {
3718 // Host is hostname
3719 this->interval.end = dns_host->interval.end;
3720 ipv4_host->invalidate();
3721 ipv6_host->invalidate();
3722 }
3723 else {
3724 invalidate();
3725 return false;
3726 }
3727
3728 if (m_colon->match(text, this->interval.end, end, flags) &&
3729 port->match(text, m_colon->interval.end, end, flags))
3730 {
3731 // Port
3732 this->interval.end = port->interval.end;
3733 }
3734 else
3735 port->invalidate();
3736
3737 if (path->match(text, this->interval.end, end, flags)) {
3738 // Path
3739 this->interval.end = path->interval.end;
3740 }
3741
3742 this->interval.start = start;
3743 return true;
3744 }
3745
3746 std::shared_ptr<basic_parser<T>> m_colon;
3747 std::shared_ptr<basic_parser<T>> m_slash;
3748 std::shared_ptr<basic_parser<T>> m_at;
3749 std::shared_ptr<basic_parser<T>> m_ip_lbracket;
3750 std::shared_ptr<basic_parser<T>> m_ip_rbracket;
3751 };
3752
3753 using url = basic_url<char>;
3754 using wurl = basic_url<wchar_t>;
3755#ifdef _UNICODE
3756 using turl = wurl;
3757#else
3758 using turl = url;
3759#endif
3760 using sgml_url = basic_url<char>;
3761
3765 template <class T>
3767 {
3768 public:
3770 _In_ const std::shared_ptr<basic_parser<T>>& _username,
3771 _In_ const std::shared_ptr<basic_parser<T>>& at,
3772 _In_ const std::shared_ptr<basic_parser<T>>& ip_lbracket,
3773 _In_ const std::shared_ptr<basic_parser<T>>& ip_rbracket,
3774 _In_ const std::shared_ptr<basic_parser<T>>& _ipv4_host,
3775 _In_ const std::shared_ptr<basic_parser<T>>& _ipv6_host,
3776 _In_ const std::shared_ptr<basic_parser<T>>& _dns_host,
3777 _In_ const std::locale& locale = std::locale()) :
3779 username(_username),
3780 m_at(at),
3781 m_ip_lbracket(ip_lbracket),
3782 m_ip_rbracket(ip_rbracket),
3783 ipv4_host(_ipv4_host),
3784 ipv6_host(_ipv6_host),
3785 dns_host(_dns_host)
3786 {}
3787
3788 virtual void invalidate()
3789 {
3790 username->invalidate();
3791 ipv4_host->invalidate();
3792 ipv6_host->invalidate();
3793 dns_host->invalidate();
3795 }
3796
3797 std::shared_ptr<basic_parser<T>> username;
3798 std::shared_ptr<basic_parser<T>> ipv4_host;
3799 std::shared_ptr<basic_parser<T>> ipv6_host;
3800 std::shared_ptr<basic_parser<T>> dns_host;
3801
3802 protected:
3803 virtual bool do_match(
3804 _In_reads_or_z_opt_(end) const T* text,
3805 _In_ size_t start = 0,
3806 _In_ size_t end = SIZE_MAX,
3807 _In_ int flags = match_default)
3808 {
3809 _Assume_(text || start >= end);
3810
3811 if (username->match(text, start, end, flags) &&
3812 m_at->match(text, username->interval.end, end, flags))
3813 {
3814 // Username@
3815 if (m_ip_lbracket->match(text, m_at->interval.end, end, flags) &&
3816 ipv4_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3817 m_ip_rbracket->match(text, ipv4_host->interval.end, end, flags))
3818 {
3819 // Host is IPv4
3820 this->interval.end = m_ip_rbracket->interval.end;
3821 ipv6_host->invalidate();
3822 dns_host->invalidate();
3823 }
3824 else if (
3825 m_ip_lbracket->match(text, m_at->interval.end, end, flags) &&
3826 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3827 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3828 {
3829 // Host is IPv6
3830 this->interval.end = m_ip_rbracket->interval.end;
3831 ipv4_host->invalidate();
3832 dns_host->invalidate();
3833 }
3834 else if (dns_host->match(text, m_at->interval.end, end, flags)) {
3835 // Host is hostname
3836 this->interval.end = dns_host->interval.end;
3837 ipv4_host->invalidate();
3838 ipv6_host->invalidate();
3839 }
3840 else
3841 goto error;
3842 this->interval.start = start;
3843 return true;
3844 }
3845
3846 error:
3847 invalidate();
3848 return false;
3849 }
3850
3851 std::shared_ptr<basic_parser<T>> m_at;
3852 std::shared_ptr<basic_parser<T>> m_ip_lbracket;
3853 std::shared_ptr<basic_parser<T>> m_ip_rbracket;
3854 };
3855
3858#ifdef _UNICODE
3860#else
3862#endif
3864
3868 template <class T>
3870 {
3871 public:
3873 _In_ const std::shared_ptr<basic_parser<T>>& _emoticon,
3874 _In_ const std::shared_ptr<basic_parser<T>>& _apex,
3875 _In_ const std::shared_ptr<basic_parser<T>>& _eyes,
3876 _In_ const std::shared_ptr<basic_parser<T>>& _nose,
3877 _In_ const std::shared_ptr<basic_set<T>>& _mouth,
3878 _In_ const std::locale& locale = std::locale()) :
3881 apex(_apex),
3882 eyes(_eyes),
3883 nose(_nose),
3884 mouth(_mouth)
3885 {}
3886
3887 virtual void invalidate()
3888 {
3889 if (emoticon) emoticon->invalidate();
3890 if (apex) apex->invalidate();
3891 eyes->invalidate();
3892 if (nose) nose->invalidate();
3893 mouth->invalidate();
3895 }
3896
3897 std::shared_ptr<basic_parser<T>> emoticon;
3898 std::shared_ptr<basic_parser<T>> apex;
3899 std::shared_ptr<basic_parser<T>> eyes;
3900 std::shared_ptr<basic_parser<T>> nose;
3901 std::shared_ptr<basic_set<T>> mouth;
3902
3903 protected:
3904 virtual bool do_match(
3905 _In_reads_or_z_opt_(end) const T* text,
3906 _In_ size_t start = 0,
3907 _In_ size_t end = SIZE_MAX,
3908 _In_ int flags = match_default)
3909 {
3910 _Assume_(text || start >= end);
3911
3912 if (emoticon && emoticon->match(text, start, end, flags)) {
3913 if (apex) apex->invalidate();
3914 eyes->invalidate();
3915 if (nose) nose->invalidate();
3916 mouth->invalidate();
3917 this->interval.start = start;
3918 this->interval.end = emoticon->interval.end;
3919 return true;
3920 }
3921
3922 this->interval.end = start;
3923
3924 if (apex && apex->match(text, this->interval.end, end, flags))
3925 this->interval.end = apex->interval.end;
3926
3927 if (eyes->match(text, this->interval.end, end, flags)) {
3928 if (nose && nose->match(text, eyes->interval.end, end, flags) &&
3929 mouth->match(text, nose->interval.end, end, flags))
3930 {
3931 size_t
3933 hit_offset = mouth->hit_offset;
3934 // Mouth may repeat :-)))))))
3935 for (this->interval.end = mouth->interval.end; mouth->match(text, this->interval.end, end, flags) && mouth->hit_offset == hit_offset; this->interval.end = mouth->interval.end);
3936 mouth->interval.start = start_mouth;
3937 mouth->interval.end = this->interval.end;
3938 this->interval.start = start;
3939 return true;
3940 }
3941 if (mouth->match(text, eyes->interval.end, end, flags)) {
3942 size_t
3944 hit_offset = mouth->hit_offset;
3945 // Mouth may repeat :-)))))))
3946 for (this->interval.end = mouth->interval.end; mouth->match(text, this->interval.end, end, flags) && mouth->hit_offset == hit_offset; this->interval.end = mouth->interval.end);
3947 if (nose) nose->invalidate();
3948 mouth->interval.start = start_mouth;
3949 mouth->interval.end = this->interval.end;
3950 this->interval.start = start;
3951 return true;
3952 }
3953 }
3954
3955 if (emoticon) emoticon->invalidate();
3956 if (apex) apex->invalidate();
3957 eyes->invalidate();
3958 if (nose) nose->invalidate();
3959 mouth->invalidate();
3960 this->interval.invalidate();
3961 return false;
3962 }
3963 };
3964
3965 using emoticon = basic_emoticon<char>;
3966 using wemoticon = basic_emoticon<wchar_t>;
3967#ifdef _UNICODE
3968 using temoticon = wemoticon;
3969#else
3970 using temoticon = emoticon;
3971#endif
3972 using sgml_emoticon = basic_emoticon<char>;
3973
3977 enum date_format_t {
3978 date_format_none = 0,
3979 date_format_dmy = 0x1,
3980 date_format_mdy = 0x2,
3981 date_format_ymd = 0x4,
3982 date_format_ym = 0x8,
3983 date_format_my = 0x10,
3984 date_format_dm = 0x20,
3985 date_format_md = 0x40,
3986 };
3987
3991 template <class T>
3992 class basic_date : public basic_parser<T>
3993 {
3994 public:
3995 basic_date(
3996 _In_ int format_mask,
3997 _In_ const std::shared_ptr<basic_integer<T>>& _day,
3998 _In_ const std::shared_ptr<basic_integer<T>>& _month,
3999 _In_ const std::shared_ptr<basic_integer<T>>& _year,
4000 _In_ const std::shared_ptr<basic_set<T>>& separator,
4001 _In_ const std::shared_ptr<basic_parser<T>>& space,
4002 _In_ const std::locale& locale = std::locale()) :
4004 format(date_format_none),
4005 m_format_mask(format_mask),
4006 day(_day),
4007 month(_month),
4008 year(_year),
4009 m_separator(separator),
4010 m_space(space)
4011 {}
4012
4013 virtual void invalidate()
4014 {
4015 if (day) day->invalidate();
4016 if (month) month->invalidate();
4017 if (year) year->invalidate();
4018 format = date_format_none;
4020 }
4021
4022 date_format_t format;
4023 std::shared_ptr<basic_integer<T>> day;
4024 std::shared_ptr<basic_integer<T>> month;
4025 std::shared_ptr<basic_integer<T>> year;
4026
4027 protected:
4028 virtual bool do_match(
4029 _In_reads_or_z_opt_(end) const T* text,
4030 _In_ size_t start = 0,
4031 _In_ size_t end = SIZE_MAX,
4032 _In_ int flags = match_default)
4033 {
4034 _Assume_(text || start >= end);
4035
4036 const int space_match_flags = flags & ~match_multiline; // Spaces in dates must never be broken in new line.
4037 if ((m_format_mask & date_format_dmy) == date_format_dmy) {
4038 if (day->match(text, start, end, flags)) {
4039 for (this->interval.end = day->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4040 if (m_separator->match(text, this->interval.end, end, flags)) {
4041 size_t hit_offset = m_separator->hit_offset;
4042 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4043 if (month->match(text, this->interval.end, end, flags)) {
4044 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4045 if (m_separator->match(text, this->interval.end, end, flags) &&
4046 m_separator->hit_offset == hit_offset) // Both separators must match.
4047 {
4048 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4049 if (year->match(text, this->interval.end, end, flags) &&
4050 is_valid(day->value, month->value))
4051 {
4052 this->interval.start = start;
4053 this->interval.end = year->interval.end;
4054 format = date_format_dmy;
4055 return true;
4056 }
4057 }
4058 }
4059 }
4060 }
4061 }
4062
4063 if ((m_format_mask & date_format_mdy) == date_format_mdy) {
4064 if (month->match(text, start, end, flags)) {
4065 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4066 if (m_separator->match(text, this->interval.end, end, flags)) {
4067 size_t hit_offset = m_separator->hit_offset;
4068 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4069 if (day->match(text, this->interval.end, end, flags)) {
4070 for (this->interval.end = day->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4071 if (m_separator->match(text, this->interval.end, end, flags) &&
4072 m_separator->hit_offset == hit_offset) // Both separators must match.
4073 {
4074 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4075 if (year->match(text, this->interval.end, end, flags) &&
4076 is_valid(day->value, month->value))
4077 {
4078 this->interval.start = start;
4079 this->interval.end = year->interval.end;
4080 format = date_format_mdy;
4081 return true;
4082 }
4083 }
4084 }
4085 }
4086 }
4087 }
4088
4089 if ((m_format_mask & date_format_ymd) == date_format_ymd) {
4090 if (year->match(text, start, end, flags)) {
4091 for (this->interval.end = year->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4092 if (m_separator->match(text, this->interval.end, end, flags)) {
4093 size_t hit_offset = m_separator->hit_offset;
4094 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4095 if (month->match(text, this->interval.end, end, flags)) {
4096 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4097 if (m_separator->match(text, this->interval.end, end, flags) &&
4098 m_separator->hit_offset == hit_offset) // Both separators must match.
4099 {
4100 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4101 if (day->match(text, this->interval.end, end, flags) &&
4102 is_valid(day->value, month->value))
4103 {
4104 this->interval.start = start;
4105 this->interval.end = day->interval.end;
4106 format = date_format_ymd;
4107 return true;
4108 }
4109 }
4110 }
4111 }
4112 }
4113 }
4114
4115 if ((m_format_mask & date_format_ym) == date_format_ym) {
4116 if (year->match(text, start, end, flags)) {
4117 for (this->interval.end = year->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4118 if (m_separator->match(text, this->interval.end, end, flags)) {
4119 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4120 if (month->match(text, this->interval.end, end, flags) &&
4121 is_valid(SIZE_MAX, month->value))
4122 {
4123 if (day) day->invalidate();
4124 this->interval.start = start;
4125 this->interval.end = month->interval.end;
4126 format = date_format_ym;
4127 return true;
4128 }
4129 }
4130 }
4131 }
4132
4133 if ((m_format_mask & date_format_my) == date_format_my) {
4134 if (month->match(text, start, end, flags)) {
4135 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4136 if (m_separator->match(text, this->interval.end, end, flags)) {
4137 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4138 if (year->match(text, this->interval.end, end, flags) &&
4139 is_valid(SIZE_MAX, month->value))
4140 {
4141 if (day) day->invalidate();
4142 this->interval.start = start;
4143 this->interval.end = year->interval.end;
4144 format = date_format_my;
4145 return true;
4146 }
4147 }
4148 }
4149 }
4150
4151 if ((m_format_mask & date_format_dm) == date_format_dm) {
4152 if (day->match(text, start, end, flags)) {
4153 for (this->interval.end = day->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4154 if (m_separator->match(text, this->interval.end, end, flags)) {
4155 size_t hit_offset = m_separator->hit_offset;
4156 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4157 if (month->match(text, this->interval.end, end, flags) &&
4158 is_valid(day->value, month->value))
4159 {
4160 if (year) year->invalidate();
4161 this->interval.start = start;
4162 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4163 if (m_separator->match(text, this->interval.end, end, flags) &&
4164 m_separator->hit_offset == hit_offset) // Both separators must match.
4165 this->interval.end = m_separator->interval.end;
4166 else
4167 this->interval.end = month->interval.end;
4168 format = date_format_dm;
4169 return true;
4170 }
4171 }
4172 }
4173 }
4174
4175 if ((m_format_mask & date_format_md) == date_format_md) {
4176 if (month->match(text, start, end, flags)) {
4177 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4178 if (m_separator->match(text, this->interval.end, end, flags)) {
4179 size_t hit_offset = m_separator->hit_offset;
4180 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4181 if (day->match(text, this->interval.end, end, flags) &&
4182 is_valid(day->value, month->value))
4183 {
4184 if (year) year->invalidate();
4185 this->interval.start = start;
4186 for (this->interval.end = day->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4187 if (m_separator->match(text, this->interval.end, end, flags) &&
4188 m_separator->hit_offset == hit_offset) // Both separators must match.
4189 this->interval.end = m_separator->interval.end;
4190 else
4191 this->interval.end = day->interval.end;
4192 format = date_format_md;
4193 return true;
4194 }
4195 }
4196 }
4197 }
4198
4199 if (day) day->invalidate();
4200 if (month) month->invalidate();
4201 if (year) year->invalidate();
4202 format = date_format_none;
4203 this->interval.invalidate();
4204 return false;
4205 }
4206
4207 static bool is_valid(size_t day, size_t month)
4208 {
4209 if (month == SIZE_MAX) {
4210 // Default to January. This allows validating day only, as January has all 31 days.
4211 month = 1;
4212 }
4213 if (day == SIZE_MAX) {
4214 // Default to 1st day in month. This allows validating month only, as each month has 1st day.
4215 day = 1;
4216 }
4217
4218 switch (month) {
4219 case 1:
4220 case 3:
4221 case 5:
4222 case 7:
4223 case 8:
4224 case 10:
4225 case 12:
4226 return 1 <= day && day <= 31;
4227 case 2:
4228 return 1 <= day && day <= 29;
4229 case 4:
4230 case 6:
4231 case 9:
4232 case 11:
4233 return 1 <= day && day <= 30;
4234 default:
4235 return false;
4236 }
4237 }
4238
4239 int m_format_mask;
4240 std::shared_ptr<basic_set<T>> m_separator;
4241 std::shared_ptr<basic_parser<T>> m_space;
4242 };
4243
4244 using date = basic_date<char>;
4245 using wdate = basic_date<wchar_t>;
4246#ifdef _UNICODE
4247 using tdate = wdate;
4248#else
4249 using tdate = date;
4250#endif
4252
4256 template <class T>
4257 class basic_time : public basic_parser<T>
4258 {
4259 public:
4260 basic_time(
4261 _In_ const std::shared_ptr<basic_integer10<T>>& _hour,
4262 _In_ const std::shared_ptr<basic_integer10<T>>& _minute,
4263 _In_ const std::shared_ptr<basic_integer10<T>>& _second,
4264 _In_ const std::shared_ptr<basic_integer10<T>>& _millisecond,
4265 _In_ const std::shared_ptr<basic_set<T>>& separator,
4266 _In_ const std::shared_ptr<basic_parser<T>>& millisecond_separator,
4267 _In_ const std::locale& locale = std::locale()) :
4269 hour(_hour),
4270 minute(_minute),
4271 second(_second),
4272 millisecond(_millisecond),
4273 m_separator(separator),
4274 m_millisecond_separator(millisecond_separator)
4275 {}
4276
4277 virtual void invalidate()
4278 {
4279 hour->invalidate();
4280 minute->invalidate();
4281 if (second) second->invalidate();
4282 if (millisecond) millisecond->invalidate();
4284 }
4285
4286 std::shared_ptr<basic_integer10<T>> hour;
4287 std::shared_ptr<basic_integer10<T>> minute;
4288 std::shared_ptr<basic_integer10<T>> second;
4289 std::shared_ptr<basic_integer10<T>> millisecond;
4290
4291 protected:
4292 virtual bool do_match(
4293 _In_reads_or_z_opt_(end) const T* text,
4294 _In_ size_t start = 0,
4295 _In_ size_t end = SIZE_MAX,
4296 _In_ int flags = match_default)
4297 {
4298 _Assume_(text || start >= end);
4299
4300 if (hour->match(text, start, end, flags) &&
4301 m_separator->match(text, hour->interval.end, end, flags) &&
4302 minute->match(text, m_separator->interval.end, end, flags) &&
4303 minute->value < 60)
4304 {
4305 // hh::mm
4306 size_t hit_offset = m_separator->hit_offset;
4307 if (m_separator->match(text, minute->interval.end, end, flags) &&
4308 m_separator->hit_offset == hit_offset && // Both separators must match.
4309 second && second->match(text, m_separator->interval.end, end, flags) &&
4310 second->value < 60)
4311 {
4312 // hh::mm:ss
4313 if (m_millisecond_separator && m_millisecond_separator->match(text, second->interval.end, end, flags) &&
4314 millisecond && millisecond->match(text, m_millisecond_separator->interval.end, end, flags) &&
4315 millisecond->value < 1000)
4316 {
4317 // hh::mm:ss.mmmm
4318 this->interval.end = millisecond->interval.end;
4319 }
4320 else {
4321 if (millisecond) millisecond->invalidate();
4322 this->interval.end = second->interval.end;
4323 }
4324 }
4325 else {
4326 if (second) second->invalidate();
4327 if (millisecond) millisecond->invalidate();
4328 this->interval.end = minute->interval.end;
4329 }
4330 this->interval.start = start;
4331 return true;
4332 }
4333
4334 hour->invalidate();
4335 minute->invalidate();
4336 if (second) second->invalidate();
4337 if (millisecond) millisecond->invalidate();
4338 this->interval.invalidate();
4339 return false;
4340 }
4341
4342 std::shared_ptr<basic_set<T>> m_separator;
4343 std::shared_ptr<basic_parser<T>> m_millisecond_separator;
4344 };
4345
4346 using time = basic_time<char>;
4347 using wtime = basic_time<wchar_t>;
4348#ifdef _UNICODE
4349 using ttime = wtime;
4350#else
4351 using ttime = time;
4352#endif
4354
4358 template <class T>
4359 class basic_angle : public basic_parser<T>
4360 {
4361 public:
4363 _In_ const std::shared_ptr<basic_integer10<T>>& _degree,
4364 _In_ const std::shared_ptr<basic_parser<T>>& _degree_separator,
4365 _In_ const std::shared_ptr<basic_integer10<T>>& _minute,
4366 _In_ const std::shared_ptr<basic_parser<T>>& _minute_separator,
4367 _In_ const std::shared_ptr<basic_integer10<T>>& _second,
4368 _In_ const std::shared_ptr<basic_parser<T>>& _second_separator,
4369 _In_ const std::shared_ptr<basic_parser<T>>& _decimal,
4370 _In_ const std::locale& locale = std::locale()) :
4372 degree(_degree),
4373 degree_separator(_degree_separator),
4374 minute(_minute),
4375 minute_separator(_minute_separator),
4376 second(_second),
4377 second_separator(_second_separator),
4378 decimal(_decimal)
4379 {}
4380
4381 virtual void invalidate()
4382 {
4383 degree->invalidate();
4384 degree_separator->invalidate();
4385 minute->invalidate();
4386 minute_separator->invalidate();
4387 if (second) second->invalidate();
4388 if (second_separator) second_separator->invalidate();
4389 if (decimal) decimal->invalidate();
4391 }
4392
4393 std::shared_ptr<basic_integer10<T>> degree;
4394 std::shared_ptr<basic_parser<T>> degree_separator;
4395 std::shared_ptr<basic_integer10<T>> minute;
4396 std::shared_ptr<basic_parser<T>> minute_separator;
4397 std::shared_ptr<basic_integer10<T>> second;
4398 std::shared_ptr<basic_parser<T>> second_separator;
4399 std::shared_ptr<basic_parser<T>> decimal;
4400
4401 protected:
4402 virtual bool do_match(
4403 _In_reads_or_z_opt_(end) const T* text,
4404 _In_ size_t start = 0,
4405 _In_ size_t end = SIZE_MAX,
4406 _In_ int flags = match_default)
4407 {
4408 _Assume_(text || start >= end);
4409
4410 this->interval.end = start;
4411
4412 if (degree->match(text, this->interval.end, end, flags) &&
4413 degree_separator->match(text, degree->interval.end, end, flags))
4414 {
4415 // Degrees
4416 this->interval.end = degree_separator->interval.end;
4417 }
4418 else {
4419 degree->invalidate();
4420 degree_separator->invalidate();
4421 }
4422
4423 if (minute->match(text, this->interval.end, end, flags) &&
4424 minute->value < 60 &&
4425 minute_separator->match(text, minute->interval.end, end, flags))
4426 {
4427 // Minutes
4428 this->interval.end = minute_separator->interval.end;
4429 }
4430 else {
4431 minute->invalidate();
4432 minute_separator->invalidate();
4433 }
4434
4435 if (second && second->match(text, this->interval.end, end, flags) &&
4436 second->value < 60)
4437 {
4438 // Seconds
4439 this->interval.end = second->interval.end;
4440 if (second_separator && second_separator->match(text, this->interval.end, end, flags))
4441 this->interval.end = second_separator->interval.end;
4442 else
4443 if (second_separator) second_separator->invalidate();
4444 }
4445 else {
4446 if (second) second->invalidate();
4447 if (second_separator) second_separator->invalidate();
4448 }
4449
4450 if (degree->interval.start < degree->interval.end ||
4451 minute->interval.start < minute->interval.end ||
4452 (second && second->interval.start < second->interval.end))
4453 {
4454 if (decimal && decimal->match(text, this->interval.end, end, flags)) {
4455 // Decimals
4456 this->interval.end = decimal->interval.end;
4457 }
4458 else if (decimal)
4459 decimal->invalidate();
4460 this->interval.start = start;
4461 return true;
4462 }
4463 if (decimal) decimal->invalidate();
4464 this->interval.invalidate();
4465 return false;
4466 }
4467 };
4468
4469 using angle = basic_angle<char>;
4471#ifdef _UNICODE
4472 using RRegElKot = wangle;
4473#else
4474 using RRegElKot = angle;
4475#endif
4477
4481 template <class T>
4483 {
4484 public:
4486 _In_ const std::shared_ptr<basic_parser<T>>& digit,
4487 _In_ const std::shared_ptr<basic_parser<T>>& plus_sign,
4488 _In_ const std::shared_ptr<basic_set<T>>& lparenthesis,
4489 _In_ const std::shared_ptr<basic_set<T>>& rparenthesis,
4490 _In_ const std::shared_ptr<basic_parser<T>>& separator,
4491 _In_ const std::shared_ptr<basic_parser<T>>& space,
4492 _In_ const std::locale& locale = std::locale()) :
4494 m_digit(digit),
4495 m_plus_sign(plus_sign),
4496 m_lparenthesis(lparenthesis),
4497 m_rparenthesis(rparenthesis),
4498 m_separator(separator),
4499 m_space(space)
4500 {}
4501
4502 virtual void invalidate()
4503 {
4504 value.clear();
4506 }
4507
4508 std::basic_string<T> value;
4509
4510 protected:
4511 virtual bool do_match(
4512 _In_reads_or_z_opt_(end) const T* text,
4513 _In_ size_t start = 0,
4514 _In_ size_t end = SIZE_MAX,
4515 _In_ int flags = match_default)
4516 {
4517 _Assume_(text || start >= end);
4518
4519 size_t safe_digit_end = start, safe_value_size = 0;
4520 bool has_digits = false, after_digit = false, in_parentheses = false, after_parentheses = false;
4521 const int space_match_flags = flags & ~match_multiline; // Spaces in phone numbers must never be broken in new line.
4522
4523 this->interval.end = start;
4524 value.clear();
4525 m_lparenthesis->invalidate();
4526 m_rparenthesis->invalidate();
4527
4528 if (m_plus_sign && m_plus_sign->match(text, this->interval.end, end, flags)) {
4529 value.append(text + m_plus_sign->interval.start, text + m_plus_sign->interval.end);
4530 safe_value_size = value.size();
4531 this->interval.end = m_plus_sign->interval.end;
4532 }
4533
4534 for (;;) {
4535 _Assume_(text || this->interval.end >= end);
4536 if (this->interval.end >= end || !text[this->interval.end])
4537 break;
4538 if (m_digit->match(text, this->interval.end, end, flags)) {
4539 // Digit
4540 value.append(text + m_digit->interval.start, text + m_digit->interval.end);
4541 this->interval.end = m_digit->interval.end;
4542 if (!in_parentheses) {
4543 safe_digit_end = this->interval.end;
4544 safe_value_size = value.size();
4545 has_digits = true;
4546 }
4547 after_digit = true;
4548 after_parentheses = false;
4549 }
4550 else if (
4551 m_lparenthesis && !m_lparenthesis->interval && // No left parenthesis yet
4552 m_rparenthesis && !m_rparenthesis->interval && // Right parenthesis after left
4553 m_lparenthesis->match(text, this->interval.end, end, flags))
4554 {
4555 // Left parenthesis
4556 value.append(text + m_lparenthesis->interval.start, m_lparenthesis->interval.size());
4557 this->interval.end = m_lparenthesis->interval.end;
4558 in_parentheses = true;
4559 after_digit = false;
4560 after_parentheses = false;
4561 }
4562 else if (
4563 in_parentheses && // After left parenthesis
4564 m_rparenthesis && !m_rparenthesis->interval && // No right parenthesis yet
4565 m_rparenthesis->match(text, this->interval.end, end, flags) &&
4566 m_lparenthesis->hit_offset == m_rparenthesis->hit_offset) // Left and right parentheses must match
4567 {
4568 // Right parenthesis
4569 value.append(text + m_rparenthesis->interval.start, text + m_rparenthesis->interval.end);
4570 this->interval.end = m_rparenthesis->interval.end;
4571 safe_digit_end = this->interval.end;
4572 safe_value_size = value.size();
4573 in_parentheses = false;
4574 after_digit = false;
4575 after_parentheses = true;
4576 }
4577 else if (
4578 after_digit &&
4579 !in_parentheses && // No separators inside parentheses
4580 !after_parentheses && // No separators following right parenthesis
4581 m_separator && m_separator->match(text, this->interval.end, end, flags))
4582 {
4583 // Separator
4584 this->interval.end = m_separator->interval.end;
4585 after_digit = false;
4586 after_parentheses = false;
4587 }
4588 else if (
4590 m_space && m_space->match(text, this->interval.end, end, space_match_flags))
4591 {
4592 // Space
4593 this->interval.end = m_space->interval.end;
4594 after_digit = false;
4595 after_parentheses = false;
4596 }
4597 else
4598 break;
4599 }
4600 if (has_digits) {
4601 value.erase(safe_value_size);
4602 this->interval.start = start;
4603 this->interval.end = safe_digit_end;
4604 return true;
4605 }
4606 value.clear();
4607 this->interval.invalidate();
4608 return false;
4609 }
4610
4611 std::shared_ptr<basic_parser<T>> m_digit;
4612 std::shared_ptr<basic_parser<T>> m_plus_sign;
4613 std::shared_ptr<basic_set<T>> m_lparenthesis;
4614 std::shared_ptr<basic_set<T>> m_rparenthesis;
4615 std::shared_ptr<basic_parser<T>> m_separator;
4616 std::shared_ptr<basic_parser<T>> m_space;
4617 };
4618
4619 using phone_number = basic_phone_number<char>;
4620 using wphone_number = basic_phone_number<wchar_t>;
4621#ifdef _UNICODE
4622 using tphone_number = wphone_number;
4623#else
4624 using tphone_number = phone_number;
4625#endif
4626 using sgml_phone_number = basic_phone_number<char>;
4627
4633 template <class T>
4634 class basic_iban : public basic_parser<T>
4635 {
4636 public:
4637 basic_iban(
4638 _In_ const std::shared_ptr<basic_parser<T>>& space,
4639 _In_ const std::locale& locale = std::locale()) :
4641 m_space(space)
4642 {
4643 this->country[0] = 0;
4644 this->check_digits[0] = 0;
4645 this->bban[0] = 0;
4646 this->is_valid = false;
4647 }
4648
4649 virtual void invalidate()
4650 {
4651 this->country[0] = 0;
4652 this->check_digits[0] = 0;
4653 this->bban[0] = 0;
4654 this->is_valid = false;
4656 }
4657
4658 T country[3];
4660 T bban[31];
4662
4663 protected:
4664 virtual bool do_match(
4665 _In_reads_or_z_opt_(end) const T* text,
4666 _In_ size_t start = 0,
4667 _In_ size_t end = SIZE_MAX,
4668 _In_ int flags = match_default)
4669 {
4670 _Assume_(text || start >= end);
4671 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
4672 const bool case_insensitive = flags & match_case_insensitive ? true : false;
4673 struct country_t {
4674 T country[2];
4675 T check_digits[2];
4676 size_t length;
4677 };
4678 static const country_t s_countries[] = {
4679 { { 'A', 'D' }, {}, 24 }, // Andorra
4680 { { 'A', 'E' }, {}, 23 }, // United Arab Emirates
4681 { { 'A', 'L' }, {}, 28 }, // Albania
4682 { { 'A', 'O' }, {}, 25 }, // Angola
4683 { { 'A', 'T' }, {}, 20 }, // Austria
4684 { { 'A', 'Z' }, {}, 28 }, // Azerbaijan
4685 { { 'B', 'A' }, { '3', '9' }, 20}, // Bosnia and Herzegovina
4686 { { 'B', 'E' }, {}, 16 }, // Belgium
4687 { { 'B', 'F' }, {}, 28 }, // Burkina Faso
4688 { { 'B', 'G' }, {}, 22 }, // Bulgaria
4689 { { 'B', 'H' }, {}, 22 }, // Bahrain
4690 { { 'B', 'I' }, {}, 27 }, // Burundi
4691 { { 'B', 'J' }, {}, 28 }, // Benin
4692 { { 'B', 'R' }, {}, 29 }, // Brazil
4693 { { 'B', 'Y' }, {}, 28 }, // Belarus
4694 { { 'C', 'F' }, {}, 27 }, // Central African Republic
4695 { { 'C', 'G' }, {}, 27 }, // Congo, Republic of the
4696 { { 'C', 'H' }, {}, 21 }, // Switzerland
4697 { { 'C', 'I' }, {}, 28 }, // Côte d'Ivoire
4698 { { 'C', 'M' }, {}, 27 }, // Cameroon
4699 { { 'C', 'R' }, {}, 22 }, // Costa Rica
4700 { { 'C', 'V' }, {}, 25 }, // Cabo Verde
4701 { { 'C', 'Y' }, {}, 28 }, // Cyprus
4702 { { 'C', 'Z' }, {}, 24 }, // Czech Republic
4703 { { 'D', 'E' }, {}, 22 }, // Germany
4704 { { 'D', 'J' }, {}, 27 }, // Djibouti
4705 { { 'D', 'K' }, {}, 18 }, // Denmark
4706 { { 'D', 'O' }, {}, 28 }, // Dominican Republic
4707 { { 'D', 'Z' }, {}, 26 }, // Algeria
4708 { { 'E', 'E' }, {}, 20 }, // Estonia
4709 { { 'E', 'G' }, {}, 29 }, // Egypt
4710 { { 'E', 'S' }, {}, 24 }, // Spain
4711 { { 'F', 'I' }, {}, 18 }, // Finland
4712 { { 'F', 'O' }, {}, 18 }, // Faroe Islands
4713 { { 'F', 'R' }, {}, 27 }, // France
4714 { { 'G', 'A' }, {}, 27 }, // Gabon
4715 { { 'G', 'B' }, {}, 22 }, // United Kingdom
4716 { { 'G', 'E' }, {}, 22 }, // Georgia
4717 { { 'G', 'I' }, {}, 23 }, // Gibraltar
4718 { { 'G', 'L' }, {}, 18 }, // Greenland
4719 { { 'G', 'Q' }, {}, 27 }, // Equatorial Guinea
4720 { { 'G', 'R' }, {}, 27 }, // Greece
4721 { { 'G', 'T' }, {}, 28 }, // Guatemala
4722 { { 'G', 'W' }, {}, 25 }, // Guinea-Bissau
4723 { { 'H', 'N' }, {}, 28 }, // Honduras
4724 { { 'H', 'R' }, {}, 21 }, // Croatia
4725 { { 'H', 'U' }, {}, 28 }, // Hungary
4726 { { 'I', 'E' }, {}, 22 }, // Ireland
4727 { { 'I', 'L' }, {}, 23 }, // Israel
4728 { { 'I', 'Q' }, {}, 23 }, // Iraq
4729 { { 'I', 'R' }, {}, 26 }, // Iran
4730 { { 'I', 'S' }, {}, 26 }, // Iceland
4731 { { 'I', 'T' }, {}, 27 }, // Italy
4732 { { 'J', 'O' }, {}, 30 }, // Jordan
4733 { { 'K', 'M' }, {}, 27 }, // Comoros
4734 { { 'K', 'W' }, {}, 30 }, // Kuwait
4735 { { 'K', 'Z' }, {}, 20 }, // Kazakhstan
4736 { { 'L', 'B' }, {}, 28 }, // Lebanon
4737 { { 'L', 'C' }, {}, 32 }, // Saint Lucia
4738 { { 'L', 'I' }, {}, 21 }, // Liechtenstein
4739 { { 'L', 'T' }, {}, 20 }, // Lithuania
4740 { { 'L', 'U' }, {}, 20 }, // Luxembourg
4741 { { 'L', 'V' }, {}, 21 }, // Latvia
4742 { { 'L', 'Y' }, {}, 25 }, // Libya
4743 { { 'M', 'A' }, {}, 28 }, // Morocco
4744 { { 'M', 'C' }, {}, 27 }, // Monaco
4745 { { 'M', 'D' }, {}, 24 }, // Moldova
4746 { { 'M', 'E' }, { '2', '5' }, 22 }, // Montenegro
4747 { { 'M', 'G' }, {}, 27 }, // Madagascar
4748 { { 'M', 'K' }, { '0', '7' }, 19 }, // North Macedonia
4749 { { 'M', 'L' }, {}, 28 }, // Mali
4750 { { 'M', 'R' }, { '1', '3' }, 27}, // Mauritania
4751 { { 'M', 'T' }, {}, 31 }, // Malta
4752 { { 'M', 'U' }, {}, 30 }, // Mauritius
4753 { { 'M', 'Z' }, {}, 25 }, // Mozambique
4754 { { 'N', 'E' }, {}, 28 }, // Niger
4755 { { 'N', 'I' }, {}, 32 }, // Nicaragua
4756 { { 'N', 'L' }, {}, 18 }, // Netherlands
4757 { { 'N', 'O' }, {}, 15 }, // Norway
4758 { { 'P', 'K' }, {}, 24 }, // Pakistan
4759 { { 'P', 'L' }, {}, 28 }, // Poland
4760 { { 'P', 'S' }, {}, 29 }, // Palestinian territories
4761 { { 'P', 'T' }, { '5', '0' }, 25 }, // Portugal
4762 { { 'Q', 'A' }, {}, 29 }, // Qatar
4763 { { 'R', 'O' }, {}, 24 }, // Romania
4764 { { 'R', 'S' }, { '3', '5' }, 22 }, // Serbia
4765 { { 'R', 'U' }, {}, 33 }, // Russia
4766 { { 'S', 'A' }, {}, 24 }, // Saudi Arabia
4767 { { 'S', 'C' }, {}, 31 }, // Seychelles
4768 { { 'S', 'D' }, {}, 18 }, // Sudan
4769 { { 'S', 'E' }, {}, 24 }, // Sweden
4770 { { 'S', 'I' }, { '5', '6' }, 19 }, // Slovenia
4771 { { 'S', 'K' }, {}, 24 }, // Slovakia
4772 { { 'S', 'M' }, {}, 27 }, // San Marino
4773 { { 'S', 'N' }, {}, 28 }, // Senegal
4774 { { 'S', 'T' }, {}, 25 }, // São Tomé and Príncipe
4775 { { 'S', 'V' }, {}, 28 }, // El Salvador
4776 { { 'T', 'D' }, {}, 27 }, // Chad
4777 { { 'T', 'G' }, {}, 28 }, // Togo
4778 { { 'T', 'L' }, { '3', '8' }, 23}, // East Timor
4779 { { 'T', 'N' }, { '5', '9' }, 24 }, // Tunisia
4780 { { 'T', 'R' }, {}, 26 }, // Turkey
4781 { { 'U', 'A' }, {}, 29 }, // Ukraine
4782 { { 'V', 'A' }, {}, 22 }, // Vatican City
4783 { { 'V', 'G' }, {}, 24 }, // Virgin Islands, British
4784 { { 'X', 'K' }, {}, 20 }, // Kosovo
4785 };
4786 const country_t* country_desc = nullptr;
4787 size_t n, available, next, bban_length;
4789
4790 this->interval.end = start;
4791 for (size_t i = 0; i < 2; ++i, ++this->interval.end) {
4792 if (this->interval.end >= end || !text[this->interval.end])
4793 goto error; // incomplete country code
4794 T chr = case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end];
4795 if (chr < 'A' || 'Z' < chr)
4796 goto error; // invalid country code
4797 this->country[i] = chr;
4798 }
4799 for (size_t l = 0, r = _countof(s_countries);;) {
4800 if (l >= r)
4801 goto error; // unknown country
4802 size_t m = (l + r) / 2;
4803 const country_t& c = s_countries[m];
4804 if (c.country[0] < this->country[0] || (c.country[0] == this->country[0] && c.country[1] < this->country[1]))
4805 l = m + 1;
4806 else if (this->country[0] < c.country[0] || (this->country[0] == c.country[0] && this->country[1] < c.country[1]))
4807 r = m;
4808 else {
4809 country_desc = &c;
4810 break;
4811 }
4812 }
4813 this->country[2] = 0;
4814
4815 for (size_t i = 0; i < 2; ++i, ++this->interval.end) {
4816 if (this->interval.end >= end || text[this->interval.end] < '0' || '9' < text[this->interval.end])
4817 goto error; // incomplete or invalid check digits
4818 this->check_digits[i] = text[this->interval.end];
4819 }
4820 this->check_digits[2] = 0;
4821
4822 if ((country_desc->check_digits[0] && this->check_digits[0] != country_desc->check_digits[0]) ||
4823 (country_desc->check_digits[1] && this->check_digits[1] != country_desc->check_digits[1]))
4824 goto error; // unexpected check digits
4825
4826 bban_length = country_desc->length - 4;
4827 for (n = 0; n < bban_length;) {
4828 if (this->interval.end >= end || !text[this->interval.end])
4829 goto error; // bban too short
4830 if (m_space && m_space->match(text, this->interval.end, end, flags)) {
4831 this->interval.end = m_space->interval.end;
4832 continue;
4833 }
4834 T chr = case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end];
4835 if (('0' <= chr && chr <= '9') || ('A' <= chr && chr <= 'Z')) {
4836 this->bban[n++] = chr;
4837 this->interval.end++;
4838 }
4839 else
4840 goto error; // invalid bban
4841 }
4842 this->bban[n] = 0;
4843
4844 // Normalize IBAN.
4845 T normalized[69];
4846 available = 0;
4847 for (size_t i = 0; ; ++i) {
4848 if (!this->bban[i]) {
4849 for (i = 0; i < 2; ++i) {
4850 if ('A' <= this->country[i] && this->country[i] <= 'J') {
4851 normalized[available++] = '1';
4852 normalized[available++] = '0' + this->country[i] - 'A';
4853 }
4854 else if ('K' <= this->country[i] && this->country[i] <= 'T') {
4855 normalized[available++] = '2';
4856 normalized[available++] = '0' + this->country[i] - 'K';
4857 }
4858 else if ('U' <= this->country[i] && this->country[i] <= 'Z') {
4859 normalized[available++] = '3';
4860 normalized[available++] = '0' + this->country[i] - 'U';
4861 }
4862 }
4863 normalized[available++] = this->check_digits[0];
4864 normalized[available++] = this->check_digits[1];
4865 normalized[available] = 0;
4866 break;
4867 }
4868 if ('0' <= this->bban[i] && this->bban[i] <= '9')
4869 normalized[available++] = this->bban[i];
4870 else if ('A' <= this->bban[i] && this->bban[i] <= 'J') {
4871 normalized[available++] = '1';
4872 normalized[available++] = '0' + this->bban[i] - 'A';
4873 }
4874 else if ('K' <= this->bban[i] && this->bban[i] <= 'T') {
4875 normalized[available++] = '2';
4876 normalized[available++] = '0' + this->bban[i] - 'K';
4877 }
4878 else if ('U' <= this->bban[i] && this->bban[i] <= 'Z') {
4879 normalized[available++] = '3';
4880 normalized[available++] = '0' + this->bban[i] - 'U';
4881 }
4882 }
4883
4884 // Calculate modulo 97.
4885 nominator = stdex::strtou32(normalized, 9, &next, 10);
4886 for (;;) {
4887 nominator %= 97;
4888 if (!normalized[next]) {
4889 this->is_valid = nominator == 1;
4890 break;
4891 }
4892 size_t digit_count = nominator == 0 ? 0 : nominator < 10 ? 1 : 2;
4893 for (; digit_count < 9 && normalized[next]; ++next, ++digit_count)
4894 nominator = nominator * 10 + static_cast<uint32_t>(normalized[next] - '0');
4895 }
4896
4897 this->interval.start = start;
4898 return true;
4899
4900 error:
4901 invalidate();
4902 return false;
4903 }
4904
4905 std::shared_ptr<basic_parser<T>> m_space;
4906 };
4907
4908 using iban = basic_iban<char>;
4909 using wiban = basic_iban<wchar_t>;
4910#ifdef _UNICODE
4911 using tiban = wiban;
4912#else
4913 using tiban = iban;
4914#endif
4915 using sgml_iban = basic_iban<char>;
4916
4922 template <class T>
4924 {
4925 public:
4927 _In_ const std::shared_ptr<basic_parser<T>>& space,
4928 _In_ const std::locale& locale = std::locale()) :
4930 m_space(space)
4931 {
4932 this->check_digits[0] = 0;
4933 this->reference[0] = 0;
4934 this->is_valid = false;
4935 }
4936
4937 virtual void invalidate()
4938 {
4939 this->check_digits[0] = 0;
4940 this->reference[0] = 0;
4941 this->is_valid = false;
4943 }
4944
4948
4949 protected:
4950 virtual bool do_match(
4951 _In_reads_or_z_opt_(end) const T* text,
4952 _In_ size_t start = 0,
4953 _In_ size_t end = SIZE_MAX,
4954 _In_ int flags = match_default)
4955 {
4956 _Assume_(text || start >= end);
4957 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
4958 const bool case_insensitive = flags & match_case_insensitive ? true : false;
4959 size_t n, available, next;
4961
4962 this->interval.end = start;
4963 if (this->interval.end + 1 >= end ||
4964 (case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end]) != 'R' ||
4965 (case_insensitive ? ctype.toupper(text[this->interval.end + 1]) : text[this->interval.end + 1]) != 'F')
4966 goto error; // incomplete or wrong reference ID
4967 this->interval.end += 2;
4968
4969 for (size_t i = 0; i < 2; ++i, ++this->interval.end) {
4970 if (this->interval.end >= end || text[this->interval.end] < '0' || '9' < text[this->interval.end])
4971 goto error; // incomplete or invalid check digits
4972 this->check_digits[i] = text[this->interval.end];
4973 }
4974 this->check_digits[2] = 0;
4975
4976 for (n = 0;;) {
4977 if (m_space && m_space->match(text, this->interval.end, end, flags))
4978 this->interval.end = m_space->interval.end;
4979 for (size_t j = 0; j < 4; ++j) {
4980 if (this->interval.end >= end || !text[this->interval.end])
4981 goto out;
4982 T chr = case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end];
4983 if (('0' <= chr && chr <= '9') || ('A' <= chr && chr <= 'Z')) {
4984 if (n >= _countof(reference) - 1)
4985 goto error; // reference overflow
4986 this->reference[n++] = chr;
4987 this->interval.end++;
4988 }
4989 else
4990 goto out;
4991 }
4992 }
4993 out:
4994 if (!n)
4995 goto error; // reference too short
4996 this->reference[_countof(this->reference) - 1] = 0;
4997 for (size_t i = n, j = _countof(this->reference) - 1; i;)
4998 this->reference[--j] = this->reference[--i];
4999 for (size_t j = _countof(this->reference) - 1 - n; j;)
5000 this->reference[--j] = '0';
5001
5002 // Normalize creditor reference.
5003 T normalized[47];
5004 available = 0;
5005 for (size_t i = 0; ; ++i) {
5006 if (!this->reference[i]) {
5007 normalized[available++] = '2'; // R
5008 normalized[available++] = '7';
5009 normalized[available++] = '1'; // F
5010 normalized[available++] = '5';
5011 normalized[available++] = this->check_digits[0];
5012 normalized[available++] = this->check_digits[1];
5013 normalized[available] = 0;
5014 break;
5015 }
5016 if ('0' <= this->reference[i] && this->reference[i] <= '9')
5017 normalized[available++] = this->reference[i];
5018 else if ('A' <= this->reference[i] && this->reference[i] <= 'J') {
5019 normalized[available++] = '1';
5020 normalized[available++] = '0' + this->reference[i] - 'A';
5021 }
5022 else if ('K' <= this->reference[i] && this->reference[i] <= 'T') {
5023 normalized[available++] = '2';
5024 normalized[available++] = '0' + this->reference[i] - 'K';
5025 }
5026 else if ('U' <= this->reference[i] && this->reference[i] <= 'Z') {
5027 normalized[available++] = '3';
5028 normalized[available++] = '0' + this->reference[i] - 'U';
5029 }
5030 }
5031
5032 // Calculate modulo 97.
5033 nominator = stdex::strtou32(normalized, 9, &next, 10);
5034 for (;;) {
5035 nominator %= 97;
5036 if (!normalized[next]) {
5037 this->is_valid = nominator == 1;
5038 break;
5039 }
5040 size_t digit_count = nominator == 0 ? 0 : nominator < 10 ? 1 : 2;
5041 for (; digit_count < 9 && normalized[next]; ++next, ++digit_count)
5042 nominator = nominator * 10 + static_cast<uint32_t>(normalized[next] - '0');
5043 }
5044
5045 this->interval.start = start;
5046 return true;
5047
5048 error:
5049 invalidate();
5050 return false;
5051 }
5052
5053 std::shared_ptr<basic_parser<T>> m_space;
5054 };
5055
5056 using creditor_reference = basic_creditor_reference<char>;
5057 using wcreditor_reference = basic_creditor_reference<wchar_t>;
5058#ifdef _UNICODE
5059 using tcreditor_reference = wcreditor_reference;
5060#else
5061 using tcreditor_reference = creditor_reference;
5062#endif
5063 using sgml_creditor_reference = basic_creditor_reference<char>;
5064
5070 template <class T>
5072 {
5073 public:
5074 basic_si_reference_part(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
5075
5076 protected:
5077 virtual bool do_match(
5078 _In_reads_or_z_opt_(end) const T* text,
5079 _In_ size_t start = 0,
5080 _In_ size_t end = SIZE_MAX,
5081 _In_ int flags = match_default)
5082 {
5083 _Assume_(text || start >= end);
5084 this->interval.end = start;
5085 for (;;) {
5086 if (this->interval.end >= end || !text[this->interval.end])
5087 break;
5088 if ('0' <= text[this->interval.end] && text[this->interval.end] <= '9')
5089 this->interval.end++;
5090 else
5091 break;
5092 }
5094 this->interval.start = start;
5095 return true;
5096 }
5097 this->interval.invalidate();
5098 return false;
5099 }
5100 };
5101
5104#ifdef _UNICODE
5106#else
5108#endif
5110
5116 template <class T>
5118 {
5119 public:
5120 basic_si_reference_delimiter(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
5121
5122 protected:
5123 virtual bool do_match(
5124 _In_reads_or_z_opt_(end) const T* text,
5125 _In_ size_t start = 0,
5126 _In_ size_t end = SIZE_MAX,
5127 _In_ int flags = match_default)
5128 {
5129 _Assume_(text || start >= end);
5130 if (start < end && text[start] == '-') {
5131 this->interval.end = (this->interval.start = start) + 1;
5132 return true;
5133 }
5134 this->interval.invalidate();
5135 return false;
5136 }
5137 };
5138
5141#ifdef _UNICODE
5143#else
5145#endif
5147
5155 template <class T>
5157 {
5158 public:
5160 _In_ const std::shared_ptr<basic_parser<T>>& space,
5161 _In_ const std::locale& locale = std::locale()) :
5163 part1(locale),
5164 part2(locale),
5165 part3(locale),
5166 is_valid(false),
5167 m_space(space),
5168 m_delimiter(locale)
5169 {
5170 this->model[0] = 0;
5171 }
5172
5173 virtual void invalidate()
5174 {
5175 this->model[0] = 0;
5176 this->part1.invalidate();
5177 this->part2.invalidate();
5178 this->part3.invalidate();
5179 this->is_valid = false;
5181 }
5182
5183 T model[3];
5188
5189 protected:
5190 virtual bool do_match(
5191 _In_reads_or_z_opt_(end) const T* text,
5192 _In_ size_t start = 0,
5193 _In_ size_t end = SIZE_MAX,
5194 _In_ int flags = match_default)
5195 {
5196 _Assume_(text || start >= end);
5197 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
5198 const bool case_insensitive = flags & match_case_insensitive ? true : false;
5199
5200 this->interval.end = start;
5201 if (this->interval.end + 1 >= end ||
5202 (case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end]) != 'S' ||
5203 (case_insensitive ? ctype.toupper(text[this->interval.end + 1]) : text[this->interval.end + 1]) != 'I')
5204 goto error; // incomplete or wrong reference ID
5205 this->interval.end += 2;
5206
5207 for (size_t i = 0; i < 2; ++i, ++this->interval.end) {
5208 if (this->interval.end >= end || text[this->interval.end] < '0' || '9' < text[this->interval.end])
5209 goto error; // incomplete or invalid model
5210 this->model[i] = text[this->interval.end];
5211 }
5212 this->model[2] = 0;
5213
5214 this->part1.invalidate();
5215 this->part2.invalidate();
5216 this->part3.invalidate();
5217 if (this->model[0] == '9' && this->model[1] == '9') {
5218 is_valid = true;
5219 this->interval.start = start;
5220 return true;
5221 }
5222
5223 if (m_space && m_space->match(text, this->interval.end, end, flags))
5224 this->interval.end = m_space->interval.end;
5225
5226 this->part1.match(text, this->interval.end, end, flags) &&
5227 this->m_delimiter.match(text, this->part1.interval.end, end, flags) &&
5228 this->part2.match(text, this->m_delimiter.interval.end, end, flags) &&
5229 this->m_delimiter.match(text, this->part2.interval.end, end, flags) &&
5230 this->part3.match(text, this->m_delimiter.interval.end, end, flags);
5231
5232 this->interval.start = start;
5233 if (this->part3.interval)
5234 this->interval.end = this->part3.interval.end;
5235 else if (this->part2.interval)
5236 this->interval.end = this->part2.interval.end;
5237 else if (this->part1.interval)
5238 this->interval.end = this->part1.interval.end;
5239 else
5240 this->interval.end = start + 4;
5241
5242 if (this->model[0] == '0' && this->model[1] == '0')
5243 is_valid =
5244 this->part3.interval ?
5245 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5246 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 :
5247 this->part2.interval ?
5248 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5249 this->part1.interval.size() + this->part2.interval.size() <= 20 :
5250 this->part1.interval ?
5251 this->part1.interval.size() <= 12 :
5252 false;
5253 else if (this->model[0] == '0' && this->model[1] == '1')
5254 is_valid =
5255 this->part3.interval ?
5256 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5257 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5258 check11(
5259 text + this->part1.interval.start, this->part1.interval.size(),
5260 text + this->part2.interval.start, this->part2.interval.size(),
5261 text + this->part3.interval.start, this->part3.interval.size()) :
5262 this->part2.interval ?
5263 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5264 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5265 check11(
5266 text + this->part1.interval.start, this->part1.interval.size(),
5267 text + this->part2.interval.start, this->part2.interval.size()) :
5268 this->part1.interval ?
5269 this->part1.interval.size() <= 12 &&
5270 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5271 false;
5272 else if (this->model[0] == '0' && this->model[1] == '2')
5273 is_valid =
5274 this->part3.interval ?
5275 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5276 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5277 check11(text + this->part2.interval.start, this->part2.interval.size()) &&
5278 check11(text + this->part3.interval.start, this->part3.interval.size()) :
5279 false;
5280 else if (this->model[0] == '0' && this->model[1] == '3')
5281 is_valid =
5282 this->part3.interval ?
5283 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5284 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5285 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5286 check11(text + this->part2.interval.start, this->part2.interval.size()) &&
5287 check11(text + this->part3.interval.start, this->part3.interval.size()) :
5288 false;
5289 else if (this->model[0] == '0' && this->model[1] == '4')
5290 is_valid =
5291 this->part3.interval ?
5292 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5293 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5294 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5295 check11(text + this->part3.interval.start, this->part3.interval.size()) :
5296 false;
5297 else if ((this->model[0] == '0' || this->model[0] == '5') && this->model[1] == '5')
5298 is_valid =
5299 this->part3.interval ?
5300 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5301 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5302 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5303 this->part2.interval ?
5304 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5305 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5306 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5307 this->part1.interval ?
5308 this->part1.interval.size() <= 12 &&
5309 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5310 false;
5311 else if (this->model[0] == '0' && this->model[1] == '6')
5312 is_valid =
5313 this->part3.interval ?
5314 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5315 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5316 check11(
5317 text + this->part2.interval.start, this->part2.interval.size(),
5318 text + this->part3.interval.start, this->part3.interval.size()) :
5319 this->part2.interval ?
5320 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5321 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5322 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5323 false;
5324 else if (this->model[0] == '0' && this->model[1] == '7')
5325 is_valid =
5326 this->part3.interval ?
5327 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5328 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5329 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5330 this->part2.interval ?
5331 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5332 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5333 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5334 false;
5335 else if (this->model[0] == '0' && this->model[1] == '8')
5336 is_valid =
5337 this->part3.interval ?
5338 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5339 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5340 check11(
5341 text + this->part1.interval.start, this->part1.interval.size(),
5342 text + this->part2.interval.start, this->part2.interval.size()) &&
5343 check11(text + this->part3.interval.start, this->part3.interval.size()) :
5344 false;
5345 else if (this->model[0] == '0' && this->model[1] == '9')
5346 is_valid =
5347 this->part3.interval ?
5348 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5349 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5350 check11(
5351 text + this->part1.interval.start, this->part1.interval.size(),
5352 text + this->part2.interval.start, this->part2.interval.size()) :
5353 this->part2.interval ?
5354 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5355 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5356 check11(
5357 text + this->part1.interval.start, this->part1.interval.size(),
5358 text + this->part2.interval.start, this->part2.interval.size()) :
5359 this->part1.interval ?
5360 this->part1.interval.size() <= 12 &&
5361 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5362 false;
5363 else if (this->model[0] == '1' && this->model[1] == '0')
5364 is_valid =
5365 this->part3.interval ?
5366 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5367 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5368 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5369 check11(
5370 text + this->part2.interval.start, this->part2.interval.size(),
5371 text + this->part3.interval.start, this->part3.interval.size()) :
5372 this->part2.interval ?
5373 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5374 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5375 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5376 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5377 false;
5378 else if (
5379 (this->model[0] == '1' && (this->model[1] == '1' || this->model[1] == '8' || this->model[1] == '9')) ||
5380 ((this->model[0] == '2' || this->model[0] == '3') && this->model[1] == '8') ||
5381 (this->model[0] == '4' && (this->model[1] == '0' || this->model[1] == '1' || this->model[1] == '8' || this->model[1] == '9')) ||
5382 (this->model[0] == '5' && (this->model[1] == '1' || this->model[1] == '8')))
5383 is_valid =
5384 this->part3.interval ?
5385 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5386 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5387 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5388 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5389 this->part2.interval ?
5390 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5391 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5392 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5393 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5394 false;
5395 else if (this->model[0] == '1' && this->model[1] == '2')
5396 is_valid =
5397 this->part3.interval ? false :
5398 this->part2.interval ? false :
5399 this->part1.interval ?
5400 this->part1.interval.size() <= 13 &&
5401 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5402 false;
5403 else if ((this->model[0] == '2' || this->model[0] == '3') && this->model[1] == '1')
5404 is_valid =
5405 this->part3.interval ? false :
5406 this->part2.interval ?
5407 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5408 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5409 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5410 false;
5411 else
5412 is_valid = true; // Assume models we don't handle as valid
5413 return true;
5414
5415 error:
5416 invalidate();
5417 return false;
5418 }
5419
5420 static bool check11(
5421 _In_count_(num_part1) const T* part1, _In_ size_t num_part1)
5422 {
5423 _Assume_(part1 && num_part1 >= 1);
5424 uint32_t nominator = 0, ponder = 2;
5425 for (size_t i = num_part1 - 1; i--; ++ponder)
5426 nominator += static_cast<uint32_t>(part1[i] - '0') * ponder;
5427 uint8_t control = 11 - static_cast<uint8_t>(nominator % 11);
5428 if (control >= 10)
5429 control = 0;
5430 return control == part1[num_part1 - 1] - '0';
5431 }
5432
5433 static bool check11(
5434 _In_count_(num_part1) const T* part1, _In_ size_t num_part1,
5435 _In_count_(num_part2) const T* part2, _In_ size_t num_part2)
5436 {
5437 _Assume_(part1 || !num_part1);
5438 _Assume_(part2 && num_part2 >= 1);
5439 uint32_t nominator = 0, ponder = 2;
5440 for (size_t i = num_part2 - 1; i--; ++ponder)
5441 nominator += static_cast<uint32_t>(part2[i] - '0') * ponder;
5442 for (size_t i = num_part1; i--; ++ponder)
5443 nominator += static_cast<uint32_t>(part1[i] - '0') * ponder;
5444 uint8_t control = 11 - static_cast<uint8_t>(nominator % 11);
5445 if (control == 10)
5446 control = 0;
5447 return control == part2[num_part2 - 1] - '0';
5448 }
5449
5450 static bool check11(
5451 _In_count_(num_part1) const T* part1, _In_ size_t num_part1,
5452 _In_count_(num_part2) const T* part2, _In_ size_t num_part2,
5453 _In_count_(num_part3) const T* part3, _In_ size_t num_part3)
5454 {
5455 _Assume_(part1 || !num_part1);
5456 _Assume_(part2 || !num_part2);
5457 _Assume_(part3 && num_part3 >= 1);
5458 uint32_t nominator = 0, ponder = 2;
5459 for (size_t i = num_part3 - 1; i--; ++ponder)
5460 nominator += static_cast<uint32_t>(part3[i] - '0') * ponder;
5461 for (size_t i = num_part2; i--; ++ponder)
5462 nominator += static_cast<uint32_t>(part2[i] - '0') * ponder;
5463 for (size_t i = num_part1; i--; ++ponder)
5464 nominator += static_cast<uint32_t>(part1[i] - '0') * ponder;
5465 uint8_t control = 11 - static_cast<uint8_t>(nominator % 11);
5466 if (control == 10)
5467 control = 0;
5468 return control == part2[num_part3 - 1] - '0';
5469 }
5470
5471 std::shared_ptr<basic_parser<T>> m_space;
5472 basic_si_reference_delimiter<T> m_delimiter;
5473 };
5474
5475 using si_reference = basic_si_reference<char>;
5476 using wsi_reference = basic_si_reference<wchar_t>;
5477#ifdef _UNICODE
5478 using tsi_reference = wsi_reference;
5479#else
5480 using tsi_reference = si_reference;
5481#endif
5482 using sgml_si_reference = basic_si_reference<char>;
5483
5487 template <class T>
5489 {
5490 public:
5492 _In_ const std::shared_ptr<basic_parser<T>>& element,
5493 _In_ const std::shared_ptr<basic_parser<T>>& digit,
5494 _In_ const std::shared_ptr<basic_parser<T>>& sign,
5495 _In_ const std::locale& locale = std::locale()) :
5497 m_element(element),
5498 m_digit(digit),
5499 m_sign(sign),
5500 has_digits(false),
5501 has_charge(false)
5502 {}
5503
5504 virtual void invalidate()
5505 {
5506 has_digits = false;
5507 has_charge = false;
5509 }
5510
5511 bool has_digits;
5512 bool has_charge;
5513
5514 protected:
5515 virtual bool do_match(
5516 _In_reads_or_z_opt_(end) const T* text,
5517 _In_ size_t start = 0,
5518 _In_ size_t end = SIZE_MAX,
5519 _In_ int flags = match_default)
5520 {
5521 _Assume_(text || start >= end);
5522
5523 has_digits = false;
5524 has_charge = false;
5525 this->interval.end = start;
5526
5527 const int element_match_flags = flags & ~match_case_insensitive; // Chemical elements are always case-sensitive.
5528 for (;;) {
5529 if (m_element->match(text, this->interval.end, end, element_match_flags)) {
5530 this->interval.end = m_element->interval.end;
5531 while (m_digit->match(text, this->interval.end, end, flags)) {
5532 this->interval.end = m_digit->interval.end;
5533 has_digits = true;
5534 }
5535 }
5536 else if (start < this->interval.end) {
5537 if (m_sign->match(text, this->interval.end, end, flags)) {
5538 this->interval.end = m_sign->interval.end;
5539 has_charge = true;
5540 }
5541 this->interval.start = start;
5542 return true;
5543 }
5544 else {
5545 this->interval.invalidate();
5546 return false;
5547 }
5548 }
5549 }
5550
5551 std::shared_ptr<basic_parser<T>> m_element;
5552 std::shared_ptr<basic_parser<T>> m_digit;
5553 std::shared_ptr<basic_parser<T>> m_sign;
5554 };
5555
5558#ifdef _UNICODE
5560#else
5562#endif
5564
5569 {
5570 protected:
5571 virtual bool do_match(
5572 _In_reads_or_z_(end) const char* text,
5573 _In_ size_t start = 0,
5574 _In_ size_t end = SIZE_MAX,
5575 _In_ int flags = match_default)
5576 {
5577 _Assume_(text || start >= end);
5578 this->interval.end = start;
5579
5580 _Assume_(text || this->interval.end >= end);
5581 if (this->interval.end < end && text[this->interval.end]) {
5582 if (text[this->interval.end] == '\r') {
5583 this->interval.end++;
5584 if (this->interval.end < end && text[this->interval.end] == '\n') {
5585 this->interval.start = start;
5586 this->interval.end++;
5587 return true;
5588 }
5589 }
5590 else if (text[this->interval.end] == '\n') {
5591 this->interval.start = start;
5592 this->interval.end++;
5593 return true;
5594 }
5595 }
5596 this->interval.invalidate();
5597 return false;
5598 }
5599 };
5600
5604 class http_space : public parser
5605 {
5606 protected:
5607 virtual bool do_match(
5608 _In_reads_or_z_(end) const char* text,
5609 _In_ size_t start = 0,
5610 _In_ size_t end = SIZE_MAX,
5611 _In_ int flags = match_default)
5612 {
5613 _Assume_(text || start >= end);
5614 this->interval.end = start;
5615 if (m_line_break.match(text, this->interval.end, end, flags)) {
5616 this->interval.end = m_line_break.interval.end;
5617 if (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) {
5618 this->interval.start = start;
5619 this->interval.end++;
5620 while (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) this->interval.end++;
5621 return true;
5622 }
5623 }
5624 else if (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) {
5625 this->interval.start = start;
5626 this->interval.end++;
5627 while (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) this->interval.end++;
5628 return true;
5629 }
5630 this->interval.invalidate();
5631 return false;
5632 }
5633
5634 http_line_break m_line_break;
5635 };
5636
5640 class http_text_char : public parser
5641 {
5642 protected:
5643 virtual bool do_match(
5644 _In_reads_or_z_(end) const char* text,
5645 _In_ size_t start = 0,
5646 _In_ size_t end = SIZE_MAX,
5647 _In_ int flags = match_default)
5648 {
5649 _Assume_(text || start >= end);
5650 this->interval.end = start;
5651
5652 _Assume_(text || this->interval.end >= end);
5653 if (m_space.match(text, this->interval.end, end, flags)) {
5654 this->interval.start = start;
5655 this->interval.end = m_space.interval.end;
5656 return true;
5657 }
5658 else if (this->interval.end < end && text[this->interval.end] && text[this->interval.end] >= 0x20) {
5659 this->interval.start = start;
5660 this->interval.end++;
5661 return true;
5662 }
5663 this->interval.invalidate();
5664 return false;
5665 }
5666
5667 http_space m_space;
5668 };
5669
5673 class http_token : public parser
5674 {
5675 protected:
5676 virtual bool do_match(
5677 _In_reads_or_z_(end) const char* text,
5678 _In_ size_t start = 0,
5679 _In_ size_t end = SIZE_MAX,
5680 _In_ int flags = match_default)
5681 {
5682 _Assume_(text || start >= end);
5683 this->interval.end = start;
5684 for (;;) {
5685 if (this->interval.end < end && text[this->interval.end]) {
5686 if ((unsigned int)text[this->interval.end] < 0x20 ||
5687 (unsigned int)text[this->interval.end] == 0x7f ||
5688 text[this->interval.end] == '(' ||
5689 text[this->interval.end] == ')' ||
5690 text[this->interval.end] == '<' ||
5691 text[this->interval.end] == '>' ||
5692 text[this->interval.end] == '@' ||
5693 text[this->interval.end] == ',' ||
5694 text[this->interval.end] == ';' ||
5695 text[this->interval.end] == ':' ||
5696 text[this->interval.end] == '\\' ||
5697 text[this->interval.end] == '\"' ||
5698 text[this->interval.end] == '/' ||
5699 text[this->interval.end] == '[' ||
5700 text[this->interval.end] == ']' ||
5701 text[this->interval.end] == '?' ||
5702 text[this->interval.end] == '=' ||
5703 text[this->interval.end] == '{' ||
5704 text[this->interval.end] == '}' ||
5705 stdex::isspace(text[this->interval.end]))
5706 break;
5707 else
5708 this->interval.end++;
5709 }
5710 else
5711 break;
5712 }
5714 this->interval.start = start;
5715 return true;
5716 }
5717 else {
5718 this->interval.invalidate();
5719 return false;
5720 }
5721 }
5722 };
5723
5728 {
5729 public:
5730 virtual void invalidate()
5731 {
5732 content.start = 1;
5733 content.end = 0;
5734 parser::invalidate();
5735 }
5736
5738
5739 protected:
5740 virtual bool do_match(
5741 _In_reads_or_z_(end) const char* text,
5742 _In_ size_t start = 0,
5743 _In_ size_t end = SIZE_MAX,
5744 _In_ int flags = match_default)
5745 {
5746 _Assume_(text || start >= end);
5747 this->interval.end = start;
5748 if (this->interval.end < end && text[this->interval.end] != '"')
5749 goto error;
5750 this->interval.end++;
5751 content.start = this->interval.end;
5752 for (;;) {
5753 _Assume_(text || this->interval.end >= end);
5754 if (this->interval.end < end && text[this->interval.end]) {
5755 if (text[this->interval.end] == '"') {
5756 content.end = this->interval.end;
5757 this->interval.end++;
5758 break;
5759 }
5760 else if (text[this->interval.end] == '\\') {
5761 this->interval.end++;
5762 if (this->interval.end < end && text[this->interval.end]) {
5763 this->interval.end++;
5764 }
5765 else
5766 goto error;
5767 }
5768 else if (m_chr.match(text, this->interval.end, end, flags))
5769 this->interval.end++;
5770 else
5771 goto error;
5772 }
5773 else
5774 goto error;
5775 }
5776 this->interval.start = start;
5777 return true;
5778
5779 error:
5780 invalidate();
5781 return false;
5782 }
5783
5784 http_text_char m_chr;
5785 };
5786
5790 class http_value : public parser
5791 {
5792 public:
5793 virtual void invalidate()
5794 {
5795 string.invalidate();
5796 token.invalidate();
5797 parser::invalidate();
5798 }
5799
5802
5803 protected:
5804 virtual bool do_match(
5805 _In_reads_or_z_(end) const char* text,
5806 _In_ size_t start = 0,
5807 _In_ size_t end = SIZE_MAX,
5808 _In_ int flags = match_default)
5809 {
5810 _Assume_(text || start >= end);
5811 this->interval.end = start;
5812 if (string.match(text, this->interval.end, end, flags)) {
5813 token.invalidate();
5814 this->interval.end = string.interval.end;
5815 this->interval.start = start;
5816 return true;
5817 }
5818 else if (token.match(text, this->interval.end, end, flags)) {
5819 string.invalidate();
5820 this->interval.end = token.interval.end;
5821 this->interval.start = start;
5822 return true;
5823 }
5824 else {
5825 this->interval.invalidate();
5826 return false;
5827 }
5828 }
5829 };
5830
5834 class http_parameter : public parser
5835 {
5836 public:
5837 virtual void invalidate()
5838 {
5839 name.invalidate();
5840 value.invalidate();
5841 parser::invalidate();
5842 }
5843
5846
5847 protected:
5848 virtual bool do_match(
5849 _In_reads_or_z_(end) const char* text,
5850 _In_ size_t start = 0,
5851 _In_ size_t end = SIZE_MAX,
5852 _In_ int flags = match_default)
5853 {
5854 _Assume_(text || start >= end);
5855 this->interval.end = start;
5856 if (name.match(text, this->interval.end, end, flags))
5857 this->interval.end = name.interval.end;
5858 else
5859 goto error;
5860 while (m_space.match(text, this->interval.end, end, flags))
5861 this->interval.end = m_space.interval.end;
5862 _Assume_(text || this->interval.end >= end);
5863 if (this->interval.end < end && text[this->interval.end] == '=')
5864 this->interval.end++;
5865 else
5866 while (m_space.match(text, this->interval.end, end, flags))
5867 this->interval.end = m_space.interval.end;
5868 if (value.match(text, this->interval.end, end, flags))
5869 this->interval.end = value.interval.end;
5870 else
5871 goto error;
5872 this->interval.start = start;
5873 return true;
5874
5875 error:
5876 invalidate();
5877 return false;
5878 }
5879
5880 http_space m_space;
5881 };
5882
5886 class http_any_type : public parser
5887 {
5888 protected:
5889 virtual bool do_match(
5890 _In_reads_or_z_(end) const char* text,
5891 _In_ size_t start = 0,
5892 _In_ size_t end = SIZE_MAX,
5893 _In_ int flags = match_default)
5894 {
5895 _Assume_(text || start >= end);
5896 if (start + 2 < end &&
5897 text[start] == '*' &&
5898 text[start + 1] == '/' &&
5899 text[start + 2] == '*')
5900 {
5901 this->interval.end = (this->interval.start = start) + 3;
5902 return true;
5903 }
5904 else if (start < end && text[start] == '*') {
5905 this->interval.end = (this->interval.start = start) + 1;
5906 return true;
5907 }
5908 else {
5909 this->interval.invalidate();
5910 return false;
5911 }
5912 }
5913 };
5914
5919 {
5920 public:
5921 virtual void invalidate()
5922 {
5923 type.invalidate();
5924 subtype.invalidate();
5925 parser::invalidate();
5926 }
5927
5928 http_token type;
5929 http_token subtype;
5930
5931 protected:
5932 virtual bool do_match(
5933 _In_reads_or_z_(end) const char* text,
5934 _In_ size_t start = 0,
5935 _In_ size_t end = SIZE_MAX,
5936 _In_ int flags = match_default)
5937 {
5938 _Assume_(text || start >= end);
5939 this->interval.end = start;
5940 if (type.match(text, this->interval.end, end, flags))
5941 this->interval.end = type.interval.end;
5942 else
5943 goto error;
5944 while (m_space.match(text, this->interval.end, end, flags))
5945 this->interval.end = m_space.interval.end;
5946 if (this->interval.end < end && text[this->interval.end] == '/')
5947 this->interval.end++;
5948 else
5949 goto error;
5950 while (m_space.match(text, this->interval.end, end, flags))
5951 this->interval.end = m_space.interval.end;
5952 if (subtype.match(text, this->interval.end, end, flags))
5953 this->interval.end = subtype.interval.end;
5954 else
5955 goto error;
5956 this->interval.start = start;
5957 return true;
5958
5959 error:
5960 invalidate();
5961 return false;
5962 }
5963
5964 http_space m_space;
5965 };
5966
5971 {
5972 public:
5973 virtual void invalidate()
5974 {
5975 params.clear();
5976 http_media_range::invalidate();
5977 }
5978
5979 std::list<http_parameter> params;
5980
5981 protected:
5982 virtual bool do_match(
5983 _In_reads_or_z_(end) const char* text,
5984 _In_ size_t start = 0,
5985 _In_ size_t end = SIZE_MAX,
5986 _In_ int flags = match_default)
5987 {
5988 _Assume_(text || start >= end);
5989 if (!http_media_range::do_match(text, start, end, flags))
5990 goto error;
5991 params.clear();
5992 for (;;) {
5993 if (this->interval.end < end && text[this->interval.end]) {
5994 if (m_space.match(text, this->interval.end, end, flags))
5995 this->interval.end = m_space.interval.end;
5996 else if (text[this->interval.end] == ';') {
5997 this->interval.end++;
5998 while (m_space.match(text, this->interval.end, end, flags))
5999 this->interval.end = m_space.interval.end;
6000 http_parameter param;
6001 if (param.match(text, this->interval.end, end, flags)) {
6002 this->interval.end = param.interval.end;
6003 params.push_back(std::move(param));
6004 }
6005 else
6006 break;
6007 }
6008 else
6009 break;
6010 }
6011 else
6012 break;
6013 }
6014 this->interval.end = params.empty() ? subtype.interval.end : params.back().interval.end;
6015 return true;
6016
6017 error:
6018 invalidate();
6019 return false;
6020 }
6021 };
6022
6027 {
6028 protected:
6029 virtual bool do_match(
6030 _In_reads_or_z_(end) const char* text,
6031 _In_ size_t start = 0,
6032 _In_ size_t end = SIZE_MAX,
6033 _In_ int flags = match_default)
6034 {
6035 _Assume_(text || start >= end);
6036 this->interval.end = start;
6037 for (;;) {
6038 if (this->interval.end < end && text[this->interval.end]) {
6039 if ((unsigned int)text[this->interval.end] < 0x20 ||
6040 (unsigned int)text[this->interval.end] == 0x7f ||
6041 text[this->interval.end] == ':' ||
6042 text[this->interval.end] == '/' ||
6043 stdex::isspace(text[this->interval.end]))
6044 break;
6045 else
6046 this->interval.end++;
6047 }
6048 else
6049 break;
6050 }
6052 this->interval.start = start;
6053 return true;
6054 }
6055 this->interval.invalidate();
6056 return false;
6057 }
6058 };
6059
6063 class http_url_port : public parser
6064 {
6065 public:
6066 http_url_port(_In_ const std::locale& locale = std::locale()) :
6067 parser(locale),
6068 value(0)
6069 {}
6070
6071 virtual void invalidate()
6072 {
6073 value = 0;
6074 parser::invalidate();
6075 }
6076
6077 uint16_t value;
6078
6079 protected:
6080 virtual bool do_match(
6081 _In_reads_or_z_(end) const char* text,
6082 _In_ size_t start = 0,
6083 _In_ size_t end = SIZE_MAX,
6084 _In_ int flags = match_default)
6085 {
6086 _Assume_(text || start >= end);
6087 value = 0;
6088 this->interval.end = start;
6089 for (;;) {
6090 if (this->interval.end < end && text[this->interval.end]) {
6091 if ('0' <= text[this->interval.end] && text[this->interval.end] <= '9') {
6092 size_t _value = static_cast<size_t>(value) * 10 + static_cast<size_t>(text[this->interval.end] - '0');
6093 if (_value > UINT16_MAX) {
6094 value = 0;
6095 this->interval.invalidate();
6096 return false;
6097 }
6098 value = (uint16_t)_value;
6099 this->interval.end++;
6100 }
6101 else
6102 break;
6103 }
6104 else
6105 break;
6106 }
6108 this->interval.start = start;
6109 return true;
6110 }
6111 this->interval.invalidate();
6112 return false;
6113 }
6114 };
6115
6120 {
6121 protected:
6122 virtual bool do_match(
6123 _In_reads_or_z_(end) const char* text,
6124 _In_ size_t start = 0,
6125 _In_ size_t end = SIZE_MAX,
6126 _In_ int flags = match_default)
6127 {
6128 _Assume_(text || start >= end);
6129 this->interval.end = start;
6130 for (;;) {
6131 if (this->interval.end < end && text[this->interval.end]) {
6132 if ((unsigned int)text[this->interval.end] < 0x20 ||
6133 (unsigned int)text[this->interval.end] == 0x7f ||
6134 text[this->interval.end] == '?' ||
6135 text[this->interval.end] == '/' ||
6136 stdex::isspace(text[this->interval.end]))
6137 break;
6138 else
6139 this->interval.end++;
6140 }
6141 else
6142 break;
6143 }
6144 this->interval.start = start;
6145 return true;
6146 }
6147 };
6148
6152 class http_url_path : public parser
6153 {
6154 public:
6155 virtual void invalidate()
6156 {
6157 segments.clear();
6158 parser::invalidate();
6159 }
6160
6161 std::vector<http_url_path_segment> segments;
6162
6163 protected:
6164 virtual bool do_match(
6165 _In_reads_or_z_(end) const char* text,
6166 _In_ size_t start = 0,
6167 _In_ size_t end = SIZE_MAX,
6168 _In_ int flags = match_default)
6169 {
6170 _Assume_(text || start >= end);
6172 this->interval.end = start;
6173 segments.clear();
6174 _Assume_(text || this->interval.end >= end);
6175 if (this->interval.end < end && text[this->interval.end] != '/')
6176 goto error;
6177 this->interval.end++;
6178 s.match(text, this->interval.end, end, flags);
6179 segments.push_back(s);
6180 this->interval.end = s.interval.end;
6181 for (;;) {
6182 if (this->interval.end < end && text[this->interval.end]) {
6183 if (text[this->interval.end] == '/') {
6184 this->interval.end++;
6185 s.match(text, this->interval.end, end, flags);
6186 segments.push_back(s);
6187 this->interval.end = s.interval.end;
6188 }
6189 else
6190 break;
6191 }
6192 else
6193 break;
6194 }
6195 this->interval.start = start;
6196 return true;
6197
6198 error:
6199 invalidate();
6200 return false;
6201 }
6202 };
6203
6208 {
6209 public:
6210 virtual void invalidate()
6211 {
6212 name.start = 1;
6213 name.end = 0;
6214 value.start = 1;
6215 value.end = 0;
6216 parser::invalidate();
6217 }
6218
6221
6222 protected:
6223 virtual bool do_match(
6224 _In_reads_or_z_(end) const char* text,
6225 _In_ size_t start = 0,
6226 _In_ size_t end = SIZE_MAX,
6227 _In_ int flags = match_default)
6228 {
6229 _Assume_(text || start >= end);
6230 this->interval.end = start;
6231 name.start = this->interval.end;
6232 for (;;) {
6233 if (this->interval.end < end && text[this->interval.end]) {
6234 if ((unsigned int)text[this->interval.end] < 0x20 ||
6235 (unsigned int)text[this->interval.end] == 0x7f ||
6236 text[this->interval.end] == '&' ||
6237 text[this->interval.end] == '=' ||
6238 stdex::isspace(text[this->interval.end]))
6239 break;
6240 else
6241 this->interval.end++;
6242 }
6243 else
6244 break;
6245 }
6247 name.end = this->interval.end;
6248 else
6249 goto error;
6250 if (text[this->interval.end] == '=') {
6251 this->interval.end++;
6252 value.start = this->interval.end;
6253 for (;;) {
6254 if (this->interval.end < end && text[this->interval.end]) {
6255 if ((unsigned int)text[this->interval.end] < 0x20 ||
6256 (unsigned int)text[this->interval.end] == 0x7f ||
6257 text[this->interval.end] == '&' ||
6258 stdex::isspace(text[this->interval.end]))
6259 break;
6260 else
6261 this->interval.end++;
6262 }
6263 else
6264 break;
6265 }
6266 value.end = this->interval.end;
6267 }
6268 else {
6269 value.start = 1;
6270 value.end = 0;
6271 }
6272 this->interval.start = start;
6273 return true;
6274
6275 error:
6276 invalidate();
6277 return false;
6278 }
6279 };
6280
6284 class http_url : public parser
6285 {
6286 public:
6287 http_url(_In_ const std::locale& locale = std::locale()) :
6288 parser(locale),
6289 port(locale)
6290 {}
6291
6292 virtual void invalidate()
6293 {
6294 server.invalidate();
6295 port.invalidate();
6296 path.invalidate();
6297 params.clear();
6298 parser::invalidate();
6299 }
6300
6301 http_url_server server;
6302 http_url_port port;
6303 http_url_path path;
6304 std::list<http_url_parameter> params;
6305
6306 protected:
6307 virtual bool do_match(
6308 _In_reads_or_z_(end) const char* text,
6309 _In_ size_t start = 0,
6310 _In_ size_t end = SIZE_MAX,
6311 _In_ int flags = match_default)
6312 {
6313 _Assume_(text || start >= end);
6314 this->interval.end = start;
6315
6316 if (this->interval.end + 7 <= end && stdex::strnicmp(text + this->interval.end, 7, "http://", SIZE_MAX, m_locale) == 0) {
6317 this->interval.end += 7;
6318 if (server.match(text, this->interval.end, end, flags))
6319 this->interval.end = server.interval.end;
6320 else
6321 goto error;
6322 if (this->interval.end < end && text[this->interval.end] == ':') {
6323 this->interval.end++;
6324 if (port.match(text, this->interval.end, end, flags))
6325 this->interval.end = port.interval.end;
6326 }
6327 else {
6328 port.invalidate();
6329 port.value = 80;
6330 }
6331 }
6332 else {
6333 server.invalidate();
6334 port.invalidate();
6335 port.value = 80;
6336 }
6337
6338 if (path.match(text, this->interval.end, end, flags))
6339 this->interval.end = path.interval.end;
6340 else
6341 goto error;
6342
6343 params.clear();
6344
6345 if (this->interval.end < end && text[this->interval.end] == '?') {
6346 this->interval.end++;
6347 for (;;) {
6348 if (this->interval.end < end && text[this->interval.end]) {
6349 if ((unsigned int)text[this->interval.end] < 0x20 ||
6350 (unsigned int)text[this->interval.end] == 0x7f ||
6351 stdex::isspace(text[this->interval.end]))
6352 break;
6353 else if (text[this->interval.end] == '&')
6354 this->interval.end++;
6355 else {
6356 http_url_parameter param;
6357 if (param.match(text, this->interval.end, end, flags)) {
6358 this->interval.end = param.interval.end;
6359 params.push_back(std::move(param));
6360 }
6361 else
6362 break;
6363 }
6364 }
6365 else
6366 break;
6367 }
6368 }
6369
6370 this->interval.start = start;
6371 return true;
6372
6373 error:
6374 invalidate();
6375 return false;
6376 }
6377 };
6378
6382 class http_language : public parser
6383 {
6384 public:
6385 virtual void invalidate()
6386 {
6387 components.clear();
6388 parser::invalidate();
6389 }
6390
6391 std::vector<stdex::interval<size_t>> components;
6392
6393 protected:
6394 virtual bool do_match(
6395 _In_reads_or_z_(end) const char* text,
6396 _In_ size_t start = 0,
6397 _In_ size_t end = SIZE_MAX,
6398 _In_ int flags = match_default)
6399 {
6400 _Assume_(text || start >= end);
6401 this->interval.end = start;
6402 components.clear();
6403 for (;;) {
6404 if (this->interval.end < end && text[this->interval.end]) {
6406 k.end = this->interval.end;
6407 for (;;) {
6408 if (k.end < end && text[k.end]) {
6409 if (stdex::isalpha(text[k.end]))
6410 k.end++;
6411 else
6412 break;
6413 }
6414 else
6415 break;
6416 }
6417 if (this->interval.end < k.end) {
6418 k.start = this->interval.end;
6419 this->interval.end = k.end;
6420 components.push_back(k);
6421 }
6422 else
6423 break;
6424 if (this->interval.end < end && text[this->interval.end] == '-')
6425 this->interval.end++;
6426 else
6427 break;
6428 }
6429 else
6430 break;
6431 }
6432 if (!components.empty()) {
6433 this->interval.start = start;
6434 this->interval.end = components.back().end;
6435 return true;
6436 }
6437 this->interval.invalidate();
6438 return false;
6439 }
6440 };
6441
6445 class http_weight : public parser
6446 {
6447 public:
6448 http_weight(_In_ const std::locale& locale = std::locale()) :
6449 parser(locale),
6450 value(1.0f)
6451 {}
6452
6453 virtual void invalidate()
6454 {
6455 value = 1.0f;
6456 parser::invalidate();
6457 }
6458
6459 float value;
6460
6461 protected:
6462 virtual bool do_match(
6463 _In_reads_or_z_(end) const char* text,
6464 _In_ size_t start = 0,
6465 _In_ size_t end = SIZE_MAX,
6466 _In_ int flags = match_default)
6467 {
6468 _Assume_(text || start >= end);
6469 size_t celi_del = 0, decimalni_del = 0, decimalni_del_n = 1;
6470 this->interval.end = start;
6471 for (;;) {
6472 if (this->interval.end < end && text[this->interval.end]) {
6473 if ('0' <= text[this->interval.end] && text[this->interval.end] <= '9') {
6474 celi_del = celi_del * 10 + static_cast<size_t>(text[this->interval.end] - '0');
6475 this->interval.end++;
6476 }
6477 else if (text[this->interval.end] == '.') {
6478 this->interval.end++;
6479 for (;;) {
6480 if (this->interval.end < end && text[this->interval.end]) {
6481 if ('0' <= text[this->interval.end] && text[this->interval.end] <= '9') {
6482 decimalni_del = decimalni_del * 10 + static_cast<size_t>(text[this->interval.end] - '0');
6483 decimalni_del_n *= 10;
6484 this->interval.end++;
6485 }
6486 else
6487 break;
6488 }
6489 else
6490 break;
6491 }
6492 break;
6493 }
6494 else
6495 break;
6496 }
6497 else
6498 break;
6499 }
6502 this->interval.start = start;
6503 return true;
6504 }
6505 value = 1.0f;
6506 this->interval.invalidate();
6507 return false;
6508 }
6509 };
6510
6514 class http_asterisk : public parser
6515 {
6516 protected:
6517 virtual bool do_match(
6518 _In_reads_or_z_(end) const char* text,
6519 _In_ size_t start = 0,
6520 _In_ size_t end = SIZE_MAX,
6521 _In_ int flags = match_default)
6522 {
6523 _Assume_(text || end <= start);
6524 if (start < end && text[start] == '*') {
6525 this->interval.end = (this->interval.start = start) + 1;
6526 return true;
6527 }
6528 this->interval.invalidate();
6529 return false;
6530 }
6531 };
6532
6536 template <class T, class T_asterisk = http_asterisk>
6538 {
6539 public:
6540 http_weighted_value(_In_ const std::locale& locale = std::locale()) :
6541 parser(locale),
6542 factor(locale)
6543 {}
6544
6545 virtual void invalidate()
6546 {
6547 asterisk.invalidate();
6548 value.invalidate();
6549 factor.invalidate();
6550 parser::invalidate();
6551 }
6552
6553 T_asterisk asterisk;
6554 T value;
6555 http_weight factor;
6556
6557 protected:
6558 virtual bool do_match(
6559 _In_reads_or_z_(end) const char* text,
6560 _In_ size_t start = 0,
6561 _In_ size_t end = SIZE_MAX,
6562 _In_ int flags = match_default)
6563 {
6564 _Assume_(text || start >= end);
6565 size_t konec_vrednosti;
6566 this->interval.end = start;
6567 if (asterisk.match(text, this->interval.end, end, flags)) {
6568 this->interval.end = konec_vrednosti = asterisk.interval.end;
6569 value.invalidate();
6570 }
6571 else if (value.match(text, this->interval.end, end, flags)) {
6572 this->interval.end = konec_vrednosti = value.interval.end;
6573 asterisk.invalidate();
6574 }
6575 else {
6576 asterisk.invalidate();
6577 value.invalidate();
6578 this->interval.invalidate();
6579 return false;
6580 }
6581
6582 while (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) this->interval.end++;
6583 if (this->interval.end < end && text[this->interval.end] == ';') {
6584 this->interval.end++;
6585 while (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) this->interval.end++;
6586 if (this->interval.end < end && (text[this->interval.end] == 'q' || text[this->interval.end] == 'Q')) {
6587 this->interval.end++;
6588 while (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) this->interval.end++;
6589 if (this->interval.end < end && text[this->interval.end] == '=') {
6590 this->interval.end++;
6591 while (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) this->interval.end++;
6592 if (factor.match(text, this->interval.end, end, flags))
6593 this->interval.end = factor.interval.end;
6594 }
6595 }
6596 }
6597 if (!factor.interval) {
6598 factor.invalidate();
6599 this->interval.end = konec_vrednosti;
6600 }
6601 this->interval.start = start;
6602 return true;
6603 }
6604 };
6605
6610 {
6611 public:
6612 virtual void invalidate()
6613 {
6614 name.invalidate();
6615 value.invalidate();
6616 parser::invalidate();
6617 }
6618
6619 http_token name;
6620 http_value value;
6621
6622 protected:
6623 virtual bool do_match(
6624 _In_reads_or_z_(end) const char* text,
6625 _In_ size_t start = 0,
6626 _In_ size_t end = SIZE_MAX,
6627 _In_ int flags = match_default)
6628 {
6629 _Assume_(text || start >= end);
6630 this->interval.end = start;
6631 if (this->interval.end < end && text[this->interval.end] == '$')
6632 this->interval.end++;
6633 else
6634 goto error;
6635 if (name.match(text, this->interval.end, end, flags))
6636 this->interval.end = name.interval.end;
6637 else
6638 goto error;
6639 while (m_space.match(text, this->interval.end, end, flags))
6640 this->interval.end = m_space.interval.end;
6641 if (this->interval.end < end && text[this->interval.end] == '=')
6642 this->interval.end++;
6643 else
6644 goto error;
6645 while (m_space.match(text, this->interval.end, end, flags))
6646 this->interval.end = m_space.interval.end;
6647 if (value.match(text, this->interval.end, end, flags))
6648 this->interval.end = value.interval.end;
6649 else
6650 goto error;
6651 this->interval.start = start;
6652 return true;
6653
6654 error:
6655 invalidate();
6656 return false;
6657 }
6658
6659 http_space m_space;
6660 };
6661
6665 class http_cookie : public parser
6666 {
6667 public:
6668 virtual void invalidate()
6669 {
6670 name.invalidate();
6671 value.invalidate();
6672 params.clear();
6673 parser::invalidate();
6674 }
6675
6678 std::list<http_cookie_parameter> params;
6679
6680 protected:
6681 virtual bool do_match(
6682 _In_reads_or_z_(end) const char* text,
6683 _In_ size_t start = 0,
6684 _In_ size_t end = SIZE_MAX,
6685 _In_ int flags = match_default)
6686 {
6687 _Assume_(text || start >= end);
6688 this->interval.end = start;
6689 if (name.match(text, this->interval.end, end, flags))
6690 this->interval.end = name.interval.end;
6691 else
6692 goto error;
6693 while (m_space.match(text, this->interval.end, end, flags))
6694 this->interval.end = m_space.interval.end;
6695 if (this->interval.end < end && text[this->interval.end] == '=')
6696 this->interval.end++;
6697 else
6698 goto error;
6699 while (m_space.match(text, this->interval.end, end, flags))
6700 this->interval.end = m_space.interval.end;
6701 if (value.match(text, this->interval.end, end, flags))
6702 this->interval.end = value.interval.end;
6703 else
6704 goto error;
6705 params.clear();
6706 for (;;) {
6707 if (this->interval.end < end && text[this->interval.end]) {
6708 if (m_space.match(text, this->interval.end, end, flags))
6709 this->interval.end = m_space.interval.end;
6710 else if (text[this->interval.end] == ';') {
6711 this->interval.end++;
6712 while (m_space.match(text, this->interval.end, end, flags))
6713 this->interval.end = m_space.interval.end;
6715 if (param.match(text, this->interval.end, end, flags)) {
6716 this->interval.end = param.interval.end;
6717 params.push_back(std::move(param));
6718 }
6719 else
6720 break;
6721 }
6722 else
6723 break;
6724 }
6725 else
6726 break;
6727 }
6728 this->interval.start = start;
6729 this->interval.end = params.empty() ? value.interval.end : params.back().interval.end;
6730 return true;
6731
6732 error:
6733 invalidate();
6734 return false;
6735 }
6736
6737 http_space m_space;
6738 };
6739
6743 class http_agent : public parser
6744 {
6745 public:
6746 virtual void invalidate()
6747 {
6748 type.start = 1;
6749 type.end = 0;
6750 version.start = 1;
6751 version.end = 0;
6752 parser::invalidate();
6753 }
6754
6757
6758 protected:
6759 virtual bool do_match(
6760 _In_reads_or_z_(end) const char* text,
6761 _In_ size_t start = 0,
6762 _In_ size_t end = SIZE_MAX,
6763 _In_ int flags = match_default)
6764 {
6765 _Assume_(text || start >= end);
6766 this->interval.end = start;
6767 type.start = this->interval.end;
6768 for (;;) {
6769 if (this->interval.end < end && text[this->interval.end]) {
6770 if (text[this->interval.end] == '/') {
6771 type.end = this->interval.end;
6772 this->interval.end++;
6773 version.start = this->interval.end;
6774 for (;;) {
6775 if (this->interval.end < end && text[this->interval.end]) {
6776 if (stdex::isspace(text[this->interval.end])) {
6777 version.end = this->interval.end;
6778 break;
6779 }
6780 else
6781 this->interval.end++;
6782 }
6783 else {
6784 version.end = this->interval.end;
6785 break;
6786 }
6787 }
6788 break;
6789 }
6790 else if (stdex::isspace(text[this->interval.end])) {
6791 type.end = this->interval.end;
6792 break;
6793 }
6794 else
6795 this->interval.end++;
6796 }
6797 else {
6798 type.end = this->interval.end;
6799 break;
6800 }
6801 }
6803 this->interval.start = start;
6804 return true;
6805 }
6806 type.start = 1;
6807 type.end = 0;
6808 version.start = 1;
6809 version.end = 0;
6810 this->interval.invalidate();
6811 return false;
6812 }
6813 };
6814
6818 class http_protocol : public parser
6819 {
6820 public:
6821 http_protocol(_In_ const std::locale& locale = std::locale()) :
6822 parser(locale),
6823 version(0x009)
6824 {}
6825
6826 virtual void invalidate()
6827 {
6828 type.start = 1;
6829 type.end = 0;
6830 version_maj.start = 1;
6831 version_maj.end = 0;
6832 version_min.start = 1;
6833 version_min.end = 0;
6834 version = 0x009;
6835 parser::invalidate();
6836 }
6837
6839 stdex::interval<size_t> version_maj;
6840 stdex::interval<size_t> version_min;
6842
6843 protected:
6844 virtual bool do_match(
6845 _In_reads_or_z_(end) const char* text,
6846 _In_ size_t start = 0,
6847 _In_ size_t end = SIZE_MAX,
6848 _In_ int flags = match_default)
6849 {
6850 _Assume_(text || start >= end);
6851 this->interval.end = start;
6852 type.start = this->interval.end;
6853 for (;;) {
6854 if (this->interval.end < end && text[this->interval.end]) {
6855 if (text[this->interval.end] == '/') {
6856 type.end = this->interval.end;
6857 this->interval.end++;
6858 break;
6859 }
6860 else if (stdex::isspace(text[this->interval.end]))
6861 goto error;
6862 else
6863 this->interval.end++;
6864 }
6865 else {
6866 type.end = this->interval.end;
6867 goto error;
6868 }
6869 }
6870 version_maj.start = this->interval.end;
6871 for (;;) {
6872 if (this->interval.end < end && text[this->interval.end]) {
6873 if (text[this->interval.end] == '.') {
6874 version_maj.end = this->interval.end;
6875 this->interval.end++;
6876 version_min.start = this->interval.end;
6877 for (;;) {
6878 if (this->interval.end < end && text[this->interval.end]) {
6879 if (stdex::isspace(text[this->interval.end])) {
6880 version_min.end = this->interval.end;
6881 version =
6882 (uint16_t)strtoui(text + version_maj.start, version_maj.size(), nullptr, 10) * 0x100 +
6883 (uint16_t)strtoui(text + version_min.start, version_min.size(), nullptr, 10);
6884 break;
6885 }
6886 else
6887 this->interval.end++;
6888 }
6889 else
6890 goto error;
6891 }
6892 break;
6893 }
6894 else if (stdex::isspace(text[this->interval.end])) {
6895 version_maj.end = this->interval.end;
6896 version_min.start = 1;
6897 version_min.end = 0;
6898 version = (uint16_t)strtoui(text + version_maj.start, version_maj.size(), nullptr, 10) * 0x100;
6899 break;
6900 }
6901 else
6902 this->interval.end++;
6903 }
6904 else
6905 goto error;
6906 }
6907 this->interval.start = start;
6908 return true;
6909
6910 error:
6911 invalidate();
6912 return false;
6913 }
6914 };
6915
6919 class http_request : public parser
6920 {
6921 public:
6922 http_request(_In_ const std::locale& locale = std::locale()) :
6923 parser(locale),
6924 url(locale),
6925 protocol(locale)
6926 {}
6927
6928 virtual void invalidate()
6929 {
6930 verb.start = 1;
6931 verb.end = 0;
6932 url.invalidate();
6933 protocol.invalidate();
6934 parser::invalidate();
6935 }
6936
6938 http_url url;
6939 http_protocol protocol;
6940
6941 protected:
6942 virtual bool do_match(
6943 _In_reads_or_z_(end) const char* text,
6944 _In_ size_t start = 0,
6945 _In_ size_t end = SIZE_MAX,
6946 _In_ int flags = match_default)
6947 {
6948 _Assume_(text || start >= end);
6949 this->interval.end = start;
6950
6951 for (;;) {
6952 if (m_line_break.match(text, this->interval.end, end, flags))
6953 goto error;
6954 else if (this->interval.end < end && text[this->interval.end]) {
6955 if (stdex::isspace(text[this->interval.end]))
6956 this->interval.end++;
6957 else
6958 break;
6959 }
6960 else
6961 goto error;
6962 }
6963 verb.start = this->interval.end;
6964 for (;;) {
6965 if (m_line_break.match(text, this->interval.end, end, flags))
6966 goto error;
6967 else if (this->interval.end < end && text[this->interval.end]) {
6968 if (stdex::isspace(text[this->interval.end])) {
6969 verb.end = this->interval.end;
6970 this->interval.end++;
6971 break;
6972 }
6973 else
6974 this->interval.end++;
6975 }
6976 else
6977 goto error;
6978 }
6979
6980 for (;;) {
6981 if (m_line_break.match(text, this->interval.end, end, flags))
6982 goto error;
6983 else if (this->interval.end < end && text[this->interval.end]) {
6984 if (stdex::isspace(text[this->interval.end]))
6985 this->interval.end++;
6986 else
6987 break;
6988 }
6989 else
6990 goto error;
6991 }
6992 if (url.match(text, this->interval.end, end, flags))
6993 this->interval.end = url.interval.end;
6994 else
6995 goto error;
6996
6997 protocol.invalidate();
6998 for (;;) {
6999 if (m_line_break.match(text, this->interval.end, end, flags)) {
7000 this->interval.end = m_line_break.interval.end;
7001 goto end;
7002 }
7003 else if (this->interval.end < end && text[this->interval.end]) {
7004 if (stdex::isspace(text[this->interval.end]))
7005 this->interval.end++;
7006 else
7007 break;
7008 }
7009 else
7010 goto end;
7011 }
7012 for (;;) {
7013 if (m_line_break.match(text, this->interval.end, end, flags)) {
7014 this->interval.end = m_line_break.interval.end;
7015 goto end;
7016 }
7017 else if (protocol.match(text, this->interval.end, end, flags)) {
7018 this->interval.end = protocol.interval.end;
7019 break;
7020 }
7021 else
7022 goto end;
7023 }
7024
7025 for (;;) {
7026 if (m_line_break.match(text, this->interval.end, end, flags)) {
7027 this->interval.end = m_line_break.interval.end;
7028 break;
7029 }
7030 else if (this->interval.end < end && text[this->interval.end])
7031 this->interval.end++;
7032 else
7033 goto end;
7034 }
7035
7036 end:
7037 this->interval.start = start;
7038 return true;
7039
7040 error:
7041 invalidate();
7042 return false;
7043 }
7044
7045 http_line_break m_line_break;
7046 };
7047
7051 class http_header : public parser
7052 {
7053 public:
7054 virtual void invalidate()
7055 {
7056 name.start = 1;
7057 name.end = 0;
7058 value.start = 1;
7059 value.end = 0;
7060 parser::invalidate();
7061 }
7062
7065
7066 protected:
7067 virtual bool do_match(
7068 _In_reads_or_z_(end) const char* text,
7069 _In_ size_t start = 0,
7070 _In_ size_t end = SIZE_MAX,
7071 _In_ int flags = match_default)
7072 {
7073 _Assume_(text || start >= end);
7074 this->interval.end = start;
7075
7076 if (m_line_break.match(text, this->interval.end, end, flags) ||
7077 (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])))
7078 goto error;
7079 name.start = this->interval.end;
7080 for (;;) {
7081 if (m_line_break.match(text, this->interval.end, end, flags))
7082 goto error;
7083 else if (this->interval.end < end && text[this->interval.end]) {
7084 if (stdex::isspace(text[this->interval.end])) {
7085 name.end = this->interval.end;
7086 this->interval.end++;
7087 for (;;) {
7088 if (m_line_break.match(text, this->interval.end, end, flags))
7089 goto error;
7090 else if (this->interval.end < end && text[this->interval.end]) {
7091 if (stdex::isspace(text[this->interval.end]))
7092 this->interval.end++;
7093 else
7094 break;
7095 }
7096 else
7097 goto error;
7098 }
7099 if (this->interval.end < end && text[this->interval.end] == ':') {
7100 this->interval.end++;
7101 break;
7102 }
7103 else
7104 goto error;
7105 break;
7106 }
7107 else if (text[this->interval.end] == ':') {
7108 name.end = this->interval.end;
7109 this->interval.end++;
7110 break;
7111 }
7112 else
7113 this->interval.end++;
7114 }
7115 else
7116 goto error;
7117 }
7118 value.start = SIZE_MAX;
7119 value.end = 0;
7120 for (;;) {
7121 if (m_line_break.match(text, this->interval.end, end, flags)) {
7122 this->interval.end = m_line_break.interval.end;
7123 if (!m_line_break.match(text, this->interval.end, end, flags) &&
7124 this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end]))
7125 this->interval.end++;
7126 else
7127 break;
7128 }
7129 else if (this->interval.end < end && text[this->interval.end]) {
7130 if (stdex::isspace(text[this->interval.end]))
7131 this->interval.end++;
7132 else {
7133 if (value.start == SIZE_MAX) value.start = this->interval.end;
7134 value.end = ++this->interval.end;
7135 }
7136 }
7137 else
7138 break;
7139 }
7140 this->interval.start = start;
7141 return true;
7142
7143 error:
7144 invalidate();
7145 return false;
7146 }
7147
7148 http_line_break m_line_break;
7149 };
7150
7154 template <class KEY, class T>
7155 class http_value_collection : public T
7156 {
7157 public:
7158 void insert(
7159 _In_reads_or_z_(end) const char* text,
7160 _In_ size_t start = 0,
7161 _In_ size_t end = SIZE_MAX,
7162 _In_ int flags = match_default)
7163 {
7164 while (start < end) {
7165 while (start < end && text[start] && stdex::isspace(text[start])) start++;
7166 if (start < end && text[start] == ',') {
7167 start++;
7168 while (start < end&& text[start] && stdex::isspace(text[start])) start++;
7169 }
7170 KEY el;
7171 if (el.match(text, start, end, flags)) {
7172 start = el.interval.end;
7173 T::insert(std::move(el));
7174 }
7175 else
7176 break;
7177 }
7178 }
7179 };
7180
7181 template <class T>
7183 constexpr bool operator()(const T& a, const T& b) const noexcept
7184 {
7185 return a.factor.value > b.factor.value;
7186 }
7187 };
7188
7192 template <class T, class AX = std::allocator<T>>
7194
7198 template <class T>
7200 {
7201 public:
7203 _In_ const std::shared_ptr<basic_parser<T>>& quote,
7204 _In_ const std::shared_ptr<basic_parser<T>>& chr,
7205 _In_ const std::shared_ptr<basic_parser<T>>& escape,
7206 _In_ const std::shared_ptr<basic_parser<T>>& sol,
7207 _In_ const std::shared_ptr<basic_parser<T>>& bs,
7208 _In_ const std::shared_ptr<basic_parser<T>>& ff,
7209 _In_ const std::shared_ptr<basic_parser<T>>& lf,
7210 _In_ const std::shared_ptr<basic_parser<T>>& cr,
7211 _In_ const std::shared_ptr<basic_parser<T>>& htab,
7212 _In_ const std::shared_ptr<basic_parser<T>>& uni,
7213 _In_ const std::shared_ptr<basic_integer16<T>>& hex,
7214 _In_ const std::locale& locale = std::locale()) :
7216 m_quote(quote),
7217 m_chr(chr),
7218 m_escape(escape),
7219 m_sol(sol),
7220 m_bs(bs),
7221 m_ff(ff),
7222 m_lf(lf),
7223 m_cr(cr),
7224 m_htab(htab),
7225 m_uni(uni),
7226 m_hex(hex)
7227 {}
7228
7229 virtual void invalidate()
7230 {
7231 value.clear();
7233 }
7234
7235 std::basic_string<T> value;
7236
7237 protected:
7238 virtual bool do_match(
7239 _In_reads_or_z_opt_(end) const T* text,
7240 _In_ size_t start = 0,
7241 _In_ size_t end = SIZE_MAX,
7242 _In_ int flags = match_default)
7243 {
7244 _Assume_(text || start >= end);
7245 this->interval.end = start;
7246 if (m_quote->match(text, this->interval.end, end, flags)) {
7247 this->interval.end = m_quote->interval.end;
7248 value.clear();
7249 for (;;) {
7250 if (m_quote->match(text, this->interval.end, end, flags)) {
7251 this->interval.start = start;
7252 this->interval.end = m_quote->interval.end;
7253 return true;
7254 }
7255 if (m_escape->match(text, this->interval.end, end, flags)) {
7256 if (m_quote->match(text, m_escape->interval.end, end, flags)) {
7257 value += '"'; this->interval.end = m_quote->interval.end;
7258 continue;
7259 }
7260 if (m_sol->match(text, m_escape->interval.end, end, flags)) {
7261 value += '/'; this->interval.end = m_sol->interval.end;
7262 continue;
7263 }
7264 if (m_bs->match(text, m_escape->interval.end, end, flags)) {
7265 value += '\b'; this->interval.end = m_bs->interval.end;
7266 continue;
7267 }
7268 if (m_ff->match(text, m_escape->interval.end, end, flags)) {
7269 value += '\f'; this->interval.end = m_ff->interval.end;
7270 continue;
7271 }
7272 if (m_lf->match(text, m_escape->interval.end, end, flags)) {
7273 value += '\n'; this->interval.end = m_lf->interval.end;
7274 continue;
7275 }
7276 if (m_cr->match(text, m_escape->interval.end, end, flags)) {
7277 value += '\r'; this->interval.end = m_cr->interval.end;
7278 continue;
7279 }
7280 if (m_htab->match(text, m_escape->interval.end, end, flags)) {
7281 value += '\t'; this->interval.end = m_htab->interval.end;
7282 continue;
7283 }
7284 if (
7285 m_uni->match(text, m_escape->interval.end, end, flags) &&
7286 m_hex->match(text, m_uni->interval.end, std::min(m_uni->interval.end + 4, end), flags | match_case_insensitive) &&
7287 m_hex->interval.size() == 4 /* JSON requests 4-digit Unicode sequneces: \u.... */)
7288 {
7289 _Assume_(m_hex->value <= 0xffff);
7290 if (sizeof(T) == 1) {
7291 if (m_hex->value > 0x7ff) {
7292 value += (T)(0xe0 | ((m_hex->value >> 12) & 0x0f));
7293 value += (T)(0x80 | ((m_hex->value >> 6) & 0x3f));
7294 value += (T)(0x80 | (m_hex->value & 0x3f));
7295 }
7296 else if (m_hex->value > 0x7f) {
7297 value += (T)(0xc0 | ((m_hex->value >> 6) & 0x1f));
7298 value += (T)(0x80 | (m_hex->value & 0x3f));
7299 }
7300 else
7301 value += (T)(m_hex->value & 0x7f);
7302 }
7303 else
7304 value += (T)m_hex->value;
7305 this->interval.end = m_hex->interval.end;
7306 continue;
7307 }
7308 if (m_escape->match(text, m_escape->interval.end, end, flags)) {
7309 value += '\\'; this->interval.end = m_escape->interval.end;
7310 continue;
7311 }
7312 }
7313 if (m_chr->match(text, this->interval.end, end, flags)) {
7314 value.append(text + m_chr->interval.start, m_chr->interval.size());
7315 this->interval.end = m_chr->interval.end;
7316 continue;
7317 }
7318 break;
7319 }
7320 }
7321 value.clear();
7322 this->interval.invalidate();
7323 return false;
7324 }
7325
7326 std::shared_ptr<basic_parser<T>> m_quote;
7327 std::shared_ptr<basic_parser<T>> m_chr;
7328 std::shared_ptr<basic_parser<T>> m_escape;
7329 std::shared_ptr<basic_parser<T>> m_sol;
7330 std::shared_ptr<basic_parser<T>> m_bs;
7331 std::shared_ptr<basic_parser<T>> m_ff;
7332 std::shared_ptr<basic_parser<T>> m_lf;
7333 std::shared_ptr<basic_parser<T>> m_cr;
7334 std::shared_ptr<basic_parser<T>> m_htab;
7335 std::shared_ptr<basic_parser<T>> m_uni;
7336 std::shared_ptr<basic_integer16<T>> m_hex;
7337 };
7338
7341#ifdef _UNICODE
7342 using tjson_string = wjson_string;
7343#else
7344 using tjson_string = json_string;
7345#endif
7346
7350 template <class T>
7352 {
7353 public:
7354 virtual void invalidate()
7355 {
7356 this->content.invalidate();
7358 }
7359
7361
7362 protected:
7363 virtual bool do_match(
7364 _In_reads_or_z_opt_(end) const T* text,
7365 _In_ size_t start = 0,
7366 _In_ size_t end = SIZE_MAX,
7367 _In_ int flags = match_multiline)
7368 {
7369 _Unreferenced_(flags);
7370 _Assume_(text || start + 1 >= end);
7371 if (start + 1 < end &&
7372 text[start] == '/' &&
7373 text[start + 1] == '*')
7374 {
7375 // /*
7376 this->content.start = this->interval.end = start + 2;
7377 for (;;) {
7378 if (this->interval.end >= end || !text[this->interval.end])
7379 break;
7380 if (this->interval.end + 1 < end &&
7381 text[this->interval.end] == '*' &&
7382 text[this->interval.end + 1] == '/')
7383 {
7384 // /*...*/
7385 this->content.end = this->interval.end;
7386 this->interval.start = start;
7387 this->interval.end = this->interval.end + 2;
7388 return true;
7389 }
7390 this->interval.end++;
7391 }
7392 }
7393 this->content.invalidate();
7394 this->interval.invalidate();
7395 return false;
7396 }
7397 };
7398
7399 using css_comment = basic_css_comment<char>;
7400 using wcss_comment = basic_css_comment<wchar_t>;
7401#ifdef _UNICODE
7402 using tcss_comment = wcss_comment;
7403#else
7404 using tcss_comment = css_comment;
7405#endif
7406
7410 template <class T>
7411 class basic_css_cdo : public basic_parser<T>
7412 {
7413 protected:
7414 virtual bool do_match(
7415 _In_reads_or_z_opt_(end) const T* text,
7416 _In_ size_t start = 0,
7417 _In_ size_t end = SIZE_MAX,
7418 _In_ int flags = match_multiline)
7419 {
7420 _Unreferenced_(flags);
7421 _Assume_(text || start + 3 >= end);
7422 if (start + 3 < end &&
7423 text[start] == '<' &&
7424 text[start + 1] == '!' &&
7425 text[start + 2] == '-' &&
7426 text[start + 3] == '-')
7427 {
7428 this->interval.start = start;
7429 this->interval.end = start + 4;
7430 return true;
7431 }
7432 this->interval.invalidate();
7433 return false;
7434 }
7435 };
7436
7439#ifdef _UNICODE
7440 using tcss_cdo = wcss_cdo;
7441#else
7442 using tcss_cdo = css_cdo;
7443#endif
7444
7448 template <class T>
7449 class basic_css_cdc : public basic_parser<T>
7450 {
7451 protected:
7452 virtual bool do_match(
7453 _In_reads_or_z_opt_(end) const T* text,
7454 _In_ size_t start = 0,
7455 _In_ size_t end = SIZE_MAX,
7456 _In_ int flags = match_multiline)
7457 {
7458 _Unreferenced_(flags);
7459 _Assume_(text || start + 2 >= end);
7460 if (start + 2 < end &&
7461 text[start] == '-' &&
7462 text[start + 1] == '-' &&
7463 text[start + 2] == '>')
7464 {
7465 this->interval.start = start;
7466 this->interval.end = start + 3;
7467 return true;
7468 }
7469 this->interval.invalidate();
7470 return false;
7471 }
7472 };
7473
7476#ifdef _UNICODE
7477 using tcss_cdc = wcss_cdc;
7478#else
7479 using tcss_cdc = css_cdc;
7480#endif
7481
7485 template <class T>
7487 {
7488 public:
7489 virtual void invalidate()
7490 {
7491 this->content.invalidate();
7493 }
7494
7496
7497 protected:
7498 virtual bool do_match(
7499 _In_reads_or_z_opt_(end) const T* text,
7500 _In_ size_t start = 0,
7501 _In_ size_t end = SIZE_MAX,
7502 _In_ int flags = match_multiline)
7503 {
7504 _Unreferenced_(flags);
7505 this->interval.end = start;
7506 _Assume_(text || this->interval.end >= end);
7507 if (this->interval.end < end &&
7508 (text[this->interval.end] == '\"' || text[this->interval.end] == '\''))
7509 {
7510 // "Quoted...
7511 T quote = text[this->interval.end];
7512 this->content.start = ++this->interval.end;
7513 for (;;) {
7514 if (this->interval.end >= end || !text[this->interval.end])
7515 break;
7516 if (text[this->interval.end] == quote) {
7517 // End quote"
7518 this->content.end = this->interval.end;
7519 this->interval.start = start;
7520 this->interval.end++;
7521 return true;
7522 }
7523 if (this->interval.end + 1 < end &&
7524 text[this->interval.end] == '\\' &&
7525 (text[this->interval.end + 1] == '\"' || text[this->interval.end + 1] == '\''))
7526 {
7527 // Escaped quote
7528 this->interval.end = this->interval.end + 2;
7529 }
7530 else
7531 this->interval.end++;
7532 }
7533 }
7534
7535 this->content.invalidate();
7536 this->interval.invalidate();
7537 return false;
7538 }
7539 };
7540
7541 using css_string = basic_css_string<char>;
7542 using wcss_string = basic_css_string<wchar_t>;
7543#ifdef _UNICODE
7544 using tcss_string = wcss_string;
7545#else
7546 using tcss_string = css_string;
7547#endif
7548
7552 template <class T>
7553 class basic_css_uri : public basic_parser<T>
7554 {
7555 public:
7556 virtual void invalidate()
7557 {
7558 this->content.invalidate();
7560 }
7561
7563
7564 protected:
7565 virtual bool do_match(
7566 _In_reads_or_z_opt_(end) const T* text,
7567 _In_ size_t start = 0,
7568 _In_ size_t end = SIZE_MAX,
7569 _In_ int flags = match_multiline)
7570 {
7571 _Unreferenced_(flags);
7572 this->interval.end = start;
7573 _Assume_(text || this->interval.end + 3 >= end);
7574 if (this->interval.end + 3 < end &&
7575 (text[this->interval.end] == 'u' || text[this->interval.end] == 'U') &&
7576 (text[this->interval.end + 1] == 'r' || text[this->interval.end + 1] == 'R') &&
7577 (text[this->interval.end + 2] == 'l' || text[this->interval.end + 2] == 'L') &&
7578 text[this->interval.end + 3] == '(')
7579 {
7580 // url(
7581 this->interval.end = this->interval.end + 4;
7582
7583 // Skip whitespace.
7584 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
7585 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
7586
7587 if (this->interval.end < end &&
7588 (text[this->interval.end] == '\"' || text[this->interval.end] == '\''))
7589 {
7590 // url("Quoted...
7591 T quote = text[this->interval.end];
7592 this->content.start = ++this->interval.end;
7593 for (;;) {
7594 if (this->interval.end >= end || !text[this->interval.end])
7595 goto error;
7596 if (text[this->interval.end] == quote) {
7597 // End quote"
7598 this->content.end = this->interval.end;
7599 this->interval.end++;
7600 break;
7601 }
7602 if (this->interval.end + 1 < end &&
7603 text[this->interval.end] == '\\' &&
7604 (text[this->interval.end + 1] == '\"' || text[this->interval.end + 1] == '\''))
7605 {
7606 // Escaped quote
7607 this->interval.end = this->interval.end + 2;
7608 }
7609 else
7610 this->interval.end++;
7611 }
7612
7613 // Skip whitespace.
7614 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
7615
7616 if (this->interval.end < end &&
7617 text[this->interval.end] == ')')
7618 {
7619 // url("...")
7620 this->interval.start = start;
7621 this->interval.end++;
7622 return true;
7623 }
7624 }
7625 else {
7626 // url(...
7627 this->content.start = content.end = this->interval.end;
7628 for (;;) {
7629 if (this->interval.end >= end || !text[this->interval.end])
7630 goto error;
7631 if (text[this->interval.end] == ')') {
7632 // url(...)
7633 this->interval.start = start;
7634 this->interval.end++;
7635 return true;
7636 }
7637 if (ctype.is(ctype.space, text[this->interval.end]))
7638 this->interval.end++;
7639 else
7640 this->content.end = ++this->interval.end;
7641 }
7642 }
7643 }
7644
7645 error:
7646 invalidate();
7647 return false;
7648 }
7649 };
7650
7651 using css_uri = basic_css_uri<char>;
7652 using wcss_uri = basic_css_uri<wchar_t>;
7653#ifdef _UNICODE
7654 using tcss_uri = wcss_uri;
7655#else
7656 using tcss_uri = css_uri;
7657#endif
7658
7662 template <class T>
7664 {
7665 public:
7666 virtual void invalidate()
7667 {
7668 this->content.invalidate();
7670 }
7671
7673
7674 protected:
7675 virtual bool do_match(
7676 _In_reads_or_z_opt_(end) const T* text,
7677 _In_ size_t start = 0,
7678 _In_ size_t end = SIZE_MAX,
7679 _In_ int flags = match_multiline)
7680 {
7681 _Unreferenced_(flags);
7682 this->interval.end = start;
7683 _Assume_(text || this->interval.end + 6 >= end);
7684 if (this->interval.end + 6 < end &&
7685 text[this->interval.end] == '@' &&
7686 (text[this->interval.end + 1] == 'i' || text[this->interval.end + 1] == 'I') &&
7687 (text[this->interval.end + 2] == 'm' || text[this->interval.end + 2] == 'M') &&
7688 (text[this->interval.end + 3] == 'p' || text[this->interval.end + 3] == 'P') &&
7689 (text[this->interval.end + 4] == 'o' || text[this->interval.end + 4] == 'O') &&
7690 (text[this->interval.end + 5] == 'r' || text[this->interval.end + 5] == 'R') &&
7691 (text[this->interval.end + 6] == 't' || text[this->interval.end + 6] == 'T'))
7692 {
7693 // @import...
7694 this->interval.end = this->interval.end + 7;
7695
7696 // Skip whitespace.
7697 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
7698 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
7699
7700 if (this->interval.end < end &&
7701 (text[this->interval.end] == '\"' || text[this->interval.end] == '\''))
7702 {
7703 // @import "Quoted
7704 T quote = text[this->interval.end];
7705 this->content.start = ++this->interval.end;
7706 for (;;) {
7707 if (this->interval.end >= end || !text[this->interval.end])
7708 goto error;
7709 if (text[this->interval.end] == quote) {
7710 // End quote"
7711 this->content.end = this->interval.end;
7712 this->interval.start = start;
7713 this->interval.end++;
7714 return true;
7715 }
7716 if (this->interval.end + 1 < end &&
7717 text[this->interval.end] == '\\' &&
7718 (text[this->interval.end + 1] == '\"' || text[this->interval.end + 1] == '\''))
7719 {
7720 // Escaped quote
7721 this->interval.end = this->interval.end + 2;
7722 }
7723 else
7724 this->interval.end++;
7725 }
7726 }
7727 }
7728
7729 error:
7730 invalidate();
7731 return false;
7732 }
7733 };
7734
7735 using css_import = basic_css_import<char>;
7736 using wcss_import = basic_css_import<wchar_t>;
7737#ifdef _UNICODE
7738 using tcss_import = wcss_import;
7739#else
7740 using tcss_import = css_import;
7741#endif
7742
7746 template <class T>
7748 {
7749 public:
7750 virtual void invalidate()
7751 {
7752 this->base_type.invalidate();
7753 this->sub_type.invalidate();
7754 this->charset.invalidate();
7756 }
7757
7761
7762 protected:
7763 virtual bool do_match(
7764 _In_reads_or_z_opt_(end) const T* text,
7765 _In_ size_t start = 0,
7766 _In_ size_t end = SIZE_MAX,
7767 _In_ int flags = match_multiline)
7768 {
7769 _Unreferenced_(flags);
7770 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
7771
7772 this->interval.end = start;
7773 this->base_type.start = this->interval.end;
7774 for (;;) {
7775 _Assume_(text || this->interval.end >= end);
7776 if (this->interval.end >= end || !text[this->interval.end])
7777 break;
7778 if (text[this->interval.end] == '/' ||
7779 text[this->interval.end] == ';' ||
7780 ctype.is(ctype.space, text[this->interval.end]))
7781 break;
7782 this->interval.end++;
7783 }
7784 if (this->interval.end <= this->base_type.start)
7785 goto error;
7786 this->base_type.end = this->interval.end;
7787
7788 if (end <= this->interval.end || text[this->interval.end] != '/')
7789 goto error;
7790
7791 this->interval.end++;
7792 this->sub_type.start = this->interval.end;
7793 for (;;) {
7794 if (this->interval.end >= end || !text[this->interval.end])
7795 break;
7796 if (text[this->interval.end] == '/' ||
7797 text[this->interval.end] == ';' ||
7798 ctype.is(ctype.space, text[this->interval.end]))
7799 break;
7800 this->interval.end++;
7801 }
7802 if (this->interval.end <= this->sub_type.start)
7803 goto error;
7804
7805 this->sub_type.end = this->interval.end;
7806 this->charset.invalidate();
7807 if (this->interval.end < end && text[this->interval.end] == ';') {
7808 this->interval.end++;
7809
7810 // Skip whitespace.
7811 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
7812
7813 if (this->interval.end + 7 < end &&
7814 (text[this->interval.end] == 'c' || text[this->interval.end] == 'C') &&
7815 (text[this->interval.end + 1] == 'h' || text[this->interval.end + 1] == 'H') &&
7816 (text[this->interval.end + 2] == 'a' || text[this->interval.end + 2] == 'A') &&
7817 (text[this->interval.end + 3] == 'r' || text[this->interval.end + 3] == 'R') &&
7818 (text[this->interval.end + 4] == 's' || text[this->interval.end + 4] == 'S') &&
7819 (text[this->interval.end + 5] == 'e' || text[this->interval.end + 5] == 'E') &&
7820 (text[this->interval.end + 6] == 't' || text[this->interval.end + 6] == 'T') &&
7821 text[this->interval.end + 7] == '=')
7822 {
7823 this->interval.end = this->interval.end + 8;
7824 if (this->interval.end < end &&
7825 (text[this->interval.end] == '\"' || text[this->interval.end] == '\''))
7826 {
7827 // "Quoted...
7828 T quote = text[this->interval.end];
7829 this->charset.start = ++this->interval.end;
7830 for (;;) {
7831 if (this->interval.end >= end || !text[this->interval.end]) {
7832 // No end quote!
7833 this->charset.invalidate();
7834 break;
7835 }
7836 if (text[this->interval.end] == quote) {
7837 // End quote"
7838 this->charset.end = this->interval.end;
7839 this->interval.end++;
7840 break;
7841 }
7842 this->interval.end++;
7843 }
7844 }
7845 else {
7846 // Nonquoted
7847 this->charset.start = this->interval.end;
7848 for (;;) {
7849 if (this->interval.end >= end || !text[this->interval.end] ||
7850 ctype.is(ctype.space, text[this->interval.end])) {
7851 this->charset.end = this->interval.end;
7852 break;
7853 }
7854 this->interval.end++;
7855 }
7856 }
7857 }
7858 }
7859 this->interval.start = start;
7860 return true;
7861
7862 error:
7863 invalidate();
7864 return false;
7865 }
7866 };
7867
7868 using mime_type = basic_mime_type<char>;
7869 using wmime_type = basic_mime_type<wchar_t>;
7870#ifdef _UNICODE
7871 using tmime_type = wmime_type;
7872#else
7873 using tmime_type = mime_type;
7874#endif
7875
7879 template <class T>
7881 {
7882 protected:
7883 virtual bool do_match(
7884 _In_reads_or_z_opt_(end) const T* text,
7885 _In_ size_t start = 0,
7886 _In_ size_t end = SIZE_MAX,
7887 _In_ int flags = match_default)
7888 {
7889 _Unreferenced_(flags);
7890 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
7891 this->interval.end = start;
7892 for (;;) {
7893 _Assume_(text || this->interval.end >= end);
7894 if (this->interval.end >= end || !text[this->interval.end]) {
7896 this->interval.start = start;
7897 return true;
7898 }
7899 this->interval.invalidate();
7900 return false;
7901 }
7902 if (text[this->interval.end] == '>' ||
7903 text[this->interval.end] == '=' ||
7904 (text[this->interval.end] == '/' && this->interval.end + 1 < end && text[this->interval.end + 1] == '>') ||
7905 ctype.is(ctype.space, text[this->interval.end]))
7906 {
7907 this->interval.start = start;
7908 return true;
7909 }
7910 this->interval.end++;
7911 }
7912 }
7913 };
7914
7917#ifdef _UNICODE
7918 using thtml_ident = whtml_ident;
7919#else
7920 using thtml_ident = html_ident;
7921#endif
7922
7926 template <class T>
7928 {
7929 public:
7930 virtual void invalidate()
7931 {
7932 this->content.invalidate();
7934 }
7935
7937
7938 protected:
7939 virtual bool do_match(
7940 _In_reads_or_z_opt_(end) const T* text,
7941 _In_ size_t start = 0,
7942 _In_ size_t end = SIZE_MAX,
7943 _In_ int flags = match_default)
7944 {
7945 _Unreferenced_(flags);
7946 this->interval.end = start;
7947 _Assume_(text || this->interval.end >= end);
7948 if (this->interval.end < end &&
7949 (text[this->interval.end] == '\"' || text[this->interval.end] == '\''))
7950 {
7951 // "Quoted...
7952 T quote = text[this->interval.end];
7953 this->content.start = ++this->interval.end;
7954 for (;;) {
7955 if (this->interval.end >= end || !text[this->interval.end]) {
7956 // No end quote!
7957 this->content.invalidate();
7958 this->interval.invalidate();
7959 return false;
7960 }
7961 if (text[this->interval.end] == quote) {
7962 // End quote"
7963 this->content.end = this->interval.end;
7964 this->interval.start = start;
7965 this->interval.end++;
7966 return true;
7967 }
7968 this->interval.end++;
7969 }
7970 }
7971
7972 // Nonquoted
7973 this->content.start = this->interval.end;
7974 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
7975 for (;;) {
7976 _Assume_(text || this->interval.end >= end);
7977 if (this->interval.end >= end || !text[this->interval.end]) {
7978 this->content.end = this->interval.end;
7979 this->interval.start = start;
7980 return true;
7981 }
7982 if (text[this->interval.end] == '>' ||
7983 (text[this->interval.end] == '/' && this->interval.end + 1 < end && text[this->interval.end + 1] == '>') ||
7984 ctype.is(ctype.space, text[this->interval.end]))
7985 {
7986 this->content.end = this->interval.end;
7987 this->interval.start = start;
7988 return true;
7989 }
7990 this->interval.end++;
7991 }
7992 }
7993 };
7994
7995 using html_value = basic_html_value<char>;
7996 using whtml_value = basic_html_value<wchar_t>;
7997#ifdef _UNICODE
7998 using thtml_value = whtml_value;
7999#else
8000 using thtml_value = html_value;
8001#endif
8002
8006 enum class html_sequence_t {
8007 text = 0,
8008 element,
8009 element_start,
8010 element_end,
8011 declaration,
8012 comment,
8013 instruction,
8014 PCDATA,
8015 CDATA,
8016
8017 unknown = -1,
8018 };
8019
8027
8031 template <class T>
8033 {
8034 public:
8035 basic_html_tag(_In_ const std::locale& locale = std::locale()) :
8037 type(html_sequence_t::unknown)
8038 {}
8039
8040 virtual void invalidate()
8041 {
8042 this->type = html_sequence_t::unknown;
8043 this->name.invalidate();
8044 this->attributes.clear();
8046 }
8047
8048 html_sequence_t type;
8050 std::vector<html_attribute> attributes;
8051
8052 protected:
8053 virtual bool do_match(
8054 _In_reads_or_z_opt_(end) const T* text,
8055 _In_ size_t start = 0,
8056 _In_ size_t end = SIZE_MAX,
8057 _In_ int flags = match_multiline)
8058 {
8059 _Assume_(text || start >= end);
8060 if (start >= end || text[start] != '<')
8061 goto error;
8062 this->interval.end = start + 1;
8063 if (this->interval.end >= end || !text[this->interval.end])
8064 goto error;
8065 if (text[this->interval.end] == '/' &&
8066 this->m_ident.match(text, this->interval.end + 1, end, flags))
8067 {
8068 // </...
8069 this->type = html_sequence_t::element_end;
8070 this->name = this->m_ident.interval;
8071 this->interval.end = this->m_ident.interval.end;
8072 }
8073 else if (text[this->interval.end] == '!') {
8074 // <!...
8075 this->interval.end++;
8076 if (this->interval.end + 1 < end &&
8077 text[this->interval.end] == '-' &&
8078 text[this->interval.end + 1] == '-')
8079 {
8080 // <!--...
8081 this->name.start = this->interval.end = this->interval.end + 2;
8082 for (;;) {
8083 if (this->interval.end >= end || !text[this->interval.end])
8084 goto error;
8085 if (this->interval.end + 2 < end &&
8086 text[this->interval.end] == '-' &&
8087 text[this->interval.end + 1] == '-' &&
8088 text[this->interval.end + 2] == '>')
8089 {
8090 // <!--...-->
8091 this->type = html_sequence_t::comment;
8092 this->name.end = this->interval.end;
8093 this->attributes.clear();
8094 this->interval.start = start;
8095 this->interval.end = this->interval.end + 3;
8096 return true;
8097 }
8098 this->interval.end++;
8099 }
8100 }
8101 this->type = html_sequence_t::declaration;
8102 this->name.start = this->name.end = this->interval.end;
8103 }
8104 else if (text[this->interval.end] == '?') {
8105 // <?...
8106 this->name.start = ++this->interval.end;
8107 for (;;) {
8108 if (this->interval.end >= end || !text[this->interval.end])
8109 goto error;
8110 if (text[this->interval.end] == '>') {
8111 // <?...>
8112 this->type = html_sequence_t::instruction;
8113 this->name.end = this->interval.end;
8114 this->attributes.clear();
8115 this->interval.start = start;
8116 this->interval.end++;
8117 return true;
8118 }
8119 if (this->interval.end + 1 < end &&
8120 text[this->interval.end] == '?' &&
8121 text[this->interval.end + 1] == '>')
8122 {
8123 // <?...?>
8124 this->type = html_sequence_t::instruction;
8125 this->name.end = this->interval.end;
8126 this->attributes.clear();
8127 this->interval.start = start;
8128 this->interval.end = this->interval.end + 2;
8129 return true;
8130 }
8131 this->interval.end++;
8132 }
8133 }
8134 else if (this->m_ident.match(text, this->interval.end, end, flags)) {
8135 // <tag...
8136 this->type = html_sequence_t::element_start;
8137 this->name = this->m_ident.interval;
8138 this->interval.end = this->m_ident.interval.end;
8139 }
8140 else
8141 goto error;
8142
8143 {
8144 // Skip whitespace.
8145 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
8146 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8147
8148 this->attributes.clear();
8149 for (;;) {
8150 if (this->type == html_sequence_t::element_start &&
8151 this->interval.end + 1 < end &&
8152 text[this->interval.end] == '/' &&
8153 text[this->interval.end + 1] == '>')
8154 {
8155 // <tag .../>
8156 this->type = html_sequence_t::element;
8157 this->interval.end = this->interval.end + 2;
8158 break;
8159 }
8160 if (this->interval.end < end &&
8161 text[this->interval.end] == '>')
8162 {
8163 // <tag ...>
8164 this->interval.end++;
8165 break;
8166 }
8167 if (this->type == html_sequence_t::declaration &&
8168 this->interval.end + 1 < end &&
8169 text[this->interval.end] == '!' &&
8170 text[this->interval.end + 1] == '>')
8171 {
8172 // "<!...!>".
8173 this->interval.end = this->interval.end + 2;
8174 break;
8175 }
8176 if (this->type == html_sequence_t::declaration &&
8177 this->interval.end + 1 < end &&
8178 text[this->interval.end] == '-' &&
8179 text[this->interval.end + 1] == '-')
8180 {
8181 // "<! ... --...".
8182 this->interval.end = this->interval.end + 2;
8183 for (;;) {
8184 if (this->interval.end >= end || !text[this->interval.end])
8185 goto error;
8186 if (this->interval.end + 1 < end &&
8187 text[this->interval.end] == '-' &&
8188 text[this->interval.end + 1] == '-')
8189 {
8190 // "<! ... --...--".
8191 this->interval.end = this->interval.end + 2;
8192 break;
8193 }
8194 this->interval.end++;
8195 }
8196
8197 // Skip whitespace.
8198 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8199 continue;
8200 }
8201
8202 if (this->interval.end >= end || !text[this->interval.end])
8203 goto error;
8204
8205 // Attributes follow...
8206 html_attribute* a = nullptr;
8207 if (this->m_ident.match(text, this->interval.end, end, flags)) {
8208 this->attributes.push_back(std::move(html_attribute{ this->m_ident.interval }));
8209 a = &this->attributes.back();
8210 _Assume_(a);
8211 this->interval.end = this->m_ident.interval.end;
8212 }
8213 else {
8214 // What was that?! Skip.
8215 this->interval.end++;
8216 continue;
8217 }
8218
8219 // Skip whitespace.
8220 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8221
8222 if (this->interval.end < end && text[this->interval.end] == '=') {
8223 this->interval.end++;
8224
8225 // Skip whitespace.
8226 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8227
8228 if (this->m_value.match(text, this->interval.end, end, flags)) {
8229 // This attribute has value.
8230 a->value = this->m_value.content;
8231 this->interval.end = this->m_value.interval.end;
8232
8233 // Skip whitespace.
8234 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8235 }
8236 }
8237 else {
8238 // This attribute has no value.
8239 a->value.invalidate();
8240 }
8241 }
8242 }
8243
8244 this->interval.start = start;
8245 return true;
8246
8247 error:
8248 invalidate();
8249 return false;
8250 }
8251
8252 basic_html_ident<T> m_ident;
8253 basic_html_value<T> m_value;
8254 };
8255
8256 using html_tag = basic_html_tag<char>;
8257 using whtml_tag = basic_html_tag<wchar_t>;
8258#ifdef _UNICODE
8259 using thtml_tag = whtml_tag;
8260#else
8261 using thtml_tag = html_tag;
8262#endif
8263
8267 template <class T>
8269 {
8270 public:
8271 virtual void invalidate()
8272 {
8273 this->condition.invalidate();
8275 }
8276
8277 stdex::interval<size_t> condition;
8278
8279 protected:
8280 virtual bool do_match(
8281 _In_reads_or_z_opt_(end) const T* text,
8282 _In_ size_t start = 0,
8283 _In_ size_t end = SIZE_MAX,
8284 _In_ int flags = match_multiline)
8285 {
8286 _Unreferenced_(flags);
8287 _Assume_(text || start + 2 >= end);
8288 if (start + 2 < end &&
8289 text[start] == '<' &&
8290 text[start + 1] == '!' &&
8291 text[start + 2] == '[')
8292 {
8293 this->interval.end = start + 3;
8294
8295 // Skip whitespace.
8296 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
8297 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8298
8299 this->condition.start = this->condition.end = this->interval.end;
8300
8301 for (;;) {
8302 if (this->interval.end >= end || !text[this->interval.end])
8303 break;
8304 if (text[this->interval.end] == '[') {
8305 this->interval.start = start;
8306 this->interval.end++;
8307 return true;
8308 }
8309 if (ctype.is(ctype.space, text[this->interval.end]))
8310 this->interval.end++;
8311 else
8312 this->condition.end = ++this->interval.end;
8313 }
8314 }
8315
8316 this->condition.invalidate();
8317 this->interval.invalidate();
8318 return false;
8319 }
8320 };
8321
8322 using html_declaration_condition_start = basic_html_declaration_condition_start<char>;
8323 using whtml_declaration_condition_start = basic_html_declaration_condition_start<wchar_t>;
8324#ifdef _UNICODE
8325 using thtml_declaration_condition_start = whtml_declaration_condition_start;
8326#else
8327 using thtml_declaration_condition_start = html_declaration_condition_start;
8328#endif
8329
8333 template <class T>
8335 {
8336 protected:
8337 virtual bool do_match(
8338 _In_reads_or_z_opt_(end) const T* text,
8339 _In_ size_t start = 0,
8340 _In_ size_t end = SIZE_MAX,
8341 _In_ int flags = match_multiline)
8342 {
8343 _Unreferenced_(flags);
8344 _Assume_(text || start + 2 >= end);
8345 if (start + 2 < end &&
8346 text[start] == ']' &&
8347 text[start + 1] == ']' &&
8348 text[start + 2] == '>')
8349 {
8350 this->interval.start = start;
8351 this->interval.end = start + 3;
8352 return true;
8353 }
8354 this->interval.invalidate();
8355 return false;
8356 }
8357 };
8358
8361#ifdef _UNICODE
8363#else
8365#endif
8366 }
8367}
8368
8369#undef ENUM_FLAG_OPERATOR
8370#undef ENUM_FLAGS
8371
8372#if defined(_MSC_VER)
8373#pragma warning(pop)
8374#elif defined(__GNUC__)
8375#pragma GCC diagnostic pop
8376#endif
locale_t helper class to free_locale when going out of scope.
Definition locale.hpp:74
Test for angle in d°mm'ss.dddd form.
Definition parser.hpp:4360
Test for any code unit.
Definition parser.hpp:215
Test for beginning of line.
Definition parser.hpp:614
Test for any.
Definition parser.hpp:1057
Test for chemical formula.
Definition parser.hpp:5489
Test for Creditor Reference.
Definition parser.hpp:4924
T reference[22]
Normalized national reference number.
Definition parser.hpp:4946
T check_digits[3]
Two check digits.
Definition parser.hpp:4945
bool is_valid
Is reference valid per ISO 7064.
Definition parser.hpp:4947
Legacy CSS comment end -->
Definition parser.hpp:7450
Legacy CSS comment start <!--
Definition parser.hpp:7412
CSS comment.
Definition parser.hpp:7352
stdex::interval< size_t > content
content position in source
Definition parser.hpp:7360
CSS import directive.
Definition parser.hpp:7664
stdex::interval< size_t > content
content position in source
Definition parser.hpp:7672
CSS string.
Definition parser.hpp:7487
stdex::interval< size_t > content
content position in source
Definition parser.hpp:7495
URI in CSS.
Definition parser.hpp:7554
stdex::interval< size_t > content
content position in source
Definition parser.hpp:7562
Test for any code unit from a given string of code units.
Definition parser.hpp:719
Test for specific code unit.
Definition parser.hpp:287
Test for date.
Definition parser.hpp:3993
Test for valid DNS domain character.
Definition parser.hpp:2775
bool allow_on_edge
Is character allowed at the beginning or an end of a DNS domain?
Definition parser.hpp:2785
Test for DNS domain/hostname.
Definition parser.hpp:2875
bool m_allow_absolute
May DNS names end with a dot (absolute name)?
Definition parser.hpp:2939
Test for e-mail address.
Definition parser.hpp:3767
Test for emoticon.
Definition parser.hpp:3870
std::shared_ptr< basic_parser< T > > apex
apex/eyebrows/halo (e.g. O, 0)
Definition parser.hpp:3898
std::shared_ptr< basic_parser< T > > eyes
eyes (e.g. :, ;, >, |, B)
Definition parser.hpp:3899
std::shared_ptr< basic_set< T > > mouth
mouth (e.g. ), ), (, (, |, P, D, p, d)
Definition parser.hpp:3901
std::shared_ptr< basic_parser< T > > nose
nose (e.g. -, o)
Definition parser.hpp:3900
std::shared_ptr< basic_parser< T > > emoticon
emoticon as a whole (e.g. 😀, 🤔, 😶)
Definition parser.hpp:3897
Test for end of line.
Definition parser.hpp:653
Test for fraction.
Definition parser.hpp:1685
End of condition ...]]>
Definition parser.hpp:8335
Start of condition <![condition[...
Definition parser.hpp:8269
virtual bool do_match(_In_reads_or_z_opt_(end) const T *text, size_t start=0, size_t end=SIZE_MAX, int flags=match_multiline)
condition position in source
Definition parser.hpp:8280
Contiguous sequence of characters representing name of element, attribute etc.
Definition parser.hpp:7881
Tag.
Definition parser.hpp:8033
std::vector< html_attribute > attributes
tag attributes
Definition parser.hpp:8050
html_sequence_t type
tag type
Definition parser.hpp:8048
stdex::interval< size_t > name
tag name position in source
Definition parser.hpp:8049
Optionally-quoted string representing value of an attribute.
Definition parser.hpp:7928
stdex::interval< size_t > content
content position in source
Definition parser.hpp:7936
Test for International Bank Account Number.
Definition parser.hpp:4635
T bban[31]
Normalized Basic Bank Account Number.
Definition parser.hpp:4660
T country[3]
ISO 3166-1 alpha-2 country code.
Definition parser.hpp:4658
T check_digits[3]
Two check digits.
Definition parser.hpp:4659
bool is_valid
Is IBAN valid per ISO 7064.
Definition parser.hpp:4661
Test for decimal integer.
Definition parser.hpp:1295
Test for decimal integer possibly containing thousand separators.
Definition parser.hpp:1380
bool has_separators
Did integer have any separators?
Definition parser.hpp:1401
size_t digit_count
Total number of digits in integer.
Definition parser.hpp:1400
Test for hexadecimal integer.
Definition parser.hpp:1460
Base class for integer testing.
Definition parser.hpp:1273
size_t value
Calculated value of the numeral.
Definition parser.hpp:1287
Test for IPv4 address.
Definition parser.hpp:2343
stdex::interval< size_t > components[4]
Individual component intervals.
Definition parser.hpp:2388
struct in_addr value
IPv4 address value.
Definition parser.hpp:2389
Test for IPv6 address.
Definition parser.hpp:2555
std::shared_ptr< basic_parser< T > > scope_id
Scope ID (e.g. NIC index with link-local addresses)
Definition parser.hpp:2627
stdex::interval< size_t > components[8]
Individual component intervals.
Definition parser.hpp:2625
struct in6_addr value
IPv6 address value.
Definition parser.hpp:2626
Test for valid IPv6 address scope ID character.
Definition parser.hpp:2481
Test for repeating.
Definition parser.hpp:909
bool m_greedy
try to match as long sequence as possible
Definition parser.hpp:948
std::shared_ptr< basic_parser< T > > m_el
repeating element
Definition parser.hpp:945
size_t m_min_iterations
minimum number of iterations
Definition parser.hpp:946
size_t m_max_iterations
maximum number of iterations
Definition parser.hpp:947
Test for JSON string.
Definition parser.hpp:7200
MIME content type.
Definition parser.hpp:7748
stdex::interval< size_t > base_type
basic type position in source
Definition parser.hpp:7758
stdex::interval< size_t > sub_type
sub-type position in source
Definition parser.hpp:7759
stdex::interval< size_t > charset
charset position in source
Definition parser.hpp:7760
Test for mixed numeral.
Definition parser.hpp:1920
std::shared_ptr< basic_parser< T > > fraction
fraction
Definition parser.hpp:1953
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:1951
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:1950
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:1949
std::shared_ptr< basic_parser< T > > integer
Integer part.
Definition parser.hpp:1952
Test for monetary numeral.
Definition parser.hpp:2214
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:2247
std::shared_ptr< basic_parser< T > > decimal_separator
Decimal separator.
Definition parser.hpp:2252
std::shared_ptr< basic_parser< T > > currency
Currency part.
Definition parser.hpp:2250
std::shared_ptr< basic_parser< T > > decimal
Decimal part.
Definition parser.hpp:2253
std::shared_ptr< basic_parser< T > > integer
Integer part.
Definition parser.hpp:2251
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:2248
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:2249
"No-op" match
Definition parser.hpp:183
Base template for all parsers.
Definition parser.hpp:80
stdex::interval< size_t > interval
Region of the last match.
Definition parser.hpp:120
Test for permutation.
Definition parser.hpp:1197
Test for phone number.
Definition parser.hpp:4483
std::basic_string< T > value
Normalized phone number.
Definition parser.hpp:4508
Test for any punctuation code unit.
Definition parser.hpp:460
Test for Roman numeral.
Definition parser.hpp:1569
Test for scientific numeral.
Definition parser.hpp:2045
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:2091
std::shared_ptr< basic_parser< T > > exponent_symbol
Exponent symbol (e.g. 'e')
Definition parser.hpp:2095
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:2089
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:2090
double value
Calculated value of the numeral.
Definition parser.hpp:2099
std::shared_ptr< basic_parser< T > > negative_exp_sign
Negative exponent sign (e.g. '-')
Definition parser.hpp:2097
std::shared_ptr< basic_integer< T > > decimal
Decimal part.
Definition parser.hpp:2094
std::shared_ptr< basic_parser< T > > positive_exp_sign
Positive exponent sign (e.g. '+')
Definition parser.hpp:2096
std::shared_ptr< basic_integer< T > > exponent
Exponent part.
Definition parser.hpp:2098
std::shared_ptr< basic_parser< T > > decimal_separator
Decimal separator.
Definition parser.hpp:2093
std::shared_ptr< basic_integer< T > > integer
Integer part.
Definition parser.hpp:2092
Test for match score.
Definition parser.hpp:1748
Test for sequence.
Definition parser.hpp:1005
Definition parser.hpp:688
Test for SI Reference delimiter.
Definition parser.hpp:5118
Test for SI Reference part.
Definition parser.hpp:5072
Test for SI Reference.
Definition parser.hpp:5157
basic_si_reference_part< T > part3
Reference data part 3 (P3)
Definition parser.hpp:5186
basic_si_reference_part< T > part1
Reference data part 1 (P1)
Definition parser.hpp:5184
bool is_valid
Is reference valid.
Definition parser.hpp:5187
T model[3]
Reference model.
Definition parser.hpp:5183
basic_si_reference_part< T > part2
Reference data part 2 (P2)
Definition parser.hpp:5185
Test for signed numeral.
Definition parser.hpp:1834
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:1860
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:1859
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:1858
std::shared_ptr< basic_parser< T > > number
Number.
Definition parser.hpp:1861
Test for any space code unit.
Definition parser.hpp:380
Test for any space or punctuation code unit.
Definition parser.hpp:535
Test for any string.
Definition parser.hpp:1125
Test for given string.
Definition parser.hpp:814
Test for time.
Definition parser.hpp:4258
Test for valid URL password character.
Definition parser.hpp:3059
Test for valid URL path character.
Definition parser.hpp:3161
Test for URL path.
Definition parser.hpp:3271
Test for valid URL username character.
Definition parser.hpp:2958
Test for URL.
Definition parser.hpp:3411
Test for HTTP agent.
Definition parser.hpp:6744
Test for HTTP any type.
Definition parser.hpp:5887
Test for HTTP asterisk.
Definition parser.hpp:6515
Test for HTTP header.
Definition parser.hpp:7052
Test for HTTP language (RFC1766)
Definition parser.hpp:6383
Test for HTTP line break (RFC2616: CRLF | LF)
Definition parser.hpp:5569
Test for HTTP media range (RFC2616: media-range)
Definition parser.hpp:5919
Test for HTTP media type (RFC2616: media-type)
Definition parser.hpp:5971
Test for HTTP parameter (RFC2616: parameter)
Definition parser.hpp:5835
http_token name
Parameter name.
Definition parser.hpp:5844
http_value value
Parameter value.
Definition parser.hpp:5845
Test for HTTP protocol.
Definition parser.hpp:6819
uint16_t version
HTTP protocol version: 0x100 = 1.0, 0x101 = 1.1...
Definition parser.hpp:6841
Test for HTTP quoted string (RFC2616: quoted-string)
Definition parser.hpp:5728
stdex::interval< size_t > content
String content (without quotes)
Definition parser.hpp:5737
Test for HTTP request.
Definition parser.hpp:6920
Test for HTTP space (RFC2616: LWS)
Definition parser.hpp:5605
Test for HTTP text character (RFC2616: TEXT)
Definition parser.hpp:5641
Test for HTTP token (RFC2616: token - tolerates non-ASCII)
Definition parser.hpp:5674
Test for HTTP URL parameter.
Definition parser.hpp:6208
Test for HTTP URL path segment.
Definition parser.hpp:6120
Test for HTTP URL path segment.
Definition parser.hpp:6153
std::vector< http_url_path_segment > segments
Path segments.
Definition parser.hpp:6161
Test for HTTP URL port.
Definition parser.hpp:6064
Test for HTTP URL server.
Definition parser.hpp:6027
Test for HTTP URL.
Definition parser.hpp:6285
Collection of HTTP values.
Definition parser.hpp:7156
Test for HTTP value (RFC2616: value)
Definition parser.hpp:5791
http_quoted_string string
Value when matched as quoted string.
Definition parser.hpp:5800
http_token token
Value when matched as token.
Definition parser.hpp:5801
Test for HTTP weight factor.
Definition parser.hpp:6446
float value
Calculated value of the weight factor.
Definition parser.hpp:6459
Test for HTTP weighted value.
Definition parser.hpp:6538
Base template for collection-holding parsers.
Definition parser.hpp:965
Test for any SGML code point.
Definition parser.hpp:248
Test for any SGML code point from a given string of SGML code points.
Definition parser.hpp:771
Test for specific SGML code point.
Definition parser.hpp:336
Test for valid DNS domain SGML character.
Definition parser.hpp:2830
Test for valid IPv6 address scope ID SGML character.
Definition parser.hpp:2520
Test for any SGML punctuation code point.
Definition parser.hpp:501
Test for any SGML space code point.
Definition parser.hpp:423
Test for any SGML space or punctuation code point.
Definition parser.hpp:578
Test for SGML given string.
Definition parser.hpp:861
Test for valid URL password SGML character.
Definition parser.hpp:3112
Test for valid URL path SGML character.
Definition parser.hpp:3218
Test for valid URL username SGML character.
Definition parser.hpp:3010
Numerical interval.
Definition interval.hpp:18
T size() const
Returns interval size.
Definition interval.hpp:47
T end
interval end
Definition interval.hpp:20
interval() noexcept
Constructs an invalid interval.
Definition interval.hpp:25
void invalidate()
Invalidates interval.
Definition interval.hpp:59
T start
interval start
Definition interval.hpp:19
Tag attribute.
Definition parser.hpp:8023
stdex::interval< size_t > name
attribute name position in source
Definition parser.hpp:8024
stdex::interval< size_t > value
attribute value position in source
Definition parser.hpp:8025
Definition parser.hpp:7182