stdex
Additional custom or not Standard C++ covered algorithms
Loading...
Searching...
No Matches
parser.hpp
1/*
2 SPDX-License-Identifier: MIT
3 Copyright © 2023-2024 Amebis
4*/
5
6#pragma once
7
8#include "compat.hpp"
9#include "endian.hpp"
10#include "interval.hpp"
11#include "memory.hpp"
12#include "sgml.hpp"
13#include "string.hpp"
14#include <stdarg.h>
15#include <stdint.h>
16#include <math.h>
17#if defined(_WIN32)
18#include <winsock2.h>
19#if _MSC_VER >= 1300
20#include <ws2ipdef.h>
21#endif
22#include <ws2tcpip.h>
23#else
24#include <netinet/in.h>
25#endif
26#include <limits>
27#include <list>
28#include <locale>
29#include <memory>
30#include <set>
31#include <string_view>
32#include <string>
33
34#if defined(_MSC_VER)
35#pragma warning(push)
36#pragma warning(disable: 4100)
37#elif defined(__GNUC__)
38#pragma GCC diagnostic push
39#pragma GCC diagnostic ignored "-Wunknown-pragmas"
40#pragma GCC diagnostic ignored "-Wunused-parameter"
41#endif
42
43#define ENUM_FLAG_OPERATOR(T,X) \
44inline T operator X (const T lhs, const T rhs) { return static_cast<T>(static_cast<std::underlying_type_t<T>>(lhs) X static_cast<std::underlying_type_t<T>>(rhs)); } \
45inline T operator X (const T lhs, const std::underlying_type_t<T> rhs) { return static_cast<T>(static_cast<std::underlying_type_t<T>>(lhs) X rhs); } \
46inline T operator X (const std::underlying_type_t<T> lhs, const T rhs) { return static_cast<T>(lhs X static_cast<std::underlying_type_t<T>>(rhs)); } \
47inline T& operator X= (T& lhs, const T rhs) { return lhs = lhs X rhs; } \
48inline T& operator X= (T& lhs, const std::underlying_type_t<T> rhs) { return lhs = lhs X rhs; }
49#define ENUM_FLAGS(T, type) \
50enum class T : type; \
51inline T operator ~ (T t) { return (T) (~static_cast<std::underlying_type_t <T>>(t)); } \
52ENUM_FLAG_OPERATOR(T,|) \
53ENUM_FLAG_OPERATOR(T,^) \
54ENUM_FLAG_OPERATOR(T,&) \
55enum class T : type
56
57#if defined(_WIN32)
58#elif defined(__APPLE__)
59#define s6_words __u6_addr.__u6_addr16
60#else
61#define s6_words s6_addr16
62#endif
63
64namespace stdex
65{
66 namespace parser
67 {
71 constexpr int match_default = 0;
72 constexpr int match_case_insensitive = 0x1;
73 constexpr int match_multiline = 0x2;
74
78 template <class T>
80 {
81 public:
82 basic_parser(_In_ const std::locale& locale = std::locale()) : m_locale(locale) {}
83 virtual ~basic_parser() {}
84
85 bool search(
86 _In_reads_or_z_opt_(end) const T* text,
87 _In_ size_t start = 0,
88 _In_ size_t end = SIZE_MAX,
89 _In_ int flags = match_default)
90 {
91 for (size_t i = start; i < end && text[i]; i++)
92 if (match(text, i, end, flags))
93 return true;
94 return false;
95 }
96
97 bool match(
98 _In_reads_or_z_opt_(end) const T* text,
99 _In_ size_t start = 0,
100 _In_ size_t end = SIZE_MAX,
101 _In_ int flags = match_default)
102 {
103 return do_match(text, start, end, flags);
104 }
105
106 bool match(
107 _In_ const std::basic_string_view<T, std::char_traits<T>> text,
108 _In_ size_t start = 0,
109 _In_ size_t end = SIZE_MAX,
110 _In_ int flags = match_default)
111 {
112 return match(text.data(), start, std::min<size_t>(end, text.size()), flags);
113 }
114
115 virtual void invalidate()
116 {
117 this->interval.invalidate();
118 }
119
121
122 protected:
123 virtual bool do_match(
124 _In_reads_or_z_opt_(end) const T* text,
125 _In_ size_t start = 0,
126 _In_ size_t end = SIZE_MAX,
127 _In_ int flags = match_default) = 0;
128
130 const wchar_t* next_sgml_cp(_In_ const char* text, _In_ size_t start, _In_ size_t end, _Out_ size_t& chr_end, _Out_ wchar_t(&buf)[3])
131 {
132 if (text[start] == '&') {
133 // Potential entity start
134 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
135 for (chr_end = start + 1;; chr_end++) {
136 if (chr_end >= end || text[chr_end] == 0) {
137 // Unterminated entity
138 break;
139 }
140 if (text[chr_end] == ';') {
141 // Entity end
142 size_t n = chr_end - start - 1;
143 if (n >= 2 && text[start + 1] == '#') {
144 // Numerical entity
145 utf32_t unicode;
146 if (text[start + 2] == 'x' || text[start + 2] == 'X')
147 unicode = static_cast<utf32_t>(strtou32(text + start + 3, n - 2, nullptr, 16));
148 else
149 unicode = static_cast<utf32_t>(strtou32(text + start + 2, n - 1, nullptr, 10));
150#ifdef _WIN32
151 if (unicode < 0x10000) {
152 buf[0] = (wchar_t)unicode;
153 buf[1] = 0;
154 }
155 else {
156 ucs4_to_surrogate_pair(buf, unicode);
157 buf[2] = 0;
158 }
159#else
160 buf[0] = (wchar_t)unicode;
161 buf[1] = 0;
162#endif
163 chr_end++;
164 return buf;
165 }
166 const wchar_t* entity_w = sgml2uni(text + start + 1, n);
167 if (entity_w) {
168 chr_end++;
169 return entity_w;
170 }
171 // Unknown entity.
172 break;
173 }
174 else if (text[chr_end] == '&' || ctype.is(ctype.space, text[chr_end])) {
175 // This char cannot possibly be a part of entity.
176 break;
177 }
178 }
179 }
180 buf[0] = text[start];
181 buf[1] = 0;
182 chr_end = start + 1;
183 return buf;
184 }
186
187 std::locale m_locale;
188 };
189
190 using parser = basic_parser<char>;
191 using wparser = basic_parser<wchar_t>;
192#ifdef _UNICODE
193 using tparser = wparser;
194#else
195 using tparser = parser;
196#endif
197 using sgml_parser = basic_parser<char>;
198
202 template <class T>
203 class basic_noop : public basic_parser<T>
204 {
205 protected:
206 virtual bool do_match(
207 _In_reads_or_z_opt_(end) const T* text,
208 _In_ size_t start = 0,
209 _In_ size_t end = SIZE_MAX,
210 _In_ int flags = match_default)
211 {
212 _Assume_(text || start >= end);
213 if (start < end && text[start]) {
214 this->interval.start = this->interval.end = start;
215 return true;
216 }
217 this->interval.invalidate();
218 return false;
219 }
220 };
221
222 using noop = basic_noop<char>;
224#ifdef _UNICODE
225 using tnoop = wnoop;
226#else
227 using tnoop = noop;
228#endif
230
234 template <class T>
235 class basic_any_cu : public basic_parser<T>
236 {
237 public:
238 basic_any_cu(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
239
240 protected:
241 virtual bool do_match(
242 _In_reads_or_z_opt_(end) const T* text,
243 _In_ size_t start = 0,
244 _In_ size_t end = SIZE_MAX,
245 _In_ int flags = match_default)
246 {
247 _Assume_(text || start >= end);
248 if (start < end && text[start]) {
249 this->interval.end = (this->interval.start = start) + 1;
250 return true;
251 }
252 this->interval.invalidate();
253 return false;
254 }
255 };
256
259#ifdef _UNICODE
260 using tany_cu = wany_cu;
261#else
262 using tany_cu = any_cu;
263#endif
264
268 class sgml_any_cp : public basic_any_cu<char>
269 {
270 public:
271 sgml_any_cp(_In_ const std::locale& locale = std::locale()) : basic_any_cu<char>(locale) {}
272
273 protected:
274 virtual bool do_match(
275 _In_reads_or_z_(end) const char* text,
276 _In_ size_t start = 0,
277 _In_ size_t end = SIZE_MAX,
278 _In_ int flags = match_default)
279 {
280 _Assume_(text || start >= end);
281 if (start < end && text[start]) {
282 if (text[start] == '&') {
283 // SGML entity
284 const auto& ctype = std::use_facet<std::ctype<char>>(m_locale);
285 for (this->interval.end = start + 1; this->interval.end < end && text[this->interval.end]; this->interval.end++)
286 if (text[this->interval.end] == ';') {
287 this->interval.end++;
288 this->interval.start = start;
289 return true;
290 }
291 else if (text[this->interval.end] == '&' || ctype.is(ctype.space, text[this->interval.end]))
292 break;
293 // Unterminated entity
294 }
295 this->interval.end = (this->interval.start = start) + 1;
296 return true;
297 }
298 this->interval.invalidate();
299 return false;
300 }
301 };
302
306 template <class T>
307 class basic_cu : public basic_parser<T>
308 {
309 public:
310 basic_cu(T chr, bool invert = false, _In_ const std::locale& locale = std::locale()) :
312 m_chr(chr),
313 m_invert(invert)
314 {}
315
316 protected:
317 virtual bool do_match(
318 _In_reads_or_z_opt_(end) const T* text,
319 _In_ size_t start = 0,
320 _In_ size_t end = SIZE_MAX,
321 _In_ int flags = match_default)
322 {
323 _Assume_(text || start >= end);
324 if (start < end && text[start]) {
325 bool r;
326 if (flags & match_case_insensitive) {
327 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
328 r = ctype.tolower(text[start]) == ctype.tolower(m_chr);
329 }
330 else
331 r = text[start] == m_chr;
332 if ((r && !m_invert) || (!r && m_invert)) {
333 this->interval.end = (this->interval.start = start) + 1;
334 return true;
335 }
336 }
337 this->interval.invalidate();
338 return false;
339 }
340
341 T m_chr;
342 bool m_invert;
343 };
344
345 using cu = basic_cu<char>;
346 using wcu = basic_cu<wchar_t>;
347#ifdef _UNICODE
348 using tcu = wcu;
349#else
350 using tcu = cu;
351#endif
352
356 class sgml_cp : public sgml_parser
357 {
358 public:
359 sgml_cp(const char* chr, size_t count = SIZE_MAX, bool invert = false, _In_ const std::locale& locale = std::locale()) :
361 m_invert(invert)
362 {
363 _Assume_(chr || !count);
364 wchar_t buf[3];
365 size_t chr_end;
366 m_chr.assign(count ? next_sgml_cp(chr, 0, count, chr_end, buf) : L"");
367 }
368
369 protected:
370 virtual bool do_match(
371 _In_reads_or_z_(end) const char* text,
372 _In_ size_t start = 0,
373 _In_ size_t end = SIZE_MAX,
374 _In_ int flags = match_default)
375 {
376 _Assume_(text || start >= end);
377 if (start < end && text[start]) {
378 wchar_t buf[3];
379 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
380 bool r = ((flags & match_case_insensitive) ?
381 stdex::strnicmp(chr, stdex::strlen(chr), m_chr.data(), m_chr.size(), m_locale) :
382 stdex::strncmp(chr, stdex::strlen(chr), m_chr.data(), m_chr.size())) == 0;
383 if ((r && !m_invert) || (!r && m_invert)) {
384 this->interval.start = start;
385 return true;
386 }
387 }
388 this->interval.invalidate();
389 return false;
390 }
391
392 std::wstring m_chr;
393 bool m_invert;
394 };
395
399 template <class T>
400 class basic_space_cu : public basic_parser<T>
401 {
402 public:
403 basic_space_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) :
405 m_invert(invert)
406 {}
407
408 protected:
409 virtual bool do_match(
410 _In_reads_or_z_opt_(end) const T* text,
411 _In_ size_t start = 0,
412 _In_ size_t end = SIZE_MAX,
413 _In_ int flags = match_default)
414 {
415 _Assume_(text || start >= end);
416 if (start < end && text[start]) {
417 bool r =
418 ((flags & match_multiline) || !stdex::islbreak(text[start])) &&
419 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::space, text[start]);
420 if ((r && !m_invert) || (!r && m_invert)) {
421 this->interval.end = (this->interval.start = start) + 1;
422 return true;
423 }
424 }
425 this->interval.invalidate();
426 return false;
427 }
428
429 bool m_invert;
430 };
431
434#ifdef _UNICODE
435 using tspace_cu = wspace_cu;
436#else
437 using tspace_cu = space_cu;
438#endif
439
443 class sgml_space_cp : public basic_space_cu<char>
444 {
445 public:
446 sgml_space_cp(_In_ bool invert = false, _In_ const std::locale& locale = std::locale()) :
448 {}
449
450 protected:
451 virtual bool do_match(
452 _In_reads_or_z_(end) const char* text,
453 _In_ size_t start = 0,
454 _In_ size_t end = SIZE_MAX,
455 _In_ int flags = match_default)
456 {
457 _Assume_(text || start >= end);
458 if (start < end && text[start]) {
459 wchar_t buf[3];
460 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
461 const wchar_t* chr_end = chr + stdex::strlen(chr);
462 bool r =
463 ((flags & match_multiline) || !stdex::islbreak(chr, SIZE_MAX)) &&
464 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::space, chr, chr_end) == chr_end;
465 if ((r && !m_invert) || (!r && m_invert)) {
466 this->interval.start = start;
467 return true;
468 }
469 }
470
471 this->interval.invalidate();
472 return false;
473 }
474 };
475
479 template <class T>
480 class basic_punct_cu : public basic_parser<T>
481 {
482 public:
483 basic_punct_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) :
485 m_invert(invert)
486 {}
487
488 protected:
489 virtual bool do_match(
490 _In_reads_or_z_opt_(end) const T* text,
491 _In_ size_t start = 0,
492 _In_ size_t end = SIZE_MAX,
493 _In_ int flags = match_default)
494 {
495 _Assume_(text || start >= end);
496 if (start < end && text[start]) {
497 bool r = std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::punct, text[start]);
498 if ((r && !m_invert) || (!r && m_invert)) {
499 this->interval.end = (this->interval.start = start) + 1;
500 return true;
501 }
502 }
503 this->interval.invalidate();
504 return false;
505 }
506
507 bool m_invert;
508 };
509
512#ifdef _UNICODE
513 using tpunct_cu = wpunct_cu;
514#else
515 using tpunct_cu = punct_cu;
516#endif
517
521 class sgml_punct_cp : public basic_punct_cu<char>
522 {
523 public:
524 sgml_punct_cp(bool invert = false, _In_ const std::locale& locale = std::locale()) :
526 {}
527
528 protected:
529 virtual bool do_match(
530 _In_reads_or_z_(end) const char* text,
531 _In_ size_t start = 0,
532 _In_ size_t end = SIZE_MAX,
533 _In_ int flags = match_default)
534 {
535 _Assume_(text || start >= end);
536 if (start < end && text[start]) {
537 wchar_t buf[3];
538 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
539 const wchar_t* chr_end = chr + stdex::strlen(chr);
540 bool r = std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::punct, chr, chr_end) == chr_end;
541 if ((r && !m_invert) || (!r && m_invert)) {
542 this->interval.start = start;
543 return true;
544 }
545 }
546 this->interval.invalidate();
547 return false;
548 }
549 };
550
554 template <class T>
556 {
557 public:
558 basic_space_or_punct_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) :
560 m_invert(invert)
561 {}
562
563 protected:
564 virtual bool do_match(
565 _In_reads_or_z_opt_(end) const T* text,
566 _In_ size_t start = 0,
567 _In_ size_t end = SIZE_MAX,
568 _In_ int flags = match_default)
569 {
570 _Assume_(text || start >= end);
571 if (start < end && text[start]) {
572 bool r =
573 ((flags & match_multiline) || !stdex::islbreak(text[start])) &&
574 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::space | std::ctype_base::punct, text[start]);
575 if ((r && !m_invert) || (!r && m_invert)) {
576 this->interval.end = (this->interval.start = start) + 1;
577 return true;
578 }
579 }
580 this->interval.invalidate();
581 return false;
582 }
583
584 bool m_invert;
585 };
586
589#ifdef _UNICODE
591#else
593#endif
594
599 {
600 public:
601 sgml_space_or_punct_cp(bool invert = false, _In_ const std::locale& locale = std::locale()) :
603 {}
604
605 protected:
606 virtual bool do_match(
607 _In_reads_or_z_(end) const char* text,
608 _In_ size_t start = 0,
609 _In_ size_t end = SIZE_MAX,
610 _In_ int flags = match_default)
611 {
612 _Assume_(text || start >= end);
613 if (start < end && text[start]) {
614 wchar_t buf[3];
615 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
616 const wchar_t* chr_end = chr + stdex::strlen(chr);
617 bool r =
618 ((flags & match_multiline) || !stdex::islbreak(chr, SIZE_MAX)) &&
619 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::space | std::ctype_base::punct, chr, chr_end) == chr_end;
620 if ((r && !m_invert) || (!r && m_invert)) {
621 this->interval.start = start;
622 return true;
623 }
624 }
625 this->interval.invalidate();
626 return false;
627 }
628 };
629
633 template <class T>
634 class basic_bol : public basic_parser<T>
635 {
636 public:
637 basic_bol(bool invert = false) : m_invert(invert) {}
638
639 protected:
640 virtual bool do_match(
641 _In_reads_or_z_opt_(end) const T* text,
642 _In_ size_t start = 0,
643 _In_ size_t end = SIZE_MAX,
644 _In_ int flags = match_default)
645 {
646 _Assume_(text || !end);
647 _Assume_(text || start >= end);
648 bool r = start == 0 || (start <= end && stdex::islbreak(text[start - 1]));
649 if ((r && !m_invert) || (!r && m_invert)) {
650 this->interval.end = this->interval.start = start;
651 return true;
652 }
653 this->interval.invalidate();
654 return false;
655 }
656
657 bool m_invert;
658 };
659
660 using bol = basic_bol<char>;
661 using wbol = basic_bol<wchar_t>;
662#ifdef _UNICODE
663 using tbol = wbol;
664#else
665 using tbol = bol;
666#endif
668
672 template <class T>
673 class basic_eol : public basic_parser<T>
674 {
675 public:
676 basic_eol(bool invert = false) : m_invert(invert) {}
677
678 protected:
679 virtual bool do_match(
680 _In_reads_or_z_opt_(end) const T* text,
681 _In_ size_t start = 0,
682 _In_ size_t end = SIZE_MAX,
683 _In_ int flags = match_default)
684 {
685 _Assume_(text || start >= end);
686 bool r = start >= end || !text[start] || stdex::islbreak(text[start]);
687 if ((r && !m_invert) || (!r && m_invert)) {
688 this->interval.end = this->interval.start = start;
689 return true;
690 }
691 this->interval.invalidate();
692 return false;
693 }
694
695 bool m_invert;
696 };
697
698 using eol = basic_eol<char>;
699 using weol = basic_eol<wchar_t>;
700#ifdef _UNICODE
701 using teol = weol;
702#else
703 using teol = eol;
704#endif
706
707 template <class T>
708 class basic_set : public basic_parser<T>
709 {
710 public:
711 basic_set(bool invert = false, _In_ const std::locale& locale = std::locale()) :
713 hit_offset(SIZE_MAX),
714 m_invert(invert)
715 {}
716
717 virtual void invalidate()
718 {
719 hit_offset = SIZE_MAX;
721 }
722
723 size_t hit_offset;
724
725 protected:
726 virtual bool do_match(
727 _In_reads_or_z_opt_(end) const T* text,
728 _In_ size_t start = 0,
729 _In_ size_t end = SIZE_MAX,
730 _In_ int flags = match_default) = 0;
731
732 bool m_invert;
733 };
734
738 template <class T>
739 class basic_cu_set : public basic_set<T>
740 {
741 public:
743 _In_reads_or_z_(count) const T* set,
744 _In_ size_t count = SIZE_MAX,
745 _In_ bool invert = false,
746 _In_ const std::locale& locale = std::locale()) :
748 {
749 if (set)
750 m_set.assign(set, set + stdex::strnlen(set, count));
751 }
752
753 protected:
754 virtual bool do_match(
755 _In_reads_or_z_opt_(end) const T* text,
756 _In_ size_t start = 0,
757 _In_ size_t end = SIZE_MAX,
758 _In_ int flags = match_default)
759 {
760 _Assume_(text || start >= end);
761 if (start < end && text[start]) {
762 const T* set = m_set.data();
763 size_t r = (flags & match_case_insensitive) ?
764 stdex::strnichr(set, m_set.size(), text[start], this->m_locale) :
765 stdex::strnchr(set, m_set.size(), text[start]);
766 if ((r != stdex::npos && !this->m_invert) || (r == stdex::npos && this->m_invert)) {
767 this->hit_offset = r;
768 this->interval.end = (this->interval.start = start) + 1;
769 return true;
770 }
771 }
772 this->hit_offset = SIZE_MAX;
773 this->interval.invalidate();
774 return false;
775 }
776
777 std::basic_string<T> m_set;
778 };
779
782#ifdef _UNICODE
783 using tcu_set = wcu_set;
784#else
785 using tcu_set = cu_set;
786#endif
787
791 class sgml_cp_set : public basic_set<char>
792 {
793 public:
794 sgml_cp_set(const char* set, size_t count = SIZE_MAX, bool invert = false, _In_ const std::locale& locale = std::locale()) :
796 {
797 if (set)
798 m_set = sgml2str(set, count);
799 }
800
801 protected:
802 virtual bool do_match(
803 _In_reads_or_z_(end) const char* text,
804 _In_ size_t start = 0,
805 _In_ size_t end = SIZE_MAX,
806 _In_ int flags = match_default)
807 {
808 _Assume_(text || start >= end);
809 if (start < end && text[start]) {
810 wchar_t buf[3];
811 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
812 const wchar_t* set = m_set.data();
813 size_t r = (flags & match_case_insensitive) ?
814 stdex::strnistr(set, m_set.size(), chr, m_locale) :
815 stdex::strnstr(set, m_set.size(), chr);
816 if ((r != stdex::npos && !m_invert) || (r == stdex::npos && m_invert)) {
817 hit_offset = r;
818 this->interval.start = start;
819 return true;
820 }
821 }
822 hit_offset = SIZE_MAX;
823 this->interval.invalidate();
824 return false;
825 }
826
827 std::wstring m_set;
828 };
829
833 template <class T>
834 class basic_string : public basic_parser<T>
835 {
836 public:
838 _In_reads_or_z_(count) const T* str,
839 _In_ size_t count = SIZE_MAX,
840 _In_ const std::locale& locale = std::locale()) :
842 m_str(str, str + stdex::strnlen(str, count))
843 {}
844
845 protected:
846 virtual bool do_match(
847 _In_reads_or_z_opt_(end) const T* text,
848 _In_ size_t start = 0,
849 _In_ size_t end = SIZE_MAX,
850 _In_ int flags = match_default)
851 {
852 _Assume_(text || start >= end);
853 size_t
854 m = m_str.size(),
855 n = std::min<size_t>(end - start, m);
856 bool r = ((flags & match_case_insensitive) ?
857 stdex::strnicmp(text + start, n, m_str.data(), m, this->m_locale) :
858 stdex::strncmp(text + start, n, m_str.data(), m)) == 0;
859 if (r) {
860 this->interval.end = (this->interval.start = start) + n;
861 return true;
862 }
863 this->interval.invalidate();
864 return false;
865 }
866
867 std::basic_string<T> m_str;
868 };
869
872#ifdef _UNICODE
873 using tstring = wstring;
874#else
875 using tstring = string;
876#endif
877
882 {
883 public:
884 sgml_string(const char* str, size_t count = SIZE_MAX, _In_ const std::locale& locale = std::locale()) :
886 m_str(sgml2str(str, count))
887 {}
888
889 protected:
890 virtual bool do_match(
891 _In_reads_or_z_(end) const char* text,
892 _In_ size_t start = 0,
893 _In_ size_t end = SIZE_MAX,
894 _In_ int flags = match_default)
895 {
896 _Assume_(text || start >= end);
897 const wchar_t* str = m_str.data();
898 const bool case_insensitive = flags & match_case_insensitive ? true : false;
899 const auto& ctype = std::use_facet<std::ctype<wchar_t>>(m_locale);
900 for (this->interval.end = start;;) {
901 if (!*str) {
902 this->interval.start = start;
903 return true;
904 }
905 if (this->interval.end >= end || !text[this->interval.end]) {
906 this->interval.invalidate();
907 return false;
908 }
909 wchar_t buf[3];
910 const wchar_t* chr = next_sgml_cp(text, this->interval.end, end, this->interval.end, buf);
911 for (; *chr; ++str, ++chr) {
912 if (!*str ||
913 (case_insensitive ? ctype.tolower(*str) != ctype.tolower(*chr) : *str != *chr))
914 {
915 this->interval.invalidate();
916 return false;
917 }
918 }
919 }
920 }
921
922 std::wstring m_str;
923 };
924
928 template <class T>
930 {
931 public:
932 basic_iterations(const std::shared_ptr<basic_parser<T>>& el, size_t min_iterations = 0, size_t max_iterations = SIZE_MAX, bool greedy = true) :
933 m_el(el),
937 {}
938
939 protected:
940 virtual bool do_match(
941 _In_reads_or_z_opt_(end) const T* text,
942 _In_ size_t start = 0,
943 _In_ size_t end = SIZE_MAX,
944 _In_ int flags = match_default)
945 {
946 _Assume_(text || start >= end);
947 this->interval.start = this->interval.end = start;
948 for (size_t i = 0; ; i++) {
949 if ((!m_greedy && i >= m_min_iterations) || i >= m_max_iterations)
950 return true;
951 if (!m_el->match(text, this->interval.end, end, flags)) {
952 if (i >= m_min_iterations)
953 return true;
954 break;
955 }
956 if (m_el->interval.end == this->interval.end) {
957 // Element did match, but the matching interval was empty. Quit instead of spinning.
958 return true;
959 }
960 this->interval.end = m_el->interval.end;
961 }
962 this->interval.invalidate();
963 return false;
964 }
965
966 std::shared_ptr<basic_parser<T>> m_el;
969 bool m_greedy;
970 };
971
974#ifdef _UNICODE
975 using titerations = witerations;
976#else
977 using titerations = iterations;
978#endif
980
984 template <class T>
986 {
987 protected:
988 parser_collection(_In_ const std::locale& locale) : basic_parser<T>(locale) {}
989
990 public:
992 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el,
993 _In_ size_t count,
994 _In_ const std::locale& locale = std::locale()) :
996 {
997 _Assume_(el || !count);
998 m_collection.reserve(count);
999 for (size_t i = 0; i < count; i++)
1000 m_collection.push_back(el[i]);
1001 }
1002
1004 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1005 _In_ const std::locale& locale = std::locale()) :
1007 m_collection(std::move(collection))
1008 {}
1009
1010 virtual void invalidate()
1011 {
1012 for (auto& el : m_collection)
1013 el->invalidate();
1015 }
1016
1017 protected:
1018 std::vector<std::shared_ptr<basic_parser<T>>> m_collection;
1019 };
1020
1024 template <class T>
1026 {
1027 public:
1029 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el = nullptr,
1030 _In_ size_t count = 0,
1031 _In_ const std::locale& locale = std::locale()) :
1033 {}
1034
1036 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1037 _In_ const std::locale& locale = std::locale()) :
1039 {}
1040
1041 protected:
1042 virtual bool do_match(
1043 _In_reads_or_z_opt_(end) const T* text,
1044 _In_ size_t start = 0,
1045 _In_ size_t end = SIZE_MAX,
1046 _In_ int flags = match_default)
1047 {
1048 _Assume_(text || start >= end);
1049 this->interval.end = start;
1050 for (auto i = this->m_collection.begin(); i != this->m_collection.end(); ++i) {
1051 if (!(*i)->match(text, this->interval.end, end, flags)) {
1052 for (++i; i != this->m_collection.end(); ++i)
1053 (*i)->invalidate();
1054 this->interval.invalidate();
1055 return false;
1056 }
1057 this->interval.end = (*i)->interval.end;
1058 }
1059 this->interval.start = start;
1060 return true;
1061 }
1062 };
1063
1066#ifdef _UNICODE
1067 using tsequence = wsequence;
1068#else
1069 using tsequence = sequence;
1070#endif
1072
1076 template <class T>
1078 {
1079 protected:
1080 basic_branch(_In_ const std::locale& locale) :
1082 hit_offset(SIZE_MAX)
1083 {}
1084
1085 public:
1087 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el = nullptr,
1088 _In_ size_t count = 0,
1089 _In_ const std::locale& locale = std::locale()) :
1091 hit_offset(SIZE_MAX)
1092 {}
1093
1095 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1096 _In_ const std::locale& locale = std::locale()) :
1098 hit_offset(SIZE_MAX)
1099 {}
1100
1101 virtual void invalidate()
1102 {
1103 hit_offset = SIZE_MAX;
1105 }
1106
1107 size_t hit_offset;
1108
1109 protected:
1110 virtual bool do_match(
1111 _In_reads_or_z_opt_(end) const T* text,
1112 _In_ size_t start = 0,
1113 _In_ size_t end = SIZE_MAX,
1114 _In_ int flags = match_default)
1115 {
1116 _Assume_(text || start >= end);
1117 hit_offset = 0;
1118 for (auto i = this->m_collection.begin(); i != this->m_collection.end(); ++i, ++hit_offset) {
1119 if ((*i)->match(text, start, end, flags)) {
1120 this->interval = (*i)->interval;
1121 for (++i; i != this->m_collection.end(); ++i)
1122 (*i)->invalidate();
1123 return true;
1124 }
1125 }
1126 hit_offset = SIZE_MAX;
1127 this->interval.invalidate();
1128 return false;
1129 }
1130 };
1131
1132 using branch = basic_branch<char>;
1134#ifdef _UNICODE
1135 using tbranch = wbranch;
1136#else
1137 using tbranch = branch;
1138#endif
1140
1144 template <class T, class T_parser = basic_string<T>>
1146 {
1147 public:
1149 _In_reads_(count) const T* str_z = nullptr,
1150 _In_ size_t count = 0,
1151 _In_ const std::locale& locale = std::locale()) :
1153 {
1154 build(str_z, count);
1155 }
1156
1157 basic_string_branch(_In_z_ const T* str, ...) :
1158 basic_branch<T>(std::locale())
1159 {
1160 va_list params;
1161 va_start(params, str);
1162 build(str, params);
1163 va_end(params);
1164 }
1165
1166 basic_string_branch(_In_ const std::locale& locale, _In_z_ const T* str, ...) :
1168 {
1169 va_list params;
1170 va_start(params, str);
1171 build(str, params);
1172 va_end(params);
1173 }
1174
1175 protected:
1176 void build(_In_reads_(count) const T* str_z, _In_ size_t count)
1177 {
1178 _Assume_(str_z || !count);
1179 if (count) {
1180 size_t offset, n;
1181 for (
1182 offset = n = 0;
1183 offset < count && str_z[offset];
1184 offset += stdex::strnlen(str_z + offset, count - offset) + 1, ++n);
1185 this->m_collection.reserve(n);
1186 for (
1187 offset = 0;
1188 offset < count && str_z[offset];
1189 offset += stdex::strnlen(str_z + offset, count - offset) + 1)
1190 this->m_collection.push_back(std::move(std::make_shared<T_parser>(str_z + offset, count - offset, this->m_locale)));
1191 }
1192 }
1193
1194 void build(_In_z_ const T* str, _In_ va_list params)
1195 {
1196 const T* p;
1197 for (
1198 this->m_collection.push_back(std::move(std::make_shared<T_parser>(str, SIZE_MAX, this->m_locale)));
1199 (p = va_arg(params, const T*)) != nullptr;
1200 this->m_collection.push_back(std::move(std::make_shared<T_parser>(p, SIZE_MAX, this->m_locale))));
1201 }
1202 };
1203
1206#ifdef _UNICODE
1208#else
1210#endif
1212
1216 template <class T>
1218 {
1219 public:
1221 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el = nullptr,
1222 _In_ size_t count = 0,
1223 _In_ const std::locale& locale = std::locale()) :
1225 {}
1226
1228 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1229 _In_ const std::locale& locale = std::locale()) :
1231 {}
1232
1233 protected:
1234 virtual bool do_match(
1235 _In_reads_or_z_opt_(end) const T* text,
1236 _In_ size_t start = 0,
1237 _In_ size_t end = SIZE_MAX,
1238 _In_ int flags = match_default)
1239 {
1240 _Assume_(text || start >= end);
1241 for (auto& el : this->m_collection)
1242 el->invalidate();
1243 if (match_recursively(text, start, end, flags)) {
1244 this->interval.start = start;
1245 return true;
1246 }
1247 this->interval.invalidate();
1248 return false;
1249 }
1250
1251 bool match_recursively(
1252 _In_reads_or_z_opt_(end) const T* text,
1253 _In_ size_t start = 0,
1254 _In_ size_t end = SIZE_MAX,
1255 _In_ int flags = match_default)
1256 {
1257 bool all_matched = true;
1258 for (auto& el : this->m_collection) {
1259 if (!el->interval) {
1260 // Element was not matched in permutatuion yet.
1261 all_matched = false;
1262 if (el->match(text, start, end, flags)) {
1263 // Element matched for the first time.
1264 if (match_recursively(text, el->interval.end, end, flags)) {
1265 // Rest of the elements matched too.
1266 return true;
1267 }
1268 el->invalidate();
1269 }
1270 }
1271 }
1272 if (all_matched) {
1273 this->interval.end = start;
1274 return true;
1275 }
1276 return false;
1277 }
1278 };
1279
1282#ifdef _UNICODE
1283 using tpermutation = wpermutation;
1284#else
1285 using tpermutation = permutation;
1286#endif
1288
1292 template <class T>
1293 class basic_integer : public basic_parser<T>
1294 {
1295 public:
1296 basic_integer(_In_ const std::locale& locale = std::locale()) :
1298 value(0)
1299 {}
1300
1301 virtual void invalidate()
1302 {
1303 value = 0;
1305 }
1306
1307 public:
1308 size_t value;
1309 };
1310
1314 template <class T>
1316 {
1317 public:
1319 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
1320 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
1321 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
1322 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
1323 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
1324 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
1325 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
1326 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
1327 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
1328 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
1329 _In_ const std::locale& locale = std::locale()) :
1331 m_digit_0(digit_0),
1332 m_digit_1(digit_1),
1333 m_digit_2(digit_2),
1334 m_digit_3(digit_3),
1335 m_digit_4(digit_4),
1336 m_digit_5(digit_5),
1337 m_digit_6(digit_6),
1338 m_digit_7(digit_7),
1339 m_digit_8(digit_8),
1340 m_digit_9(digit_9)
1341 {}
1342
1343 protected:
1344 virtual bool do_match(
1345 _In_reads_or_z_opt_(end) const T* text,
1346 _In_ size_t start = 0,
1347 _In_ size_t end = SIZE_MAX,
1348 _In_ int flags = match_default)
1349 {
1350 _Assume_(text || start >= end);
1351 for (this->interval.end = start, this->value = 0; this->interval.end < end && text[this->interval.end];) {
1352 size_t dig;
1353 if (m_digit_0->match(text, this->interval.end, end, flags)) { dig = 0; this->interval.end = m_digit_0->interval.end; }
1354 else if (m_digit_1->match(text, this->interval.end, end, flags)) { dig = 1; this->interval.end = m_digit_1->interval.end; }
1355 else if (m_digit_2->match(text, this->interval.end, end, flags)) { dig = 2; this->interval.end = m_digit_2->interval.end; }
1356 else if (m_digit_3->match(text, this->interval.end, end, flags)) { dig = 3; this->interval.end = m_digit_3->interval.end; }
1357 else if (m_digit_4->match(text, this->interval.end, end, flags)) { dig = 4; this->interval.end = m_digit_4->interval.end; }
1358 else if (m_digit_5->match(text, this->interval.end, end, flags)) { dig = 5; this->interval.end = m_digit_5->interval.end; }
1359 else if (m_digit_6->match(text, this->interval.end, end, flags)) { dig = 6; this->interval.end = m_digit_6->interval.end; }
1360 else if (m_digit_7->match(text, this->interval.end, end, flags)) { dig = 7; this->interval.end = m_digit_7->interval.end; }
1361 else if (m_digit_8->match(text, this->interval.end, end, flags)) { dig = 8; this->interval.end = m_digit_8->interval.end; }
1362 else if (m_digit_9->match(text, this->interval.end, end, flags)) { dig = 9; this->interval.end = m_digit_9->interval.end; }
1363 else break;
1364 this->value = this->value * 10 + dig;
1365 }
1367 this->interval.start = start;
1368 return true;
1369 }
1370 this->interval.invalidate();
1371 return false;
1372 }
1373
1374 std::shared_ptr<basic_parser<T>>
1375 m_digit_0,
1376 m_digit_1,
1377 m_digit_2,
1378 m_digit_3,
1379 m_digit_4,
1380 m_digit_5,
1381 m_digit_6,
1382 m_digit_7,
1383 m_digit_8,
1384 m_digit_9;
1385 };
1386
1389#ifdef _UNICODE
1390 using tinteger10 = winteger10;
1391#else
1392 using tinteger10 = integer10;
1393#endif
1395
1399 template <class T>
1401 {
1402 public:
1404 _In_ const std::shared_ptr<basic_integer10<T>>& digits,
1405 _In_ const std::shared_ptr<basic_set<T>>& separator,
1406 _In_ const std::locale& locale = std::locale()) :
1408 digit_count(0),
1409 has_separators(false),
1410 m_digits(digits),
1411 m_separator(separator)
1412 {}
1413
1414 virtual void invalidate()
1415 {
1416 digit_count = 0;
1417 has_separators = false;
1419 }
1420
1423
1424 protected:
1425 virtual bool do_match(
1426 _In_reads_or_z_opt_(end) const T* text,
1427 _In_ size_t start = 0,
1428 _In_ size_t end = SIZE_MAX,
1429 _In_ int flags = match_default)
1430 {
1431 _Assume_(text || start >= end);
1432 if (m_digits->match(text, start, end, flags)) {
1433 // Leading part match.
1434 this->value = m_digits->value;
1435 digit_count = m_digits->interval.size();
1436 has_separators = false;
1437 this->interval.start = start;
1438 this->interval.end = m_digits->interval.end;
1439 if (m_digits->interval.size() <= 3) {
1440 // Maybe separated with thousand separators?
1441 size_t hit_offset = SIZE_MAX;
1442 while (m_separator->match(text, this->interval.end, end, flags) &&
1443 (hit_offset == SIZE_MAX || hit_offset == m_separator->hit_offset) && // All separators must be the same, no mixing.
1444 m_digits->match(text, m_separator->interval.end, end, flags) &&
1445 m_digits->interval.size() == 3)
1446 {
1447 // Thousand separator and three-digit integer followed.
1448 this->value = this->value * 1000 + m_digits->value;
1449 digit_count += 3;
1450 has_separators = true;
1451 this->interval.end = m_digits->interval.end;
1452 hit_offset = m_separator->hit_offset;
1453 }
1454 }
1455
1456 return true;
1457 }
1458 this->value = 0;
1459 this->interval.invalidate();
1460 return false;
1461 }
1462
1463 std::shared_ptr<basic_integer10<T>> m_digits;
1464 std::shared_ptr<basic_set<T>> m_separator;
1465 };
1466
1467 using integer10ts = basic_integer10ts<char>;
1468 using winteger10ts = basic_integer10ts<wchar_t>;
1469#ifdef _UNICODE
1470 using tinteger10ts = winteger10ts;
1471#else
1472 using tinteger10ts = integer10ts;
1473#endif
1474 using sgml_integer10ts = basic_integer10ts<char>;
1475
1479 template <class T>
1481 {
1482 public:
1484 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
1485 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
1486 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
1487 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
1488 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
1489 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
1490 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
1491 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
1492 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
1493 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
1494 _In_ const std::shared_ptr<basic_parser<T>>& digit_10,
1495 _In_ const std::shared_ptr<basic_parser<T>>& digit_11,
1496 _In_ const std::shared_ptr<basic_parser<T>>& digit_12,
1497 _In_ const std::shared_ptr<basic_parser<T>>& digit_13,
1498 _In_ const std::shared_ptr<basic_parser<T>>& digit_14,
1499 _In_ const std::shared_ptr<basic_parser<T>>& digit_15,
1500 _In_ const std::locale& locale = std::locale()) :
1502 m_digit_0(digit_0),
1503 m_digit_1(digit_1),
1504 m_digit_2(digit_2),
1505 m_digit_3(digit_3),
1506 m_digit_4(digit_4),
1507 m_digit_5(digit_5),
1508 m_digit_6(digit_6),
1509 m_digit_7(digit_7),
1510 m_digit_8(digit_8),
1511 m_digit_9(digit_9),
1512 m_digit_10(digit_10),
1513 m_digit_11(digit_11),
1514 m_digit_12(digit_12),
1515 m_digit_13(digit_13),
1516 m_digit_14(digit_14),
1517 m_digit_15(digit_15)
1518 {}
1519
1520 protected:
1521 virtual bool do_match(
1522 _In_reads_or_z_opt_(end) const T* text,
1523 _In_ size_t start = 0,
1524 _In_ size_t end = SIZE_MAX,
1525 _In_ int flags = match_default)
1526 {
1527 _Assume_(text || start >= end);
1528 for (this->interval.end = start, this->value = 0; this->interval.end < end && text[this->interval.end];) {
1529 size_t dig;
1530 if (m_digit_0->match(text, this->interval.end, end, flags)) { dig = 0; this->interval.end = m_digit_0->interval.end; }
1531 else if (m_digit_1->match(text, this->interval.end, end, flags)) { dig = 1; this->interval.end = m_digit_1->interval.end; }
1532 else if (m_digit_2->match(text, this->interval.end, end, flags)) { dig = 2; this->interval.end = m_digit_2->interval.end; }
1533 else if (m_digit_3->match(text, this->interval.end, end, flags)) { dig = 3; this->interval.end = m_digit_3->interval.end; }
1534 else if (m_digit_4->match(text, this->interval.end, end, flags)) { dig = 4; this->interval.end = m_digit_4->interval.end; }
1535 else if (m_digit_5->match(text, this->interval.end, end, flags)) { dig = 5; this->interval.end = m_digit_5->interval.end; }
1536 else if (m_digit_6->match(text, this->interval.end, end, flags)) { dig = 6; this->interval.end = m_digit_6->interval.end; }
1537 else if (m_digit_7->match(text, this->interval.end, end, flags)) { dig = 7; this->interval.end = m_digit_7->interval.end; }
1538 else if (m_digit_8->match(text, this->interval.end, end, flags)) { dig = 8; this->interval.end = m_digit_8->interval.end; }
1539 else if (m_digit_9->match(text, this->interval.end, end, flags)) { dig = 9; this->interval.end = m_digit_9->interval.end; }
1540 else if (m_digit_10->match(text, this->interval.end, end, flags)) { dig = 10; this->interval.end = m_digit_10->interval.end; }
1541 else if (m_digit_11->match(text, this->interval.end, end, flags)) { dig = 11; this->interval.end = m_digit_11->interval.end; }
1542 else if (m_digit_12->match(text, this->interval.end, end, flags)) { dig = 12; this->interval.end = m_digit_12->interval.end; }
1543 else if (m_digit_13->match(text, this->interval.end, end, flags)) { dig = 13; this->interval.end = m_digit_13->interval.end; }
1544 else if (m_digit_14->match(text, this->interval.end, end, flags)) { dig = 14; this->interval.end = m_digit_14->interval.end; }
1545 else if (m_digit_15->match(text, this->interval.end, end, flags)) { dig = 15; this->interval.end = m_digit_15->interval.end; }
1546 else break;
1547 this->value = this->value * 16 + dig;
1548 }
1550 this->interval.start = start;
1551 return true;
1552 }
1553 this->interval.invalidate();
1554 return false;
1555 }
1556
1557 std::shared_ptr<basic_parser<T>>
1558 m_digit_0,
1559 m_digit_1,
1560 m_digit_2,
1561 m_digit_3,
1562 m_digit_4,
1563 m_digit_5,
1564 m_digit_6,
1565 m_digit_7,
1566 m_digit_8,
1567 m_digit_9,
1568 m_digit_10,
1569 m_digit_11,
1570 m_digit_12,
1571 m_digit_13,
1572 m_digit_14,
1573 m_digit_15;
1574 };
1575
1578#ifdef _UNICODE
1579 using tinteger16 = winteger16;
1580#else
1581 using tinteger16 = integer16;
1582#endif
1584
1588 template <class T>
1590 {
1591 public:
1593 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
1594 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
1595 _In_ const std::shared_ptr<basic_parser<T>>& digit_10,
1596 _In_ const std::shared_ptr<basic_parser<T>>& digit_50,
1597 _In_ const std::shared_ptr<basic_parser<T>>& digit_100,
1598 _In_ const std::shared_ptr<basic_parser<T>>& digit_500,
1599 _In_ const std::shared_ptr<basic_parser<T>>& digit_1000,
1600 _In_ const std::shared_ptr<basic_parser<T>>& digit_5000,
1601 _In_ const std::shared_ptr<basic_parser<T>>& digit_10000,
1602 _In_ const std::locale& locale = std::locale()) :
1604 m_digit_1(digit_1),
1605 m_digit_5(digit_5),
1606 m_digit_10(digit_10),
1607 m_digit_50(digit_50),
1608 m_digit_100(digit_100),
1609 m_digit_500(digit_500),
1610 m_digit_1000(digit_1000),
1611 m_digit_5000(digit_5000),
1612 m_digit_10000(digit_10000)
1613 {}
1614
1615 protected:
1616 virtual bool do_match(
1617 _In_reads_or_z_opt_(end) const T* text,
1618 _In_ size_t start = 0,
1619 _In_ size_t end = SIZE_MAX,
1620 _In_ int flags = match_default)
1621 {
1622 _Assume_(text || start >= end);
1623 size_t
1625 end2;
1626
1627 for (this->interval.end = start, this->value = 0; this->interval.end < end && text[this->interval.end]; dig[3] = dig[2], dig[2] = dig[1], dig[1] = dig[0], this->interval.end = end2) {
1628 if (m_digit_1 && m_digit_1->match(text, this->interval.end, end, flags)) { dig[0] = 1; end2 = m_digit_1->interval.end; }
1629 else if (m_digit_5 && m_digit_5->match(text, this->interval.end, end, flags)) { dig[0] = 5; end2 = m_digit_5->interval.end; }
1630 else if (m_digit_10 && m_digit_10->match(text, this->interval.end, end, flags)) { dig[0] = 10; end2 = m_digit_10->interval.end; }
1631 else if (m_digit_50 && m_digit_50->match(text, this->interval.end, end, flags)) { dig[0] = 50; end2 = m_digit_50->interval.end; }
1632 else if (m_digit_100 && m_digit_100->match(text, this->interval.end, end, flags)) { dig[0] = 100; end2 = m_digit_100->interval.end; }
1633 else if (m_digit_500 && m_digit_500->match(text, this->interval.end, end, flags)) { dig[0] = 500; end2 = m_digit_500->interval.end; }
1634 else if (m_digit_1000 && m_digit_1000->match(text, this->interval.end, end, flags)) { dig[0] = 1000; end2 = m_digit_1000->interval.end; }
1635 else if (m_digit_5000 && m_digit_5000->match(text, this->interval.end, end, flags)) { dig[0] = 5000; end2 = m_digit_5000->interval.end; }
1636 else if (m_digit_10000 && m_digit_10000->match(text, this->interval.end, end, flags)) { dig[0] = 10000; end2 = m_digit_10000->interval.end; }
1637 else break;
1638
1639 // Store first digit.
1640 if (dig[4] == SIZE_MAX) dig[4] = dig[0];
1641
1642 if (dig[3] == dig[2] && dig[2] == dig[1] && dig[1] == dig[0] && dig[0] != dig[4]) {
1643 // Same digit repeated four times. No-go, unless first digit. E.g. XIIII vs. XIV. MMMMMCD allowed, IIII also...
1644 break;
1645 }
1646 if (dig[0] <= dig[1]) {
1647 // Digit is less or equal previous one: add.
1648 this->value += dig[0];
1649 }
1650 else if (
1651 (dig[1] == 1 && (dig[0] == 5 || dig[0] == 10)) ||
1652 (dig[1] == 10 && (dig[0] == 50 || dig[0] == 100)) ||
1653 (dig[1] == 100 && (dig[0] == 500 || dig[0] == 1000)) ||
1654 (dig[1] == 1000 && (dig[0] == 5000 || dig[0] == 10000)))
1655 {
1656 // Digit is up to two orders bigger than previous one: subtract. But...
1657 if (dig[2] < dig[0]) {
1658 // Digit is also bigger than pre-previous one. E.g. VIX (V < X => invalid)
1659 break;
1660 }
1661 this->value -= dig[1]; // Cancel addition in the previous step.
1662 dig[0] -= dig[1]; // Combine last two digits.
1663 dig[1] = dig[2]; // The true previous digit is now pre-previous one. :)
1664 dig[2] = dig[3]; // The true pre-previous digit is now pre-pre-previous one. :)
1665 this->value += dig[0]; // Add combined value.
1666 }
1667 else {
1668 // New digit is too big than the previous one. E.g. VX (V < X => invalid)
1669 break;
1670 }
1671 }
1672 if (this->value) {
1673 this->interval.start = start;
1674 return true;
1675 }
1676 this->interval.invalidate();
1677 return false;
1678 }
1679
1680 std::shared_ptr<basic_parser<T>>
1681 m_digit_1,
1682 m_digit_5,
1683 m_digit_10,
1684 m_digit_50,
1685 m_digit_100,
1686 m_digit_500,
1687 m_digit_1000,
1688 m_digit_5000,
1689 m_digit_10000;
1690 };
1691
1694#ifdef _UNICODE
1696#else
1698#endif
1700
1704 template <class T>
1706 {
1707 public:
1709 _In_ const std::shared_ptr<basic_parser<T>>& _numerator,
1710 _In_ const std::shared_ptr<basic_parser<T>>& _fraction_line,
1711 _In_ const std::shared_ptr<basic_parser<T>>& _denominator,
1712 _In_ const std::locale& locale = std::locale()) :
1714 numerator(_numerator),
1715 fraction_line(_fraction_line),
1716 denominator(_denominator)
1717 {}
1718
1719 virtual void invalidate()
1720 {
1721 numerator->invalidate();
1722 fraction_line->invalidate();
1723 denominator->invalidate();
1725 }
1726
1727 std::shared_ptr<basic_parser<T>> numerator;
1728 std::shared_ptr<basic_parser<T>> fraction_line;
1729 std::shared_ptr<basic_parser<T>> denominator;
1730
1731 protected:
1732 virtual bool do_match(
1733 _In_reads_or_z_opt_(end) const T* text,
1734 _In_ size_t start = 0,
1735 _In_ size_t end = SIZE_MAX,
1736 _In_ int flags = match_default)
1737 {
1738 _Assume_(text || start >= end);
1739 if (numerator->match(text, start, end, flags) &&
1740 fraction_line->match(text, numerator->interval.end, end, flags) &&
1741 denominator->match(text, fraction_line->interval.end, end, flags))
1742 {
1743 this->interval.start = start;
1744 this->interval.end = denominator->interval.end;
1745 return true;
1746 }
1747 numerator->invalidate();
1748 fraction_line->invalidate();
1749 denominator->invalidate();
1750 this->interval.invalidate();
1751 return false;
1752 }
1753 };
1754
1757#ifdef _UNICODE
1758 using tfraction = wfraction;
1759#else
1760 using tfraction = fraction;
1761#endif
1763
1767 template <class T>
1768 class basic_score : public basic_parser<T>
1769 {
1770 public:
1772 _In_ const std::shared_ptr<basic_parser<T>>& _home,
1773 _In_ const std::shared_ptr<basic_parser<T>>& _separator,
1774 _In_ const std::shared_ptr<basic_parser<T>>& _guest,
1775 _In_ const std::shared_ptr<basic_parser<T>>& space,
1776 _In_ const std::locale& locale = std::locale()) :
1778 home(_home),
1779 separator(_separator),
1780 guest(_guest),
1781 m_space(space)
1782 {}
1783
1784 virtual void invalidate()
1785 {
1786 home->invalidate();
1787 separator->invalidate();
1788 guest->invalidate();
1790 }
1791
1792 std::shared_ptr<basic_parser<T>> home;
1793 std::shared_ptr<basic_parser<T>> separator;
1794 std::shared_ptr<basic_parser<T>> guest;
1795
1796 protected:
1797 virtual bool do_match(
1798 _In_reads_or_z_opt_(end) const T* text,
1799 _In_ size_t start = 0,
1800 _In_ size_t end = SIZE_MAX,
1801 _In_ int flags = match_default)
1802 {
1803 _Assume_(text || start >= end);
1804 this->interval.end = start;
1805
1806 const int space_match_flags = flags & ~match_multiline; // Spaces in score must never be broken in new line.
1807
1808 if (home->match(text, this->interval.end, end, flags))
1809 this->interval.end = home->interval.end;
1810 else
1811 goto end;
1812
1813 for (; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
1814
1815 if (separator->match(text, this->interval.end, end, flags))
1816 this->interval.end = separator->interval.end;
1817 else
1818 goto end;
1819
1820 for (; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
1821
1822 if (guest->match(text, this->interval.end, end, flags))
1823 this->interval.end = guest->interval.end;
1824 else
1825 goto end;
1826
1827 this->interval.start = start;
1828 return true;
1829
1830 end:
1831 home->invalidate();
1832 separator->invalidate();
1833 guest->invalidate();
1834 this->interval.invalidate();
1835 return false;
1836 }
1837
1838 std::shared_ptr<basic_parser<T>> m_space;
1839 };
1840
1841 using score = basic_score<char>;
1843#ifdef _UNICODE
1844 using tscore = wscore;
1845#else
1846 using tscore = score;
1847#endif
1849
1853 template <class T>
1855 {
1856 public:
1858 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
1859 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
1860 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
1861 _In_ const std::shared_ptr<basic_parser<T>>& _number,
1862 _In_ const std::locale& locale = std::locale()) :
1868 {}
1869
1870 virtual void invalidate()
1871 {
1872 if (positive_sign) positive_sign->invalidate();
1873 if (negative_sign) negative_sign->invalidate();
1874 if (special_sign) special_sign->invalidate();
1875 number->invalidate();
1877 }
1878
1879 std::shared_ptr<basic_parser<T>> positive_sign;
1880 std::shared_ptr<basic_parser<T>> negative_sign;
1881 std::shared_ptr<basic_parser<T>> special_sign;
1882 std::shared_ptr<basic_parser<T>> number;
1883
1884 protected:
1885 virtual bool do_match(
1886 _In_reads_or_z_opt_(end) const T* text,
1887 _In_ size_t start = 0,
1888 _In_ size_t end = SIZE_MAX,
1889 _In_ int flags = match_default)
1890 {
1891 _Assume_(text || start >= end);
1892 this->interval.end = start;
1893 if (positive_sign && positive_sign->match(text, this->interval.end, end, flags)) {
1894 this->interval.end = positive_sign->interval.end;
1895 if (negative_sign) negative_sign->invalidate();
1896 if (special_sign) special_sign->invalidate();
1897 }
1898 else if (negative_sign && negative_sign->match(text, this->interval.end, end, flags)) {
1899 this->interval.end = negative_sign->interval.end;
1900 if (positive_sign) positive_sign->invalidate();
1901 if (special_sign) special_sign->invalidate();
1902 }
1903 else if (special_sign && special_sign->match(text, this->interval.end, end, flags)) {
1904 this->interval.end = special_sign->interval.end;
1905 if (positive_sign) positive_sign->invalidate();
1906 if (negative_sign) negative_sign->invalidate();
1907 }
1908 else {
1909 if (positive_sign) positive_sign->invalidate();
1910 if (negative_sign) negative_sign->invalidate();
1911 if (special_sign) special_sign->invalidate();
1912 }
1913 if (number->match(text, this->interval.end, end, flags)) {
1914 this->interval.start = start;
1915 this->interval.end = number->interval.end;
1916 return true;
1917 }
1918 if (positive_sign) positive_sign->invalidate();
1919 if (negative_sign) negative_sign->invalidate();
1920 if (special_sign) special_sign->invalidate();
1921 number->invalidate();
1922 this->interval.invalidate();
1923 return false;
1924 }
1925 };
1926
1927 using signed_numeral = basic_signed_numeral<char>;
1928 using wsigned_numeral = basic_signed_numeral<wchar_t>;
1929#ifdef _UNICODE
1930 using tsigned_numeral = wsigned_numeral;
1931#else
1932 using tsigned_numeral = signed_numeral;
1933#endif
1934 using sgml_signed_numeral = basic_signed_numeral<char>;
1935
1939 template <class T>
1941 {
1942 public:
1944 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
1945 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
1946 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
1947 _In_ const std::shared_ptr<basic_parser<T>>& _integer,
1948 _In_ const std::shared_ptr<basic_parser<T>>& space,
1949 _In_ const std::shared_ptr<basic_parser<T>>& _fraction,
1950 _In_ const std::locale& locale = std::locale()) :
1957 m_space(space)
1958 {}
1959
1960 virtual void invalidate()
1961 {
1962 if (positive_sign) positive_sign->invalidate();
1963 if (negative_sign) negative_sign->invalidate();
1964 if (special_sign) special_sign->invalidate();
1965 integer->invalidate();
1966 fraction->invalidate();
1968 }
1969
1970 std::shared_ptr<basic_parser<T>> positive_sign;
1971 std::shared_ptr<basic_parser<T>> negative_sign;
1972 std::shared_ptr<basic_parser<T>> special_sign;
1973 std::shared_ptr<basic_parser<T>> integer;
1974 std::shared_ptr<basic_parser<T>> fraction;
1975
1976 protected:
1977 virtual bool do_match(
1978 _In_reads_or_z_opt_(end) const T* text,
1979 _In_ size_t start = 0,
1980 _In_ size_t end = SIZE_MAX,
1981 _In_ int flags = match_default)
1982 {
1983 _Assume_(text || start >= end);
1984 this->interval.end = start;
1985
1986 if (positive_sign && positive_sign->match(text, this->interval.end, end, flags)) {
1987 this->interval.end = positive_sign->interval.end;
1988 if (negative_sign) negative_sign->invalidate();
1989 if (special_sign) special_sign->invalidate();
1990 }
1991 else if (negative_sign && negative_sign->match(text, this->interval.end, end, flags)) {
1992 this->interval.end = negative_sign->interval.end;
1993 if (positive_sign) positive_sign->invalidate();
1994 if (special_sign) special_sign->invalidate();
1995 }
1996 else if (special_sign && special_sign->match(text, this->interval.end, end, flags)) {
1997 this->interval.end = special_sign->interval.end;
1998 if (positive_sign) positive_sign->invalidate();
1999 if (negative_sign) negative_sign->invalidate();
2000 }
2001 else {
2002 if (positive_sign) positive_sign->invalidate();
2003 if (negative_sign) negative_sign->invalidate();
2004 if (special_sign) special_sign->invalidate();
2005 }
2006
2007 // Check for <integer> <fraction>
2008 const int space_match_flags = flags & ~match_multiline; // Spaces in fractions must never be broken in new line.
2009 if (integer->match(text, this->interval.end, end, flags) &&
2010 m_space->match(text, integer->interval.end, end, space_match_flags))
2011 {
2012 for (this->interval.end = m_space->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
2013 if (fraction->match(text, this->interval.end, end, flags)) {
2014 this->interval.start = start;
2015 this->interval.end = fraction->interval.end;
2016 return true;
2017 }
2018 fraction->invalidate();
2019 this->interval.start = start;
2020 this->interval.end = integer->interval.end;
2021 return true;
2022 }
2023
2024 // Check for <fraction>
2025 if (fraction->match(text, this->interval.end, end, flags)) {
2026 integer->invalidate();
2027 this->interval.start = start;
2028 this->interval.end = fraction->interval.end;
2029 return true;
2030 }
2031
2032 // Check for <integer>
2033 if (integer->match(text, this->interval.end, end, flags)) {
2034 fraction->invalidate();
2035 this->interval.start = start;
2036 this->interval.end = integer->interval.end;
2037 return true;
2038 }
2039
2040 if (positive_sign) positive_sign->invalidate();
2041 if (negative_sign) negative_sign->invalidate();
2042 if (special_sign) special_sign->invalidate();
2043 integer->invalidate();
2044 fraction->invalidate();
2045 this->interval.invalidate();
2046 return false;
2047 }
2048
2049 std::shared_ptr<basic_parser<T>> m_space;
2050 };
2051
2052 using mixed_numeral = basic_mixed_numeral<char>;
2053 using wmixed_numeral = basic_mixed_numeral<wchar_t>;
2054#ifdef _UNICODE
2055 using tmixed_numeral = wmixed_numeral;
2056#else
2057 using tmixed_numeral = mixed_numeral;
2058#endif
2059 using sgml_mixed_numeral = basic_mixed_numeral<char>;
2060
2064 template <class T>
2066 {
2067 public:
2069 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
2070 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
2071 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
2072 _In_ const std::shared_ptr<basic_integer<T>>& _integer,
2073 _In_ const std::shared_ptr<basic_parser<T>>& _decimal_separator,
2074 _In_ const std::shared_ptr<basic_integer<T>>& _decimal,
2075 _In_ const std::shared_ptr<basic_parser<T>>& _exponent_symbol,
2076 _In_ const std::shared_ptr<basic_parser<T>>& _positive_exp_sign,
2077 _In_ const std::shared_ptr<basic_parser<T>>& _negative_exp_sign,
2078 _In_ const std::shared_ptr<basic_integer<T>>& _exponent,
2079 _In_ const std::locale& locale = std::locale()) :
2091 value(std::numeric_limits<double>::quiet_NaN())
2092 {}
2093
2094 virtual void invalidate()
2095 {
2096 if (positive_sign) positive_sign->invalidate();
2097 if (negative_sign) negative_sign->invalidate();
2098 if (special_sign) special_sign->invalidate();
2099 integer->invalidate();
2100 decimal_separator->invalidate();
2101 decimal->invalidate();
2102 if (exponent_symbol) exponent_symbol->invalidate();
2103 if (positive_exp_sign) positive_exp_sign->invalidate();
2104 if (negative_exp_sign) negative_exp_sign->invalidate();
2105 if (exponent) exponent->invalidate();
2106 value = std::numeric_limits<double>::quiet_NaN();
2108 }
2109
2110 std::shared_ptr<basic_parser<T>> positive_sign;
2111 std::shared_ptr<basic_parser<T>> negative_sign;
2112 std::shared_ptr<basic_parser<T>> special_sign;
2113 std::shared_ptr<basic_integer<T>> integer;
2114 std::shared_ptr<basic_parser<T>> decimal_separator;
2115 std::shared_ptr<basic_integer<T>> decimal;
2116 std::shared_ptr<basic_parser<T>> exponent_symbol;
2117 std::shared_ptr<basic_parser<T>> positive_exp_sign;
2118 std::shared_ptr<basic_parser<T>> negative_exp_sign;
2119 std::shared_ptr<basic_integer<T>> exponent;
2120 double value;
2121
2122 protected:
2123 virtual bool do_match(
2124 _In_reads_or_z_opt_(end) const T* text,
2125 _In_ size_t start = 0,
2126 _In_ size_t end = SIZE_MAX,
2127 _In_ int flags = match_default)
2128 {
2129 _Assume_(text || start >= end);
2130 this->interval.end = start;
2131
2132 if (positive_sign && positive_sign->match(text, this->interval.end, end, flags)) {
2133 this->interval.end = positive_sign->interval.end;
2134 if (negative_sign) negative_sign->invalidate();
2135 if (special_sign) special_sign->invalidate();
2136 }
2137 else if (negative_sign && negative_sign->match(text, this->interval.end, end, flags)) {
2138 this->interval.end = negative_sign->interval.end;
2139 if (positive_sign) positive_sign->invalidate();
2140 if (special_sign) special_sign->invalidate();
2141 }
2142 else if (special_sign && special_sign->match(text, this->interval.end, end, flags)) {
2143 this->interval.end = special_sign->interval.end;
2144 if (positive_sign) positive_sign->invalidate();
2145 if (negative_sign) negative_sign->invalidate();
2146 }
2147 else {
2148 if (positive_sign) positive_sign->invalidate();
2149 if (negative_sign) negative_sign->invalidate();
2150 if (special_sign) special_sign->invalidate();
2151 }
2152
2153 if (integer->match(text, this->interval.end, end, flags))
2154 this->interval.end = integer->interval.end;
2155
2156 if (decimal_separator->match(text, this->interval.end, end, flags) &&
2157 decimal->match(text, decimal_separator->interval.end, end, flags))
2158 this->interval.end = decimal->interval.end;
2159 else {
2160 decimal_separator->invalidate();
2161 decimal->invalidate();
2162 }
2163
2164 if (integer->interval.empty() &&
2165 decimal->interval.empty())
2166 {
2167 // No integer part, no decimal part.
2168 if (positive_sign) positive_sign->invalidate();
2169 if (negative_sign) negative_sign->invalidate();
2170 if (special_sign) special_sign->invalidate();
2171 integer->invalidate();
2172 decimal_separator->invalidate();
2173 decimal->invalidate();
2174 if (exponent_symbol) exponent_symbol->invalidate();
2175 if (positive_exp_sign) positive_exp_sign->invalidate();
2176 if (negative_exp_sign) negative_exp_sign->invalidate();
2177 if (exponent) exponent->invalidate();
2178 this->interval.invalidate();
2179 return false;
2180 }
2181
2182 if (exponent_symbol && exponent_symbol->match(text, this->interval.end, end, flags) &&
2183 ((positive_exp_sign && positive_exp_sign->match(text, exponent_symbol->interval.end, end, flags) &&
2184 exponent && exponent->match(text, positive_exp_sign->interval.end, end, flags)) ||
2185 (exponent && exponent->match(text, exponent_symbol->interval.end, end, flags))))
2186 {
2187 this->interval.end = exponent->interval.end;
2188 if (negative_exp_sign) negative_exp_sign->invalidate();
2189 }
2190 else if (exponent_symbol && exponent_symbol->match(text, this->interval.end, end, flags) &&
2191 negative_exp_sign && negative_exp_sign->match(text, exponent_symbol->interval.end, end, flags) &&
2192 exponent && exponent->match(text, negative_exp_sign->interval.end, end, flags))
2193 {
2194 this->interval.end = exponent->interval.end;
2195 if (positive_exp_sign) positive_exp_sign->invalidate();
2196 }
2197 else {
2198 if (exponent_symbol) exponent_symbol->invalidate();
2199 if (positive_exp_sign) positive_exp_sign->invalidate();
2200 if (negative_exp_sign) negative_exp_sign->invalidate();
2201 if (exponent) exponent->invalidate();
2202 }
2203
2204 value = (double)integer->value;
2205 if (decimal->interval)
2206 value += (double)decimal->value * pow(10.0, -(double)decimal->interval.size());
2207 if (negative_sign && negative_sign->interval)
2208 value = -value;
2209 if (exponent && exponent->interval) {
2210 double e = (double)exponent->value;
2211 if (negative_exp_sign && negative_exp_sign->interval)
2212 e = -e;
2213 value *= pow(10.0, e);
2214 }
2215
2216 this->interval.start = start;
2217 return true;
2218 }
2219 };
2220
2221 using scientific_numeral = basic_scientific_numeral<char>;
2222 using wscientific_numeral = basic_scientific_numeral<wchar_t>;
2223#ifdef _UNICODE
2224 using tscientific_numeral = wscientific_numeral;
2225#else
2226 using tscientific_numeral = scientific_numeral;
2227#endif
2228 using sgml_scientific_numeral = basic_scientific_numeral<char>;
2229
2233 template <class T>
2235 {
2236 public:
2238 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
2239 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
2240 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
2241 _In_ const std::shared_ptr<basic_parser<T>>& _currency,
2242 _In_ const std::shared_ptr<basic_parser<T>>& _integer,
2243 _In_ const std::shared_ptr<basic_parser<T>>& _decimal_separator,
2244 _In_ const std::shared_ptr<basic_parser<T>>& _decimal,
2245 _In_ const std::locale& locale = std::locale()) :
2254 {}
2255
2256 virtual void invalidate()
2257 {
2258 if (positive_sign) positive_sign->invalidate();
2259 if (negative_sign) negative_sign->invalidate();
2260 if (special_sign) special_sign->invalidate();
2261 currency->invalidate();
2262 integer->invalidate();
2263 decimal_separator->invalidate();
2264 decimal->invalidate();
2266 }
2267
2268 std::shared_ptr<basic_parser<T>> positive_sign;
2269 std::shared_ptr<basic_parser<T>> negative_sign;
2270 std::shared_ptr<basic_parser<T>> special_sign;
2271 std::shared_ptr<basic_parser<T>> currency;
2272 std::shared_ptr<basic_parser<T>> integer;
2273 std::shared_ptr<basic_parser<T>> decimal_separator;
2274 std::shared_ptr<basic_parser<T>> decimal;
2275
2276 protected:
2277 virtual bool do_match(
2278 _In_reads_or_z_opt_(end) const T* text,
2279 _In_ size_t start = 0,
2280 _In_ size_t end = SIZE_MAX,
2281 _In_ int flags = match_default)
2282 {
2283 _Assume_(text || start >= end);
2284 this->interval.end = start;
2285
2286 if (positive_sign->match(text, this->interval.end, end, flags)) {
2287 this->interval.end = positive_sign->interval.end;
2288 if (negative_sign) negative_sign->invalidate();
2289 if (special_sign) special_sign->invalidate();
2290 }
2291 else if (negative_sign->match(text, this->interval.end, end, flags)) {
2292 this->interval.end = negative_sign->interval.end;
2293 if (positive_sign) positive_sign->invalidate();
2294 if (special_sign) special_sign->invalidate();
2295 }
2296 else if (special_sign->match(text, this->interval.end, end, flags)) {
2297 this->interval.end = special_sign->interval.end;
2298 if (positive_sign) positive_sign->invalidate();
2299 if (negative_sign) negative_sign->invalidate();
2300 }
2301 else {
2302 if (positive_sign) positive_sign->invalidate();
2303 if (negative_sign) negative_sign->invalidate();
2304 if (special_sign) special_sign->invalidate();
2305 }
2306
2307 if (currency->match(text, this->interval.end, end, flags))
2308 this->interval.end = currency->interval.end;
2309 else {
2310 if (positive_sign) positive_sign->invalidate();
2311 if (negative_sign) negative_sign->invalidate();
2312 if (special_sign) special_sign->invalidate();
2313 integer->invalidate();
2314 decimal_separator->invalidate();
2315 decimal->invalidate();
2316 this->interval.invalidate();
2317 return false;
2318 }
2319
2320 if (integer->match(text, this->interval.end, end, flags))
2321 this->interval.end = integer->interval.end;
2322 if (decimal_separator->match(text, this->interval.end, end, flags) &&
2323 decimal->match(text, decimal_separator->interval.end, end, flags))
2324 this->interval.end = decimal->interval.end;
2325 else {
2326 decimal_separator->invalidate();
2327 decimal->invalidate();
2328 }
2329
2330 if (integer->interval.empty() &&
2331 decimal->interval.empty())
2332 {
2333 // No integer part, no decimal part.
2334 if (positive_sign) positive_sign->invalidate();
2335 if (negative_sign) negative_sign->invalidate();
2336 if (special_sign) special_sign->invalidate();
2337 currency->invalidate();
2338 integer->invalidate();
2339 decimal_separator->invalidate();
2340 decimal->invalidate();
2341 this->interval.invalidate();
2342 return false;
2343 }
2344
2345 this->interval.start = start;
2346 return true;
2347 }
2348 };
2349
2350 using monetary_numeral = basic_monetary_numeral<char>;
2351 using wmonetary_numeral = basic_monetary_numeral<wchar_t>;
2352#ifdef _UNICODE
2353 using tmonetary_numeral = wmonetary_numeral;
2354#else
2355 using tmonetary_numeral = monetary_numeral;
2356#endif
2357 using sgml_monetary_numeral = basic_monetary_numeral<char>;
2358
2362 template <class T>
2364 {
2365 public:
2367 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
2368 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
2369 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
2370 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
2371 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
2372 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
2373 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
2374 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
2375 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
2376 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
2377 _In_ const std::shared_ptr<basic_parser<T>>& separator,
2378 _In_ const std::locale& locale = std::locale()) :
2380 m_digit_0(digit_0),
2381 m_digit_1(digit_1),
2382 m_digit_2(digit_2),
2383 m_digit_3(digit_3),
2384 m_digit_4(digit_4),
2385 m_digit_5(digit_5),
2386 m_digit_6(digit_6),
2387 m_digit_7(digit_7),
2388 m_digit_8(digit_8),
2389 m_digit_9(digit_9),
2390 m_separator(separator)
2391 {
2392 value.s_addr = 0;
2393 }
2394
2395 virtual void invalidate()
2396 {
2397 components[0].start = 1;
2398 components[0].end = 0;
2399 components[1].start = 1;
2400 components[1].end = 0;
2401 components[2].start = 1;
2402 components[2].end = 0;
2403 components[3].start = 1;
2404 components[3].end = 0;
2405 value.s_addr = 0;
2407 }
2408
2411
2412 protected:
2413 virtual bool do_match(
2414 _In_reads_or_z_opt_(end) const T* text,
2415 _In_ size_t start = 0,
2416 _In_ size_t end = SIZE_MAX,
2417 _In_ int flags = match_default)
2418 {
2419 _Assume_(text || start >= end);
2420 this->interval.end = start;
2421 value.s_addr = 0;
2422
2423 size_t i;
2424 for (i = 0; i < 4; i++) {
2425 if (i) {
2426 if (m_separator->match(text, this->interval.end, end, flags))
2427 this->interval.end = m_separator->interval.end;
2428 else
2429 goto error;
2430 }
2431
2432 components[i].start = this->interval.end;
2433 bool is_empty = true;
2434 size_t x;
2435 for (x = 0; this->interval.end < end && text[this->interval.end];) {
2436 size_t dig, digit_end;
2437 if (m_digit_0->match(text, this->interval.end, end, flags)) { dig = 0; digit_end = m_digit_0->interval.end; }
2438 else if (m_digit_1->match(text, this->interval.end, end, flags)) { dig = 1; digit_end = m_digit_1->interval.end; }
2439 else if (m_digit_2->match(text, this->interval.end, end, flags)) { dig = 2; digit_end = m_digit_2->interval.end; }
2440 else if (m_digit_3->match(text, this->interval.end, end, flags)) { dig = 3; digit_end = m_digit_3->interval.end; }
2441 else if (m_digit_4->match(text, this->interval.end, end, flags)) { dig = 4; digit_end = m_digit_4->interval.end; }
2442 else if (m_digit_5->match(text, this->interval.end, end, flags)) { dig = 5; digit_end = m_digit_5->interval.end; }
2443 else if (m_digit_6->match(text, this->interval.end, end, flags)) { dig = 6; digit_end = m_digit_6->interval.end; }
2444 else if (m_digit_7->match(text, this->interval.end, end, flags)) { dig = 7; digit_end = m_digit_7->interval.end; }
2445 else if (m_digit_8->match(text, this->interval.end, end, flags)) { dig = 8; digit_end = m_digit_8->interval.end; }
2446 else if (m_digit_9->match(text, this->interval.end, end, flags)) { dig = 9; digit_end = m_digit_9->interval.end; }
2447 else break;
2448 size_t x_n = x * 10 + dig;
2449 if (x_n <= 255) {
2450 x = x_n;
2451 this->interval.end = digit_end;
2452 is_empty = false;
2453 }
2454 else
2455 break;
2456 }
2457 if (is_empty)
2458 goto error;
2459 components[i].end = this->interval.end;
2460 value.s_addr = (value.s_addr << 8) | (uint8_t)x;
2461 }
2462 if (i < 4)
2463 goto error;
2464
2465 HE2BE(reinterpret_cast<uint32_t&>(value.s_addr));
2466 this->interval.start = start;
2467 return true;
2468
2469 error:
2470 invalidate();
2471 return false;
2472 }
2473
2474 std::shared_ptr<basic_parser<T>>
2475 m_digit_0,
2476 m_digit_1,
2477 m_digit_2,
2478 m_digit_3,
2479 m_digit_4,
2480 m_digit_5,
2481 m_digit_6,
2482 m_digit_7,
2483 m_digit_8,
2484 m_digit_9;
2485 std::shared_ptr<basic_parser<T>> m_separator;
2486 };
2487
2488 using ipv4_address = basic_ipv4_address<char>;
2489 using wipv4_address = basic_ipv4_address<wchar_t>;
2490#ifdef _UNICODE
2491 using tipv4_address = wipv4_address;
2492#else
2493 using tipv4_address = ipv4_address;
2494#endif
2495 using sgml_ipv4_address = basic_ipv4_address<char>;
2496
2500 template <class T>
2502 {
2503 public:
2504 basic_ipv6_scope_id_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
2505
2506 protected:
2507 virtual bool do_match(
2508 _In_reads_or_z_opt_(end) const T* text,
2509 _In_ size_t start = 0,
2510 _In_ size_t end = SIZE_MAX,
2511 _In_ int flags = match_default)
2512 {
2513 _Assume_(text || start >= end);
2514 if (start < end && text[start]) {
2515 if (text[start] == '-' ||
2516 text[start] == '_' ||
2517 text[start] == ':' ||
2518 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
2519 {
2520 this->interval.end = (this->interval.start = start) + 1;
2521 return true;
2522 }
2523 }
2524 this->interval.invalidate();
2525 return false;
2526 }
2527 };
2528
2531#ifdef _UNICODE
2533#else
2535#endif
2536
2541 {
2542 public:
2543 sgml_ipv6_scope_id_char(_In_ const std::locale& locale = std::locale()) : sgml_parser(locale) {}
2544
2545 protected:
2546 virtual bool do_match(
2547 _In_reads_or_z_(end) const char* text,
2548 _In_ size_t start = 0,
2549 _In_ size_t end = SIZE_MAX,
2550 _In_ int flags = match_default)
2551 {
2552 _Assume_(text || start >= end);
2553 if (start < end && text[start]) {
2554 wchar_t buf[3];
2555 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
2556 const wchar_t* chr_end = chr + stdex::strlen(chr);
2557 if (((chr[0] == L'-' ||
2558 chr[0] == L'_' ||
2559 chr[0] == L':') && chr[1] == 0) ||
2560 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
2561 {
2562 this->interval.start = start;
2563 return true;
2564 }
2565 }
2566 this->interval.invalidate();
2567 return false;
2568 }
2569 };
2570
2574 template <class T>
2576 {
2577 public:
2579 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
2580 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
2581 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
2582 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
2583 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
2584 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
2585 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
2586 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
2587 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
2588 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
2589 _In_ const std::shared_ptr<basic_parser<T>>& digit_10,
2590 _In_ const std::shared_ptr<basic_parser<T>>& digit_11,
2591 _In_ const std::shared_ptr<basic_parser<T>>& digit_12,
2592 _In_ const std::shared_ptr<basic_parser<T>>& digit_13,
2593 _In_ const std::shared_ptr<basic_parser<T>>& digit_14,
2594 _In_ const std::shared_ptr<basic_parser<T>>& digit_15,
2595 _In_ const std::shared_ptr<basic_parser<T>>& separator,
2596 _In_ const std::shared_ptr<basic_parser<T>>& scope_id_separator = nullptr,
2597 _In_ const std::shared_ptr<basic_parser<T>>& _scope_id = nullptr,
2598 _In_ const std::locale& locale = std::locale()) :
2600 m_digit_0(digit_0),
2601 m_digit_1(digit_1),
2602 m_digit_2(digit_2),
2603 m_digit_3(digit_3),
2604 m_digit_4(digit_4),
2605 m_digit_5(digit_5),
2606 m_digit_6(digit_6),
2607 m_digit_7(digit_7),
2608 m_digit_8(digit_8),
2609 m_digit_9(digit_9),
2610 m_digit_10(digit_10),
2611 m_digit_11(digit_11),
2612 m_digit_12(digit_12),
2613 m_digit_13(digit_13),
2614 m_digit_14(digit_14),
2615 m_digit_15(digit_15),
2616 m_separator(separator),
2617 m_scope_id_separator(scope_id_separator),
2619 {
2620 memset(&value, 0, sizeof(value));
2621 }
2622
2623 virtual void invalidate()
2624 {
2625 components[0].start = 1;
2626 components[0].end = 0;
2627 components[1].start = 1;
2628 components[1].end = 0;
2629 components[2].start = 1;
2630 components[2].end = 0;
2631 components[3].start = 1;
2632 components[3].end = 0;
2633 components[4].start = 1;
2634 components[4].end = 0;
2635 components[5].start = 1;
2636 components[5].end = 0;
2637 components[6].start = 1;
2638 components[6].end = 0;
2639 components[7].start = 1;
2640 components[7].end = 0;
2641 memset(&value, 0, sizeof(value));
2642 if (scope_id) scope_id->invalidate();
2644 }
2645
2648 std::shared_ptr<basic_parser<T>> scope_id;
2649
2650 protected:
2651 virtual bool do_match(
2652 _In_reads_or_z_opt_(end) const T* text,
2653 _In_ size_t start = 0,
2654 _In_ size_t end = SIZE_MAX,
2655 _In_ int flags = match_default)
2656 {
2657 _Assume_(text || start >= end);
2658 this->interval.end = start;
2659 memset(&value, 0, sizeof(value));
2660
2661 size_t i, compaction_i = SIZE_MAX, compaction_start = start;
2662 for (i = 0; i < 8; i++) {
2663 bool is_empty = true;
2664
2665 if (m_separator->match(text, this->interval.end, end, flags)) {
2666 // : found
2667 this->interval.end = m_separator->interval.end;
2668 if (m_separator->match(text, this->interval.end, end, flags)) {
2669 // :: found
2670 if (compaction_i == SIZE_MAX) {
2671 // Zero compaction start
2672 compaction_i = i;
2673 compaction_start = m_separator->interval.start;
2674 this->interval.end = m_separator->interval.end;
2675 }
2676 else {
2677 // More than one zero compaction
2678 break;
2679 }
2680 }
2681 else if (!i) {
2682 // Leading : found
2683 goto error;
2684 }
2685 }
2686 else if (i) {
2687 // : missing
2688 break;
2689 }
2690
2691 components[i].start = this->interval.end;
2692 size_t x;
2693 for (x = 0; this->interval.end < end && text[this->interval.end];) {
2694 size_t dig, digit_end;
2695 if (m_digit_0->match(text, this->interval.end, end, flags)) { dig = 0; digit_end = m_digit_0->interval.end; }
2696 else if (m_digit_1->match(text, this->interval.end, end, flags)) { dig = 1; digit_end = m_digit_1->interval.end; }
2697 else if (m_digit_2->match(text, this->interval.end, end, flags)) { dig = 2; digit_end = m_digit_2->interval.end; }
2698 else if (m_digit_3->match(text, this->interval.end, end, flags)) { dig = 3; digit_end = m_digit_3->interval.end; }
2699 else if (m_digit_4->match(text, this->interval.end, end, flags)) { dig = 4; digit_end = m_digit_4->interval.end; }
2700 else if (m_digit_5->match(text, this->interval.end, end, flags)) { dig = 5; digit_end = m_digit_5->interval.end; }
2701 else if (m_digit_6->match(text, this->interval.end, end, flags)) { dig = 6; digit_end = m_digit_6->interval.end; }
2702 else if (m_digit_7->match(text, this->interval.end, end, flags)) { dig = 7; digit_end = m_digit_7->interval.end; }
2703 else if (m_digit_8->match(text, this->interval.end, end, flags)) { dig = 8; digit_end = m_digit_8->interval.end; }
2704 else if (m_digit_9->match(text, this->interval.end, end, flags)) { dig = 9; digit_end = m_digit_9->interval.end; }
2705 else if (m_digit_10->match(text, this->interval.end, end, flags)) { dig = 10; digit_end = m_digit_10->interval.end; }
2706 else if (m_digit_11->match(text, this->interval.end, end, flags)) { dig = 11; digit_end = m_digit_11->interval.end; }
2707 else if (m_digit_12->match(text, this->interval.end, end, flags)) { dig = 12; digit_end = m_digit_12->interval.end; }
2708 else if (m_digit_13->match(text, this->interval.end, end, flags)) { dig = 13; digit_end = m_digit_13->interval.end; }
2709 else if (m_digit_14->match(text, this->interval.end, end, flags)) { dig = 14; digit_end = m_digit_14->interval.end; }
2710 else if (m_digit_15->match(text, this->interval.end, end, flags)) { dig = 15; digit_end = m_digit_15->interval.end; }
2711 else break;
2712 size_t x_n = x * 16 + dig;
2713 if (x_n <= 0xffff) {
2714 x = x_n;
2715 this->interval.end = digit_end;
2716 is_empty = false;
2717 }
2718 else
2719 break;
2720 }
2721 if (is_empty) {
2722 if (compaction_i != SIZE_MAX) {
2723 // Zero compaction active: no sweat.
2724 break;
2725 }
2726 goto error;
2727 }
2728 components[i].end = this->interval.end;
2729 HE2BE(reinterpret_cast<uint16_t&>(this->value.s6_words[i]));
2730 }
2731
2732 if (compaction_i != SIZE_MAX) {
2733 // Align components right due to zero compaction.
2734 size_t j, k;
2735 for (j = 8, k = i; k > compaction_i;) {
2736 this->value.s6_words[--j] = this->value.s6_words[--k];
2738 }
2739 for (; j > compaction_i;) {
2740 this->value.s6_words[--j] = 0;
2741 components[j].start =
2743 }
2744 }
2745 else if (i < 8)
2746 goto error;
2747
2748 if (m_scope_id_separator && m_scope_id_separator->match(text, this->interval.end, end, flags) &&
2749 scope_id && scope_id->match(text, m_scope_id_separator->interval.end, end, flags))
2750 this->interval.end = scope_id->interval.end;
2751 else if (scope_id)
2752 scope_id->invalidate();
2753
2754 this->interval.start = start;
2755 return true;
2756
2757 error:
2758 invalidate();
2759 return false;
2760 }
2761
2762 std::shared_ptr<basic_parser<T>>
2763 m_digit_0,
2764 m_digit_1,
2765 m_digit_2,
2766 m_digit_3,
2767 m_digit_4,
2768 m_digit_5,
2769 m_digit_6,
2770 m_digit_7,
2771 m_digit_8,
2772 m_digit_9,
2773 m_digit_10,
2774 m_digit_11,
2775 m_digit_12,
2776 m_digit_13,
2777 m_digit_14,
2778 m_digit_15;
2779 std::shared_ptr<basic_parser<T>> m_separator, m_scope_id_separator;
2780 };
2781
2782 using ipv6_address = basic_ipv6_address<char>;
2783 using wipv6_address = basic_ipv6_address<wchar_t>;
2784#ifdef _UNICODE
2785 using tipv6_address = wipv6_address;
2786#else
2787 using tipv6_address = ipv6_address;
2788#endif
2789 using sgml_ipv6_address = basic_ipv6_address<char>;
2790
2794 template <class T>
2796 {
2797 public:
2799 _In_ bool allow_idn,
2800 _In_ const std::locale& locale = std::locale()) :
2802 m_allow_idn(allow_idn),
2803 allow_on_edge(true)
2804 {}
2805
2807
2808 protected:
2809 virtual bool do_match(
2810 _In_reads_or_z_opt_(end) const T* text,
2811 _In_ size_t start = 0,
2812 _In_ size_t end = SIZE_MAX,
2813 _In_ int flags = match_default)
2814 {
2815 _Assume_(text || start >= end);
2816 if (start < end && text[start]) {
2817 if (('A' <= text[start] && text[start] <= 'Z') ||
2818 ('a' <= text[start] && text[start] <= 'z') ||
2819 ('0' <= text[start] && text[start] <= '9'))
2820 allow_on_edge = true;
2821 else if (text[start] == '-')
2822 allow_on_edge = false;
2823 else if (m_allow_idn && std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
2824 allow_on_edge = true;
2825 else {
2826 this->interval.invalidate();
2827 return false;
2828 }
2829 this->interval.end = (this->interval.start = start) + 1;
2830 return true;
2831 }
2832 this->interval.invalidate();
2833 return false;
2834 }
2835
2836 bool m_allow_idn;
2837 };
2838
2839 using dns_domain_char = basic_dns_domain_char<char>;
2840 using wdns_domain_char = basic_dns_domain_char<wchar_t>;
2841#ifdef _UNICODE
2842 using tdns_domain_char = wdns_domain_char;
2843#else
2844 using tdns_domain_char = dns_domain_char;
2845#endif
2846
2851 {
2852 public:
2854 _In_ bool allow_idn,
2855 _In_ const std::locale& locale = std::locale()) :
2857 {}
2858
2859 protected:
2860 virtual bool do_match(
2861 _In_reads_or_z_(end) const char* text,
2862 _In_ size_t start = 0,
2863 _In_ size_t end = SIZE_MAX,
2864 _In_ int flags = match_default)
2865 {
2866 _Assume_(text || start >= end);
2867 if (start < end && text[start]) {
2868 wchar_t buf[3];
2869 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
2870 const wchar_t* chr_end = chr + stdex::strlen(chr);
2871 if ((('A' <= chr[0] && chr[0] <= 'Z') ||
2872 ('a' <= chr[0] && chr[0] <= 'z') ||
2873 ('0' <= chr[0] && chr[0] <= '9')) && chr[1] == 0)
2874 allow_on_edge = true;
2875 else if (chr[0] == '-' && chr[1] == 0)
2876 allow_on_edge = false;
2877 else if (m_allow_idn && std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
2878 allow_on_edge = true;
2879 else {
2880 this->interval.invalidate();
2881 return false;
2882 }
2883 this->interval.start = start;
2884 return true;
2885 }
2886 this->interval.invalidate();
2887 return false;
2888 }
2889 };
2890
2894 template <class T>
2896 {
2897 public:
2899 _In_ bool allow_absolute,
2900 _In_ const std::shared_ptr<basic_dns_domain_char<T>>& domain_char,
2901 _In_ const std::shared_ptr<basic_parser<T>>& separator,
2902 _In_ const std::locale& locale = std::locale()) :
2905 m_domain_char(domain_char),
2906 m_separator(separator)
2907 {}
2908
2909 protected:
2910 virtual bool do_match(
2911 _In_reads_or_z_opt_(end) const T* text,
2912 _In_ size_t start = 0,
2913 _In_ size_t end = SIZE_MAX,
2914 _In_ int flags = match_default)
2915 {
2916 _Assume_(text || start >= end);
2917 size_t i = start, count;
2918 for (count = 0; i < end && text[i] && count < 127; count++) {
2919 if (m_domain_char->match(text, i, end, flags) &&
2920 m_domain_char->allow_on_edge)
2921 {
2922 // Domain start
2923 this->interval.end = i = m_domain_char->interval.end;
2924 while (i < end && text[i]) {
2925 if (m_domain_char->allow_on_edge &&
2926 m_separator->match(text, i, end, flags))
2927 {
2928 // Domain end
2929 if (m_allow_absolute)
2930 this->interval.end = i = m_separator->interval.end;
2931 else {
2932 this->interval.end = i;
2933 i = m_separator->interval.end;
2934 }
2935 break;
2936 }
2937 if (m_domain_char->match(text, i, end, flags)) {
2938 if (m_domain_char->allow_on_edge)
2939 this->interval.end = i = m_domain_char->interval.end;
2940 else
2941 i = m_domain_char->interval.end;
2942 }
2943 else {
2944 this->interval.start = start;
2945 return true;
2946 }
2947 }
2948 }
2949 else
2950 break;
2951 }
2952 if (count) {
2953 this->interval.start = start;
2954 return true;
2955 }
2956 this->interval.invalidate();
2957 return false;
2958 }
2959
2961 std::shared_ptr<basic_dns_domain_char<T>> m_domain_char;
2962 std::shared_ptr<basic_parser<T>> m_separator;
2963 };
2964
2967#ifdef _UNICODE
2968 using tdns_name = wdns_name;
2969#else
2970 using tdns_name = dns_name;
2971#endif
2973
2977 template <class T>
2979 {
2980 public:
2981 basic_url_username_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
2982
2983 protected:
2984 virtual bool do_match(
2985 _In_reads_or_z_opt_(end) const T* text,
2986 _In_ size_t start = 0,
2987 _In_ size_t end = SIZE_MAX,
2988 _In_ int flags = match_default)
2989 {
2990 _Assume_(text || start >= end);
2991 if (start < end && text[start]) {
2992 if (text[start] == '-' ||
2993 text[start] == '.' ||
2994 text[start] == '_' ||
2995 text[start] == '~' ||
2996 text[start] == '%' ||
2997 text[start] == '!' ||
2998 text[start] == '$' ||
2999 text[start] == '&' ||
3000 text[start] == '\'' ||
3001 //text[start] == '(' ||
3002 //text[start] == ')' ||
3003 text[start] == '*' ||
3004 text[start] == '+' ||
3005 text[start] == ',' ||
3006 text[start] == ';' ||
3007 text[start] == '=' ||
3008 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
3009 {
3010 this->interval.end = (this->interval.start = start) + 1;
3011 return true;
3012 }
3013 }
3014 this->interval.invalidate();
3015 return false;
3016 }
3017 };
3018
3021#ifdef _UNICODE
3023#else
3025#endif
3026
3031 {
3032 public:
3033 sgml_url_username_char(_In_ const std::locale& locale = std::locale()) : basic_url_username_char<char>(locale) {}
3034
3035 protected:
3036 virtual bool do_match(
3037 _In_reads_or_z_(end) const char* text,
3038 _In_ size_t start = 0,
3039 _In_ size_t end = SIZE_MAX,
3040 _In_ int flags = match_default)
3041 {
3042 _Assume_(text || start >= end);
3043 if (start < end && text[start]) {
3044 wchar_t buf[3];
3045 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
3046 const wchar_t* chr_end = chr + stdex::strlen(chr);
3047 if (((chr[0] == L'-' ||
3048 chr[0] == L'.' ||
3049 chr[0] == L'_' ||
3050 chr[0] == L'~' ||
3051 chr[0] == L'%' ||
3052 chr[0] == L'!' ||
3053 chr[0] == L'$' ||
3054 chr[0] == L'&' ||
3055 chr[0] == L'\'' ||
3056 //chr[0] == L'(' ||
3057 //chr[0] == L')' ||
3058 chr[0] == L'*' ||
3059 chr[0] == L'+' ||
3060 chr[0] == L',' ||
3061 chr[0] == L';' ||
3062 chr[0] == L'=') && chr[1] == 0) ||
3063 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
3064 {
3065 this->interval.start = start;
3066 return true;
3067 }
3068 }
3069
3070 this->interval.invalidate();
3071 return false;
3072 }
3073 };
3074
3078 template <class T>
3080 {
3081 public:
3082 basic_url_password_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
3083
3084 protected:
3085 virtual bool do_match(
3086 _In_reads_or_z_opt_(end) const T* text,
3087 _In_ size_t start = 0,
3088 _In_ size_t end = SIZE_MAX,
3089 _In_ int flags = match_default)
3090 {
3091 _Assume_(text || start >= end);
3092 if (start < end && text[start]) {
3093 if (text[start] == '-' ||
3094 text[start] == '.' ||
3095 text[start] == '_' ||
3096 text[start] == '~' ||
3097 text[start] == '%' ||
3098 text[start] == '!' ||
3099 text[start] == '$' ||
3100 text[start] == '&' ||
3101 text[start] == '\'' ||
3102 text[start] == '(' ||
3103 text[start] == ')' ||
3104 text[start] == '*' ||
3105 text[start] == '+' ||
3106 text[start] == ',' ||
3107 text[start] == ';' ||
3108 text[start] == '=' ||
3109 text[start] == ':' ||
3110 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
3111 {
3112 this->interval.end = (this->interval.start = start) + 1;
3113 return true;
3114 }
3115 }
3116 this->interval.invalidate();
3117 return false;
3118 }
3119 };
3120
3123#ifdef _UNICODE
3125#else
3127#endif
3128
3133 {
3134 public:
3135 sgml_url_password_char(_In_ const std::locale& locale = std::locale()) : basic_url_password_char<char>(locale) {}
3136
3137 protected:
3138 virtual bool do_match(
3139 _In_reads_or_z_(end) const char* text,
3140 _In_ size_t start = 0,
3141 _In_ size_t end = SIZE_MAX,
3142 _In_ int flags = match_default)
3143 {
3144 _Assume_(text || start >= end);
3145 if (start < end && text[start]) {
3146 wchar_t buf[3];
3147 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
3148 const wchar_t* chr_end = chr + stdex::strlen(chr);
3149 if (((chr[0] == L'-' ||
3150 chr[0] == L'.' ||
3151 chr[0] == L'_' ||
3152 chr[0] == L'~' ||
3153 chr[0] == L'%' ||
3154 chr[0] == L'!' ||
3155 chr[0] == L'$' ||
3156 chr[0] == L'&' ||
3157 chr[0] == L'\'' ||
3158 chr[0] == L'(' ||
3159 chr[0] == L')' ||
3160 chr[0] == L'*' ||
3161 chr[0] == L'+' ||
3162 chr[0] == L',' ||
3163 chr[0] == L';' ||
3164 chr[0] == L'=' ||
3165 chr[0] == L':') && chr[1] == 0) ||
3166 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
3167 {
3168 this->interval.start = start;
3169 return true;
3170 }
3171 }
3172 this->interval.invalidate();
3173 return false;
3174 }
3175 };
3176
3180 template <class T>
3182 {
3183 public:
3184 basic_url_path_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
3185
3186 protected:
3187 virtual bool do_match(
3188 _In_reads_or_z_opt_(end) const T* text,
3189 _In_ size_t start = 0,
3190 _In_ size_t end = SIZE_MAX,
3191 _In_ int flags = match_default)
3192 {
3193 _Assume_(text || start >= end);
3194 if (start < end && text[start]) {
3195 if (text[start] == '/' ||
3196 text[start] == '-' ||
3197 text[start] == '.' ||
3198 text[start] == '_' ||
3199 text[start] == '~' ||
3200 text[start] == '%' ||
3201 text[start] == '!' ||
3202 text[start] == '$' ||
3203 text[start] == '&' ||
3204 text[start] == '\'' ||
3205 text[start] == '(' ||
3206 text[start] == ')' ||
3207 text[start] == '*' ||
3208 text[start] == '+' ||
3209 text[start] == ',' ||
3210 text[start] == ';' ||
3211 text[start] == '=' ||
3212 text[start] == ':' ||
3213 text[start] == '@' ||
3214 text[start] == '?' ||
3215 text[start] == '#' ||
3216 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
3217 {
3218 this->interval.end = (this->interval.start = start) + 1;
3219 return true;
3220 }
3221 }
3222 this->interval.invalidate();
3223 return false;
3224 }
3225 };
3226
3229#ifdef _UNICODE
3231#else
3233#endif
3234
3239 {
3240 public:
3241 sgml_url_path_char(_In_ const std::locale& locale = std::locale()) : basic_url_path_char<char>(locale) {}
3242
3243 protected:
3244 virtual bool do_match(
3245 _In_reads_or_z_(end) const char* text,
3246 _In_ size_t start = 0,
3247 _In_ size_t end = SIZE_MAX,
3248 _In_ int flags = match_default)
3249 {
3250 _Assume_(text || start >= end);
3251 if (start < end && text[start]) {
3252 wchar_t buf[3];
3253 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
3254 const wchar_t* chr_end = chr + stdex::strlen(chr);
3255 if (((chr[0] == L'/' ||
3256 chr[0] == L'-' ||
3257 chr[0] == L'.' ||
3258 chr[0] == L'_' ||
3259 chr[0] == L'~' ||
3260 chr[0] == L'%' ||
3261 chr[0] == L'!' ||
3262 chr[0] == L'$' ||
3263 chr[0] == L'&' ||
3264 chr[0] == L'\'' ||
3265 chr[0] == L'(' ||
3266 chr[0] == L')' ||
3267 chr[0] == L'*' ||
3268 chr[0] == L'+' ||
3269 chr[0] == L',' ||
3270 chr[0] == L';' ||
3271 chr[0] == L'=' ||
3272 chr[0] == L':' ||
3273 chr[0] == L'@' ||
3274 chr[0] == L'?' ||
3275 chr[0] == L'#') && chr[1] == 0) ||
3276 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
3277 {
3278 this->interval.start = start;
3279 return true;
3280 }
3281 }
3282 this->interval.invalidate();
3283 return false;
3284 }
3285 };
3286
3290 template <class T>
3292 {
3293 public:
3295 _In_ const std::shared_ptr<basic_parser<T>>& path_char,
3296 _In_ const std::shared_ptr<basic_parser<T>>& query_start,
3297 _In_ const std::shared_ptr<basic_parser<T>>& bookmark_start,
3298 _In_ const std::locale& locale = std::locale()) :
3300 m_path_char(path_char),
3301 m_query_start(query_start),
3302 m_bookmark_start(bookmark_start)
3303 {}
3304
3305 virtual void invalidate()
3306 {
3307 path.start = 1;
3308 path.end = 0;
3309 query.start = 1;
3310 query.end = 0;
3311 bookmark.start = 1;
3312 bookmark.end = 0;
3314 }
3315
3318 stdex::interval<size_t> bookmark;
3319
3320 protected:
3321 virtual bool do_match(
3322 _In_reads_or_z_opt_(end) const T* text,
3323 _In_ size_t start = 0,
3324 _In_ size_t end = SIZE_MAX,
3325 _In_ int flags = match_default)
3326 {
3327 _Assume_(text || start >= end);
3328
3329 this->interval.end = start;
3330 path.start = start;
3331 query.start = 1;
3332 query.end = 0;
3333 bookmark.start = 1;
3334 bookmark.end = 0;
3335
3336 for (;;) {
3337 if (this->interval.end >= end || !text[this->interval.end])
3338 break;
3339 if (m_query_start->match(text, this->interval.end, end, flags)) {
3340 path.end = this->interval.end;
3341 query.start = this->interval.end = m_query_start->interval.end;
3342 for (;;) {
3343 if (this->interval.end >= end || !text[this->interval.end]) {
3344 query.end = this->interval.end;
3345 break;
3346 }
3347 if (m_bookmark_start->match(text, this->interval.end, end, flags)) {
3348 query.end = this->interval.end;
3349 bookmark.start = this->interval.end = m_bookmark_start->interval.end;
3350 for (;;) {
3351 if (this->interval.end >= end || !text[this->interval.end]) {
3352 bookmark.end = this->interval.end;
3353 break;
3354 }
3355 if (m_path_char->match(text, this->interval.end, end, flags))
3356 this->interval.end = m_path_char->interval.end;
3357 else {
3358 bookmark.end = this->interval.end;
3359 break;
3360 }
3361 }
3362 this->interval.start = start;
3363 return true;
3364 }
3365 if (m_path_char->match(text, this->interval.end, end, flags))
3366 this->interval.end = m_path_char->interval.end;
3367 else {
3368 query.end = this->interval.end;
3369 break;
3370 }
3371 }
3372 this->interval.start = start;
3373 return true;
3374 }
3375 if (m_bookmark_start->match(text, this->interval.end, end, flags)) {
3376 path.end = this->interval.end;
3377 bookmark.start = this->interval.end = m_bookmark_start->interval.end;
3378 for (;;) {
3379 if (this->interval.end >= end || !text[this->interval.end]) {
3380 bookmark.end = this->interval.end;
3381 break;
3382 }
3383 if (m_path_char->match(text, this->interval.end, end, flags))
3384 this->interval.end = m_path_char->interval.end;
3385 else {
3386 bookmark.end = this->interval.end;
3387 break;
3388 }
3389 }
3390 this->interval.start = start;
3391 return true;
3392 }
3393 if (m_path_char->match(text, this->interval.end, end, flags))
3394 this->interval.end = m_path_char->interval.end;
3395 else
3396 break;
3397 }
3398
3400 path.end = this->interval.end;
3401 this->interval.start = start;
3402 return true;
3403 }
3404
3405 path.start = 1;
3406 path.end = 0;
3407 bookmark.start = 1;
3408 bookmark.end = 0;
3409 this->interval.invalidate();
3410 return false;
3411 }
3412
3413 std::shared_ptr<basic_parser<T>> m_path_char;
3414 std::shared_ptr<basic_parser<T>> m_query_start;
3415 std::shared_ptr<basic_parser<T>> m_bookmark_start;
3416 };
3417
3420#ifdef _UNICODE
3421 using turl_path = wurl_path;
3422#else
3423 using turl_path = url_path;
3424#endif
3426
3430 template <class T>
3431 class basic_url : public basic_parser<T>
3432 {
3433 public:
3434 basic_url(
3435 _In_ const std::shared_ptr<basic_parser<T>>& _http_scheme,
3436 _In_ const std::shared_ptr<basic_parser<T>>& _ftp_scheme,
3437 _In_ const std::shared_ptr<basic_parser<T>>& _mailto_scheme,
3438 _In_ const std::shared_ptr<basic_parser<T>>& _file_scheme,
3439 _In_ const std::shared_ptr<basic_parser<T>>& colon,
3440 _In_ const std::shared_ptr<basic_parser<T>>& slash,
3441 _In_ const std::shared_ptr<basic_parser<T>>& _username,
3442 _In_ const std::shared_ptr<basic_parser<T>>& _password,
3443 _In_ const std::shared_ptr<basic_parser<T>>& at,
3444 _In_ const std::shared_ptr<basic_parser<T>>& ip_lbracket,
3445 _In_ const std::shared_ptr<basic_parser<T>>& ip_rbracket,
3446 _In_ const std::shared_ptr<basic_parser<T>>& _ipv4_host,
3447 _In_ const std::shared_ptr<basic_parser<T>>& _ipv6_host,
3448 _In_ const std::shared_ptr<basic_parser<T>>& _dns_host,
3449 _In_ const std::shared_ptr<basic_parser<T>>& _port,
3450 _In_ const std::shared_ptr<basic_parser<T>>& _path,
3451 _In_ const std::locale& locale = std::locale()) :
3453 http_scheme(_http_scheme),
3454 ftp_scheme(_ftp_scheme),
3455 mailto_scheme(_mailto_scheme),
3456 file_scheme(_file_scheme),
3457 m_colon(colon),
3458 m_slash(slash),
3459 username(_username),
3460 password(_password),
3461 m_at(at),
3462 m_ip_lbracket(ip_lbracket),
3463 m_ip_rbracket(ip_rbracket),
3464 ipv4_host(_ipv4_host),
3465 ipv6_host(_ipv6_host),
3466 dns_host(_dns_host),
3467 port(_port),
3468 path(_path)
3469 {}
3470
3471 virtual void invalidate()
3472 {
3473 http_scheme->invalidate();
3474 ftp_scheme->invalidate();
3475 mailto_scheme->invalidate();
3476 file_scheme->invalidate();
3477 username->invalidate();
3478 password->invalidate();
3479 ipv4_host->invalidate();
3480 ipv6_host->invalidate();
3481 dns_host->invalidate();
3482 port->invalidate();
3483 path->invalidate();
3485 }
3486
3487 std::shared_ptr<basic_parser<T>> http_scheme;
3488 std::shared_ptr<basic_parser<T>> ftp_scheme;
3489 std::shared_ptr<basic_parser<T>> mailto_scheme;
3490 std::shared_ptr<basic_parser<T>> file_scheme;
3491 std::shared_ptr<basic_parser<T>> username;
3492 std::shared_ptr<basic_parser<T>> password;
3493 std::shared_ptr<basic_parser<T>> ipv4_host;
3494 std::shared_ptr<basic_parser<T>> ipv6_host;
3495 std::shared_ptr<basic_parser<T>> dns_host;
3496 std::shared_ptr<basic_parser<T>> port;
3497 std::shared_ptr<basic_parser<T>> path;
3498
3499 protected:
3500 virtual bool do_match(
3501 _In_reads_or_z_opt_(end) const T* text,
3502 _In_ size_t start = 0,
3503 _In_ size_t end = SIZE_MAX,
3504 _In_ int flags = match_default)
3505 {
3506 _Assume_(text || start >= end);
3507
3508 this->interval.end = start;
3509
3510 if (http_scheme->match(text, this->interval.end, end, flags) &&
3511 m_colon->match(text, http_scheme->interval.end, end, flags) &&
3512 m_slash->match(text, m_colon->interval.end, end, flags) &&
3513 m_slash->match(text, m_slash->interval.end, end, flags))
3514 {
3515 // http://
3516 this->interval.end = m_slash->interval.end;
3517 ftp_scheme->invalidate();
3518 mailto_scheme->invalidate();
3519 file_scheme->invalidate();
3520 }
3521 else if (ftp_scheme->match(text, this->interval.end, end, flags) &&
3522 m_colon->match(text, ftp_scheme->interval.end, end, flags) &&
3523 m_slash->match(text, m_colon->interval.end, end, flags) &&
3524 m_slash->match(text, m_slash->interval.end, end, flags))
3525 {
3526 // ftp://
3527 this->interval.end = m_slash->interval.end;
3528 http_scheme->invalidate();
3529 mailto_scheme->invalidate();
3530 file_scheme->invalidate();
3531 }
3532 else if (mailto_scheme->match(text, this->interval.end, end, flags) &&
3533 m_colon->match(text, mailto_scheme->interval.end, end, flags))
3534 {
3535 // mailto:
3536 this->interval.end = m_colon->interval.end;
3537 http_scheme->invalidate();
3538 ftp_scheme->invalidate();
3539 file_scheme->invalidate();
3540 }
3541 else if (file_scheme->match(text, this->interval.end, end, flags) &&
3542 m_colon->match(text, file_scheme->interval.end, end, flags) &&
3543 m_slash->match(text, m_colon->interval.end, end, flags) &&
3544 m_slash->match(text, m_slash->interval.end, end, flags))
3545 {
3546 // file://
3547 this->interval.end = m_slash->interval.end;
3548 http_scheme->invalidate();
3549 ftp_scheme->invalidate();
3550 mailto_scheme->invalidate();
3551 }
3552 else {
3553 // Default to http:
3554 http_scheme->invalidate();
3555 ftp_scheme->invalidate();
3556 mailto_scheme->invalidate();
3557 file_scheme->invalidate();
3558 }
3559
3560 if (ftp_scheme->interval) {
3561 if (username->match(text, this->interval.end, end, flags)) {
3562 if (m_colon->match(text, username->interval.end, end, flags) &&
3563 password->match(text, m_colon->interval.end, end, flags) &&
3564 m_at->match(text, password->interval.end, end, flags))
3565 {
3566 // Username and password
3567 this->interval.end = m_at->interval.end;
3568 }
3569 else if (m_at->match(text, this->interval.end, end, flags)) {
3570 // Username only
3571 this->interval.end = m_at->interval.end;
3572 password->invalidate();
3573 }
3574 else {
3575 username->invalidate();
3576 password->invalidate();
3577 }
3578 }
3579 else {
3580 username->invalidate();
3581 password->invalidate();
3582 }
3583
3584 if (ipv4_host->match(text, this->interval.end, end, flags)) {
3585 // Host is IPv4
3586 this->interval.end = ipv4_host->interval.end;
3587 ipv6_host->invalidate();
3588 dns_host->invalidate();
3589 }
3590 else if (
3591 m_ip_lbracket->match(text, this->interval.end, end, flags) &&
3592 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3593 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3594 {
3595 // Host is IPv6
3596 this->interval.end = m_ip_rbracket->interval.end;
3597 ipv4_host->invalidate();
3598 dns_host->invalidate();
3599 }
3600 else if (dns_host->match(text, this->interval.end, end, flags)) {
3601 // Host is hostname
3602 this->interval.end = dns_host->interval.end;
3603 ipv4_host->invalidate();
3604 ipv6_host->invalidate();
3605 }
3606 else {
3607 invalidate();
3608 return false;
3609 }
3610
3611 if (m_colon->match(text, this->interval.end, end, flags) &&
3612 port->match(text, m_colon->interval.end, end, flags))
3613 {
3614 // Port
3615 this->interval.end = port->interval.end;
3616 }
3617 else
3618 port->invalidate();
3619
3620 if (path->match(text, this->interval.end, end, flags)) {
3621 // Path
3622 this->interval.end = path->interval.end;
3623 }
3624
3625 this->interval.start = start;
3626 return true;
3627 }
3628
3629 if (mailto_scheme->interval) {
3630 if (username->match(text, this->interval.end, end, flags) &&
3631 m_at->match(text, username->interval.end, end, flags))
3632 {
3633 // Username
3634 this->interval.end = m_at->interval.end;
3635 }
3636 else {
3637 invalidate();
3638 return false;
3639 }
3640
3641 if (m_ip_lbracket->match(text, this->interval.end, end, flags) &&
3642 ipv4_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3643 m_ip_rbracket->match(text, ipv4_host->interval.end, end, flags))
3644 {
3645 // Host is IPv4
3646 this->interval.end = m_ip_rbracket->interval.end;
3647 ipv6_host->invalidate();
3648 dns_host->invalidate();
3649 }
3650 else if (
3651 m_ip_lbracket->match(text, this->interval.end, end, flags) &&
3652 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3653 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3654 {
3655 // Host is IPv6
3656 this->interval.end = m_ip_rbracket->interval.end;
3657 ipv4_host->invalidate();
3658 dns_host->invalidate();
3659 }
3660 else if (dns_host->match(text, this->interval.end, end, flags)) {
3661 // Host is hostname
3662 this->interval.end = dns_host->interval.end;
3663 ipv4_host->invalidate();
3664 ipv6_host->invalidate();
3665 }
3666 else {
3667 invalidate();
3668 return false;
3669 }
3670
3671 password->invalidate();
3672 port->invalidate();
3673 path->invalidate();
3674 this->interval.start = start;
3675 return true;
3676 }
3677
3678 if (file_scheme->interval) {
3679 if (path->match(text, this->interval.end, end, flags)) {
3680 // Path
3681 this->interval.end = path->interval.end;
3682 }
3683
3684 username->invalidate();
3685 password->invalidate();
3686 ipv4_host->invalidate();
3687 ipv6_host->invalidate();
3688 dns_host->invalidate();
3689 port->invalidate();
3690 this->interval.start = start;
3691 return true;
3692 }
3693
3694 // "http://" found or defaulted to
3695
3696 // If "http://" explicit, test for username&password.
3697 if (http_scheme->interval &&
3698 username->match(text, this->interval.end, end, flags))
3699 {
3700 if (m_colon->match(text, username->interval.end, end, flags) &&
3701 password->match(text, m_colon->interval.end, end, flags) &&
3702 m_at->match(text, password->interval.end, end, flags))
3703 {
3704 // Username and password
3705 this->interval.end = m_at->interval.end;
3706 }
3707 else if (m_at->match(text, username->interval.end, end, flags)) {
3708 // Username only
3709 this->interval.end = m_at->interval.end;
3710 password->invalidate();
3711 }
3712 else {
3713 username->invalidate();
3714 password->invalidate();
3715 }
3716 }
3717 else {
3718 username->invalidate();
3719 password->invalidate();
3720 }
3721
3722 if (ipv4_host->match(text, this->interval.end, end, flags)) {
3723 // Host is IPv4
3724 this->interval.end = ipv4_host->interval.end;
3725 ipv6_host->invalidate();
3726 dns_host->invalidate();
3727 }
3728 else if (
3729 m_ip_lbracket->match(text, this->interval.end, end, flags) &&
3730 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3731 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3732 {
3733 // Host is IPv6
3734 this->interval.end = m_ip_rbracket->interval.end;
3735 ipv4_host->invalidate();
3736 dns_host->invalidate();
3737 }
3738 else if (dns_host->match(text, this->interval.end, end, flags)) {
3739 // Host is hostname
3740 this->interval.end = dns_host->interval.end;
3741 ipv4_host->invalidate();
3742 ipv6_host->invalidate();
3743 }
3744 else {
3745 invalidate();
3746 return false;
3747 }
3748
3749 if (m_colon->match(text, this->interval.end, end, flags) &&
3750 port->match(text, m_colon->interval.end, end, flags))
3751 {
3752 // Port
3753 this->interval.end = port->interval.end;
3754 }
3755 else
3756 port->invalidate();
3757
3758 if (path->match(text, this->interval.end, end, flags)) {
3759 // Path
3760 this->interval.end = path->interval.end;
3761 }
3762
3763 this->interval.start = start;
3764 return true;
3765 }
3766
3767 std::shared_ptr<basic_parser<T>> m_colon;
3768 std::shared_ptr<basic_parser<T>> m_slash;
3769 std::shared_ptr<basic_parser<T>> m_at;
3770 std::shared_ptr<basic_parser<T>> m_ip_lbracket;
3771 std::shared_ptr<basic_parser<T>> m_ip_rbracket;
3772 };
3773
3774 using url = basic_url<char>;
3775 using wurl = basic_url<wchar_t>;
3776#ifdef _UNICODE
3777 using turl = wurl;
3778#else
3779 using turl = url;
3780#endif
3781 using sgml_url = basic_url<char>;
3782
3786 template <class T>
3788 {
3789 public:
3791 _In_ const std::shared_ptr<basic_parser<T>>& _username,
3792 _In_ const std::shared_ptr<basic_parser<T>>& at,
3793 _In_ const std::shared_ptr<basic_parser<T>>& ip_lbracket,
3794 _In_ const std::shared_ptr<basic_parser<T>>& ip_rbracket,
3795 _In_ const std::shared_ptr<basic_parser<T>>& _ipv4_host,
3796 _In_ const std::shared_ptr<basic_parser<T>>& _ipv6_host,
3797 _In_ const std::shared_ptr<basic_parser<T>>& _dns_host,
3798 _In_ const std::locale& locale = std::locale()) :
3800 username(_username),
3801 m_at(at),
3802 m_ip_lbracket(ip_lbracket),
3803 m_ip_rbracket(ip_rbracket),
3804 ipv4_host(_ipv4_host),
3805 ipv6_host(_ipv6_host),
3806 dns_host(_dns_host)
3807 {}
3808
3809 virtual void invalidate()
3810 {
3811 username->invalidate();
3812 ipv4_host->invalidate();
3813 ipv6_host->invalidate();
3814 dns_host->invalidate();
3816 }
3817
3818 std::shared_ptr<basic_parser<T>> username;
3819 std::shared_ptr<basic_parser<T>> ipv4_host;
3820 std::shared_ptr<basic_parser<T>> ipv6_host;
3821 std::shared_ptr<basic_parser<T>> dns_host;
3822
3823 protected:
3824 virtual bool do_match(
3825 _In_reads_or_z_opt_(end) const T* text,
3826 _In_ size_t start = 0,
3827 _In_ size_t end = SIZE_MAX,
3828 _In_ int flags = match_default)
3829 {
3830 _Assume_(text || start >= end);
3831
3832 if (username->match(text, start, end, flags) &&
3833 m_at->match(text, username->interval.end, end, flags))
3834 {
3835 // Username@
3836 if (m_ip_lbracket->match(text, m_at->interval.end, end, flags) &&
3837 ipv4_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3838 m_ip_rbracket->match(text, ipv4_host->interval.end, end, flags))
3839 {
3840 // Host is IPv4
3841 this->interval.end = m_ip_rbracket->interval.end;
3842 ipv6_host->invalidate();
3843 dns_host->invalidate();
3844 }
3845 else if (
3846 m_ip_lbracket->match(text, m_at->interval.end, end, flags) &&
3847 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3848 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3849 {
3850 // Host is IPv6
3851 this->interval.end = m_ip_rbracket->interval.end;
3852 ipv4_host->invalidate();
3853 dns_host->invalidate();
3854 }
3855 else if (dns_host->match(text, m_at->interval.end, end, flags)) {
3856 // Host is hostname
3857 this->interval.end = dns_host->interval.end;
3858 ipv4_host->invalidate();
3859 ipv6_host->invalidate();
3860 }
3861 else
3862 goto error;
3863 this->interval.start = start;
3864 return true;
3865 }
3866
3867 error:
3868 invalidate();
3869 return false;
3870 }
3871
3872 std::shared_ptr<basic_parser<T>> m_at;
3873 std::shared_ptr<basic_parser<T>> m_ip_lbracket;
3874 std::shared_ptr<basic_parser<T>> m_ip_rbracket;
3875 };
3876
3879#ifdef _UNICODE
3881#else
3883#endif
3885
3889 template <class T>
3891 {
3892 public:
3894 _In_ const std::shared_ptr<basic_parser<T>>& _emoticon,
3895 _In_ const std::shared_ptr<basic_parser<T>>& _apex,
3896 _In_ const std::shared_ptr<basic_parser<T>>& _eyes,
3897 _In_ const std::shared_ptr<basic_parser<T>>& _nose,
3898 _In_ const std::shared_ptr<basic_set<T>>& _mouth,
3899 _In_ const std::locale& locale = std::locale()) :
3902 apex(_apex),
3903 eyes(_eyes),
3904 nose(_nose),
3905 mouth(_mouth)
3906 {}
3907
3908 virtual void invalidate()
3909 {
3910 if (emoticon) emoticon->invalidate();
3911 if (apex) apex->invalidate();
3912 eyes->invalidate();
3913 if (nose) nose->invalidate();
3914 mouth->invalidate();
3916 }
3917
3918 std::shared_ptr<basic_parser<T>> emoticon;
3919 std::shared_ptr<basic_parser<T>> apex;
3920 std::shared_ptr<basic_parser<T>> eyes;
3921 std::shared_ptr<basic_parser<T>> nose;
3922 std::shared_ptr<basic_set<T>> mouth;
3923
3924 protected:
3925 virtual bool do_match(
3926 _In_reads_or_z_opt_(end) const T* text,
3927 _In_ size_t start = 0,
3928 _In_ size_t end = SIZE_MAX,
3929 _In_ int flags = match_default)
3930 {
3931 _Assume_(text || start >= end);
3932
3933 if (emoticon && emoticon->match(text, start, end, flags)) {
3934 if (apex) apex->invalidate();
3935 eyes->invalidate();
3936 if (nose) nose->invalidate();
3937 mouth->invalidate();
3938 this->interval.start = start;
3939 this->interval.end = emoticon->interval.end;
3940 return true;
3941 }
3942
3943 this->interval.end = start;
3944
3945 if (apex && apex->match(text, this->interval.end, end, flags))
3946 this->interval.end = apex->interval.end;
3947
3948 if (eyes->match(text, this->interval.end, end, flags)) {
3949 if (nose && nose->match(text, eyes->interval.end, end, flags) &&
3950 mouth->match(text, nose->interval.end, end, flags))
3951 {
3952 size_t
3954 hit_offset = mouth->hit_offset;
3955 // Mouth may repeat :-)))))))
3956 for (this->interval.end = mouth->interval.end; mouth->match(text, this->interval.end, end, flags) && mouth->hit_offset == hit_offset; this->interval.end = mouth->interval.end);
3957 mouth->interval.start = start_mouth;
3958 mouth->interval.end = this->interval.end;
3959 this->interval.start = start;
3960 return true;
3961 }
3962 if (mouth->match(text, eyes->interval.end, end, flags)) {
3963 size_t
3965 hit_offset = mouth->hit_offset;
3966 // Mouth may repeat :-)))))))
3967 for (this->interval.end = mouth->interval.end; mouth->match(text, this->interval.end, end, flags) && mouth->hit_offset == hit_offset; this->interval.end = mouth->interval.end);
3968 if (nose) nose->invalidate();
3969 mouth->interval.start = start_mouth;
3970 mouth->interval.end = this->interval.end;
3971 this->interval.start = start;
3972 return true;
3973 }
3974 }
3975
3976 if (emoticon) emoticon->invalidate();
3977 if (apex) apex->invalidate();
3978 eyes->invalidate();
3979 if (nose) nose->invalidate();
3980 mouth->invalidate();
3981 this->interval.invalidate();
3982 return false;
3983 }
3984 };
3985
3986 using emoticon = basic_emoticon<char>;
3987 using wemoticon = basic_emoticon<wchar_t>;
3988#ifdef _UNICODE
3989 using temoticon = wemoticon;
3990#else
3991 using temoticon = emoticon;
3992#endif
3993 using sgml_emoticon = basic_emoticon<char>;
3994
3998 enum date_format_t {
3999 date_format_none = 0,
4000 date_format_dmy = 0x1,
4001 date_format_mdy = 0x2,
4002 date_format_ymd = 0x4,
4003 date_format_ym = 0x8,
4004 date_format_my = 0x10,
4005 date_format_dm = 0x20,
4006 date_format_md = 0x40,
4007 };
4008
4012 template <class T>
4013 class basic_date : public basic_parser<T>
4014 {
4015 public:
4016 basic_date(
4017 _In_ int format_mask,
4018 _In_ const std::shared_ptr<basic_integer<T>>& _day,
4019 _In_ const std::shared_ptr<basic_integer<T>>& _month,
4020 _In_ const std::shared_ptr<basic_integer<T>>& _year,
4021 _In_ const std::shared_ptr<basic_set<T>>& separator,
4022 _In_ const std::shared_ptr<basic_parser<T>>& space,
4023 _In_ const std::locale& locale = std::locale()) :
4025 format(date_format_none),
4026 m_format_mask(format_mask),
4027 day(_day),
4028 month(_month),
4029 year(_year),
4030 m_separator(separator),
4031 m_space(space)
4032 {}
4033
4034 virtual void invalidate()
4035 {
4036 if (day) day->invalidate();
4037 if (month) month->invalidate();
4038 if (year) year->invalidate();
4039 format = date_format_none;
4041 }
4042
4043 date_format_t format;
4044 std::shared_ptr<basic_integer<T>> day;
4045 std::shared_ptr<basic_integer<T>> month;
4046 std::shared_ptr<basic_integer<T>> year;
4047
4048 protected:
4049 virtual bool do_match(
4050 _In_reads_or_z_opt_(end) const T* text,
4051 _In_ size_t start = 0,
4052 _In_ size_t end = SIZE_MAX,
4053 _In_ int flags = match_default)
4054 {
4055 _Assume_(text || start >= end);
4056
4057 const int space_match_flags = flags & ~match_multiline; // Spaces in dates must never be broken in new line.
4058 if ((m_format_mask & date_format_dmy) == date_format_dmy) {
4059 if (day->match(text, start, end, flags)) {
4060 for (this->interval.end = day->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4061 if (m_separator->match(text, this->interval.end, end, flags)) {
4062 size_t hit_offset = m_separator->hit_offset;
4063 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4064 if (month->match(text, this->interval.end, end, flags)) {
4065 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4066 if (m_separator->match(text, this->interval.end, end, flags) &&
4067 m_separator->hit_offset == hit_offset) // Both separators must match.
4068 {
4069 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4070 if (year->match(text, this->interval.end, end, flags) &&
4071 is_valid(day->value, month->value))
4072 {
4073 this->interval.start = start;
4074 this->interval.end = year->interval.end;
4075 format = date_format_dmy;
4076 return true;
4077 }
4078 }
4079 }
4080 }
4081 }
4082 }
4083
4084 if ((m_format_mask & date_format_mdy) == date_format_mdy) {
4085 if (month->match(text, start, end, flags)) {
4086 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4087 if (m_separator->match(text, this->interval.end, end, flags)) {
4088 size_t hit_offset = m_separator->hit_offset;
4089 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4090 if (day->match(text, this->interval.end, end, flags)) {
4091 for (this->interval.end = day->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4092 if (m_separator->match(text, this->interval.end, end, flags) &&
4093 m_separator->hit_offset == hit_offset) // Both separators must match.
4094 {
4095 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4096 if (year->match(text, this->interval.end, end, flags) &&
4097 is_valid(day->value, month->value))
4098 {
4099 this->interval.start = start;
4100 this->interval.end = year->interval.end;
4101 format = date_format_mdy;
4102 return true;
4103 }
4104 }
4105 }
4106 }
4107 }
4108 }
4109
4110 if ((m_format_mask & date_format_ymd) == date_format_ymd) {
4111 if (year->match(text, start, end, flags)) {
4112 for (this->interval.end = year->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4113 if (m_separator->match(text, this->interval.end, end, flags)) {
4114 size_t hit_offset = m_separator->hit_offset;
4115 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4116 if (month->match(text, this->interval.end, end, flags)) {
4117 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4118 if (m_separator->match(text, this->interval.end, end, flags) &&
4119 m_separator->hit_offset == hit_offset) // Both separators must match.
4120 {
4121 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4122 if (day->match(text, this->interval.end, end, flags) &&
4123 is_valid(day->value, month->value))
4124 {
4125 this->interval.start = start;
4126 this->interval.end = day->interval.end;
4127 format = date_format_ymd;
4128 return true;
4129 }
4130 }
4131 }
4132 }
4133 }
4134 }
4135
4136 if ((m_format_mask & date_format_ym) == date_format_ym) {
4137 if (year->match(text, start, end, flags)) {
4138 for (this->interval.end = year->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4139 if (m_separator->match(text, this->interval.end, end, flags)) {
4140 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4141 if (month->match(text, this->interval.end, end, flags) &&
4142 is_valid(SIZE_MAX, month->value))
4143 {
4144 if (day) day->invalidate();
4145 this->interval.start = start;
4146 this->interval.end = month->interval.end;
4147 format = date_format_ym;
4148 return true;
4149 }
4150 }
4151 }
4152 }
4153
4154 if ((m_format_mask & date_format_my) == date_format_my) {
4155 if (month->match(text, start, end, flags)) {
4156 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4157 if (m_separator->match(text, this->interval.end, end, flags)) {
4158 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4159 if (year->match(text, this->interval.end, end, flags) &&
4160 is_valid(SIZE_MAX, month->value))
4161 {
4162 if (day) day->invalidate();
4163 this->interval.start = start;
4164 this->interval.end = year->interval.end;
4165 format = date_format_my;
4166 return true;
4167 }
4168 }
4169 }
4170 }
4171
4172 if ((m_format_mask & date_format_dm) == date_format_dm) {
4173 if (day->match(text, start, end, flags)) {
4174 for (this->interval.end = day->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4175 if (m_separator->match(text, this->interval.end, end, flags)) {
4176 size_t hit_offset = m_separator->hit_offset;
4177 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4178 if (month->match(text, this->interval.end, end, flags) &&
4179 is_valid(day->value, month->value))
4180 {
4181 if (year) year->invalidate();
4182 this->interval.start = start;
4183 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4184 if (m_separator->match(text, this->interval.end, end, flags) &&
4185 m_separator->hit_offset == hit_offset) // Both separators must match.
4186 this->interval.end = m_separator->interval.end;
4187 else
4188 this->interval.end = month->interval.end;
4189 format = date_format_dm;
4190 return true;
4191 }
4192 }
4193 }
4194 }
4195
4196 if ((m_format_mask & date_format_md) == date_format_md) {
4197 if (month->match(text, start, end, flags)) {
4198 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4199 if (m_separator->match(text, this->interval.end, end, flags)) {
4200 size_t hit_offset = m_separator->hit_offset;
4201 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4202 if (day->match(text, this->interval.end, end, flags) &&
4203 is_valid(day->value, month->value))
4204 {
4205 if (year) year->invalidate();
4206 this->interval.start = start;
4207 for (this->interval.end = day->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4208 if (m_separator->match(text, this->interval.end, end, flags) &&
4209 m_separator->hit_offset == hit_offset) // Both separators must match.
4210 this->interval.end = m_separator->interval.end;
4211 else
4212 this->interval.end = day->interval.end;
4213 format = date_format_md;
4214 return true;
4215 }
4216 }
4217 }
4218 }
4219
4220 if (day) day->invalidate();
4221 if (month) month->invalidate();
4222 if (year) year->invalidate();
4223 format = date_format_none;
4224 this->interval.invalidate();
4225 return false;
4226 }
4227
4228 static bool is_valid(size_t day, size_t month)
4229 {
4230 if (month == SIZE_MAX) {
4231 // Default to January. This allows validating day only, as January has all 31 days.
4232 month = 1;
4233 }
4234 if (day == SIZE_MAX) {
4235 // Default to 1st day in month. This allows validating month only, as each month has 1st day.
4236 day = 1;
4237 }
4238
4239 switch (month) {
4240 case 1:
4241 case 3:
4242 case 5:
4243 case 7:
4244 case 8:
4245 case 10:
4246 case 12:
4247 return 1 <= day && day <= 31;
4248 case 2:
4249 return 1 <= day && day <= 29;
4250 case 4:
4251 case 6:
4252 case 9:
4253 case 11:
4254 return 1 <= day && day <= 30;
4255 default:
4256 return false;
4257 }
4258 }
4259
4260 int m_format_mask;
4261 std::shared_ptr<basic_set<T>> m_separator;
4262 std::shared_ptr<basic_parser<T>> m_space;
4263 };
4264
4265 using date = basic_date<char>;
4266 using wdate = basic_date<wchar_t>;
4267#ifdef _UNICODE
4268 using tdate = wdate;
4269#else
4270 using tdate = date;
4271#endif
4273
4277 template <class T>
4278 class basic_time : public basic_parser<T>
4279 {
4280 public:
4281 basic_time(
4282 _In_ const std::shared_ptr<basic_integer10<T>>& _hour,
4283 _In_ const std::shared_ptr<basic_integer10<T>>& _minute,
4284 _In_ const std::shared_ptr<basic_integer10<T>>& _second,
4285 _In_ const std::shared_ptr<basic_integer10<T>>& _millisecond,
4286 _In_ const std::shared_ptr<basic_set<T>>& separator,
4287 _In_ const std::shared_ptr<basic_parser<T>>& millisecond_separator,
4288 _In_ const std::locale& locale = std::locale()) :
4290 hour(_hour),
4291 minute(_minute),
4292 second(_second),
4293 millisecond(_millisecond),
4294 m_separator(separator),
4295 m_millisecond_separator(millisecond_separator)
4296 {}
4297
4298 virtual void invalidate()
4299 {
4300 hour->invalidate();
4301 minute->invalidate();
4302 if (second) second->invalidate();
4303 if (millisecond) millisecond->invalidate();
4305 }
4306
4307 std::shared_ptr<basic_integer10<T>> hour;
4308 std::shared_ptr<basic_integer10<T>> minute;
4309 std::shared_ptr<basic_integer10<T>> second;
4310 std::shared_ptr<basic_integer10<T>> millisecond;
4311
4312 protected:
4313 virtual bool do_match(
4314 _In_reads_or_z_opt_(end) const T* text,
4315 _In_ size_t start = 0,
4316 _In_ size_t end = SIZE_MAX,
4317 _In_ int flags = match_default)
4318 {
4319 _Assume_(text || start >= end);
4320
4321 if (hour->match(text, start, end, flags) &&
4322 m_separator->match(text, hour->interval.end, end, flags) &&
4323 minute->match(text, m_separator->interval.end, end, flags) &&
4324 minute->value < 60)
4325 {
4326 // hh::mm
4327 size_t hit_offset = m_separator->hit_offset;
4328 if (m_separator->match(text, minute->interval.end, end, flags) &&
4329 m_separator->hit_offset == hit_offset && // Both separators must match.
4330 second && second->match(text, m_separator->interval.end, end, flags) &&
4331 second->value < 60)
4332 {
4333 // hh::mm:ss
4334 if (m_millisecond_separator && m_millisecond_separator->match(text, second->interval.end, end, flags) &&
4335 millisecond && millisecond->match(text, m_millisecond_separator->interval.end, end, flags) &&
4336 millisecond->value < 1000)
4337 {
4338 // hh::mm:ss.mmmm
4339 this->interval.end = millisecond->interval.end;
4340 }
4341 else {
4342 if (millisecond) millisecond->invalidate();
4343 this->interval.end = second->interval.end;
4344 }
4345 }
4346 else {
4347 if (second) second->invalidate();
4348 if (millisecond) millisecond->invalidate();
4349 this->interval.end = minute->interval.end;
4350 }
4351 this->interval.start = start;
4352 return true;
4353 }
4354
4355 hour->invalidate();
4356 minute->invalidate();
4357 if (second) second->invalidate();
4358 if (millisecond) millisecond->invalidate();
4359 this->interval.invalidate();
4360 return false;
4361 }
4362
4363 std::shared_ptr<basic_set<T>> m_separator;
4364 std::shared_ptr<basic_parser<T>> m_millisecond_separator;
4365 };
4366
4367 using time = basic_time<char>;
4368 using wtime = basic_time<wchar_t>;
4369#ifdef _UNICODE
4370 using ttime = wtime;
4371#else
4372 using ttime = time;
4373#endif
4375
4379 template <class T>
4380 class basic_angle : public basic_parser<T>
4381 {
4382 public:
4384 _In_ const std::shared_ptr<basic_integer10<T>>& _degree,
4385 _In_ const std::shared_ptr<basic_parser<T>>& _degree_separator,
4386 _In_ const std::shared_ptr<basic_integer10<T>>& _minute,
4387 _In_ const std::shared_ptr<basic_parser<T>>& _minute_separator,
4388 _In_ const std::shared_ptr<basic_integer10<T>>& _second,
4389 _In_ const std::shared_ptr<basic_parser<T>>& _second_separator,
4390 _In_ const std::shared_ptr<basic_parser<T>>& _decimal,
4391 _In_ const std::locale& locale = std::locale()) :
4393 degree(_degree),
4394 degree_separator(_degree_separator),
4395 minute(_minute),
4396 minute_separator(_minute_separator),
4397 second(_second),
4398 second_separator(_second_separator),
4399 decimal(_decimal)
4400 {}
4401
4402 virtual void invalidate()
4403 {
4404 degree->invalidate();
4405 degree_separator->invalidate();
4406 minute->invalidate();
4407 minute_separator->invalidate();
4408 if (second) second->invalidate();
4409 if (second_separator) second_separator->invalidate();
4410 if (decimal) decimal->invalidate();
4412 }
4413
4414 std::shared_ptr<basic_integer10<T>> degree;
4415 std::shared_ptr<basic_parser<T>> degree_separator;
4416 std::shared_ptr<basic_integer10<T>> minute;
4417 std::shared_ptr<basic_parser<T>> minute_separator;
4418 std::shared_ptr<basic_integer10<T>> second;
4419 std::shared_ptr<basic_parser<T>> second_separator;
4420 std::shared_ptr<basic_parser<T>> decimal;
4421
4422 protected:
4423 virtual bool do_match(
4424 _In_reads_or_z_opt_(end) const T* text,
4425 _In_ size_t start = 0,
4426 _In_ size_t end = SIZE_MAX,
4427 _In_ int flags = match_default)
4428 {
4429 _Assume_(text || start >= end);
4430
4431 this->interval.end = start;
4432
4433 if (degree->match(text, this->interval.end, end, flags) &&
4434 degree_separator->match(text, degree->interval.end, end, flags))
4435 {
4436 // Degrees
4437 this->interval.end = degree_separator->interval.end;
4438 }
4439 else {
4440 degree->invalidate();
4441 degree_separator->invalidate();
4442 }
4443
4444 if (minute->match(text, this->interval.end, end, flags) &&
4445 minute->value < 60 &&
4446 minute_separator->match(text, minute->interval.end, end, flags))
4447 {
4448 // Minutes
4449 this->interval.end = minute_separator->interval.end;
4450 }
4451 else {
4452 minute->invalidate();
4453 minute_separator->invalidate();
4454 }
4455
4456 if (second && second->match(text, this->interval.end, end, flags) &&
4457 second->value < 60)
4458 {
4459 // Seconds
4460 this->interval.end = second->interval.end;
4461 if (second_separator && second_separator->match(text, this->interval.end, end, flags))
4462 this->interval.end = second_separator->interval.end;
4463 else
4464 if (second_separator) second_separator->invalidate();
4465 }
4466 else {
4467 if (second) second->invalidate();
4468 if (second_separator) second_separator->invalidate();
4469 }
4470
4471 if (degree->interval.start < degree->interval.end ||
4472 minute->interval.start < minute->interval.end ||
4473 (second && second->interval.start < second->interval.end))
4474 {
4475 if (decimal && decimal->match(text, this->interval.end, end, flags)) {
4476 // Decimals
4477 this->interval.end = decimal->interval.end;
4478 }
4479 else if (decimal)
4480 decimal->invalidate();
4481 this->interval.start = start;
4482 return true;
4483 }
4484 if (decimal) decimal->invalidate();
4485 this->interval.invalidate();
4486 return false;
4487 }
4488 };
4489
4490 using angle = basic_angle<char>;
4492#ifdef _UNICODE
4493 using RRegElKot = wangle;
4494#else
4495 using RRegElKot = angle;
4496#endif
4498
4502 template <class T>
4504 {
4505 public:
4507 _In_ const std::shared_ptr<basic_parser<T>>& digit,
4508 _In_ const std::shared_ptr<basic_parser<T>>& plus_sign,
4509 _In_ const std::shared_ptr<basic_set<T>>& lparenthesis,
4510 _In_ const std::shared_ptr<basic_set<T>>& rparenthesis,
4511 _In_ const std::shared_ptr<basic_parser<T>>& separator,
4512 _In_ const std::shared_ptr<basic_parser<T>>& space,
4513 _In_ const std::locale& locale = std::locale()) :
4515 m_digit(digit),
4516 m_plus_sign(plus_sign),
4517 m_lparenthesis(lparenthesis),
4518 m_rparenthesis(rparenthesis),
4519 m_separator(separator),
4520 m_space(space)
4521 {}
4522
4523 virtual void invalidate()
4524 {
4525 value.clear();
4527 }
4528
4529 std::basic_string<T> value;
4530
4531 protected:
4532 virtual bool do_match(
4533 _In_reads_or_z_opt_(end) const T* text,
4534 _In_ size_t start = 0,
4535 _In_ size_t end = SIZE_MAX,
4536 _In_ int flags = match_default)
4537 {
4538 _Assume_(text || start >= end);
4539
4540 size_t safe_digit_end = start, safe_value_size = 0;
4541 bool has_digits = false, after_digit = false, in_parentheses = false, after_parentheses = false;
4542 const int space_match_flags = flags & ~match_multiline; // Spaces in phone numbers must never be broken in new line.
4543
4544 this->interval.end = start;
4545 value.clear();
4546 m_lparenthesis->invalidate();
4547 m_rparenthesis->invalidate();
4548
4549 if (m_plus_sign && m_plus_sign->match(text, this->interval.end, end, flags)) {
4550 value.append(text + m_plus_sign->interval.start, text + m_plus_sign->interval.end);
4551 safe_value_size = value.size();
4552 this->interval.end = m_plus_sign->interval.end;
4553 }
4554
4555 for (;;) {
4556 _Assume_(text || this->interval.end >= end);
4557 if (this->interval.end >= end || !text[this->interval.end])
4558 break;
4559 if (m_digit->match(text, this->interval.end, end, flags)) {
4560 // Digit
4561 value.append(text + m_digit->interval.start, text + m_digit->interval.end);
4562 this->interval.end = m_digit->interval.end;
4563 if (!in_parentheses) {
4564 safe_digit_end = this->interval.end;
4565 safe_value_size = value.size();
4566 has_digits = true;
4567 }
4568 after_digit = true;
4569 after_parentheses = false;
4570 }
4571 else if (
4572 m_lparenthesis && !m_lparenthesis->interval && // No left parenthesis yet
4573 m_rparenthesis && !m_rparenthesis->interval && // Right parenthesis after left
4574 m_lparenthesis->match(text, this->interval.end, end, flags))
4575 {
4576 // Left parenthesis
4577 value.append(text + m_lparenthesis->interval.start, m_lparenthesis->interval.size());
4578 this->interval.end = m_lparenthesis->interval.end;
4579 in_parentheses = true;
4580 after_digit = false;
4581 after_parentheses = false;
4582 }
4583 else if (
4584 in_parentheses && // After left parenthesis
4585 m_rparenthesis && !m_rparenthesis->interval && // No right parenthesis yet
4586 m_rparenthesis->match(text, this->interval.end, end, flags) &&
4587 m_lparenthesis->hit_offset == m_rparenthesis->hit_offset) // Left and right parentheses must match
4588 {
4589 // Right parenthesis
4590 value.append(text + m_rparenthesis->interval.start, text + m_rparenthesis->interval.end);
4591 this->interval.end = m_rparenthesis->interval.end;
4592 safe_digit_end = this->interval.end;
4593 safe_value_size = value.size();
4594 in_parentheses = false;
4595 after_digit = false;
4596 after_parentheses = true;
4597 }
4598 else if (
4599 after_digit &&
4600 !in_parentheses && // No separators inside parentheses
4601 !after_parentheses && // No separators following right parenthesis
4602 m_separator && m_separator->match(text, this->interval.end, end, flags))
4603 {
4604 // Separator
4605 this->interval.end = m_separator->interval.end;
4606 after_digit = false;
4607 after_parentheses = false;
4608 }
4609 else if (
4611 m_space && m_space->match(text, this->interval.end, end, space_match_flags))
4612 {
4613 // Space
4614 this->interval.end = m_space->interval.end;
4615 after_digit = false;
4616 after_parentheses = false;
4617 }
4618 else
4619 break;
4620 }
4621 if (has_digits) {
4622 value.erase(safe_value_size);
4623 this->interval.start = start;
4624 this->interval.end = safe_digit_end;
4625 return true;
4626 }
4627 value.clear();
4628 this->interval.invalidate();
4629 return false;
4630 }
4631
4632 std::shared_ptr<basic_parser<T>> m_digit;
4633 std::shared_ptr<basic_parser<T>> m_plus_sign;
4634 std::shared_ptr<basic_set<T>> m_lparenthesis;
4635 std::shared_ptr<basic_set<T>> m_rparenthesis;
4636 std::shared_ptr<basic_parser<T>> m_separator;
4637 std::shared_ptr<basic_parser<T>> m_space;
4638 };
4639
4640 using phone_number = basic_phone_number<char>;
4641 using wphone_number = basic_phone_number<wchar_t>;
4642#ifdef _UNICODE
4643 using tphone_number = wphone_number;
4644#else
4645 using tphone_number = phone_number;
4646#endif
4647 using sgml_phone_number = basic_phone_number<char>;
4648
4654 template <class T>
4655 class basic_iban : public basic_parser<T>
4656 {
4657 public:
4658 basic_iban(
4659 _In_ const std::shared_ptr<basic_parser<T>>& space,
4660 _In_ const std::locale& locale = std::locale()) :
4662 m_space(space)
4663 {
4664 this->country[0] = 0;
4665 this->check_digits[0] = 0;
4666 this->bban[0] = 0;
4667 this->is_valid = false;
4668 }
4669
4670 virtual void invalidate()
4671 {
4672 this->country[0] = 0;
4673 this->check_digits[0] = 0;
4674 this->bban[0] = 0;
4675 this->is_valid = false;
4677 }
4678
4679 T country[3];
4681 T bban[31];
4683
4684 protected:
4685 virtual bool do_match(
4686 _In_reads_or_z_opt_(end) const T* text,
4687 _In_ size_t start = 0,
4688 _In_ size_t end = SIZE_MAX,
4689 _In_ int flags = match_default)
4690 {
4691 _Assume_(text || start >= end);
4692 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
4693 const bool case_insensitive = flags & match_case_insensitive ? true : false;
4694 struct country_t {
4695 T country[2];
4696 T check_digits[2];
4697 size_t length;
4698 };
4699 static const country_t s_countries[] = {
4700 { { 'A', 'D' }, {}, 24 }, // Andorra
4701 { { 'A', 'E' }, {}, 23 }, // United Arab Emirates
4702 { { 'A', 'L' }, {}, 28 }, // Albania
4703 { { 'A', 'O' }, {}, 25 }, // Angola
4704 { { 'A', 'T' }, {}, 20 }, // Austria
4705 { { 'A', 'Z' }, {}, 28 }, // Azerbaijan
4706 { { 'B', 'A' }, { '3', '9' }, 20}, // Bosnia and Herzegovina
4707 { { 'B', 'E' }, {}, 16 }, // Belgium
4708 { { 'B', 'F' }, {}, 28 }, // Burkina Faso
4709 { { 'B', 'G' }, {}, 22 }, // Bulgaria
4710 { { 'B', 'H' }, {}, 22 }, // Bahrain
4711 { { 'B', 'I' }, {}, 27 }, // Burundi
4712 { { 'B', 'J' }, {}, 28 }, // Benin
4713 { { 'B', 'R' }, {}, 29 }, // Brazil
4714 { { 'B', 'Y' }, {}, 28 }, // Belarus
4715 { { 'C', 'F' }, {}, 27 }, // Central African Republic
4716 { { 'C', 'G' }, {}, 27 }, // Congo, Republic of the
4717 { { 'C', 'H' }, {}, 21 }, // Switzerland
4718 { { 'C', 'I' }, {}, 28 }, // Côte d'Ivoire
4719 { { 'C', 'M' }, {}, 27 }, // Cameroon
4720 { { 'C', 'R' }, {}, 22 }, // Costa Rica
4721 { { 'C', 'V' }, {}, 25 }, // Cabo Verde
4722 { { 'C', 'Y' }, {}, 28 }, // Cyprus
4723 { { 'C', 'Z' }, {}, 24 }, // Czech Republic
4724 { { 'D', 'E' }, {}, 22 }, // Germany
4725 { { 'D', 'J' }, {}, 27 }, // Djibouti
4726 { { 'D', 'K' }, {}, 18 }, // Denmark
4727 { { 'D', 'O' }, {}, 28 }, // Dominican Republic
4728 { { 'D', 'Z' }, {}, 26 }, // Algeria
4729 { { 'E', 'E' }, {}, 20 }, // Estonia
4730 { { 'E', 'G' }, {}, 29 }, // Egypt
4731 { { 'E', 'S' }, {}, 24 }, // Spain
4732 { { 'F', 'I' }, {}, 18 }, // Finland
4733 { { 'F', 'O' }, {}, 18 }, // Faroe Islands
4734 { { 'F', 'R' }, {}, 27 }, // France
4735 { { 'G', 'A' }, {}, 27 }, // Gabon
4736 { { 'G', 'B' }, {}, 22 }, // United Kingdom
4737 { { 'G', 'E' }, {}, 22 }, // Georgia
4738 { { 'G', 'I' }, {}, 23 }, // Gibraltar
4739 { { 'G', 'L' }, {}, 18 }, // Greenland
4740 { { 'G', 'Q' }, {}, 27 }, // Equatorial Guinea
4741 { { 'G', 'R' }, {}, 27 }, // Greece
4742 { { 'G', 'T' }, {}, 28 }, // Guatemala
4743 { { 'G', 'W' }, {}, 25 }, // Guinea-Bissau
4744 { { 'H', 'N' }, {}, 28 }, // Honduras
4745 { { 'H', 'R' }, {}, 21 }, // Croatia
4746 { { 'H', 'U' }, {}, 28 }, // Hungary
4747 { { 'I', 'E' }, {}, 22 }, // Ireland
4748 { { 'I', 'L' }, {}, 23 }, // Israel
4749 { { 'I', 'Q' }, {}, 23 }, // Iraq
4750 { { 'I', 'R' }, {}, 26 }, // Iran
4751 { { 'I', 'S' }, {}, 26 }, // Iceland
4752 { { 'I', 'T' }, {}, 27 }, // Italy
4753 { { 'J', 'O' }, {}, 30 }, // Jordan
4754 { { 'K', 'M' }, {}, 27 }, // Comoros
4755 { { 'K', 'W' }, {}, 30 }, // Kuwait
4756 { { 'K', 'Z' }, {}, 20 }, // Kazakhstan
4757 { { 'L', 'B' }, {}, 28 }, // Lebanon
4758 { { 'L', 'C' }, {}, 32 }, // Saint Lucia
4759 { { 'L', 'I' }, {}, 21 }, // Liechtenstein
4760 { { 'L', 'T' }, {}, 20 }, // Lithuania
4761 { { 'L', 'U' }, {}, 20 }, // Luxembourg
4762 { { 'L', 'V' }, {}, 21 }, // Latvia
4763 { { 'L', 'Y' }, {}, 25 }, // Libya
4764 { { 'M', 'A' }, {}, 28 }, // Morocco
4765 { { 'M', 'C' }, {}, 27 }, // Monaco
4766 { { 'M', 'D' }, {}, 24 }, // Moldova
4767 { { 'M', 'E' }, { '2', '5' }, 22 }, // Montenegro
4768 { { 'M', 'G' }, {}, 27 }, // Madagascar
4769 { { 'M', 'K' }, { '0', '7' }, 19 }, // North Macedonia
4770 { { 'M', 'L' }, {}, 28 }, // Mali
4771 { { 'M', 'R' }, { '1', '3' }, 27}, // Mauritania
4772 { { 'M', 'T' }, {}, 31 }, // Malta
4773 { { 'M', 'U' }, {}, 30 }, // Mauritius
4774 { { 'M', 'Z' }, {}, 25 }, // Mozambique
4775 { { 'N', 'E' }, {}, 28 }, // Niger
4776 { { 'N', 'I' }, {}, 32 }, // Nicaragua
4777 { { 'N', 'L' }, {}, 18 }, // Netherlands
4778 { { 'N', 'O' }, {}, 15 }, // Norway
4779 { { 'P', 'K' }, {}, 24 }, // Pakistan
4780 { { 'P', 'L' }, {}, 28 }, // Poland
4781 { { 'P', 'S' }, {}, 29 }, // Palestinian territories
4782 { { 'P', 'T' }, { '5', '0' }, 25 }, // Portugal
4783 { { 'Q', 'A' }, {}, 29 }, // Qatar
4784 { { 'R', 'O' }, {}, 24 }, // Romania
4785 { { 'R', 'S' }, { '3', '5' }, 22 }, // Serbia
4786 { { 'R', 'U' }, {}, 33 }, // Russia
4787 { { 'S', 'A' }, {}, 24 }, // Saudi Arabia
4788 { { 'S', 'C' }, {}, 31 }, // Seychelles
4789 { { 'S', 'D' }, {}, 18 }, // Sudan
4790 { { 'S', 'E' }, {}, 24 }, // Sweden
4791 { { 'S', 'I' }, { '5', '6' }, 19 }, // Slovenia
4792 { { 'S', 'K' }, {}, 24 }, // Slovakia
4793 { { 'S', 'M' }, {}, 27 }, // San Marino
4794 { { 'S', 'N' }, {}, 28 }, // Senegal
4795 { { 'S', 'T' }, {}, 25 }, // São Tomé and Príncipe
4796 { { 'S', 'V' }, {}, 28 }, // El Salvador
4797 { { 'T', 'D' }, {}, 27 }, // Chad
4798 { { 'T', 'G' }, {}, 28 }, // Togo
4799 { { 'T', 'L' }, { '3', '8' }, 23}, // East Timor
4800 { { 'T', 'N' }, { '5', '9' }, 24 }, // Tunisia
4801 { { 'T', 'R' }, {}, 26 }, // Turkey
4802 { { 'U', 'A' }, {}, 29 }, // Ukraine
4803 { { 'V', 'A' }, {}, 22 }, // Vatican City
4804 { { 'V', 'G' }, {}, 24 }, // Virgin Islands, British
4805 { { 'X', 'K' }, {}, 20 }, // Kosovo
4806 };
4807 const country_t* country_desc = nullptr;
4808 size_t n, available, next, bban_length;
4810
4811 this->interval.end = start;
4812 for (size_t i = 0; i < 2; ++i, ++this->interval.end) {
4813 if (this->interval.end >= end || !text[this->interval.end])
4814 goto error; // incomplete country code
4815 T chr = case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end];
4816 if (chr < 'A' || 'Z' < chr)
4817 goto error; // invalid country code
4818 this->country[i] = chr;
4819 }
4820 for (size_t l = 0, r = _countof(s_countries);;) {
4821 if (l >= r)
4822 goto error; // unknown country
4823 size_t m = (l + r) / 2;
4824 const country_t& c = s_countries[m];
4825 if (c.country[0] < this->country[0] || (c.country[0] == this->country[0] && c.country[1] < this->country[1]))
4826 l = m + 1;
4827 else if (this->country[0] < c.country[0] || (this->country[0] == c.country[0] && this->country[1] < c.country[1]))
4828 r = m;
4829 else {
4830 country_desc = &c;
4831 break;
4832 }
4833 }
4834 this->country[2] = 0;
4835
4836 for (size_t i = 0; i < 2; ++i, ++this->interval.end) {
4837 if (this->interval.end >= end || text[this->interval.end] < '0' || '9' < text[this->interval.end])
4838 goto error; // incomplete or invalid check digits
4839 this->check_digits[i] = text[this->interval.end];
4840 }
4841 this->check_digits[2] = 0;
4842
4843 if ((country_desc->check_digits[0] && this->check_digits[0] != country_desc->check_digits[0]) ||
4844 (country_desc->check_digits[1] && this->check_digits[1] != country_desc->check_digits[1]))
4845 goto error; // unexpected check digits
4846
4847 bban_length = country_desc->length - 4;
4848 for (n = 0; n < bban_length;) {
4849 if (this->interval.end >= end || !text[this->interval.end])
4850 goto error; // bban too short
4851 if (m_space && m_space->match(text, this->interval.end, end, flags)) {
4852 this->interval.end = m_space->interval.end;
4853 continue;
4854 }
4855 T chr = case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end];
4856 if (('0' <= chr && chr <= '9') || ('A' <= chr && chr <= 'Z')) {
4857 this->bban[n++] = chr;
4858 this->interval.end++;
4859 }
4860 else
4861 goto error; // invalid bban
4862 }
4863 this->bban[n] = 0;
4864
4865 // Normalize IBAN.
4866 T normalized[69];
4867 available = 0;
4868 for (size_t i = 0; ; ++i) {
4869 if (!this->bban[i]) {
4870 for (i = 0; i < 2; ++i) {
4871 if ('A' <= this->country[i] && this->country[i] <= 'J') {
4872 normalized[available++] = '1';
4873 normalized[available++] = '0' + this->country[i] - 'A';
4874 }
4875 else if ('K' <= this->country[i] && this->country[i] <= 'T') {
4876 normalized[available++] = '2';
4877 normalized[available++] = '0' + this->country[i] - 'K';
4878 }
4879 else if ('U' <= this->country[i] && this->country[i] <= 'Z') {
4880 normalized[available++] = '3';
4881 normalized[available++] = '0' + this->country[i] - 'U';
4882 }
4883 }
4884 normalized[available++] = this->check_digits[0];
4885 normalized[available++] = this->check_digits[1];
4886 normalized[available] = 0;
4887 break;
4888 }
4889 if ('0' <= this->bban[i] && this->bban[i] <= '9')
4890 normalized[available++] = this->bban[i];
4891 else if ('A' <= this->bban[i] && this->bban[i] <= 'J') {
4892 normalized[available++] = '1';
4893 normalized[available++] = '0' + this->bban[i] - 'A';
4894 }
4895 else if ('K' <= this->bban[i] && this->bban[i] <= 'T') {
4896 normalized[available++] = '2';
4897 normalized[available++] = '0' + this->bban[i] - 'K';
4898 }
4899 else if ('U' <= this->bban[i] && this->bban[i] <= 'Z') {
4900 normalized[available++] = '3';
4901 normalized[available++] = '0' + this->bban[i] - 'U';
4902 }
4903 }
4904
4905 // Calculate modulo 97.
4906 nominator = stdex::strtou32(normalized, 9, &next, 10);
4907 for (;;) {
4908 nominator %= 97;
4909 if (!normalized[next]) {
4910 this->is_valid = nominator == 1;
4911 break;
4912 }
4913 size_t digit_count = nominator == 0 ? 0 : nominator < 10 ? 1 : 2;
4914 for (; digit_count < 9 && normalized[next]; ++next, ++digit_count)
4915 nominator = nominator * 10 + static_cast<uint32_t>(normalized[next] - '0');
4916 }
4917
4918 this->interval.start = start;
4919 return true;
4920
4921 error:
4922 invalidate();
4923 return false;
4924 }
4925
4926 std::shared_ptr<basic_parser<T>> m_space;
4927 };
4928
4929 using iban = basic_iban<char>;
4930 using wiban = basic_iban<wchar_t>;
4931#ifdef _UNICODE
4932 using tiban = wiban;
4933#else
4934 using tiban = iban;
4935#endif
4936 using sgml_iban = basic_iban<char>;
4937
4943 template <class T>
4945 {
4946 public:
4948 _In_ const std::shared_ptr<basic_parser<T>>& space,
4949 _In_ const std::locale& locale = std::locale()) :
4951 m_space(space)
4952 {
4953 this->check_digits[0] = 0;
4954 this->reference[0] = 0;
4955 this->is_valid = false;
4956 }
4957
4958 virtual void invalidate()
4959 {
4960 this->check_digits[0] = 0;
4961 this->reference[0] = 0;
4962 this->is_valid = false;
4964 }
4965
4969
4970 protected:
4971 virtual bool do_match(
4972 _In_reads_or_z_opt_(end) const T* text,
4973 _In_ size_t start = 0,
4974 _In_ size_t end = SIZE_MAX,
4975 _In_ int flags = match_default)
4976 {
4977 _Assume_(text || start >= end);
4978 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
4979 const bool case_insensitive = flags & match_case_insensitive ? true : false;
4980 size_t n, available, next;
4982
4983 this->interval.end = start;
4984 if (this->interval.end + 1 >= end ||
4985 (case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end]) != 'R' ||
4986 (case_insensitive ? ctype.toupper(text[this->interval.end + 1]) : text[this->interval.end + 1]) != 'F')
4987 goto error; // incomplete or wrong reference ID
4988 this->interval.end += 2;
4989
4990 for (size_t i = 0; i < 2; ++i, ++this->interval.end) {
4991 if (this->interval.end >= end || text[this->interval.end] < '0' || '9' < text[this->interval.end])
4992 goto error; // incomplete or invalid check digits
4993 this->check_digits[i] = text[this->interval.end];
4994 }
4995 this->check_digits[2] = 0;
4996
4997 for (n = 0;;) {
4998 if (m_space && m_space->match(text, this->interval.end, end, flags))
4999 this->interval.end = m_space->interval.end;
5000 for (size_t j = 0; j < 4; ++j) {
5001 if (this->interval.end >= end || !text[this->interval.end])
5002 goto out;
5003 T chr = case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end];
5004 if (('0' <= chr && chr <= '9') || ('A' <= chr && chr <= 'Z')) {
5005 if (n >= _countof(reference) - 1)
5006 goto error; // reference overflow
5007 this->reference[n++] = chr;
5008 this->interval.end++;
5009 }
5010 else
5011 goto out;
5012 }
5013 }
5014 out:
5015 if (!n)
5016 goto error; // reference too short
5017 this->reference[_countof(this->reference) - 1] = 0;
5018 for (size_t i = n, j = _countof(this->reference) - 1; i;)
5019 this->reference[--j] = this->reference[--i];
5020 for (size_t j = _countof(this->reference) - 1 - n; j;)
5021 this->reference[--j] = '0';
5022
5023 // Normalize creditor reference.
5024 T normalized[47];
5025 available = 0;
5026 for (size_t i = 0; ; ++i) {
5027 if (!this->reference[i]) {
5028 normalized[available++] = '2'; // R
5029 normalized[available++] = '7';
5030 normalized[available++] = '1'; // F
5031 normalized[available++] = '5';
5032 normalized[available++] = this->check_digits[0];
5033 normalized[available++] = this->check_digits[1];
5034 normalized[available] = 0;
5035 break;
5036 }
5037 if ('0' <= this->reference[i] && this->reference[i] <= '9')
5038 normalized[available++] = this->reference[i];
5039 else if ('A' <= this->reference[i] && this->reference[i] <= 'J') {
5040 normalized[available++] = '1';
5041 normalized[available++] = '0' + this->reference[i] - 'A';
5042 }
5043 else if ('K' <= this->reference[i] && this->reference[i] <= 'T') {
5044 normalized[available++] = '2';
5045 normalized[available++] = '0' + this->reference[i] - 'K';
5046 }
5047 else if ('U' <= this->reference[i] && this->reference[i] <= 'Z') {
5048 normalized[available++] = '3';
5049 normalized[available++] = '0' + this->reference[i] - 'U';
5050 }
5051 }
5052
5053 // Calculate modulo 97.
5054 nominator = stdex::strtou32(normalized, 9, &next, 10);
5055 for (;;) {
5056 nominator %= 97;
5057 if (!normalized[next]) {
5058 this->is_valid = nominator == 1;
5059 break;
5060 }
5061 size_t digit_count = nominator == 0 ? 0 : nominator < 10 ? 1 : 2;
5062 for (; digit_count < 9 && normalized[next]; ++next, ++digit_count)
5063 nominator = nominator * 10 + static_cast<uint32_t>(normalized[next] - '0');
5064 }
5065
5066 this->interval.start = start;
5067 return true;
5068
5069 error:
5070 invalidate();
5071 return false;
5072 }
5073
5074 std::shared_ptr<basic_parser<T>> m_space;
5075 };
5076
5077 using creditor_reference = basic_creditor_reference<char>;
5078 using wcreditor_reference = basic_creditor_reference<wchar_t>;
5079#ifdef _UNICODE
5080 using tcreditor_reference = wcreditor_reference;
5081#else
5082 using tcreditor_reference = creditor_reference;
5083#endif
5084 using sgml_creditor_reference = basic_creditor_reference<char>;
5085
5091 template <class T>
5093 {
5094 public:
5095 basic_si_reference_part(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
5096
5097 protected:
5098 virtual bool do_match(
5099 _In_reads_or_z_opt_(end) const T* text,
5100 _In_ size_t start = 0,
5101 _In_ size_t end = SIZE_MAX,
5102 _In_ int flags = match_default)
5103 {
5104 _Assume_(text || start >= end);
5105 this->interval.end = start;
5106 for (;;) {
5107 if (this->interval.end >= end || !text[this->interval.end])
5108 break;
5109 if ('0' <= text[this->interval.end] && text[this->interval.end] <= '9')
5110 this->interval.end++;
5111 else
5112 break;
5113 }
5115 this->interval.start = start;
5116 return true;
5117 }
5118 this->interval.invalidate();
5119 return false;
5120 }
5121 };
5122
5125#ifdef _UNICODE
5127#else
5129#endif
5131
5137 template <class T>
5139 {
5140 public:
5141 basic_si_reference_delimiter(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
5142
5143 protected:
5144 virtual bool do_match(
5145 _In_reads_or_z_opt_(end) const T* text,
5146 _In_ size_t start = 0,
5147 _In_ size_t end = SIZE_MAX,
5148 _In_ int flags = match_default)
5149 {
5150 _Assume_(text || start >= end);
5151 if (start < end && text[start] == '-') {
5152 this->interval.end = (this->interval.start = start) + 1;
5153 return true;
5154 }
5155 this->interval.invalidate();
5156 return false;
5157 }
5158 };
5159
5162#ifdef _UNICODE
5164#else
5166#endif
5168
5176 template <class T>
5178 {
5179 public:
5181 _In_ const std::shared_ptr<basic_parser<T>>& space,
5182 _In_ const std::locale& locale = std::locale()) :
5184 part1(locale),
5185 part2(locale),
5186 part3(locale),
5187 is_valid(false),
5188 m_space(space),
5189 m_delimiter(locale)
5190 {
5191 this->model[0] = 0;
5192 }
5193
5194 virtual void invalidate()
5195 {
5196 this->model[0] = 0;
5197 this->part1.invalidate();
5198 this->part2.invalidate();
5199 this->part3.invalidate();
5200 this->is_valid = false;
5202 }
5203
5204 T model[3];
5209
5210 protected:
5211 virtual bool do_match(
5212 _In_reads_or_z_opt_(end) const T* text,
5213 _In_ size_t start = 0,
5214 _In_ size_t end = SIZE_MAX,
5215 _In_ int flags = match_default)
5216 {
5217 _Assume_(text || start >= end);
5218 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
5219 const bool case_insensitive = flags & match_case_insensitive ? true : false;
5220
5221 this->interval.end = start;
5222 if (this->interval.end + 1 >= end ||
5223 (case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end]) != 'S' ||
5224 (case_insensitive ? ctype.toupper(text[this->interval.end + 1]) : text[this->interval.end + 1]) != 'I')
5225 goto error; // incomplete or wrong reference ID
5226 this->interval.end += 2;
5227
5228 for (size_t i = 0; i < 2; ++i, ++this->interval.end) {
5229 if (this->interval.end >= end || text[this->interval.end] < '0' || '9' < text[this->interval.end])
5230 goto error; // incomplete or invalid model
5231 this->model[i] = text[this->interval.end];
5232 }
5233 this->model[2] = 0;
5234
5235 this->part1.invalidate();
5236 this->part2.invalidate();
5237 this->part3.invalidate();
5238 if (this->model[0] == '9' && this->model[1] == '9') {
5239 is_valid = true;
5240 this->interval.start = start;
5241 return true;
5242 }
5243
5244 if (m_space && m_space->match(text, this->interval.end, end, flags))
5245 this->interval.end = m_space->interval.end;
5246
5247 this->part1.match(text, this->interval.end, end, flags) &&
5248 this->m_delimiter.match(text, this->part1.interval.end, end, flags) &&
5249 this->part2.match(text, this->m_delimiter.interval.end, end, flags) &&
5250 this->m_delimiter.match(text, this->part2.interval.end, end, flags) &&
5251 this->part3.match(text, this->m_delimiter.interval.end, end, flags);
5252
5253 this->interval.start = start;
5254 if (this->part3.interval)
5255 this->interval.end = this->part3.interval.end;
5256 else if (this->part2.interval)
5257 this->interval.end = this->part2.interval.end;
5258 else if (this->part1.interval)
5259 this->interval.end = this->part1.interval.end;
5260 else
5261 this->interval.end = start + 4;
5262
5263 if (this->model[0] == '0' && this->model[1] == '0')
5264 is_valid =
5265 this->part3.interval ?
5266 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5267 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 :
5268 this->part2.interval ?
5269 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5270 this->part1.interval.size() + this->part2.interval.size() <= 20 :
5271 this->part1.interval ?
5272 this->part1.interval.size() <= 12 :
5273 false;
5274 else if (this->model[0] == '0' && this->model[1] == '1')
5275 is_valid =
5276 this->part3.interval ?
5277 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5278 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5279 check11(
5280 text + this->part1.interval.start, this->part1.interval.size(),
5281 text + this->part2.interval.start, this->part2.interval.size(),
5282 text + this->part3.interval.start, this->part3.interval.size()) :
5283 this->part2.interval ?
5284 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5285 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5286 check11(
5287 text + this->part1.interval.start, this->part1.interval.size(),
5288 text + this->part2.interval.start, this->part2.interval.size()) :
5289 this->part1.interval ?
5290 this->part1.interval.size() <= 12 &&
5291 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5292 false;
5293 else if (this->model[0] == '0' && this->model[1] == '2')
5294 is_valid =
5295 this->part3.interval ?
5296 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5297 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5298 check11(text + this->part2.interval.start, this->part2.interval.size()) &&
5299 check11(text + this->part3.interval.start, this->part3.interval.size()) :
5300 false;
5301 else if (this->model[0] == '0' && this->model[1] == '3')
5302 is_valid =
5303 this->part3.interval ?
5304 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5305 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5306 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5307 check11(text + this->part2.interval.start, this->part2.interval.size()) &&
5308 check11(text + this->part3.interval.start, this->part3.interval.size()) :
5309 false;
5310 else if (this->model[0] == '0' && this->model[1] == '4')
5311 is_valid =
5312 this->part3.interval ?
5313 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5314 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5315 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5316 check11(text + this->part3.interval.start, this->part3.interval.size()) :
5317 false;
5318 else if ((this->model[0] == '0' || this->model[0] == '5') && this->model[1] == '5')
5319 is_valid =
5320 this->part3.interval ?
5321 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5322 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5323 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5324 this->part2.interval ?
5325 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5326 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5327 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5328 this->part1.interval ?
5329 this->part1.interval.size() <= 12 &&
5330 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5331 false;
5332 else if (this->model[0] == '0' && this->model[1] == '6')
5333 is_valid =
5334 this->part3.interval ?
5335 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5336 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5337 check11(
5338 text + this->part2.interval.start, this->part2.interval.size(),
5339 text + this->part3.interval.start, this->part3.interval.size()) :
5340 this->part2.interval ?
5341 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5342 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5343 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5344 false;
5345 else if (this->model[0] == '0' && this->model[1] == '7')
5346 is_valid =
5347 this->part3.interval ?
5348 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5349 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5350 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5351 this->part2.interval ?
5352 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5353 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5354 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5355 false;
5356 else if (this->model[0] == '0' && this->model[1] == '8')
5357 is_valid =
5358 this->part3.interval ?
5359 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5360 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5361 check11(
5362 text + this->part1.interval.start, this->part1.interval.size(),
5363 text + this->part2.interval.start, this->part2.interval.size()) &&
5364 check11(text + this->part3.interval.start, this->part3.interval.size()) :
5365 false;
5366 else if (this->model[0] == '0' && this->model[1] == '9')
5367 is_valid =
5368 this->part3.interval ?
5369 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5370 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5371 check11(
5372 text + this->part1.interval.start, this->part1.interval.size(),
5373 text + this->part2.interval.start, this->part2.interval.size()) :
5374 this->part2.interval ?
5375 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5376 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5377 check11(
5378 text + this->part1.interval.start, this->part1.interval.size(),
5379 text + this->part2.interval.start, this->part2.interval.size()) :
5380 this->part1.interval ?
5381 this->part1.interval.size() <= 12 &&
5382 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5383 false;
5384 else if (this->model[0] == '1' && this->model[1] == '0')
5385 is_valid =
5386 this->part3.interval ?
5387 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5388 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5389 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5390 check11(
5391 text + this->part2.interval.start, this->part2.interval.size(),
5392 text + this->part3.interval.start, this->part3.interval.size()) :
5393 this->part2.interval ?
5394 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5395 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5396 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5397 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5398 false;
5399 else if (
5400 (this->model[0] == '1' && (this->model[1] == '1' || this->model[1] == '8' || this->model[1] == '9')) ||
5401 ((this->model[0] == '2' || this->model[0] == '3') && this->model[1] == '8') ||
5402 (this->model[0] == '4' && (this->model[1] == '0' || this->model[1] == '1' || this->model[1] == '8' || this->model[1] == '9')) ||
5403 (this->model[0] == '5' && (this->model[1] == '1' || this->model[1] == '8')))
5404 is_valid =
5405 this->part3.interval ?
5406 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5407 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5408 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5409 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5410 this->part2.interval ?
5411 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5412 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5413 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5414 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5415 false;
5416 else if (this->model[0] == '1' && this->model[1] == '2')
5417 is_valid =
5418 this->part3.interval ? false :
5419 this->part2.interval ? false :
5420 this->part1.interval ?
5421 this->part1.interval.size() <= 13 &&
5422 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5423 false;
5424 else if ((this->model[0] == '2' || this->model[0] == '3') && this->model[1] == '1')
5425 is_valid =
5426 this->part3.interval ? false :
5427 this->part2.interval ?
5428 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5429 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5430 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5431 false;
5432 else
5433 is_valid = true; // Assume models we don't handle as valid
5434 return true;
5435
5436 error:
5437 invalidate();
5438 return false;
5439 }
5440
5441 static bool check11(
5442 _In_count_(num_part1) const T* part1, _In_ size_t num_part1)
5443 {
5444 _Assume_(part1 && num_part1 >= 1);
5445 uint32_t nominator = 0, ponder = 2;
5446 for (size_t i = num_part1 - 1; i--; ++ponder)
5447 nominator += static_cast<uint32_t>(part1[i] - '0') * ponder;
5448 uint8_t control = 11 - static_cast<uint8_t>(nominator % 11);
5449 if (control >= 10)
5450 control = 0;
5451 return control == part1[num_part1 - 1] - '0';
5452 }
5453
5454 static bool check11(
5455 _In_count_(num_part1) const T* part1, _In_ size_t num_part1,
5456 _In_count_(num_part2) const T* part2, _In_ size_t num_part2)
5457 {
5458 _Assume_(part1 || !num_part1);
5459 _Assume_(part2 && num_part2 >= 1);
5460 uint32_t nominator = 0, ponder = 2;
5461 for (size_t i = num_part2 - 1; i--; ++ponder)
5462 nominator += static_cast<uint32_t>(part2[i] - '0') * ponder;
5463 for (size_t i = num_part1; i--; ++ponder)
5464 nominator += static_cast<uint32_t>(part1[i] - '0') * ponder;
5465 uint8_t control = 11 - static_cast<uint8_t>(nominator % 11);
5466 if (control == 10)
5467 control = 0;
5468 return control == part2[num_part2 - 1] - '0';
5469 }
5470
5471 static bool check11(
5472 _In_count_(num_part1) const T* part1, _In_ size_t num_part1,
5473 _In_count_(num_part2) const T* part2, _In_ size_t num_part2,
5474 _In_count_(num_part3) const T* part3, _In_ size_t num_part3)
5475 {
5476 _Assume_(part1 || !num_part1);
5477 _Assume_(part2 || !num_part2);
5478 _Assume_(part3 && num_part3 >= 1);
5479 uint32_t nominator = 0, ponder = 2;
5480 for (size_t i = num_part3 - 1; i--; ++ponder)
5481 nominator += static_cast<uint32_t>(part3[i] - '0') * ponder;
5482 for (size_t i = num_part2; i--; ++ponder)
5483 nominator += static_cast<uint32_t>(part2[i] - '0') * ponder;
5484 for (size_t i = num_part1; i--; ++ponder)
5485 nominator += static_cast<uint32_t>(part1[i] - '0') * ponder;
5486 uint8_t control = 11 - static_cast<uint8_t>(nominator % 11);
5487 if (control == 10)
5488 control = 0;
5489 return control == part2[num_part3 - 1] - '0';
5490 }
5491
5492 std::shared_ptr<basic_parser<T>> m_space;
5493 basic_si_reference_delimiter<T> m_delimiter;
5494 };
5495
5496 using si_reference = basic_si_reference<char>;
5497 using wsi_reference = basic_si_reference<wchar_t>;
5498#ifdef _UNICODE
5499 using tsi_reference = wsi_reference;
5500#else
5501 using tsi_reference = si_reference;
5502#endif
5503 using sgml_si_reference = basic_si_reference<char>;
5504
5508 template <class T>
5510 {
5511 public:
5513 _In_ const std::shared_ptr<basic_parser<T>>& element,
5514 _In_ const std::shared_ptr<basic_parser<T>>& digit,
5515 _In_ const std::shared_ptr<basic_parser<T>>& sign,
5516 _In_ const std::locale& locale = std::locale()) :
5518 m_element(element),
5519 m_digit(digit),
5520 m_sign(sign),
5521 has_digits(false),
5522 has_charge(false)
5523 {}
5524
5525 virtual void invalidate()
5526 {
5527 has_digits = false;
5528 has_charge = false;
5530 }
5531
5532 bool has_digits;
5533 bool has_charge;
5534
5535 protected:
5536 virtual bool do_match(
5537 _In_reads_or_z_opt_(end) const T* text,
5538 _In_ size_t start = 0,
5539 _In_ size_t end = SIZE_MAX,
5540 _In_ int flags = match_default)
5541 {
5542 _Assume_(text || start >= end);
5543
5544 has_digits = false;
5545 has_charge = false;
5546 this->interval.end = start;
5547
5548 const int element_match_flags = flags & ~match_case_insensitive; // Chemical elements are always case-sensitive.
5549 for (;;) {
5550 if (m_element->match(text, this->interval.end, end, element_match_flags)) {
5551 this->interval.end = m_element->interval.end;
5552 while (m_digit->match(text, this->interval.end, end, flags)) {
5553 this->interval.end = m_digit->interval.end;
5554 has_digits = true;
5555 }
5556 }
5557 else if (start < this->interval.end) {
5558 if (m_sign->match(text, this->interval.end, end, flags)) {
5559 this->interval.end = m_sign->interval.end;
5560 has_charge = true;
5561 }
5562 this->interval.start = start;
5563 return true;
5564 }
5565 else {
5566 this->interval.invalidate();
5567 return false;
5568 }
5569 }
5570 }
5571
5572 std::shared_ptr<basic_parser<T>> m_element;
5573 std::shared_ptr<basic_parser<T>> m_digit;
5574 std::shared_ptr<basic_parser<T>> m_sign;
5575 };
5576
5579#ifdef _UNICODE
5581#else
5583#endif
5585
5590 {
5591 protected:
5592 virtual bool do_match(
5593 _In_reads_or_z_(end) const char* text,
5594 _In_ size_t start = 0,
5595 _In_ size_t end = SIZE_MAX,
5596 _In_ int flags = match_default)
5597 {
5598 _Assume_(text || start >= end);
5599 this->interval.end = start;
5600
5601 _Assume_(text || this->interval.end >= end);
5602 if (this->interval.end < end && text[this->interval.end]) {
5603 if (text[this->interval.end] == '\r') {
5604 this->interval.end++;
5605 if (this->interval.end < end && text[this->interval.end] == '\n') {
5606 this->interval.start = start;
5607 this->interval.end++;
5608 return true;
5609 }
5610 }
5611 else if (text[this->interval.end] == '\n') {
5612 this->interval.start = start;
5613 this->interval.end++;
5614 return true;
5615 }
5616 }
5617 this->interval.invalidate();
5618 return false;
5619 }
5620 };
5621
5625 class http_space : public parser
5626 {
5627 protected:
5628 virtual bool do_match(
5629 _In_reads_or_z_(end) const char* text,
5630 _In_ size_t start = 0,
5631 _In_ size_t end = SIZE_MAX,
5632 _In_ int flags = match_default)
5633 {
5634 _Assume_(text || start >= end);
5635 this->interval.end = start;
5636 if (m_line_break.match(text, this->interval.end, end, flags)) {
5637 this->interval.end = m_line_break.interval.end;
5638 if (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) {
5639 this->interval.start = start;
5640 this->interval.end++;
5641 while (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) this->interval.end++;
5642 return true;
5643 }
5644 }
5645 else if (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) {
5646 this->interval.start = start;
5647 this->interval.end++;
5648 while (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) this->interval.end++;
5649 return true;
5650 }
5651 this->interval.invalidate();
5652 return false;
5653 }
5654
5655 http_line_break m_line_break;
5656 };
5657
5661 class http_text_char : public parser
5662 {
5663 protected:
5664 virtual bool do_match(
5665 _In_reads_or_z_(end) const char* text,
5666 _In_ size_t start = 0,
5667 _In_ size_t end = SIZE_MAX,
5668 _In_ int flags = match_default)
5669 {
5670 _Assume_(text || start >= end);
5671 this->interval.end = start;
5672
5673 _Assume_(text || this->interval.end >= end);
5674 if (m_space.match(text, this->interval.end, end, flags)) {
5675 this->interval.start = start;
5676 this->interval.end = m_space.interval.end;
5677 return true;
5678 }
5679 else if (this->interval.end < end && text[this->interval.end] && text[this->interval.end] >= 0x20) {
5680 this->interval.start = start;
5681 this->interval.end++;
5682 return true;
5683 }
5684 this->interval.invalidate();
5685 return false;
5686 }
5687
5688 http_space m_space;
5689 };
5690
5694 class http_token : public parser
5695 {
5696 protected:
5697 virtual bool do_match(
5698 _In_reads_or_z_(end) const char* text,
5699 _In_ size_t start = 0,
5700 _In_ size_t end = SIZE_MAX,
5701 _In_ int flags = match_default)
5702 {
5703 _Assume_(text || start >= end);
5704 this->interval.end = start;
5705 for (;;) {
5706 if (this->interval.end < end && text[this->interval.end]) {
5707 if ((unsigned int)text[this->interval.end] < 0x20 ||
5708 (unsigned int)text[this->interval.end] == 0x7f ||
5709 text[this->interval.end] == '(' ||
5710 text[this->interval.end] == ')' ||
5711 text[this->interval.end] == '<' ||
5712 text[this->interval.end] == '>' ||
5713 text[this->interval.end] == '@' ||
5714 text[this->interval.end] == ',' ||
5715 text[this->interval.end] == ';' ||
5716 text[this->interval.end] == ':' ||
5717 text[this->interval.end] == '\\' ||
5718 text[this->interval.end] == '\"' ||
5719 text[this->interval.end] == '/' ||
5720 text[this->interval.end] == '[' ||
5721 text[this->interval.end] == ']' ||
5722 text[this->interval.end] == '?' ||
5723 text[this->interval.end] == '=' ||
5724 text[this->interval.end] == '{' ||
5725 text[this->interval.end] == '}' ||
5726 stdex::isspace(text[this->interval.end]))
5727 break;
5728 else
5729 this->interval.end++;
5730 }
5731 else
5732 break;
5733 }
5735 this->interval.start = start;
5736 return true;
5737 }
5738 else {
5739 this->interval.invalidate();
5740 return false;
5741 }
5742 }
5743 };
5744
5749 {
5750 public:
5751 virtual void invalidate()
5752 {
5753 content.start = 1;
5754 content.end = 0;
5755 parser::invalidate();
5756 }
5757
5759
5760 protected:
5761 virtual bool do_match(
5762 _In_reads_or_z_(end) const char* text,
5763 _In_ size_t start = 0,
5764 _In_ size_t end = SIZE_MAX,
5765 _In_ int flags = match_default)
5766 {
5767 _Assume_(text || start >= end);
5768 this->interval.end = start;
5769 if (this->interval.end < end && text[this->interval.end] != '"')
5770 goto error;
5771 this->interval.end++;
5772 content.start = this->interval.end;
5773 for (;;) {
5774 _Assume_(text || this->interval.end >= end);
5775 if (this->interval.end < end && text[this->interval.end]) {
5776 if (text[this->interval.end] == '"') {
5777 content.end = this->interval.end;
5778 this->interval.end++;
5779 break;
5780 }
5781 else if (text[this->interval.end] == '\\') {
5782 this->interval.end++;
5783 if (this->interval.end < end && text[this->interval.end]) {
5784 this->interval.end++;
5785 }
5786 else
5787 goto error;
5788 }
5789 else if (m_chr.match(text, this->interval.end, end, flags))
5790 this->interval.end++;
5791 else
5792 goto error;
5793 }
5794 else
5795 goto error;
5796 }
5797 this->interval.start = start;
5798 return true;
5799
5800 error:
5801 invalidate();
5802 return false;
5803 }
5804
5805 http_text_char m_chr;
5806 };
5807
5811 class http_value : public parser
5812 {
5813 public:
5814 virtual void invalidate()
5815 {
5816 string.invalidate();
5817 token.invalidate();
5818 parser::invalidate();
5819 }
5820
5823
5824 protected:
5825 virtual bool do_match(
5826 _In_reads_or_z_(end) const char* text,
5827 _In_ size_t start = 0,
5828 _In_ size_t end = SIZE_MAX,
5829 _In_ int flags = match_default)
5830 {
5831 _Assume_(text || start >= end);
5832 this->interval.end = start;
5833 if (string.match(text, this->interval.end, end, flags)) {
5834 token.invalidate();
5835 this->interval.end = string.interval.end;
5836 this->interval.start = start;
5837 return true;
5838 }
5839 else if (token.match(text, this->interval.end, end, flags)) {
5840 string.invalidate();
5841 this->interval.end = token.interval.end;
5842 this->interval.start = start;
5843 return true;
5844 }
5845 else {
5846 this->interval.invalidate();
5847 return false;
5848 }
5849 }
5850 };
5851
5855 class http_parameter : public parser
5856 {
5857 public:
5858 virtual void invalidate()
5859 {
5860 name.invalidate();
5861 value.invalidate();
5862 parser::invalidate();
5863 }
5864
5867
5868 protected:
5869 virtual bool do_match(
5870 _In_reads_or_z_(end) const char* text,
5871 _In_ size_t start = 0,
5872 _In_ size_t end = SIZE_MAX,
5873 _In_ int flags = match_default)
5874 {
5875 _Assume_(text || start >= end);
5876 this->interval.end = start;
5877 if (name.match(text, this->interval.end, end, flags))
5878 this->interval.end = name.interval.end;
5879 else
5880 goto error;
5881 while (m_space.match(text, this->interval.end, end, flags))
5882 this->interval.end = m_space.interval.end;
5883 _Assume_(text || this->interval.end >= end);
5884 if (this->interval.end < end && text[this->interval.end] == '=')
5885 this->interval.end++;
5886 else
5887 while (m_space.match(text, this->interval.end, end, flags))
5888 this->interval.end = m_space.interval.end;
5889 if (value.match(text, this->interval.end, end, flags))
5890 this->interval.end = value.interval.end;
5891 else
5892 goto error;
5893 this->interval.start = start;
5894 return true;
5895
5896 error:
5897 invalidate();
5898 return false;
5899 }
5900
5901 http_space m_space;
5902 };
5903
5907 class http_any_type : public parser
5908 {
5909 protected:
5910 virtual bool do_match(
5911 _In_reads_or_z_(end) const char* text,
5912 _In_ size_t start = 0,
5913 _In_ size_t end = SIZE_MAX,
5914 _In_ int flags = match_default)
5915 {
5916 _Assume_(text || start >= end);
5917 if (start + 2 < end &&
5918 text[start] == '*' &&
5919 text[start + 1] == '/' &&
5920 text[start + 2] == '*')
5921 {
5922 this->interval.end = (this->interval.start = start) + 3;
5923 return true;
5924 }
5925 else if (start < end && text[start] == '*') {
5926 this->interval.end = (this->interval.start = start) + 1;
5927 return true;
5928 }
5929 else {
5930 this->interval.invalidate();
5931 return false;
5932 }
5933 }
5934 };
5935
5940 {
5941 public:
5942 virtual void invalidate()
5943 {
5944 type.invalidate();
5945 subtype.invalidate();
5946 parser::invalidate();
5947 }
5948
5949 http_token type;
5950 http_token subtype;
5951
5952 protected:
5953 virtual bool do_match(
5954 _In_reads_or_z_(end) const char* text,
5955 _In_ size_t start = 0,
5956 _In_ size_t end = SIZE_MAX,
5957 _In_ int flags = match_default)
5958 {
5959 _Assume_(text || start >= end);
5960 this->interval.end = start;
5961 if (type.match(text, this->interval.end, end, flags))
5962 this->interval.end = type.interval.end;
5963 else
5964 goto error;
5965 while (m_space.match(text, this->interval.end, end, flags))
5966 this->interval.end = m_space.interval.end;
5967 if (this->interval.end < end && text[this->interval.end] == '/')
5968 this->interval.end++;
5969 else
5970 goto error;
5971 while (m_space.match(text, this->interval.end, end, flags))
5972 this->interval.end = m_space.interval.end;
5973 if (subtype.match(text, this->interval.end, end, flags))
5974 this->interval.end = subtype.interval.end;
5975 else
5976 goto error;
5977 this->interval.start = start;
5978 return true;
5979
5980 error:
5981 invalidate();
5982 return false;
5983 }
5984
5985 http_space m_space;
5986 };
5987
5992 {
5993 public:
5994 virtual void invalidate()
5995 {
5996 params.clear();
5997 http_media_range::invalidate();
5998 }
5999
6000 std::list<http_parameter> params;
6001
6002 protected:
6003 virtual bool do_match(
6004 _In_reads_or_z_(end) const char* text,
6005 _In_ size_t start = 0,
6006 _In_ size_t end = SIZE_MAX,
6007 _In_ int flags = match_default)
6008 {
6009 _Assume_(text || start >= end);
6010 if (!http_media_range::do_match(text, start, end, flags))
6011 goto error;
6012 params.clear();
6013 for (;;) {
6014 if (this->interval.end < end && text[this->interval.end]) {
6015 if (m_space.match(text, this->interval.end, end, flags))
6016 this->interval.end = m_space.interval.end;
6017 else if (text[this->interval.end] == ';') {
6018 this->interval.end++;
6019 while (m_space.match(text, this->interval.end, end, flags))
6020 this->interval.end = m_space.interval.end;
6021 http_parameter param;
6022 if (param.match(text, this->interval.end, end, flags)) {
6023 this->interval.end = param.interval.end;
6024 params.push_back(std::move(param));
6025 }
6026 else
6027 break;
6028 }
6029 else
6030 break;
6031 }
6032 else
6033 break;
6034 }
6035 this->interval.end = params.empty() ? subtype.interval.end : params.back().interval.end;
6036 return true;
6037
6038 error:
6039 invalidate();
6040 return false;
6041 }
6042 };
6043
6048 {
6049 protected:
6050 virtual bool do_match(
6051 _In_reads_or_z_(end) const char* text,
6052 _In_ size_t start = 0,
6053 _In_ size_t end = SIZE_MAX,
6054 _In_ int flags = match_default)
6055 {
6056 _Assume_(text || start >= end);
6057 this->interval.end = start;
6058 for (;;) {
6059 if (this->interval.end < end && text[this->interval.end]) {
6060 if ((unsigned int)text[this->interval.end] < 0x20 ||
6061 (unsigned int)text[this->interval.end] == 0x7f ||
6062 text[this->interval.end] == ':' ||
6063 text[this->interval.end] == '/' ||
6064 stdex::isspace(text[this->interval.end]))
6065 break;
6066 else
6067 this->interval.end++;
6068 }
6069 else
6070 break;
6071 }
6073 this->interval.start = start;
6074 return true;
6075 }
6076 this->interval.invalidate();
6077 return false;
6078 }
6079 };
6080
6084 class http_url_port : public parser
6085 {
6086 public:
6087 http_url_port(_In_ const std::locale& locale = std::locale()) :
6088 parser(locale),
6089 value(0)
6090 {}
6091
6092 virtual void invalidate()
6093 {
6094 value = 0;
6095 parser::invalidate();
6096 }
6097
6098 uint16_t value;
6099
6100 protected:
6101 virtual bool do_match(
6102 _In_reads_or_z_(end) const char* text,
6103 _In_ size_t start = 0,
6104 _In_ size_t end = SIZE_MAX,
6105 _In_ int flags = match_default)
6106 {
6107 _Assume_(text || start >= end);
6108 value = 0;
6109 this->interval.end = start;
6110 for (;;) {
6111 if (this->interval.end < end && text[this->interval.end]) {
6112 if ('0' <= text[this->interval.end] && text[this->interval.end] <= '9') {
6113 size_t _value = static_cast<size_t>(value) * 10 + static_cast<size_t>(text[this->interval.end] - '0');
6114 if (_value > UINT16_MAX) {
6115 value = 0;
6116 this->interval.invalidate();
6117 return false;
6118 }
6119 value = (uint16_t)_value;
6120 this->interval.end++;
6121 }
6122 else
6123 break;
6124 }
6125 else
6126 break;
6127 }
6129 this->interval.start = start;
6130 return true;
6131 }
6132 this->interval.invalidate();
6133 return false;
6134 }
6135 };
6136
6141 {
6142 protected:
6143 virtual bool do_match(
6144 _In_reads_or_z_(end) const char* text,
6145 _In_ size_t start = 0,
6146 _In_ size_t end = SIZE_MAX,
6147 _In_ int flags = match_default)
6148 {
6149 _Assume_(text || start >= end);
6150 this->interval.end = start;
6151 for (;;) {
6152 if (this->interval.end < end && text[this->interval.end]) {
6153 if ((unsigned int)text[this->interval.end] < 0x20 ||
6154 (unsigned int)text[this->interval.end] == 0x7f ||
6155 text[this->interval.end] == '?' ||
6156 text[this->interval.end] == '/' ||
6157 stdex::isspace(text[this->interval.end]))
6158 break;
6159 else
6160 this->interval.end++;
6161 }
6162 else
6163 break;
6164 }
6165 this->interval.start = start;
6166 return true;
6167 }
6168 };
6169
6173 class http_url_path : public parser
6174 {
6175 public:
6176 virtual void invalidate()
6177 {
6178 segments.clear();
6179 parser::invalidate();
6180 }
6181
6182 std::vector<http_url_path_segment> segments;
6183
6184 protected:
6185 virtual bool do_match(
6186 _In_reads_or_z_(end) const char* text,
6187 _In_ size_t start = 0,
6188 _In_ size_t end = SIZE_MAX,
6189 _In_ int flags = match_default)
6190 {
6191 _Assume_(text || start >= end);
6193 this->interval.end = start;
6194 segments.clear();
6195 _Assume_(text || this->interval.end >= end);
6196 if (this->interval.end < end && text[this->interval.end] != '/')
6197 goto error;
6198 this->interval.end++;
6199 s.match(text, this->interval.end, end, flags);
6200 segments.push_back(s);
6201 this->interval.end = s.interval.end;
6202 for (;;) {
6203 if (this->interval.end < end && text[this->interval.end]) {
6204 if (text[this->interval.end] == '/') {
6205 this->interval.end++;
6206 s.match(text, this->interval.end, end, flags);
6207 segments.push_back(s);
6208 this->interval.end = s.interval.end;
6209 }
6210 else
6211 break;
6212 }
6213 else
6214 break;
6215 }
6216 this->interval.start = start;
6217 return true;
6218
6219 error:
6220 invalidate();
6221 return false;
6222 }
6223 };
6224
6229 {
6230 public:
6231 virtual void invalidate()
6232 {
6233 name.start = 1;
6234 name.end = 0;
6235 value.start = 1;
6236 value.end = 0;
6237 parser::invalidate();
6238 }
6239
6242
6243 protected:
6244 virtual bool do_match(
6245 _In_reads_or_z_(end) const char* text,
6246 _In_ size_t start = 0,
6247 _In_ size_t end = SIZE_MAX,
6248 _In_ int flags = match_default)
6249 {
6250 _Assume_(text || start >= end);
6251 this->interval.end = start;
6252 name.start = this->interval.end;
6253 for (;;) {
6254 if (this->interval.end < end && text[this->interval.end]) {
6255 if ((unsigned int)text[this->interval.end] < 0x20 ||
6256 (unsigned int)text[this->interval.end] == 0x7f ||
6257 text[this->interval.end] == '&' ||
6258 text[this->interval.end] == '=' ||
6259 stdex::isspace(text[this->interval.end]))
6260 break;
6261 else
6262 this->interval.end++;
6263 }
6264 else
6265 break;
6266 }
6268 name.end = this->interval.end;
6269 else
6270 goto error;
6271 if (text[this->interval.end] == '=') {
6272 this->interval.end++;
6273 value.start = this->interval.end;
6274 for (;;) {
6275 if (this->interval.end < end && text[this->interval.end]) {
6276 if ((unsigned int)text[this->interval.end] < 0x20 ||
6277 (unsigned int)text[this->interval.end] == 0x7f ||
6278 text[this->interval.end] == '&' ||
6279 stdex::isspace(text[this->interval.end]))
6280 break;
6281 else
6282 this->interval.end++;
6283 }
6284 else
6285 break;
6286 }
6287 value.end = this->interval.end;
6288 }
6289 else {
6290 value.start = 1;
6291 value.end = 0;
6292 }
6293 this->interval.start = start;
6294 return true;
6295
6296 error:
6297 invalidate();
6298 return false;
6299 }
6300 };
6301
6305 class http_url : public parser
6306 {
6307 public:
6308 http_url(_In_ const std::locale& locale = std::locale()) :
6309 parser(locale),
6310 port(locale)
6311 {}
6312
6313 virtual void invalidate()
6314 {
6315 server.invalidate();
6316 port.invalidate();
6317 path.invalidate();
6318 params.clear();
6319 parser::invalidate();
6320 }
6321
6322 http_url_server server;
6323 http_url_port port;
6324 http_url_path path;
6325 std::list<http_url_parameter> params;
6326
6327 protected:
6328 virtual bool do_match(
6329 _In_reads_or_z_(end) const char* text,
6330 _In_ size_t start = 0,
6331 _In_ size_t end = SIZE_MAX,
6332 _In_ int flags = match_default)
6333 {
6334 _Assume_(text || start >= end);
6335 this->interval.end = start;
6336
6337 if (this->interval.end + 7 <= end && stdex::strnicmp(text + this->interval.end, 7, "http://", SIZE_MAX, m_locale) == 0) {
6338 this->interval.end += 7;
6339 if (server.match(text, this->interval.end, end, flags))
6340 this->interval.end = server.interval.end;
6341 else
6342 goto error;
6343 if (this->interval.end < end && text[this->interval.end] == ':') {
6344 this->interval.end++;
6345 if (port.match(text, this->interval.end, end, flags))
6346 this->interval.end = port.interval.end;
6347 }
6348 else {
6349 port.invalidate();
6350 port.value = 80;
6351 }
6352 }
6353 else {
6354 server.invalidate();
6355 port.invalidate();
6356 port.value = 80;
6357 }
6358
6359 if (path.match(text, this->interval.end, end, flags))
6360 this->interval.end = path.interval.end;
6361 else
6362 goto error;
6363
6364 params.clear();
6365
6366 if (this->interval.end < end && text[this->interval.end] == '?') {
6367 this->interval.end++;
6368 for (;;) {
6369 if (this->interval.end < end && text[this->interval.end]) {
6370 if ((unsigned int)text[this->interval.end] < 0x20 ||
6371 (unsigned int)text[this->interval.end] == 0x7f ||
6372 stdex::isspace(text[this->interval.end]))
6373 break;
6374 else if (text[this->interval.end] == '&')
6375 this->interval.end++;
6376 else {
6377 http_url_parameter param;
6378 if (param.match(text, this->interval.end, end, flags)) {
6379 this->interval.end = param.interval.end;
6380 params.push_back(std::move(param));
6381 }
6382 else
6383 break;
6384 }
6385 }
6386 else
6387 break;
6388 }
6389 }
6390
6391 this->interval.start = start;
6392 return true;
6393
6394 error:
6395 invalidate();
6396 return false;
6397 }
6398 };
6399
6403 class http_language : public parser
6404 {
6405 public:
6406 virtual void invalidate()
6407 {
6408 components.clear();
6409 parser::invalidate();
6410 }
6411
6412 std::vector<stdex::interval<size_t>> components;
6413
6414 protected:
6415 virtual bool do_match(
6416 _In_reads_or_z_(end) const char* text,
6417 _In_ size_t start = 0,
6418 _In_ size_t end = SIZE_MAX,
6419 _In_ int flags = match_default)
6420 {
6421 _Assume_(text || start >= end);
6422 this->interval.end = start;
6423 components.clear();
6424 for (;;) {
6425 if (this->interval.end < end && text[this->interval.end]) {
6427 k.end = this->interval.end;
6428 for (;;) {
6429 if (k.end < end && text[k.end]) {
6430 if (stdex::isalpha(text[k.end]))
6431 k.end++;
6432 else
6433 break;
6434 }
6435 else
6436 break;
6437 }
6438 if (this->interval.end < k.end) {
6439 k.start = this->interval.end;
6440 this->interval.end = k.end;
6441 components.push_back(k);
6442 }
6443 else
6444 break;
6445 if (this->interval.end < end && text[this->interval.end] == '-')
6446 this->interval.end++;
6447 else
6448 break;
6449 }
6450 else
6451 break;
6452 }
6453 if (!components.empty()) {
6454 this->interval.start = start;
6455 this->interval.end = components.back().end;
6456 return true;
6457 }
6458 this->interval.invalidate();
6459 return false;
6460 }
6461 };
6462
6466 class http_weight : public parser
6467 {
6468 public:
6469 http_weight(_In_ const std::locale& locale = std::locale()) :
6470 parser(locale),
6471 value(1.0f)
6472 {}
6473
6474 virtual void invalidate()
6475 {
6476 value = 1.0f;
6477 parser::invalidate();
6478 }
6479
6480 float value;
6481
6482 protected:
6483 virtual bool do_match(
6484 _In_reads_or_z_(end) const char* text,
6485 _In_ size_t start = 0,
6486 _In_ size_t end = SIZE_MAX,
6487 _In_ int flags = match_default)
6488 {
6489 _Assume_(text || start >= end);
6490 size_t celi_del = 0, decimalni_del = 0, decimalni_del_n = 1;
6491 this->interval.end = start;
6492 for (;;) {
6493 if (this->interval.end < end && text[this->interval.end]) {
6494 if ('0' <= text[this->interval.end] && text[this->interval.end] <= '9') {
6495 celi_del = celi_del * 10 + static_cast<size_t>(text[this->interval.end] - '0');
6496 this->interval.end++;
6497 }
6498 else if (text[this->interval.end] == '.') {
6499 this->interval.end++;
6500 for (;;) {
6501 if (this->interval.end < end && text[this->interval.end]) {
6502 if ('0' <= text[this->interval.end] && text[this->interval.end] <= '9') {
6503 decimalni_del = decimalni_del * 10 + static_cast<size_t>(text[this->interval.end] - '0');
6504 decimalni_del_n *= 10;
6505 this->interval.end++;
6506 }
6507 else
6508 break;
6509 }
6510 else
6511 break;
6512 }
6513 break;
6514 }
6515 else
6516 break;
6517 }
6518 else
6519 break;
6520 }
6523 this->interval.start = start;
6524 return true;
6525 }
6526 value = 1.0f;
6527 this->interval.invalidate();
6528 return false;
6529 }
6530 };
6531
6535 class http_asterisk : public parser
6536 {
6537 protected:
6538 virtual bool do_match(
6539 _In_reads_or_z_(end) const char* text,
6540 _In_ size_t start = 0,
6541 _In_ size_t end = SIZE_MAX,
6542 _In_ int flags = match_default)
6543 {
6544 _Assume_(text || end <= start);
6545 if (start < end && text[start] == '*') {
6546 this->interval.end = (this->interval.start = start) + 1;
6547 return true;
6548 }
6549 this->interval.invalidate();
6550 return false;
6551 }
6552 };
6553
6557 template <class T, class T_asterisk = http_asterisk>
6559 {
6560 public:
6561 http_weighted_value(_In_ const std::locale& locale = std::locale()) :
6562 parser(locale),
6563 factor(locale)
6564 {}
6565
6566 virtual void invalidate()
6567 {
6568 asterisk.invalidate();
6569 value.invalidate();
6570 factor.invalidate();
6571 parser::invalidate();
6572 }
6573
6574 T_asterisk asterisk;
6575 T value;
6576 http_weight factor;
6577
6578 protected:
6579 virtual bool do_match(
6580 _In_reads_or_z_(end) const char* text,
6581 _In_ size_t start = 0,
6582 _In_ size_t end = SIZE_MAX,
6583 _In_ int flags = match_default)
6584 {
6585 _Assume_(text || start >= end);
6586 size_t konec_vrednosti;
6587 this->interval.end = start;
6588 if (asterisk.match(text, this->interval.end, end, flags)) {
6589 this->interval.end = konec_vrednosti = asterisk.interval.end;
6590 value.invalidate();
6591 }
6592 else if (value.match(text, this->interval.end, end, flags)) {
6593 this->interval.end = konec_vrednosti = value.interval.end;
6594 asterisk.invalidate();
6595 }
6596 else {
6597 asterisk.invalidate();
6598 value.invalidate();
6599 this->interval.invalidate();
6600 return false;
6601 }
6602
6603 while (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) this->interval.end++;
6604 if (this->interval.end < end && text[this->interval.end] == ';') {
6605 this->interval.end++;
6606 while (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) this->interval.end++;
6607 if (this->interval.end < end && (text[this->interval.end] == 'q' || text[this->interval.end] == 'Q')) {
6608 this->interval.end++;
6609 while (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) this->interval.end++;
6610 if (this->interval.end < end && text[this->interval.end] == '=') {
6611 this->interval.end++;
6612 while (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) this->interval.end++;
6613 if (factor.match(text, this->interval.end, end, flags))
6614 this->interval.end = factor.interval.end;
6615 }
6616 }
6617 }
6618 if (!factor.interval) {
6619 factor.invalidate();
6620 this->interval.end = konec_vrednosti;
6621 }
6622 this->interval.start = start;
6623 return true;
6624 }
6625 };
6626
6631 {
6632 public:
6633 virtual void invalidate()
6634 {
6635 name.invalidate();
6636 value.invalidate();
6637 parser::invalidate();
6638 }
6639
6640 http_token name;
6641 http_value value;
6642
6643 protected:
6644 virtual bool do_match(
6645 _In_reads_or_z_(end) const char* text,
6646 _In_ size_t start = 0,
6647 _In_ size_t end = SIZE_MAX,
6648 _In_ int flags = match_default)
6649 {
6650 _Assume_(text || start >= end);
6651 this->interval.end = start;
6652 if (this->interval.end < end && text[this->interval.end] == '$')
6653 this->interval.end++;
6654 else
6655 goto error;
6656 if (name.match(text, this->interval.end, end, flags))
6657 this->interval.end = name.interval.end;
6658 else
6659 goto error;
6660 while (m_space.match(text, this->interval.end, end, flags))
6661 this->interval.end = m_space.interval.end;
6662 if (this->interval.end < end && text[this->interval.end] == '=')
6663 this->interval.end++;
6664 else
6665 goto error;
6666 while (m_space.match(text, this->interval.end, end, flags))
6667 this->interval.end = m_space.interval.end;
6668 if (value.match(text, this->interval.end, end, flags))
6669 this->interval.end = value.interval.end;
6670 else
6671 goto error;
6672 this->interval.start = start;
6673 return true;
6674
6675 error:
6676 invalidate();
6677 return false;
6678 }
6679
6680 http_space m_space;
6681 };
6682
6686 class http_cookie : public parser
6687 {
6688 public:
6689 virtual void invalidate()
6690 {
6691 name.invalidate();
6692 value.invalidate();
6693 params.clear();
6694 parser::invalidate();
6695 }
6696
6699 std::list<http_cookie_parameter> params;
6700
6701 protected:
6702 virtual bool do_match(
6703 _In_reads_or_z_(end) const char* text,
6704 _In_ size_t start = 0,
6705 _In_ size_t end = SIZE_MAX,
6706 _In_ int flags = match_default)
6707 {
6708 _Assume_(text || start >= end);
6709 this->interval.end = start;
6710 if (name.match(text, this->interval.end, end, flags))
6711 this->interval.end = name.interval.end;
6712 else
6713 goto error;
6714 while (m_space.match(text, this->interval.end, end, flags))
6715 this->interval.end = m_space.interval.end;
6716 if (this->interval.end < end && text[this->interval.end] == '=')
6717 this->interval.end++;
6718 else
6719 goto error;
6720 while (m_space.match(text, this->interval.end, end, flags))
6721 this->interval.end = m_space.interval.end;
6722 if (value.match(text, this->interval.end, end, flags))
6723 this->interval.end = value.interval.end;
6724 else
6725 goto error;
6726 params.clear();
6727 for (;;) {
6728 if (this->interval.end < end && text[this->interval.end]) {
6729 if (m_space.match(text, this->interval.end, end, flags))
6730 this->interval.end = m_space.interval.end;
6731 else if (text[this->interval.end] == ';') {
6732 this->interval.end++;
6733 while (m_space.match(text, this->interval.end, end, flags))
6734 this->interval.end = m_space.interval.end;
6736 if (param.match(text, this->interval.end, end, flags)) {
6737 this->interval.end = param.interval.end;
6738 params.push_back(std::move(param));
6739 }
6740 else
6741 break;
6742 }
6743 else
6744 break;
6745 }
6746 else
6747 break;
6748 }
6749 this->interval.start = start;
6750 this->interval.end = params.empty() ? value.interval.end : params.back().interval.end;
6751 return true;
6752
6753 error:
6754 invalidate();
6755 return false;
6756 }
6757
6758 http_space m_space;
6759 };
6760
6764 class http_agent : public parser
6765 {
6766 public:
6767 virtual void invalidate()
6768 {
6769 type.start = 1;
6770 type.end = 0;
6771 version.start = 1;
6772 version.end = 0;
6773 parser::invalidate();
6774 }
6775
6778
6779 protected:
6780 virtual bool do_match(
6781 _In_reads_or_z_(end) const char* text,
6782 _In_ size_t start = 0,
6783 _In_ size_t end = SIZE_MAX,
6784 _In_ int flags = match_default)
6785 {
6786 _Assume_(text || start >= end);
6787 this->interval.end = start;
6788 type.start = this->interval.end;
6789 for (;;) {
6790 if (this->interval.end < end && text[this->interval.end]) {
6791 if (text[this->interval.end] == '/') {
6792 type.end = this->interval.end;
6793 this->interval.end++;
6794 version.start = this->interval.end;
6795 for (;;) {
6796 if (this->interval.end < end && text[this->interval.end]) {
6797 if (stdex::isspace(text[this->interval.end])) {
6798 version.end = this->interval.end;
6799 break;
6800 }
6801 else
6802 this->interval.end++;
6803 }
6804 else {
6805 version.end = this->interval.end;
6806 break;
6807 }
6808 }
6809 break;
6810 }
6811 else if (stdex::isspace(text[this->interval.end])) {
6812 type.end = this->interval.end;
6813 break;
6814 }
6815 else
6816 this->interval.end++;
6817 }
6818 else {
6819 type.end = this->interval.end;
6820 break;
6821 }
6822 }
6824 this->interval.start = start;
6825 return true;
6826 }
6827 type.start = 1;
6828 type.end = 0;
6829 version.start = 1;
6830 version.end = 0;
6831 this->interval.invalidate();
6832 return false;
6833 }
6834 };
6835
6839 class http_protocol : public parser
6840 {
6841 public:
6842 http_protocol(_In_ const std::locale& locale = std::locale()) :
6843 parser(locale),
6844 version(0x009)
6845 {}
6846
6847 virtual void invalidate()
6848 {
6849 type.start = 1;
6850 type.end = 0;
6851 version_maj.start = 1;
6852 version_maj.end = 0;
6853 version_min.start = 1;
6854 version_min.end = 0;
6855 version = 0x009;
6856 parser::invalidate();
6857 }
6858
6860 stdex::interval<size_t> version_maj;
6861 stdex::interval<size_t> version_min;
6863
6864 protected:
6865 virtual bool do_match(
6866 _In_reads_or_z_(end) const char* text,
6867 _In_ size_t start = 0,
6868 _In_ size_t end = SIZE_MAX,
6869 _In_ int flags = match_default)
6870 {
6871 _Assume_(text || start >= end);
6872 this->interval.end = start;
6873 type.start = this->interval.end;
6874 for (;;) {
6875 if (this->interval.end < end && text[this->interval.end]) {
6876 if (text[this->interval.end] == '/') {
6877 type.end = this->interval.end;
6878 this->interval.end++;
6879 break;
6880 }
6881 else if (stdex::isspace(text[this->interval.end]))
6882 goto error;
6883 else
6884 this->interval.end++;
6885 }
6886 else {
6887 type.end = this->interval.end;
6888 goto error;
6889 }
6890 }
6891 version_maj.start = this->interval.end;
6892 for (;;) {
6893 if (this->interval.end < end && text[this->interval.end]) {
6894 if (text[this->interval.end] == '.') {
6895 version_maj.end = this->interval.end;
6896 this->interval.end++;
6897 version_min.start = this->interval.end;
6898 for (;;) {
6899 if (this->interval.end < end && text[this->interval.end]) {
6900 if (stdex::isspace(text[this->interval.end])) {
6901 version_min.end = this->interval.end;
6902 version =
6903 (uint16_t)strtoui(text + version_maj.start, version_maj.size(), nullptr, 10) * 0x100 +
6904 (uint16_t)strtoui(text + version_min.start, version_min.size(), nullptr, 10);
6905 break;
6906 }
6907 else
6908 this->interval.end++;
6909 }
6910 else
6911 goto error;
6912 }
6913 break;
6914 }
6915 else if (stdex::isspace(text[this->interval.end])) {
6916 version_maj.end = this->interval.end;
6917 version_min.start = 1;
6918 version_min.end = 0;
6919 version = (uint16_t)strtoui(text + version_maj.start, version_maj.size(), nullptr, 10) * 0x100;
6920 break;
6921 }
6922 else
6923 this->interval.end++;
6924 }
6925 else
6926 goto error;
6927 }
6928 this->interval.start = start;
6929 return true;
6930
6931 error:
6932 invalidate();
6933 return false;
6934 }
6935 };
6936
6940 class http_request : public parser
6941 {
6942 public:
6943 http_request(_In_ const std::locale& locale = std::locale()) :
6944 parser(locale),
6945 url(locale),
6946 protocol(locale)
6947 {}
6948
6949 virtual void invalidate()
6950 {
6951 verb.start = 1;
6952 verb.end = 0;
6953 url.invalidate();
6954 protocol.invalidate();
6955 parser::invalidate();
6956 }
6957
6959 http_url url;
6960 http_protocol protocol;
6961
6962 protected:
6963 virtual bool do_match(
6964 _In_reads_or_z_(end) const char* text,
6965 _In_ size_t start = 0,
6966 _In_ size_t end = SIZE_MAX,
6967 _In_ int flags = match_default)
6968 {
6969 _Assume_(text || start >= end);
6970 this->interval.end = start;
6971
6972 for (;;) {
6973 if (m_line_break.match(text, this->interval.end, end, flags))
6974 goto error;
6975 else if (this->interval.end < end && text[this->interval.end]) {
6976 if (stdex::isspace(text[this->interval.end]))
6977 this->interval.end++;
6978 else
6979 break;
6980 }
6981 else
6982 goto error;
6983 }
6984 verb.start = this->interval.end;
6985 for (;;) {
6986 if (m_line_break.match(text, this->interval.end, end, flags))
6987 goto error;
6988 else if (this->interval.end < end && text[this->interval.end]) {
6989 if (stdex::isspace(text[this->interval.end])) {
6990 verb.end = this->interval.end;
6991 this->interval.end++;
6992 break;
6993 }
6994 else
6995 this->interval.end++;
6996 }
6997 else
6998 goto error;
6999 }
7000
7001 for (;;) {
7002 if (m_line_break.match(text, this->interval.end, end, flags))
7003 goto error;
7004 else if (this->interval.end < end && text[this->interval.end]) {
7005 if (stdex::isspace(text[this->interval.end]))
7006 this->interval.end++;
7007 else
7008 break;
7009 }
7010 else
7011 goto error;
7012 }
7013 if (url.match(text, this->interval.end, end, flags))
7014 this->interval.end = url.interval.end;
7015 else
7016 goto error;
7017
7018 protocol.invalidate();
7019 for (;;) {
7020 if (m_line_break.match(text, this->interval.end, end, flags)) {
7021 this->interval.end = m_line_break.interval.end;
7022 goto end;
7023 }
7024 else if (this->interval.end < end && text[this->interval.end]) {
7025 if (stdex::isspace(text[this->interval.end]))
7026 this->interval.end++;
7027 else
7028 break;
7029 }
7030 else
7031 goto end;
7032 }
7033 for (;;) {
7034 if (m_line_break.match(text, this->interval.end, end, flags)) {
7035 this->interval.end = m_line_break.interval.end;
7036 goto end;
7037 }
7038 else if (protocol.match(text, this->interval.end, end, flags)) {
7039 this->interval.end = protocol.interval.end;
7040 break;
7041 }
7042 else
7043 goto end;
7044 }
7045
7046 for (;;) {
7047 if (m_line_break.match(text, this->interval.end, end, flags)) {
7048 this->interval.end = m_line_break.interval.end;
7049 break;
7050 }
7051 else if (this->interval.end < end && text[this->interval.end])
7052 this->interval.end++;
7053 else
7054 goto end;
7055 }
7056
7057 end:
7058 this->interval.start = start;
7059 return true;
7060
7061 error:
7062 invalidate();
7063 return false;
7064 }
7065
7066 http_line_break m_line_break;
7067 };
7068
7072 class http_header : public parser
7073 {
7074 public:
7075 virtual void invalidate()
7076 {
7077 name.start = 1;
7078 name.end = 0;
7079 value.start = 1;
7080 value.end = 0;
7081 parser::invalidate();
7082 }
7083
7086
7087 protected:
7088 virtual bool do_match(
7089 _In_reads_or_z_(end) const char* text,
7090 _In_ size_t start = 0,
7091 _In_ size_t end = SIZE_MAX,
7092 _In_ int flags = match_default)
7093 {
7094 _Assume_(text || start >= end);
7095 this->interval.end = start;
7096
7097 if (m_line_break.match(text, this->interval.end, end, flags) ||
7098 (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])))
7099 goto error;
7100 name.start = this->interval.end;
7101 for (;;) {
7102 if (m_line_break.match(text, this->interval.end, end, flags))
7103 goto error;
7104 else if (this->interval.end < end && text[this->interval.end]) {
7105 if (stdex::isspace(text[this->interval.end])) {
7106 name.end = this->interval.end;
7107 this->interval.end++;
7108 for (;;) {
7109 if (m_line_break.match(text, this->interval.end, end, flags))
7110 goto error;
7111 else if (this->interval.end < end && text[this->interval.end]) {
7112 if (stdex::isspace(text[this->interval.end]))
7113 this->interval.end++;
7114 else
7115 break;
7116 }
7117 else
7118 goto error;
7119 }
7120 if (this->interval.end < end && text[this->interval.end] == ':') {
7121 this->interval.end++;
7122 break;
7123 }
7124 else
7125 goto error;
7126 break;
7127 }
7128 else if (text[this->interval.end] == ':') {
7129 name.end = this->interval.end;
7130 this->interval.end++;
7131 break;
7132 }
7133 else
7134 this->interval.end++;
7135 }
7136 else
7137 goto error;
7138 }
7139 value.start = SIZE_MAX;
7140 value.end = 0;
7141 for (;;) {
7142 if (m_line_break.match(text, this->interval.end, end, flags)) {
7143 this->interval.end = m_line_break.interval.end;
7144 if (!m_line_break.match(text, this->interval.end, end, flags) &&
7145 this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end]))
7146 this->interval.end++;
7147 else
7148 break;
7149 }
7150 else if (this->interval.end < end && text[this->interval.end]) {
7151 if (stdex::isspace(text[this->interval.end]))
7152 this->interval.end++;
7153 else {
7154 if (value.start == SIZE_MAX) value.start = this->interval.end;
7155 value.end = ++this->interval.end;
7156 }
7157 }
7158 else
7159 break;
7160 }
7161 this->interval.start = start;
7162 return true;
7163
7164 error:
7165 invalidate();
7166 return false;
7167 }
7168
7169 http_line_break m_line_break;
7170 };
7171
7175 template <class KEY, class T>
7176 class http_value_collection : public T
7177 {
7178 public:
7179 void insert(
7180 _In_reads_or_z_(end) const char* text,
7181 _In_ size_t start = 0,
7182 _In_ size_t end = SIZE_MAX,
7183 _In_ int flags = match_default)
7184 {
7185 while (start < end) {
7186 while (start < end && text[start] && stdex::isspace(text[start])) start++;
7187 if (start < end && text[start] == ',') {
7188 start++;
7189 while (start < end&& text[start] && stdex::isspace(text[start])) start++;
7190 }
7191 KEY el;
7192 if (el.match(text, start, end, flags)) {
7193 start = el.interval.end;
7194 T::insert(std::move(el));
7195 }
7196 else
7197 break;
7198 }
7199 }
7200 };
7201
7202 template <class T>
7204 constexpr bool operator()(const T& a, const T& b) const noexcept
7205 {
7206 return a.factor.value > b.factor.value;
7207 }
7208 };
7209
7213 template <class T, class AX = std::allocator<T>>
7215
7219 template <class T>
7221 {
7222 public:
7224 _In_ const std::shared_ptr<basic_parser<T>>& quote,
7225 _In_ const std::shared_ptr<basic_parser<T>>& chr,
7226 _In_ const std::shared_ptr<basic_parser<T>>& escape,
7227 _In_ const std::shared_ptr<basic_parser<T>>& sol,
7228 _In_ const std::shared_ptr<basic_parser<T>>& bs,
7229 _In_ const std::shared_ptr<basic_parser<T>>& ff,
7230 _In_ const std::shared_ptr<basic_parser<T>>& lf,
7231 _In_ const std::shared_ptr<basic_parser<T>>& cr,
7232 _In_ const std::shared_ptr<basic_parser<T>>& htab,
7233 _In_ const std::shared_ptr<basic_parser<T>>& uni,
7234 _In_ const std::shared_ptr<basic_integer16<T>>& hex,
7235 _In_ const std::locale& locale = std::locale()) :
7237 m_quote(quote),
7238 m_chr(chr),
7239 m_escape(escape),
7240 m_sol(sol),
7241 m_bs(bs),
7242 m_ff(ff),
7243 m_lf(lf),
7244 m_cr(cr),
7245 m_htab(htab),
7246 m_uni(uni),
7247 m_hex(hex)
7248 {}
7249
7250 virtual void invalidate()
7251 {
7252 value.clear();
7254 }
7255
7256 std::basic_string<T> value;
7257
7258 protected:
7259 virtual bool do_match(
7260 _In_reads_or_z_opt_(end) const T* text,
7261 _In_ size_t start = 0,
7262 _In_ size_t end = SIZE_MAX,
7263 _In_ int flags = match_default)
7264 {
7265 _Assume_(text || start >= end);
7266 this->interval.end = start;
7267 if (m_quote->match(text, this->interval.end, end, flags)) {
7268 this->interval.end = m_quote->interval.end;
7269 value.clear();
7270 for (;;) {
7271 if (m_quote->match(text, this->interval.end, end, flags)) {
7272 this->interval.start = start;
7273 this->interval.end = m_quote->interval.end;
7274 return true;
7275 }
7276 if (m_escape->match(text, this->interval.end, end, flags)) {
7277 if (m_quote->match(text, m_escape->interval.end, end, flags)) {
7278 value += '"'; this->interval.end = m_quote->interval.end;
7279 continue;
7280 }
7281 if (m_sol->match(text, m_escape->interval.end, end, flags)) {
7282 value += '/'; this->interval.end = m_sol->interval.end;
7283 continue;
7284 }
7285 if (m_bs->match(text, m_escape->interval.end, end, flags)) {
7286 value += '\b'; this->interval.end = m_bs->interval.end;
7287 continue;
7288 }
7289 if (m_ff->match(text, m_escape->interval.end, end, flags)) {
7290 value += '\f'; this->interval.end = m_ff->interval.end;
7291 continue;
7292 }
7293 if (m_lf->match(text, m_escape->interval.end, end, flags)) {
7294 value += '\n'; this->interval.end = m_lf->interval.end;
7295 continue;
7296 }
7297 if (m_cr->match(text, m_escape->interval.end, end, flags)) {
7298 value += '\r'; this->interval.end = m_cr->interval.end;
7299 continue;
7300 }
7301 if (m_htab->match(text, m_escape->interval.end, end, flags)) {
7302 value += '\t'; this->interval.end = m_htab->interval.end;
7303 continue;
7304 }
7305 if (
7306 m_uni->match(text, m_escape->interval.end, end, flags) &&
7307 m_hex->match(text, m_uni->interval.end, std::min(m_uni->interval.end + 4, end), flags | match_case_insensitive) &&
7308 m_hex->interval.size() == 4 /* JSON requests 4-digit Unicode sequneces: \u.... */)
7309 {
7310 _Assume_(m_hex->value <= 0xffff);
7311 if (sizeof(T) == 1) {
7312 if (m_hex->value > 0x7ff) {
7313 value += (T)(0xe0 | ((m_hex->value >> 12) & 0x0f));
7314 value += (T)(0x80 | ((m_hex->value >> 6) & 0x3f));
7315 value += (T)(0x80 | (m_hex->value & 0x3f));
7316 }
7317 else if (m_hex->value > 0x7f) {
7318 value += (T)(0xc0 | ((m_hex->value >> 6) & 0x1f));
7319 value += (T)(0x80 | (m_hex->value & 0x3f));
7320 }
7321 else
7322 value += (T)(m_hex->value & 0x7f);
7323 }
7324 else
7325 value += (T)m_hex->value;
7326 this->interval.end = m_hex->interval.end;
7327 continue;
7328 }
7329 if (m_escape->match(text, m_escape->interval.end, end, flags)) {
7330 value += '\\'; this->interval.end = m_escape->interval.end;
7331 continue;
7332 }
7333 }
7334 if (m_chr->match(text, this->interval.end, end, flags)) {
7335 value.append(text + m_chr->interval.start, m_chr->interval.size());
7336 this->interval.end = m_chr->interval.end;
7337 continue;
7338 }
7339 break;
7340 }
7341 }
7342 value.clear();
7343 this->interval.invalidate();
7344 return false;
7345 }
7346
7347 std::shared_ptr<basic_parser<T>> m_quote;
7348 std::shared_ptr<basic_parser<T>> m_chr;
7349 std::shared_ptr<basic_parser<T>> m_escape;
7350 std::shared_ptr<basic_parser<T>> m_sol;
7351 std::shared_ptr<basic_parser<T>> m_bs;
7352 std::shared_ptr<basic_parser<T>> m_ff;
7353 std::shared_ptr<basic_parser<T>> m_lf;
7354 std::shared_ptr<basic_parser<T>> m_cr;
7355 std::shared_ptr<basic_parser<T>> m_htab;
7356 std::shared_ptr<basic_parser<T>> m_uni;
7357 std::shared_ptr<basic_integer16<T>> m_hex;
7358 };
7359
7362#ifdef _UNICODE
7363 using tjson_string = wjson_string;
7364#else
7365 using tjson_string = json_string;
7366#endif
7367
7371 template <class T>
7373 {
7374 public:
7375 virtual void invalidate()
7376 {
7377 this->content.invalidate();
7379 }
7380
7382
7383 protected:
7384 virtual bool do_match(
7385 _In_reads_or_z_opt_(end) const T* text,
7386 _In_ size_t start = 0,
7387 _In_ size_t end = SIZE_MAX,
7388 _In_ int flags = match_multiline)
7389 {
7390 _Unreferenced_(flags);
7391 _Assume_(text || start + 1 >= end);
7392 if (start + 1 < end &&
7393 text[start] == '/' &&
7394 text[start + 1] == '*')
7395 {
7396 // /*
7397 this->content.start = this->interval.end = start + 2;
7398 for (;;) {
7399 if (this->interval.end >= end || !text[this->interval.end])
7400 break;
7401 if (this->interval.end + 1 < end &&
7402 text[this->interval.end] == '*' &&
7403 text[this->interval.end + 1] == '/')
7404 {
7405 // /*...*/
7406 this->content.end = this->interval.end;
7407 this->interval.start = start;
7408 this->interval.end = this->interval.end + 2;
7409 return true;
7410 }
7411 this->interval.end++;
7412 }
7413 }
7414 this->content.invalidate();
7415 this->interval.invalidate();
7416 return false;
7417 }
7418 };
7419
7420 using css_comment = basic_css_comment<char>;
7421 using wcss_comment = basic_css_comment<wchar_t>;
7422#ifdef _UNICODE
7423 using tcss_comment = wcss_comment;
7424#else
7425 using tcss_comment = css_comment;
7426#endif
7427
7431 template <class T>
7432 class basic_css_cdo : public basic_parser<T>
7433 {
7434 protected:
7435 virtual bool do_match(
7436 _In_reads_or_z_opt_(end) const T* text,
7437 _In_ size_t start = 0,
7438 _In_ size_t end = SIZE_MAX,
7439 _In_ int flags = match_multiline)
7440 {
7441 _Unreferenced_(flags);
7442 _Assume_(text || start + 3 >= end);
7443 if (start + 3 < end &&
7444 text[start] == '<' &&
7445 text[start + 1] == '!' &&
7446 text[start + 2] == '-' &&
7447 text[start + 3] == '-')
7448 {
7449 this->interval.start = start;
7450 this->interval.end = start + 4;
7451 return true;
7452 }
7453 this->interval.invalidate();
7454 return false;
7455 }
7456 };
7457
7460#ifdef _UNICODE
7461 using tcss_cdo = wcss_cdo;
7462#else
7463 using tcss_cdo = css_cdo;
7464#endif
7465
7469 template <class T>
7470 class basic_css_cdc : public basic_parser<T>
7471 {
7472 protected:
7473 virtual bool do_match(
7474 _In_reads_or_z_opt_(end) const T* text,
7475 _In_ size_t start = 0,
7476 _In_ size_t end = SIZE_MAX,
7477 _In_ int flags = match_multiline)
7478 {
7479 _Unreferenced_(flags);
7480 _Assume_(text || start + 2 >= end);
7481 if (start + 2 < end &&
7482 text[start] == '-' &&
7483 text[start + 1] == '-' &&
7484 text[start + 2] == '>')
7485 {
7486 this->interval.start = start;
7487 this->interval.end = start + 3;
7488 return true;
7489 }
7490 this->interval.invalidate();
7491 return false;
7492 }
7493 };
7494
7497#ifdef _UNICODE
7498 using tcss_cdc = wcss_cdc;
7499#else
7500 using tcss_cdc = css_cdc;
7501#endif
7502
7506 template <class T>
7508 {
7509 public:
7510 virtual void invalidate()
7511 {
7512 this->content.invalidate();
7514 }
7515
7517
7518 protected:
7519 virtual bool do_match(
7520 _In_reads_or_z_opt_(end) const T* text,
7521 _In_ size_t start = 0,
7522 _In_ size_t end = SIZE_MAX,
7523 _In_ int flags = match_multiline)
7524 {
7525 _Unreferenced_(flags);
7526 this->interval.end = start;
7527 _Assume_(text || this->interval.end >= end);
7528 if (this->interval.end < end &&
7529 (text[this->interval.end] == '\"' || text[this->interval.end] == '\''))
7530 {
7531 // "Quoted...
7532 T quote = text[this->interval.end];
7533 this->content.start = ++this->interval.end;
7534 for (;;) {
7535 if (this->interval.end >= end || !text[this->interval.end])
7536 break;
7537 if (text[this->interval.end] == quote) {
7538 // End quote"
7539 this->content.end = this->interval.end;
7540 this->interval.start = start;
7541 this->interval.end++;
7542 return true;
7543 }
7544 if (this->interval.end + 1 < end &&
7545 text[this->interval.end] == '\\' &&
7546 (text[this->interval.end + 1] == '\"' || text[this->interval.end + 1] == '\''))
7547 {
7548 // Escaped quote
7549 this->interval.end = this->interval.end + 2;
7550 }
7551 else
7552 this->interval.end++;
7553 }
7554 }
7555
7556 this->content.invalidate();
7557 this->interval.invalidate();
7558 return false;
7559 }
7560 };
7561
7562 using css_string = basic_css_string<char>;
7563 using wcss_string = basic_css_string<wchar_t>;
7564#ifdef _UNICODE
7565 using tcss_string = wcss_string;
7566#else
7567 using tcss_string = css_string;
7568#endif
7569
7573 template <class T>
7574 class basic_css_uri : public basic_parser<T>
7575 {
7576 public:
7577 virtual void invalidate()
7578 {
7579 this->content.invalidate();
7581 }
7582
7584
7585 protected:
7586 virtual bool do_match(
7587 _In_reads_or_z_opt_(end) const T* text,
7588 _In_ size_t start = 0,
7589 _In_ size_t end = SIZE_MAX,
7590 _In_ int flags = match_multiline)
7591 {
7592 _Unreferenced_(flags);
7593 this->interval.end = start;
7594 _Assume_(text || this->interval.end + 3 >= end);
7595 if (this->interval.end + 3 < end &&
7596 (text[this->interval.end] == 'u' || text[this->interval.end] == 'U') &&
7597 (text[this->interval.end + 1] == 'r' || text[this->interval.end + 1] == 'R') &&
7598 (text[this->interval.end + 2] == 'l' || text[this->interval.end + 2] == 'L') &&
7599 text[this->interval.end + 3] == '(')
7600 {
7601 // url(
7602 this->interval.end = this->interval.end + 4;
7603
7604 // Skip whitespace.
7605 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
7606 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
7607
7608 if (this->interval.end < end &&
7609 (text[this->interval.end] == '\"' || text[this->interval.end] == '\''))
7610 {
7611 // url("Quoted...
7612 T quote = text[this->interval.end];
7613 this->content.start = ++this->interval.end;
7614 for (;;) {
7615 if (this->interval.end >= end || !text[this->interval.end])
7616 goto error;
7617 if (text[this->interval.end] == quote) {
7618 // End quote"
7619 this->content.end = this->interval.end;
7620 this->interval.end++;
7621 break;
7622 }
7623 if (this->interval.end + 1 < end &&
7624 text[this->interval.end] == '\\' &&
7625 (text[this->interval.end + 1] == '\"' || text[this->interval.end + 1] == '\''))
7626 {
7627 // Escaped quote
7628 this->interval.end = this->interval.end + 2;
7629 }
7630 else
7631 this->interval.end++;
7632 }
7633
7634 // Skip whitespace.
7635 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
7636
7637 if (this->interval.end < end &&
7638 text[this->interval.end] == ')')
7639 {
7640 // url("...")
7641 this->interval.start = start;
7642 this->interval.end++;
7643 return true;
7644 }
7645 }
7646 else {
7647 // url(...
7648 this->content.start = content.end = this->interval.end;
7649 for (;;) {
7650 if (this->interval.end >= end || !text[this->interval.end])
7651 goto error;
7652 if (text[this->interval.end] == ')') {
7653 // url(...)
7654 this->interval.start = start;
7655 this->interval.end++;
7656 return true;
7657 }
7658 if (ctype.is(ctype.space, text[this->interval.end]))
7659 this->interval.end++;
7660 else
7661 this->content.end = ++this->interval.end;
7662 }
7663 }
7664 }
7665
7666 error:
7667 invalidate();
7668 return false;
7669 }
7670 };
7671
7672 using css_uri = basic_css_uri<char>;
7673 using wcss_uri = basic_css_uri<wchar_t>;
7674#ifdef _UNICODE
7675 using tcss_uri = wcss_uri;
7676#else
7677 using tcss_uri = css_uri;
7678#endif
7679
7683 template <class T>
7685 {
7686 public:
7687 virtual void invalidate()
7688 {
7689 this->content.invalidate();
7691 }
7692
7694
7695 protected:
7696 virtual bool do_match(
7697 _In_reads_or_z_opt_(end) const T* text,
7698 _In_ size_t start = 0,
7699 _In_ size_t end = SIZE_MAX,
7700 _In_ int flags = match_multiline)
7701 {
7702 _Unreferenced_(flags);
7703 this->interval.end = start;
7704 _Assume_(text || this->interval.end + 6 >= end);
7705 if (this->interval.end + 6 < end &&
7706 text[this->interval.end] == '@' &&
7707 (text[this->interval.end + 1] == 'i' || text[this->interval.end + 1] == 'I') &&
7708 (text[this->interval.end + 2] == 'm' || text[this->interval.end + 2] == 'M') &&
7709 (text[this->interval.end + 3] == 'p' || text[this->interval.end + 3] == 'P') &&
7710 (text[this->interval.end + 4] == 'o' || text[this->interval.end + 4] == 'O') &&
7711 (text[this->interval.end + 5] == 'r' || text[this->interval.end + 5] == 'R') &&
7712 (text[this->interval.end + 6] == 't' || text[this->interval.end + 6] == 'T'))
7713 {
7714 // @import...
7715 this->interval.end = this->interval.end + 7;
7716
7717 // Skip whitespace.
7718 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
7719 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
7720
7721 if (this->interval.end < end &&
7722 (text[this->interval.end] == '\"' || text[this->interval.end] == '\''))
7723 {
7724 // @import "Quoted
7725 T quote = text[this->interval.end];
7726 this->content.start = ++this->interval.end;
7727 for (;;) {
7728 if (this->interval.end >= end || !text[this->interval.end])
7729 goto error;
7730 if (text[this->interval.end] == quote) {
7731 // End quote"
7732 this->content.end = this->interval.end;
7733 this->interval.start = start;
7734 this->interval.end++;
7735 return true;
7736 }
7737 if (this->interval.end + 1 < end &&
7738 text[this->interval.end] == '\\' &&
7739 (text[this->interval.end + 1] == '\"' || text[this->interval.end + 1] == '\''))
7740 {
7741 // Escaped quote
7742 this->interval.end = this->interval.end + 2;
7743 }
7744 else
7745 this->interval.end++;
7746 }
7747 }
7748 }
7749
7750 error:
7751 invalidate();
7752 return false;
7753 }
7754 };
7755
7756 using css_import = basic_css_import<char>;
7757 using wcss_import = basic_css_import<wchar_t>;
7758#ifdef _UNICODE
7759 using tcss_import = wcss_import;
7760#else
7761 using tcss_import = css_import;
7762#endif
7763
7767 template <class T>
7769 {
7770 public:
7771 virtual void invalidate()
7772 {
7773 this->base_type.invalidate();
7774 this->sub_type.invalidate();
7775 this->charset.invalidate();
7777 }
7778
7782
7783 protected:
7784 virtual bool do_match(
7785 _In_reads_or_z_opt_(end) const T* text,
7786 _In_ size_t start = 0,
7787 _In_ size_t end = SIZE_MAX,
7788 _In_ int flags = match_multiline)
7789 {
7790 _Unreferenced_(flags);
7791 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
7792
7793 this->interval.end = start;
7794 this->base_type.start = this->interval.end;
7795 for (;;) {
7796 _Assume_(text || this->interval.end >= end);
7797 if (this->interval.end >= end || !text[this->interval.end])
7798 break;
7799 if (text[this->interval.end] == '/' ||
7800 text[this->interval.end] == ';' ||
7801 ctype.is(ctype.space, text[this->interval.end]))
7802 break;
7803 this->interval.end++;
7804 }
7805 if (this->interval.end <= this->base_type.start)
7806 goto error;
7807 this->base_type.end = this->interval.end;
7808
7809 if (end <= this->interval.end || text[this->interval.end] != '/')
7810 goto error;
7811
7812 this->interval.end++;
7813 this->sub_type.start = this->interval.end;
7814 for (;;) {
7815 if (this->interval.end >= end || !text[this->interval.end])
7816 break;
7817 if (text[this->interval.end] == '/' ||
7818 text[this->interval.end] == ';' ||
7819 ctype.is(ctype.space, text[this->interval.end]))
7820 break;
7821 this->interval.end++;
7822 }
7823 if (this->interval.end <= this->sub_type.start)
7824 goto error;
7825
7826 this->sub_type.end = this->interval.end;
7827 this->charset.invalidate();
7828 if (this->interval.end < end && text[this->interval.end] == ';') {
7829 this->interval.end++;
7830
7831 // Skip whitespace.
7832 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
7833
7834 if (this->interval.end + 7 < end &&
7835 (text[this->interval.end] == 'c' || text[this->interval.end] == 'C') &&
7836 (text[this->interval.end + 1] == 'h' || text[this->interval.end + 1] == 'H') &&
7837 (text[this->interval.end + 2] == 'a' || text[this->interval.end + 2] == 'A') &&
7838 (text[this->interval.end + 3] == 'r' || text[this->interval.end + 3] == 'R') &&
7839 (text[this->interval.end + 4] == 's' || text[this->interval.end + 4] == 'S') &&
7840 (text[this->interval.end + 5] == 'e' || text[this->interval.end + 5] == 'E') &&
7841 (text[this->interval.end + 6] == 't' || text[this->interval.end + 6] == 'T') &&
7842 text[this->interval.end + 7] == '=')
7843 {
7844 this->interval.end = this->interval.end + 8;
7845 if (this->interval.end < end &&
7846 (text[this->interval.end] == '\"' || text[this->interval.end] == '\''))
7847 {
7848 // "Quoted...
7849 T quote = text[this->interval.end];
7850 this->charset.start = ++this->interval.end;
7851 for (;;) {
7852 if (this->interval.end >= end || !text[this->interval.end]) {
7853 // No end quote!
7854 this->charset.invalidate();
7855 break;
7856 }
7857 if (text[this->interval.end] == quote) {
7858 // End quote"
7859 this->charset.end = this->interval.end;
7860 this->interval.end++;
7861 break;
7862 }
7863 this->interval.end++;
7864 }
7865 }
7866 else {
7867 // Nonquoted
7868 this->charset.start = this->interval.end;
7869 for (;;) {
7870 if (this->interval.end >= end || !text[this->interval.end] ||
7871 ctype.is(ctype.space, text[this->interval.end])) {
7872 this->charset.end = this->interval.end;
7873 break;
7874 }
7875 this->interval.end++;
7876 }
7877 }
7878 }
7879 }
7880 this->interval.start = start;
7881 return true;
7882
7883 error:
7884 invalidate();
7885 return false;
7886 }
7887 };
7888
7889 using mime_type = basic_mime_type<char>;
7890 using wmime_type = basic_mime_type<wchar_t>;
7891#ifdef _UNICODE
7892 using tmime_type = wmime_type;
7893#else
7894 using tmime_type = mime_type;
7895#endif
7896
7900 template <class T>
7902 {
7903 protected:
7904 virtual bool do_match(
7905 _In_reads_or_z_opt_(end) const T* text,
7906 _In_ size_t start = 0,
7907 _In_ size_t end = SIZE_MAX,
7908 _In_ int flags = match_default)
7909 {
7910 _Unreferenced_(flags);
7911 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
7912 this->interval.end = start;
7913 for (;;) {
7914 _Assume_(text || this->interval.end >= end);
7915 if (this->interval.end >= end || !text[this->interval.end]) {
7917 this->interval.start = start;
7918 return true;
7919 }
7920 this->interval.invalidate();
7921 return false;
7922 }
7923 if (text[this->interval.end] == '>' ||
7924 text[this->interval.end] == '=' ||
7925 (text[this->interval.end] == '/' && this->interval.end + 1 < end && text[this->interval.end + 1] == '>') ||
7926 ctype.is(ctype.space, text[this->interval.end]))
7927 {
7928 this->interval.start = start;
7929 return true;
7930 }
7931 this->interval.end++;
7932 }
7933 }
7934 };
7935
7938#ifdef _UNICODE
7939 using thtml_ident = whtml_ident;
7940#else
7941 using thtml_ident = html_ident;
7942#endif
7943
7947 template <class T>
7949 {
7950 public:
7951 virtual void invalidate()
7952 {
7953 this->content.invalidate();
7955 }
7956
7958
7959 protected:
7960 virtual bool do_match(
7961 _In_reads_or_z_opt_(end) const T* text,
7962 _In_ size_t start = 0,
7963 _In_ size_t end = SIZE_MAX,
7964 _In_ int flags = match_default)
7965 {
7966 _Unreferenced_(flags);
7967 this->interval.end = start;
7968 _Assume_(text || this->interval.end >= end);
7969 if (this->interval.end < end &&
7970 (text[this->interval.end] == '\"' || text[this->interval.end] == '\''))
7971 {
7972 // "Quoted...
7973 T quote = text[this->interval.end];
7974 this->content.start = ++this->interval.end;
7975 for (;;) {
7976 if (this->interval.end >= end || !text[this->interval.end]) {
7977 // No end quote!
7978 this->content.invalidate();
7979 this->interval.invalidate();
7980 return false;
7981 }
7982 if (text[this->interval.end] == quote) {
7983 // End quote"
7984 this->content.end = this->interval.end;
7985 this->interval.start = start;
7986 this->interval.end++;
7987 return true;
7988 }
7989 this->interval.end++;
7990 }
7991 }
7992
7993 // Nonquoted
7994 this->content.start = this->interval.end;
7995 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
7996 for (;;) {
7997 _Assume_(text || this->interval.end >= end);
7998 if (this->interval.end >= end || !text[this->interval.end]) {
7999 this->content.end = this->interval.end;
8000 this->interval.start = start;
8001 return true;
8002 }
8003 if (text[this->interval.end] == '>' ||
8004 (text[this->interval.end] == '/' && this->interval.end + 1 < end && text[this->interval.end + 1] == '>') ||
8005 ctype.is(ctype.space, text[this->interval.end]))
8006 {
8007 this->content.end = this->interval.end;
8008 this->interval.start = start;
8009 return true;
8010 }
8011 this->interval.end++;
8012 }
8013 }
8014 };
8015
8016 using html_value = basic_html_value<char>;
8017 using whtml_value = basic_html_value<wchar_t>;
8018#ifdef _UNICODE
8019 using thtml_value = whtml_value;
8020#else
8021 using thtml_value = html_value;
8022#endif
8023
8027 enum class html_sequence_t {
8028 text = 0,
8029 element,
8030 element_start,
8031 element_end,
8032 declaration,
8033 comment,
8034 instruction,
8035 PCDATA,
8036 CDATA,
8037
8038 unknown = -1,
8039 };
8040
8048
8052 template <class T>
8054 {
8055 public:
8056 basic_html_tag(_In_ const std::locale& locale = std::locale()) :
8058 type(html_sequence_t::unknown)
8059 {}
8060
8061 virtual void invalidate()
8062 {
8063 this->type = html_sequence_t::unknown;
8064 this->name.invalidate();
8065 this->attributes.clear();
8067 }
8068
8069 html_sequence_t type;
8071 std::vector<html_attribute> attributes;
8072
8073 protected:
8074 virtual bool do_match(
8075 _In_reads_or_z_opt_(end) const T* text,
8076 _In_ size_t start = 0,
8077 _In_ size_t end = SIZE_MAX,
8078 _In_ int flags = match_multiline)
8079 {
8080 _Assume_(text || start >= end);
8081 if (start >= end || text[start] != '<')
8082 goto error;
8083 this->interval.end = start + 1;
8084 if (this->interval.end >= end || !text[this->interval.end])
8085 goto error;
8086 if (text[this->interval.end] == '/' &&
8087 this->m_ident.match(text, this->interval.end + 1, end, flags))
8088 {
8089 // </...
8090 this->type = html_sequence_t::element_end;
8091 this->name = this->m_ident.interval;
8092 this->interval.end = this->m_ident.interval.end;
8093 }
8094 else if (text[this->interval.end] == '!') {
8095 // <!...
8096 this->interval.end++;
8097 if (this->interval.end + 1 < end &&
8098 text[this->interval.end] == '-' &&
8099 text[this->interval.end + 1] == '-')
8100 {
8101 // <!--...
8102 this->name.start = this->interval.end = this->interval.end + 2;
8103 for (;;) {
8104 if (this->interval.end >= end || !text[this->interval.end])
8105 goto error;
8106 if (this->interval.end + 2 < end &&
8107 text[this->interval.end] == '-' &&
8108 text[this->interval.end + 1] == '-' &&
8109 text[this->interval.end + 2] == '>')
8110 {
8111 // <!--...-->
8112 this->type = html_sequence_t::comment;
8113 this->name.end = this->interval.end;
8114 this->attributes.clear();
8115 this->interval.start = start;
8116 this->interval.end = this->interval.end + 3;
8117 return true;
8118 }
8119 this->interval.end++;
8120 }
8121 }
8122 this->type = html_sequence_t::declaration;
8123 this->name.start = this->name.end = this->interval.end;
8124 }
8125 else if (text[this->interval.end] == '?') {
8126 // <?...
8127 this->name.start = ++this->interval.end;
8128 for (;;) {
8129 if (this->interval.end >= end || !text[this->interval.end])
8130 goto error;
8131 if (text[this->interval.end] == '>') {
8132 // <?...>
8133 this->type = html_sequence_t::instruction;
8134 this->name.end = this->interval.end;
8135 this->attributes.clear();
8136 this->interval.start = start;
8137 this->interval.end++;
8138 return true;
8139 }
8140 if (this->interval.end + 1 < end &&
8141 text[this->interval.end] == '?' &&
8142 text[this->interval.end + 1] == '>')
8143 {
8144 // <?...?>
8145 this->type = html_sequence_t::instruction;
8146 this->name.end = this->interval.end;
8147 this->attributes.clear();
8148 this->interval.start = start;
8149 this->interval.end = this->interval.end + 2;
8150 return true;
8151 }
8152 this->interval.end++;
8153 }
8154 }
8155 else if (this->m_ident.match(text, this->interval.end, end, flags)) {
8156 // <tag...
8157 this->type = html_sequence_t::element_start;
8158 this->name = this->m_ident.interval;
8159 this->interval.end = this->m_ident.interval.end;
8160 }
8161 else
8162 goto error;
8163
8164 {
8165 // Skip whitespace.
8166 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
8167 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8168
8169 this->attributes.clear();
8170 for (;;) {
8171 if (this->type == html_sequence_t::element_start &&
8172 this->interval.end + 1 < end &&
8173 text[this->interval.end] == '/' &&
8174 text[this->interval.end + 1] == '>')
8175 {
8176 // <tag .../>
8177 this->type = html_sequence_t::element;
8178 this->interval.end = this->interval.end + 2;
8179 break;
8180 }
8181 if (this->interval.end < end &&
8182 text[this->interval.end] == '>')
8183 {
8184 // <tag ...>
8185 this->interval.end++;
8186 break;
8187 }
8188 if (this->type == html_sequence_t::declaration &&
8189 this->interval.end + 1 < end &&
8190 text[this->interval.end] == '!' &&
8191 text[this->interval.end + 1] == '>')
8192 {
8193 // "<!...!>".
8194 this->interval.end = this->interval.end + 2;
8195 break;
8196 }
8197 if (this->type == html_sequence_t::declaration &&
8198 this->interval.end + 1 < end &&
8199 text[this->interval.end] == '-' &&
8200 text[this->interval.end + 1] == '-')
8201 {
8202 // "<! ... --...".
8203 this->interval.end = this->interval.end + 2;
8204 for (;;) {
8205 if (this->interval.end >= end || !text[this->interval.end])
8206 goto error;
8207 if (this->interval.end + 1 < end &&
8208 text[this->interval.end] == '-' &&
8209 text[this->interval.end + 1] == '-')
8210 {
8211 // "<! ... --...--".
8212 this->interval.end = this->interval.end + 2;
8213 break;
8214 }
8215 this->interval.end++;
8216 }
8217
8218 // Skip whitespace.
8219 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8220 continue;
8221 }
8222
8223 if (this->interval.end >= end || !text[this->interval.end])
8224 goto error;
8225
8226 // Attributes follow...
8227 html_attribute* a = nullptr;
8228 if (this->m_ident.match(text, this->interval.end, end, flags)) {
8229 this->attributes.push_back(std::move(html_attribute{ this->m_ident.interval }));
8230 a = &this->attributes.back();
8231 _Assume_(a);
8232 this->interval.end = this->m_ident.interval.end;
8233 }
8234 else {
8235 // What was that?! Skip.
8236 this->interval.end++;
8237 continue;
8238 }
8239
8240 // Skip whitespace.
8241 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8242
8243 if (this->interval.end < end && text[this->interval.end] == '=') {
8244 this->interval.end++;
8245
8246 // Skip whitespace.
8247 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8248
8249 if (this->m_value.match(text, this->interval.end, end, flags)) {
8250 // This attribute has value.
8251 a->value = this->m_value.content;
8252 this->interval.end = this->m_value.interval.end;
8253
8254 // Skip whitespace.
8255 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8256 }
8257 }
8258 else {
8259 // This attribute has no value.
8260 a->value.invalidate();
8261 }
8262 }
8263 }
8264
8265 this->interval.start = start;
8266 return true;
8267
8268 error:
8269 invalidate();
8270 return false;
8271 }
8272
8273 basic_html_ident<T> m_ident;
8274 basic_html_value<T> m_value;
8275 };
8276
8277 using html_tag = basic_html_tag<char>;
8278 using whtml_tag = basic_html_tag<wchar_t>;
8279#ifdef _UNICODE
8280 using thtml_tag = whtml_tag;
8281#else
8282 using thtml_tag = html_tag;
8283#endif
8284
8288 template <class T>
8290 {
8291 public:
8292 virtual void invalidate()
8293 {
8294 this->condition.invalidate();
8296 }
8297
8298 stdex::interval<size_t> condition;
8299
8300 protected:
8301 virtual bool do_match(
8302 _In_reads_or_z_opt_(end) const T* text,
8303 _In_ size_t start = 0,
8304 _In_ size_t end = SIZE_MAX,
8305 _In_ int flags = match_multiline)
8306 {
8307 _Unreferenced_(flags);
8308 _Assume_(text || start + 2 >= end);
8309 if (start + 2 < end &&
8310 text[start] == '<' &&
8311 text[start + 1] == '!' &&
8312 text[start + 2] == '[')
8313 {
8314 this->interval.end = start + 3;
8315
8316 // Skip whitespace.
8317 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
8318 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8319
8320 this->condition.start = this->condition.end = this->interval.end;
8321
8322 for (;;) {
8323 if (this->interval.end >= end || !text[this->interval.end])
8324 break;
8325 if (text[this->interval.end] == '[') {
8326 this->interval.start = start;
8327 this->interval.end++;
8328 return true;
8329 }
8330 if (ctype.is(ctype.space, text[this->interval.end]))
8331 this->interval.end++;
8332 else
8333 this->condition.end = ++this->interval.end;
8334 }
8335 }
8336
8337 this->condition.invalidate();
8338 this->interval.invalidate();
8339 return false;
8340 }
8341 };
8342
8343 using html_declaration_condition_start = basic_html_declaration_condition_start<char>;
8344 using whtml_declaration_condition_start = basic_html_declaration_condition_start<wchar_t>;
8345#ifdef _UNICODE
8346 using thtml_declaration_condition_start = whtml_declaration_condition_start;
8347#else
8348 using thtml_declaration_condition_start = html_declaration_condition_start;
8349#endif
8350
8354 template <class T>
8356 {
8357 protected:
8358 virtual bool do_match(
8359 _In_reads_or_z_opt_(end) const T* text,
8360 _In_ size_t start = 0,
8361 _In_ size_t end = SIZE_MAX,
8362 _In_ int flags = match_multiline)
8363 {
8364 _Unreferenced_(flags);
8365 _Assume_(text || start + 2 >= end);
8366 if (start + 2 < end &&
8367 text[start] == ']' &&
8368 text[start + 1] == ']' &&
8369 text[start + 2] == '>')
8370 {
8371 this->interval.start = start;
8372 this->interval.end = start + 3;
8373 return true;
8374 }
8375 this->interval.invalidate();
8376 return false;
8377 }
8378 };
8379
8382#ifdef _UNICODE
8384#else
8386#endif
8387 }
8388}
8389
8390#undef ENUM_FLAG_OPERATOR
8391#undef ENUM_FLAGS
8392
8393#if defined(_MSC_VER)
8394#pragma warning(pop)
8395#elif defined(__GNUC__)
8396#pragma GCC diagnostic pop
8397#endif
locale_t helper class to free_locale when going out of scope.
Definition locale.hpp:74
Test for angle in d°mm'ss.dddd form.
Definition parser.hpp:4381
Test for any code unit.
Definition parser.hpp:236
Test for beginning of line.
Definition parser.hpp:635
Test for any.
Definition parser.hpp:1078
Test for chemical formula.
Definition parser.hpp:5510
Test for Creditor Reference.
Definition parser.hpp:4945
T reference[22]
Normalized national reference number.
Definition parser.hpp:4967
T check_digits[3]
Two check digits.
Definition parser.hpp:4966
bool is_valid
Is reference valid per ISO 7064.
Definition parser.hpp:4968
Legacy CSS comment end -->
Definition parser.hpp:7471
Legacy CSS comment start <!--
Definition parser.hpp:7433
CSS comment.
Definition parser.hpp:7373
stdex::interval< size_t > content
content position in source
Definition parser.hpp:7381
CSS import directive.
Definition parser.hpp:7685
stdex::interval< size_t > content
content position in source
Definition parser.hpp:7693
CSS string.
Definition parser.hpp:7508
stdex::interval< size_t > content
content position in source
Definition parser.hpp:7516
URI in CSS.
Definition parser.hpp:7575
stdex::interval< size_t > content
content position in source
Definition parser.hpp:7583
Test for any code unit from a given string of code units.
Definition parser.hpp:740
Test for specific code unit.
Definition parser.hpp:308
Test for date.
Definition parser.hpp:4014
Test for valid DNS domain character.
Definition parser.hpp:2796
bool allow_on_edge
Is character allowed at the beginning or an end of a DNS domain?
Definition parser.hpp:2806
Test for DNS domain/hostname.
Definition parser.hpp:2896
bool m_allow_absolute
May DNS names end with a dot (absolute name)?
Definition parser.hpp:2960
Test for e-mail address.
Definition parser.hpp:3788
Test for emoticon.
Definition parser.hpp:3891
std::shared_ptr< basic_parser< T > > apex
apex/eyebrows/halo (e.g. O, 0)
Definition parser.hpp:3919
std::shared_ptr< basic_parser< T > > eyes
eyes (e.g. :, ;, >, |, B)
Definition parser.hpp:3920
std::shared_ptr< basic_set< T > > mouth
mouth (e.g. ), ), (, (, |, P, D, p, d)
Definition parser.hpp:3922
std::shared_ptr< basic_parser< T > > nose
nose (e.g. -, o)
Definition parser.hpp:3921
std::shared_ptr< basic_parser< T > > emoticon
emoticon as a whole (e.g. 😀, 🤔, 😶)
Definition parser.hpp:3918
Test for end of line.
Definition parser.hpp:674
Test for fraction.
Definition parser.hpp:1706
End of condition ...]]>
Definition parser.hpp:8356
Start of condition <![condition[...
Definition parser.hpp:8290
virtual bool do_match(_In_reads_or_z_opt_(end) const T *text, size_t start=0, size_t end=SIZE_MAX, int flags=match_multiline)
condition position in source
Definition parser.hpp:8301
Contiguous sequence of characters representing name of element, attribute etc.
Definition parser.hpp:7902
Tag.
Definition parser.hpp:8054
std::vector< html_attribute > attributes
tag attributes
Definition parser.hpp:8071
html_sequence_t type
tag type
Definition parser.hpp:8069
stdex::interval< size_t > name
tag name position in source
Definition parser.hpp:8070
Optionally-quoted string representing value of an attribute.
Definition parser.hpp:7949
stdex::interval< size_t > content
content position in source
Definition parser.hpp:7957
Test for International Bank Account Number.
Definition parser.hpp:4656
T bban[31]
Normalized Basic Bank Account Number.
Definition parser.hpp:4681
T country[3]
ISO 3166-1 alpha-2 country code.
Definition parser.hpp:4679
T check_digits[3]
Two check digits.
Definition parser.hpp:4680
bool is_valid
Is IBAN valid per ISO 7064.
Definition parser.hpp:4682
Test for decimal integer.
Definition parser.hpp:1316
Test for decimal integer possibly containing thousand separators.
Definition parser.hpp:1401
bool has_separators
Did integer have any separators?
Definition parser.hpp:1422
size_t digit_count
Total number of digits in integer.
Definition parser.hpp:1421
Test for hexadecimal integer.
Definition parser.hpp:1481
Base class for integer testing.
Definition parser.hpp:1294
size_t value
Calculated value of the numeral.
Definition parser.hpp:1308
Test for IPv4 address.
Definition parser.hpp:2364
stdex::interval< size_t > components[4]
Individual component intervals.
Definition parser.hpp:2409
struct in_addr value
IPv4 address value.
Definition parser.hpp:2410
Test for IPv6 address.
Definition parser.hpp:2576
std::shared_ptr< basic_parser< T > > scope_id
Scope ID (e.g. NIC index with link-local addresses)
Definition parser.hpp:2648
stdex::interval< size_t > components[8]
Individual component intervals.
Definition parser.hpp:2646
struct in6_addr value
IPv6 address value.
Definition parser.hpp:2647
Test for valid IPv6 address scope ID character.
Definition parser.hpp:2502
Test for repeating.
Definition parser.hpp:930
bool m_greedy
try to match as long sequence as possible
Definition parser.hpp:969
std::shared_ptr< basic_parser< T > > m_el
repeating element
Definition parser.hpp:966
size_t m_min_iterations
minimum number of iterations
Definition parser.hpp:967
size_t m_max_iterations
maximum number of iterations
Definition parser.hpp:968
Test for JSON string.
Definition parser.hpp:7221
MIME content type.
Definition parser.hpp:7769
stdex::interval< size_t > base_type
basic type position in source
Definition parser.hpp:7779
stdex::interval< size_t > sub_type
sub-type position in source
Definition parser.hpp:7780
stdex::interval< size_t > charset
charset position in source
Definition parser.hpp:7781
Test for mixed numeral.
Definition parser.hpp:1941
std::shared_ptr< basic_parser< T > > fraction
fraction
Definition parser.hpp:1974
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:1972
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:1971
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:1970
std::shared_ptr< basic_parser< T > > integer
Integer part.
Definition parser.hpp:1973
Test for monetary numeral.
Definition parser.hpp:2235
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:2268
std::shared_ptr< basic_parser< T > > decimal_separator
Decimal separator.
Definition parser.hpp:2273
std::shared_ptr< basic_parser< T > > currency
Currency part.
Definition parser.hpp:2271
std::shared_ptr< basic_parser< T > > decimal
Decimal part.
Definition parser.hpp:2274
std::shared_ptr< basic_parser< T > > integer
Integer part.
Definition parser.hpp:2272
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:2269
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:2270
"No-op" match
Definition parser.hpp:204
Base template for all parsers.
Definition parser.hpp:80
stdex::interval< size_t > interval
Region of the last match.
Definition parser.hpp:120
Test for permutation.
Definition parser.hpp:1218
Test for phone number.
Definition parser.hpp:4504
std::basic_string< T > value
Normalized phone number.
Definition parser.hpp:4529
Test for any punctuation code unit.
Definition parser.hpp:481
Test for Roman numeral.
Definition parser.hpp:1590
Test for scientific numeral.
Definition parser.hpp:2066
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:2112
std::shared_ptr< basic_parser< T > > exponent_symbol
Exponent symbol (e.g. 'e')
Definition parser.hpp:2116
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:2110
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:2111
double value
Calculated value of the numeral.
Definition parser.hpp:2120
std::shared_ptr< basic_parser< T > > negative_exp_sign
Negative exponent sign (e.g. '-')
Definition parser.hpp:2118
std::shared_ptr< basic_integer< T > > decimal
Decimal part.
Definition parser.hpp:2115
std::shared_ptr< basic_parser< T > > positive_exp_sign
Positive exponent sign (e.g. '+')
Definition parser.hpp:2117
std::shared_ptr< basic_integer< T > > exponent
Exponent part.
Definition parser.hpp:2119
std::shared_ptr< basic_parser< T > > decimal_separator
Decimal separator.
Definition parser.hpp:2114
std::shared_ptr< basic_integer< T > > integer
Integer part.
Definition parser.hpp:2113
Test for match score.
Definition parser.hpp:1769
Test for sequence.
Definition parser.hpp:1026
Definition parser.hpp:709
Test for SI Reference delimiter.
Definition parser.hpp:5139
Test for SI Reference part.
Definition parser.hpp:5093
Test for SI Reference.
Definition parser.hpp:5178
basic_si_reference_part< T > part3
Reference data part 3 (P3)
Definition parser.hpp:5207
basic_si_reference_part< T > part1
Reference data part 1 (P1)
Definition parser.hpp:5205
bool is_valid
Is reference valid.
Definition parser.hpp:5208
T model[3]
Reference model.
Definition parser.hpp:5204
basic_si_reference_part< T > part2
Reference data part 2 (P2)
Definition parser.hpp:5206
Test for signed numeral.
Definition parser.hpp:1855
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:1881
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:1880
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:1879
std::shared_ptr< basic_parser< T > > number
Number.
Definition parser.hpp:1882
Test for any space code unit.
Definition parser.hpp:401
Test for any space or punctuation code unit.
Definition parser.hpp:556
Test for any string.
Definition parser.hpp:1146
Test for given string.
Definition parser.hpp:835
Test for time.
Definition parser.hpp:4279
Test for valid URL password character.
Definition parser.hpp:3080
Test for valid URL path character.
Definition parser.hpp:3182
Test for URL path.
Definition parser.hpp:3292
Test for valid URL username character.
Definition parser.hpp:2979
Test for URL.
Definition parser.hpp:3432
Test for HTTP agent.
Definition parser.hpp:6765
Test for HTTP any type.
Definition parser.hpp:5908
Test for HTTP asterisk.
Definition parser.hpp:6536
Test for HTTP header.
Definition parser.hpp:7073
Test for HTTP language (RFC1766)
Definition parser.hpp:6404
Test for HTTP line break (RFC2616: CRLF | LF)
Definition parser.hpp:5590
Test for HTTP media range (RFC2616: media-range)
Definition parser.hpp:5940
Test for HTTP media type (RFC2616: media-type)
Definition parser.hpp:5992
Test for HTTP parameter (RFC2616: parameter)
Definition parser.hpp:5856
http_token name
Parameter name.
Definition parser.hpp:5865
http_value value
Parameter value.
Definition parser.hpp:5866
Test for HTTP protocol.
Definition parser.hpp:6840
uint16_t version
HTTP protocol version: 0x100 = 1.0, 0x101 = 1.1...
Definition parser.hpp:6862
Test for HTTP quoted string (RFC2616: quoted-string)
Definition parser.hpp:5749
stdex::interval< size_t > content
String content (without quotes)
Definition parser.hpp:5758
Test for HTTP request.
Definition parser.hpp:6941
Test for HTTP space (RFC2616: LWS)
Definition parser.hpp:5626
Test for HTTP text character (RFC2616: TEXT)
Definition parser.hpp:5662
Test for HTTP token (RFC2616: token - tolerates non-ASCII)
Definition parser.hpp:5695
Test for HTTP URL parameter.
Definition parser.hpp:6229
Test for HTTP URL path segment.
Definition parser.hpp:6141
Test for HTTP URL path segment.
Definition parser.hpp:6174
std::vector< http_url_path_segment > segments
Path segments.
Definition parser.hpp:6182
Test for HTTP URL port.
Definition parser.hpp:6085
Test for HTTP URL server.
Definition parser.hpp:6048
Test for HTTP URL.
Definition parser.hpp:6306
Collection of HTTP values.
Definition parser.hpp:7177
Test for HTTP value (RFC2616: value)
Definition parser.hpp:5812
http_quoted_string string
Value when matched as quoted string.
Definition parser.hpp:5821
http_token token
Value when matched as token.
Definition parser.hpp:5822
Test for HTTP weight factor.
Definition parser.hpp:6467
float value
Calculated value of the weight factor.
Definition parser.hpp:6480
Test for HTTP weighted value.
Definition parser.hpp:6559
Base template for collection-holding parsers.
Definition parser.hpp:986
Test for any SGML code point.
Definition parser.hpp:269
Test for any SGML code point from a given string of SGML code points.
Definition parser.hpp:792
Test for specific SGML code point.
Definition parser.hpp:357
Test for valid DNS domain SGML character.
Definition parser.hpp:2851
Test for valid IPv6 address scope ID SGML character.
Definition parser.hpp:2541
Test for any SGML punctuation code point.
Definition parser.hpp:522
Test for any SGML space code point.
Definition parser.hpp:444
Test for any SGML space or punctuation code point.
Definition parser.hpp:599
Test for SGML given string.
Definition parser.hpp:882
Test for valid URL password SGML character.
Definition parser.hpp:3133
Test for valid URL path SGML character.
Definition parser.hpp:3239
Test for valid URL username SGML character.
Definition parser.hpp:3031
Numerical interval.
Definition interval.hpp:18
T size() const
Returns interval size.
Definition interval.hpp:47
T end
interval end
Definition interval.hpp:20
interval() noexcept
Constructs an invalid interval.
Definition interval.hpp:25
void invalidate()
Invalidates interval.
Definition interval.hpp:59
T start
interval start
Definition interval.hpp:19
Tag attribute.
Definition parser.hpp:8044
stdex::interval< size_t > name
attribute name position in source
Definition parser.hpp:8045
stdex::interval< size_t > value
attribute value position in source
Definition parser.hpp:8046
Definition parser.hpp:7203