stdex
Additional custom or not Standard C++ covered algorithms
Loading...
Searching...
No Matches
parser.hpp
1/*
2 SPDX-License-Identifier: MIT
3 Copyright © 2023-2024 Amebis
4*/
5
6#pragma once
7
8#include "compat.hpp"
9#include "endian.hpp"
10#include "interval.hpp"
11#include "memory.hpp"
12#include "sgml.hpp"
13#include "string.hpp"
14#include <stdarg.h>
15#include <stdint.h>
16#include <math.h>
17#if defined(_WIN32)
18#include <winsock2.h>
19#if _MSC_VER >= 1300
20#include <ws2ipdef.h>
21#endif
22#include <ws2tcpip.h>
23#else
24#include <netinet/in.h>
25#endif
26#include <limits>
27#include <list>
28#include <locale>
29#include <memory>
30#include <set>
31#include <string_view>
32#include <string>
33
34#ifdef _MSC_VER
35#pragma warning(push)
36#pragma warning(disable: 4100)
37#endif
38
39#define ENUM_FLAG_OPERATOR(T,X) \
40inline T operator X (const T lhs, const T rhs) { return static_cast<T>(static_cast<std::underlying_type_t<T>>(lhs) X static_cast<std::underlying_type_t<T>>(rhs)); } \
41inline T operator X (const T lhs, const std::underlying_type_t<T> rhs) { return static_cast<T>(static_cast<std::underlying_type_t<T>>(lhs) X rhs); } \
42inline T operator X (const std::underlying_type_t<T> lhs, const T rhs) { return static_cast<T>(lhs X static_cast<std::underlying_type_t<T>>(rhs)); } \
43inline T& operator X= (T& lhs, const T rhs) { return lhs = lhs X rhs; } \
44inline T& operator X= (T& lhs, const std::underlying_type_t<T> rhs) { return lhs = lhs X rhs; }
45#define ENUM_FLAGS(T, type) \
46enum class T : type; \
47inline T operator ~ (T t) { return (T) (~static_cast<std::underlying_type_t <T>>(t)); } \
48ENUM_FLAG_OPERATOR(T,|) \
49ENUM_FLAG_OPERATOR(T,^) \
50ENUM_FLAG_OPERATOR(T,&) \
51enum class T : type
52
53#if defined(_WIN32)
54#elif defined(__APPLE__)
55#define s6_words __u6_addr.__u6_addr16
56#else
57#define s6_words s6_addr16
58#endif
59
60namespace stdex
61{
62 namespace parser
63 {
67 constexpr int match_default = 0;
68 constexpr int match_case_insensitive = 0x1;
69 constexpr int match_multiline = 0x2;
70
74 template <class T>
76 {
77 public:
78 basic_parser(_In_ const std::locale& locale = std::locale()) : m_locale(locale) {}
79 virtual ~basic_parser() {}
80
81 bool search(
82 _In_reads_or_z_opt_(end) const T* text,
83 _In_ size_t start = 0,
84 _In_ size_t end = SIZE_MAX,
85 _In_ int flags = match_default)
86 {
87 for (size_t i = start; i < end && text[i]; i++)
88 if (match(text, i, end, flags))
89 return true;
90 return false;
91 }
92
93 bool match(
94 _In_reads_or_z_opt_(end) const T* text,
95 _In_ size_t start = 0,
96 _In_ size_t end = SIZE_MAX,
97 _In_ int flags = match_default)
98 {
99 return do_match(text, start, end, flags);
100 }
101
102 bool match(
103 _In_ const std::basic_string_view<T, std::char_traits<T>> text,
104 _In_ size_t start = 0,
105 _In_ size_t end = SIZE_MAX,
106 _In_ int flags = match_default)
107 {
108 return match(text.data(), start, std::min<size_t>(end, text.size()), flags);
109 }
110
111 virtual void invalidate()
112 {
113 this->interval.invalidate();
114 }
115
117
118 protected:
119 virtual bool do_match(
120 _In_reads_or_z_opt_(end) const T* text,
121 _In_ size_t start = 0,
122 _In_ size_t end = SIZE_MAX,
123 _In_ int flags = match_default) = 0;
124
126 const wchar_t* next_sgml_cp(_In_ const char* text, _In_ size_t start, _In_ size_t end, _Out_ size_t& chr_end, _Out_ wchar_t(&buf)[3])
127 {
128 if (text[start] == '&') {
129 // Potential entity start
130 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
131 for (chr_end = start + 1;; chr_end++) {
132 if (chr_end >= end || text[chr_end] == 0) {
133 // Unterminated entity
134 break;
135 }
136 if (text[chr_end] == ';') {
137 // Entity end
138 size_t n = chr_end - start - 1;
139 if (n >= 2 && text[start + 1] == '#') {
140 // Numerical entity
141 char32_t unicode;
142 if (text[start + 2] == 'x' || text[start + 2] == 'X')
143 unicode = strtou32(text + start + 3, n - 2, nullptr, 16);
144 else
145 unicode = strtou32(text + start + 2, n - 1, nullptr, 10);
146#ifdef _WIN32
147 if (unicode < 0x10000) {
148 buf[0] = (wchar_t)unicode;
149 buf[1] = 0;
150 }
151 else {
152 ucs4_to_surrogate_pair(buf, unicode);
153 buf[2] = 0;
154 }
155#else
156 buf[0] = (wchar_t)unicode;
157 buf[1] = 0;
158#endif
159 chr_end++;
160 return buf;
161 }
162 const wchar_t* entity_w = sgml2uni(text + start + 1, n);
163 if (entity_w) {
164 chr_end++;
165 return entity_w;
166 }
167 // Unknown entity.
168 break;
169 }
170 else if (text[chr_end] == '&' || ctype.is(ctype.space, text[chr_end])) {
171 // This char cannot possibly be a part of entity.
172 break;
173 }
174 }
175 }
176 buf[0] = text[start];
177 buf[1] = 0;
178 chr_end = start + 1;
179 return buf;
180 }
182
183 std::locale m_locale;
184 };
185
186 using parser = basic_parser<char>;
187 using wparser = basic_parser<wchar_t>;
188#ifdef _UNICODE
189 using tparser = wparser;
190#else
191 using tparser = parser;
192#endif
193 using sgml_parser = basic_parser<char>;
194
198 template <class T>
199 class basic_noop : public basic_parser<T>
200 {
201 protected:
202 virtual bool do_match(
203 _In_reads_or_z_opt_(end) const T* text,
204 _In_ size_t start = 0,
205 _In_ size_t end = SIZE_MAX,
206 _In_ int flags = match_default)
207 {
208 _Assume_(text || start >= end);
209 if (start < end && text[start]) {
210 this->interval.start = this->interval.end = start;
211 return true;
212 }
213 this->interval.invalidate();
214 return false;
215 }
216 };
217
218 using noop = basic_noop<char>;
220#ifdef _UNICODE
221 using tnoop = wnoop;
222#else
223 using tnoop = noop;
224#endif
226
230 template <class T>
231 class basic_any_cu : public basic_parser<T>
232 {
233 public:
234 basic_any_cu(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
235
236 protected:
237 virtual bool do_match(
238 _In_reads_or_z_opt_(end) const T* text,
239 _In_ size_t start = 0,
240 _In_ size_t end = SIZE_MAX,
241 _In_ int flags = match_default)
242 {
243 _Assume_(text || start >= end);
244 if (start < end && text[start]) {
245 this->interval.end = (this->interval.start = start) + 1;
246 return true;
247 }
248 this->interval.invalidate();
249 return false;
250 }
251 };
252
255#ifdef _UNICODE
256 using tany_cu = wany_cu;
257#else
258 using tany_cu = any_cu;
259#endif
260
264 class sgml_any_cp : public basic_any_cu<char>
265 {
266 public:
267 sgml_any_cp(_In_ const std::locale& locale = std::locale()) : basic_any_cu<char>(locale) {}
268
269 protected:
270 virtual bool do_match(
271 _In_reads_or_z_(end) const char* text,
272 _In_ size_t start = 0,
273 _In_ size_t end = SIZE_MAX,
274 _In_ int flags = match_default)
275 {
276 _Assume_(text || start >= end);
277 if (start < end && text[start]) {
278 if (text[start] == '&') {
279 // SGML entity
280 const auto& ctype = std::use_facet<std::ctype<char>>(m_locale);
281 for (this->interval.end = start + 1; this->interval.end < end && text[this->interval.end]; this->interval.end++)
282 if (text[this->interval.end] == ';') {
283 this->interval.end++;
284 this->interval.start = start;
285 return true;
286 }
287 else if (text[this->interval.end] == '&' || ctype.is(ctype.space, text[this->interval.end]))
288 break;
289 // Unterminated entity
290 }
291 this->interval.end = (this->interval.start = start) + 1;
292 return true;
293 }
294 this->interval.invalidate();
295 return false;
296 }
297 };
298
302 template <class T>
303 class basic_cu : public basic_parser<T>
304 {
305 public:
306 basic_cu(T chr, bool invert = false, _In_ const std::locale& locale = std::locale()) :
308 m_chr(chr),
309 m_invert(invert)
310 {}
311
312 protected:
313 virtual bool do_match(
314 _In_reads_or_z_opt_(end) const T* text,
315 _In_ size_t start = 0,
316 _In_ size_t end = SIZE_MAX,
317 _In_ int flags = match_default)
318 {
319 _Assume_(text || start >= end);
320 if (start < end && text[start]) {
321 bool r;
322 if (flags & match_case_insensitive) {
323 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
324 r = ctype.tolower(text[start]) == ctype.tolower(m_chr);
325 }
326 else
327 r = text[start] == m_chr;
328 if ((r && !m_invert) || (!r && m_invert)) {
329 this->interval.end = (this->interval.start = start) + 1;
330 return true;
331 }
332 }
333 this->interval.invalidate();
334 return false;
335 }
336
337 T m_chr;
338 bool m_invert;
339 };
340
341 using cu = basic_cu<char>;
342 using wcu = basic_cu<wchar_t>;
343#ifdef _UNICODE
344 using tcu = wcu;
345#else
346 using tcu = cu;
347#endif
348
352 class sgml_cp : public sgml_parser
353 {
354 public:
355 sgml_cp(const char* chr, size_t count = SIZE_MAX, bool invert = false, _In_ const std::locale& locale = std::locale()) :
357 m_invert(invert)
358 {
359 _Assume_(chr || !count);
360 wchar_t buf[3];
361 size_t chr_end;
362 m_chr.assign(count ? next_sgml_cp(chr, 0, count, chr_end, buf) : L"");
363 }
364
365 protected:
366 virtual bool do_match(
367 _In_reads_or_z_(end) const char* text,
368 _In_ size_t start = 0,
369 _In_ size_t end = SIZE_MAX,
370 _In_ int flags = match_default)
371 {
372 _Assume_(text || start >= end);
373 if (start < end && text[start]) {
374 wchar_t buf[3];
375 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
376 bool r = ((flags & match_case_insensitive) ?
377 stdex::strnicmp(chr, SIZE_MAX, m_chr.data(), m_chr.size(), m_locale) :
378 stdex::strncmp(chr, SIZE_MAX, m_chr.data(), m_chr.size())) == 0;
379 if ((r && !m_invert) || (!r && m_invert)) {
380 this->interval.start = start;
381 return true;
382 }
383 }
384 this->interval.invalidate();
385 return false;
386 }
387
388 std::wstring m_chr;
389 bool m_invert;
390 };
391
395 template <class T>
396 class basic_space_cu : public basic_parser<T>
397 {
398 public:
399 basic_space_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) :
401 m_invert(invert)
402 {}
403
404 protected:
405 virtual bool do_match(
406 _In_reads_or_z_opt_(end) const T* text,
407 _In_ size_t start = 0,
408 _In_ size_t end = SIZE_MAX,
409 _In_ int flags = match_default)
410 {
411 _Assume_(text || start >= end);
412 if (start < end && text[start]) {
413 bool r =
414 ((flags & match_multiline) || !stdex::islbreak(text[start])) &&
415 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::space, text[start]);
416 if ((r && !m_invert) || (!r && m_invert)) {
417 this->interval.end = (this->interval.start = start) + 1;
418 return true;
419 }
420 }
421 this->interval.invalidate();
422 return false;
423 }
424
425 bool m_invert;
426 };
427
430#ifdef _UNICODE
431 using tspace_cu = wspace_cu;
432#else
433 using tspace_cu = space_cu;
434#endif
435
439 class sgml_space_cp : public basic_space_cu<char>
440 {
441 public:
442 sgml_space_cp(_In_ bool invert = false, _In_ const std::locale& locale = std::locale()) :
444 {}
445
446 protected:
447 virtual bool do_match(
448 _In_reads_or_z_(end) const char* text,
449 _In_ size_t start = 0,
450 _In_ size_t end = SIZE_MAX,
451 _In_ int flags = match_default)
452 {
453 _Assume_(text || start >= end);
454 if (start < end && text[start]) {
455 wchar_t buf[3];
456 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
457 const wchar_t* chr_end = chr + stdex::strlen(chr);
458 bool r =
459 ((flags & match_multiline) || !stdex::islbreak(chr, SIZE_MAX)) &&
460 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::space, chr, chr_end) == chr_end;
461 if ((r && !m_invert) || (!r && m_invert)) {
462 this->interval.start = start;
463 return true;
464 }
465 }
466
467 this->interval.invalidate();
468 return false;
469 }
470 };
471
475 template <class T>
476 class basic_punct_cu : public basic_parser<T>
477 {
478 public:
479 basic_punct_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) :
481 m_invert(invert)
482 {}
483
484 protected:
485 virtual bool do_match(
486 _In_reads_or_z_opt_(end) const T* text,
487 _In_ size_t start = 0,
488 _In_ size_t end = SIZE_MAX,
489 _In_ int flags = match_default)
490 {
491 _Assume_(text || start >= end);
492 if (start < end && text[start]) {
493 bool r = std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::punct, text[start]);
494 if ((r && !m_invert) || (!r && m_invert)) {
495 this->interval.end = (this->interval.start = start) + 1;
496 return true;
497 }
498 }
499 this->interval.invalidate();
500 return false;
501 }
502
503 bool m_invert;
504 };
505
508#ifdef _UNICODE
509 using tpunct_cu = wpunct_cu;
510#else
511 using tpunct_cu = punct_cu;
512#endif
513
517 class sgml_punct_cp : public basic_punct_cu<char>
518 {
519 public:
520 sgml_punct_cp(bool invert = false, _In_ const std::locale& locale = std::locale()) :
522 {}
523
524 protected:
525 virtual bool do_match(
526 _In_reads_or_z_(end) const char* text,
527 _In_ size_t start = 0,
528 _In_ size_t end = SIZE_MAX,
529 _In_ int flags = match_default)
530 {
531 _Assume_(text || start >= end);
532 if (start < end && text[start]) {
533 wchar_t buf[3];
534 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
535 const wchar_t* chr_end = chr + stdex::strlen(chr);
536 bool r = std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::punct, chr, chr_end) == chr_end;
537 if ((r && !m_invert) || (!r && m_invert)) {
538 this->interval.start = start;
539 return true;
540 }
541 }
542 this->interval.invalidate();
543 return false;
544 }
545 };
546
550 template <class T>
552 {
553 public:
554 basic_space_or_punct_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) :
556 m_invert(invert)
557 {}
558
559 protected:
560 virtual bool do_match(
561 _In_reads_or_z_opt_(end) const T* text,
562 _In_ size_t start = 0,
563 _In_ size_t end = SIZE_MAX,
564 _In_ int flags = match_default)
565 {
566 _Assume_(text || start >= end);
567 if (start < end && text[start]) {
568 bool r =
569 ((flags & match_multiline) || !stdex::islbreak(text[start])) &&
570 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::space | std::ctype_base::punct, text[start]);
571 if ((r && !m_invert) || (!r && m_invert)) {
572 this->interval.end = (this->interval.start = start) + 1;
573 return true;
574 }
575 }
576 this->interval.invalidate();
577 return false;
578 }
579
580 bool m_invert;
581 };
582
585#ifdef _UNICODE
587#else
589#endif
590
595 {
596 public:
597 sgml_space_or_punct_cp(bool invert = false, _In_ const std::locale& locale = std::locale()) :
599 {}
600
601 protected:
602 virtual bool do_match(
603 _In_reads_or_z_(end) const char* text,
604 _In_ size_t start = 0,
605 _In_ size_t end = SIZE_MAX,
606 _In_ int flags = match_default)
607 {
608 _Assume_(text || start >= end);
609 if (start < end && text[start]) {
610 wchar_t buf[3];
611 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
612 const wchar_t* chr_end = chr + stdex::strlen(chr);
613 bool r =
614 ((flags & match_multiline) || !stdex::islbreak(chr, SIZE_MAX)) &&
615 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::space | std::ctype_base::punct, chr, chr_end) == chr_end;
616 if ((r && !m_invert) || (!r && m_invert)) {
617 this->interval.start = start;
618 return true;
619 }
620 }
621 this->interval.invalidate();
622 return false;
623 }
624 };
625
629 template <class T>
630 class basic_bol : public basic_parser<T>
631 {
632 public:
633 basic_bol(bool invert = false) : m_invert(invert) {}
634
635 protected:
636 virtual bool do_match(
637 _In_reads_or_z_opt_(end) const T* text,
638 _In_ size_t start = 0,
639 _In_ size_t end = SIZE_MAX,
640 _In_ int flags = match_default)
641 {
642 _Assume_(text || !end);
643 _Assume_(text || start >= end);
644 bool r = start == 0 || (start <= end && stdex::islbreak(text[start - 1]));
645 if ((r && !m_invert) || (!r && m_invert)) {
646 this->interval.end = this->interval.start = start;
647 return true;
648 }
649 this->interval.invalidate();
650 return false;
651 }
652
653 bool m_invert;
654 };
655
656 using bol = basic_bol<char>;
657 using wbol = basic_bol<wchar_t>;
658#ifdef _UNICODE
659 using tbol = wbol;
660#else
661 using tbol = bol;
662#endif
664
668 template <class T>
669 class basic_eol : public basic_parser<T>
670 {
671 public:
672 basic_eol(bool invert = false) : m_invert(invert) {}
673
674 protected:
675 virtual bool do_match(
676 _In_reads_or_z_opt_(end) const T* text,
677 _In_ size_t start = 0,
678 _In_ size_t end = SIZE_MAX,
679 _In_ int flags = match_default)
680 {
681 _Assume_(text || start >= end);
682 bool r = start >= end || !text[start] || stdex::islbreak(text[start]);
683 if ((r && !m_invert) || (!r && m_invert)) {
684 this->interval.end = this->interval.start = start;
685 return true;
686 }
687 this->interval.invalidate();
688 return false;
689 }
690
691 bool m_invert;
692 };
693
694 using eol = basic_eol<char>;
695 using weol = basic_eol<wchar_t>;
696#ifdef _UNICODE
697 using teol = weol;
698#else
699 using teol = eol;
700#endif
702
703 template <class T>
704 class basic_set : public basic_parser<T>
705 {
706 public:
707 basic_set(bool invert = false, _In_ const std::locale& locale = std::locale()) :
709 hit_offset(SIZE_MAX),
710 m_invert(invert)
711 {}
712
713 virtual void invalidate()
714 {
715 hit_offset = SIZE_MAX;
717 }
718
719 size_t hit_offset;
720
721 protected:
722 virtual bool do_match(
723 _In_reads_or_z_opt_(end) const T* text,
724 _In_ size_t start = 0,
725 _In_ size_t end = SIZE_MAX,
726 _In_ int flags = match_default) = 0;
727
728 bool m_invert;
729 };
730
734 template <class T>
735 class basic_cu_set : public basic_set<T>
736 {
737 public:
739 _In_reads_or_z_(count) const T* set,
740 _In_ size_t count = SIZE_MAX,
741 _In_ bool invert = false,
742 _In_ const std::locale& locale = std::locale()) :
744 {
745 if (set)
746 m_set.assign(set, set + stdex::strnlen(set, count));
747 }
748
749 protected:
750 virtual bool do_match(
751 _In_reads_or_z_opt_(end) const T* text,
752 _In_ size_t start = 0,
753 _In_ size_t end = SIZE_MAX,
754 _In_ int flags = match_default)
755 {
756 _Assume_(text || start >= end);
757 if (start < end && text[start]) {
758 const T* set = m_set.data();
759 size_t r = (flags & match_case_insensitive) ?
760 stdex::strnichr(set, m_set.size(), text[start], this->m_locale) :
761 stdex::strnchr(set, m_set.size(), text[start]);
762 if ((r != stdex::npos && !this->m_invert) || (r == stdex::npos && this->m_invert)) {
763 this->hit_offset = r;
764 this->interval.end = (this->interval.start = start) + 1;
765 return true;
766 }
767 }
768 this->hit_offset = SIZE_MAX;
769 this->interval.invalidate();
770 return false;
771 }
772
773 std::basic_string<T> m_set;
774 };
775
778#ifdef _UNICODE
779 using tcu_set = wcu_set;
780#else
781 using tcu_set = cu_set;
782#endif
783
787 class sgml_cp_set : public basic_set<char>
788 {
789 public:
790 sgml_cp_set(const char* set, size_t count = SIZE_MAX, bool invert = false, _In_ const std::locale& locale = std::locale()) :
792 {
793 if (set)
794 m_set = sgml2str(set, count);
795 }
796
797 protected:
798 virtual bool do_match(
799 _In_reads_or_z_(end) const char* text,
800 _In_ size_t start = 0,
801 _In_ size_t end = SIZE_MAX,
802 _In_ int flags = match_default)
803 {
804 _Assume_(text || start >= end);
805 if (start < end && text[start]) {
806 wchar_t buf[3];
807 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
808 const wchar_t* set = m_set.data();
809 size_t r = (flags & match_case_insensitive) ?
810 stdex::strnistr(set, m_set.size(), chr, m_locale) :
811 stdex::strnstr(set, m_set.size(), chr);
812 if ((r != stdex::npos && !m_invert) || (r == stdex::npos && m_invert)) {
813 hit_offset = r;
814 this->interval.start = start;
815 return true;
816 }
817 }
818 hit_offset = SIZE_MAX;
819 this->interval.invalidate();
820 return false;
821 }
822
823 std::wstring m_set;
824 };
825
829 template <class T>
830 class basic_string : public basic_parser<T>
831 {
832 public:
834 _In_reads_or_z_(count) const T* str,
835 _In_ size_t count = SIZE_MAX,
836 _In_ const std::locale& locale = std::locale()) :
838 m_str(str, str + stdex::strnlen(str, count))
839 {}
840
841 protected:
842 virtual bool do_match(
843 _In_reads_or_z_opt_(end) const T* text,
844 _In_ size_t start = 0,
845 _In_ size_t end = SIZE_MAX,
846 _In_ int flags = match_default)
847 {
848 _Assume_(text || start >= end);
849 size_t
850 m = m_str.size(),
851 n = std::min<size_t>(end - start, m);
852 bool r = ((flags & match_case_insensitive) ?
853 stdex::strnicmp(text + start, n, m_str.data(), m, this->m_locale) :
854 stdex::strncmp(text + start, n, m_str.data(), m)) == 0;
855 if (r) {
856 this->interval.end = (this->interval.start = start) + n;
857 return true;
858 }
859 this->interval.invalidate();
860 return false;
861 }
862
863 std::basic_string<T> m_str;
864 };
865
868#ifdef _UNICODE
869 using tstring = wstring;
870#else
871 using tstring = string;
872#endif
873
878 {
879 public:
880 sgml_string(const char* str, size_t count = SIZE_MAX, _In_ const std::locale& locale = std::locale()) :
882 m_str(sgml2str(str, count))
883 {}
884
885 protected:
886 virtual bool do_match(
887 _In_reads_or_z_(end) const char* text,
888 _In_ size_t start = 0,
889 _In_ size_t end = SIZE_MAX,
890 _In_ int flags = match_default)
891 {
892 _Assume_(text || start >= end);
893 const wchar_t* str = m_str.data();
894 const bool case_insensitive = flags & match_case_insensitive ? true : false;
895 const auto& ctype = std::use_facet<std::ctype<wchar_t>>(m_locale);
896 for (this->interval.end = start;;) {
897 if (!*str) {
898 this->interval.start = start;
899 return true;
900 }
901 if (this->interval.end >= end || !text[this->interval.end]) {
902 this->interval.invalidate();
903 return false;
904 }
905 wchar_t buf[3];
906 const wchar_t* chr = next_sgml_cp(text, this->interval.end, end, this->interval.end, buf);
907 for (; *chr; ++str, ++chr) {
908 if (!*str ||
909 (case_insensitive ? ctype.tolower(*str) != ctype.tolower(*chr) : *str != *chr))
910 {
911 this->interval.invalidate();
912 return false;
913 }
914 }
915 }
916 }
917
918 std::wstring m_str;
919 };
920
924 template <class T>
926 {
927 public:
928 basic_iterations(const std::shared_ptr<basic_parser<T>>& el, size_t min_iterations = 0, size_t max_iterations = SIZE_MAX, bool greedy = true) :
929 m_el(el),
933 {}
934
935 protected:
936 virtual bool do_match(
937 _In_reads_or_z_opt_(end) const T* text,
938 _In_ size_t start = 0,
939 _In_ size_t end = SIZE_MAX,
940 _In_ int flags = match_default)
941 {
942 _Assume_(text || start >= end);
943 this->interval.start = this->interval.end = start;
944 for (size_t i = 0; ; i++) {
945 if ((!m_greedy && i >= m_min_iterations) || i >= m_max_iterations)
946 return true;
947 if (!m_el->match(text, this->interval.end, end, flags)) {
948 if (i >= m_min_iterations)
949 return true;
950 break;
951 }
952 if (m_el->interval.end == this->interval.end) {
953 // Element did match, but the matching interval was empty. Quit instead of spinning.
954 return true;
955 }
956 this->interval.end = m_el->interval.end;
957 }
958 this->interval.invalidate();
959 return false;
960 }
961
962 std::shared_ptr<basic_parser<T>> m_el;
965 bool m_greedy;
966 };
967
970#ifdef _UNICODE
971 using titerations = witerations;
972#else
973 using titerations = iterations;
974#endif
976
980 template <class T>
982 {
983 protected:
984 parser_collection(_In_ const std::locale& locale) : basic_parser<T>(locale) {}
985
986 public:
988 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el,
989 _In_ size_t count,
990 _In_ const std::locale& locale = std::locale()) :
992 {
993 _Assume_(el || !count);
994 m_collection.reserve(count);
995 for (size_t i = 0; i < count; i++)
996 m_collection.push_back(el[i]);
997 }
998
1000 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1001 _In_ const std::locale& locale = std::locale()) :
1003 m_collection(std::move(collection))
1004 {}
1005
1006 virtual void invalidate()
1007 {
1008 for (auto& el : m_collection)
1009 el->invalidate();
1011 }
1012
1013 protected:
1014 std::vector<std::shared_ptr<basic_parser<T>>> m_collection;
1015 };
1016
1020 template <class T>
1022 {
1023 public:
1025 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el = nullptr,
1026 _In_ size_t count = 0,
1027 _In_ const std::locale& locale = std::locale()) :
1029 {}
1030
1032 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1033 _In_ const std::locale& locale = std::locale()) :
1035 {}
1036
1037 protected:
1038 virtual bool do_match(
1039 _In_reads_or_z_opt_(end) const T* text,
1040 _In_ size_t start = 0,
1041 _In_ size_t end = SIZE_MAX,
1042 _In_ int flags = match_default)
1043 {
1044 _Assume_(text || start >= end);
1045 this->interval.end = start;
1046 for (auto i = this->m_collection.begin(); i != this->m_collection.end(); ++i) {
1047 if (!(*i)->match(text, this->interval.end, end, flags)) {
1048 for (++i; i != this->m_collection.end(); ++i)
1049 (*i)->invalidate();
1050 this->interval.invalidate();
1051 return false;
1052 }
1053 this->interval.end = (*i)->interval.end;
1054 }
1055 this->interval.start = start;
1056 return true;
1057 }
1058 };
1059
1062#ifdef _UNICODE
1063 using tsequence = wsequence;
1064#else
1065 using tsequence = sequence;
1066#endif
1068
1072 template <class T>
1074 {
1075 protected:
1076 basic_branch(_In_ const std::locale& locale) :
1078 hit_offset(SIZE_MAX)
1079 {}
1080
1081 public:
1083 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el = nullptr,
1084 _In_ size_t count = 0,
1085 _In_ const std::locale& locale = std::locale()) :
1087 hit_offset(SIZE_MAX)
1088 {}
1089
1091 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1092 _In_ const std::locale& locale = std::locale()) :
1094 hit_offset(SIZE_MAX)
1095 {}
1096
1097 virtual void invalidate()
1098 {
1099 hit_offset = SIZE_MAX;
1101 }
1102
1103 size_t hit_offset;
1104
1105 protected:
1106 virtual bool do_match(
1107 _In_reads_or_z_opt_(end) const T* text,
1108 _In_ size_t start = 0,
1109 _In_ size_t end = SIZE_MAX,
1110 _In_ int flags = match_default)
1111 {
1112 _Assume_(text || start >= end);
1113 hit_offset = 0;
1114 for (auto i = this->m_collection.begin(); i != this->m_collection.end(); ++i, ++hit_offset) {
1115 if ((*i)->match(text, start, end, flags)) {
1116 this->interval = (*i)->interval;
1117 for (++i; i != this->m_collection.end(); ++i)
1118 (*i)->invalidate();
1119 return true;
1120 }
1121 }
1122 hit_offset = SIZE_MAX;
1123 this->interval.invalidate();
1124 return false;
1125 }
1126 };
1127
1128 using branch = basic_branch<char>;
1130#ifdef _UNICODE
1131 using tbranch = wbranch;
1132#else
1133 using tbranch = branch;
1134#endif
1136
1140 template <class T, class T_parser = basic_string<T>>
1142 {
1143 public:
1145 _In_reads_(count) const T* str_z = nullptr,
1146 _In_ size_t count = 0,
1147 _In_ const std::locale& locale = std::locale()) :
1149 {
1150 build(str_z, count);
1151 }
1152
1153 basic_string_branch(_In_z_ const T* str, ...) :
1154 basic_branch<T>(std::locale())
1155 {
1156 va_list params;
1157 va_start(params, str);
1158 build(str, params);
1159 va_end(params);
1160 }
1161
1162 basic_string_branch(_In_ const std::locale& locale, _In_z_ const T* str, ...) :
1164 {
1165 va_list params;
1166 va_start(params, str);
1167 build(str, params);
1168 va_end(params);
1169 }
1170
1171 protected:
1172 void build(_In_reads_(count) const T* str_z, _In_ size_t count)
1173 {
1174 _Assume_(str_z || !count);
1175 if (count) {
1176 size_t offset, n;
1177 for (
1178 offset = n = 0;
1179 offset < count && str_z[offset];
1180 offset += stdex::strnlen(str_z + offset, count - offset) + 1, ++n);
1181 this->m_collection.reserve(n);
1182 for (
1183 offset = 0;
1184 offset < count && str_z[offset];
1185 offset += stdex::strnlen(str_z + offset, count - offset) + 1)
1186 this->m_collection.push_back(std::move(std::make_shared<T_parser>(str_z + offset, count - offset, this->m_locale)));
1187 }
1188 }
1189
1190 void build(_In_z_ const T* str, _In_ va_list params)
1191 {
1192 const T* p;
1193 for (
1194 this->m_collection.push_back(std::move(std::make_shared<T_parser>(str, SIZE_MAX, this->m_locale)));
1195 (p = va_arg(params, const T*)) != nullptr;
1196 this->m_collection.push_back(std::move(std::make_shared<T_parser>(p, SIZE_MAX, this->m_locale))));
1197 }
1198 };
1199
1202#ifdef _UNICODE
1204#else
1206#endif
1208
1212 template <class T>
1214 {
1215 public:
1217 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el = nullptr,
1218 _In_ size_t count = 0,
1219 _In_ const std::locale& locale = std::locale()) :
1221 {}
1222
1224 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1225 _In_ const std::locale& locale = std::locale()) :
1227 {}
1228
1229 protected:
1230 virtual bool do_match(
1231 _In_reads_or_z_opt_(end) const T* text,
1232 _In_ size_t start = 0,
1233 _In_ size_t end = SIZE_MAX,
1234 _In_ int flags = match_default)
1235 {
1236 _Assume_(text || start >= end);
1237 for (auto& el : this->m_collection)
1238 el->invalidate();
1239 if (match_recursively(text, start, end, flags)) {
1240 this->interval.start = start;
1241 return true;
1242 }
1243 this->interval.invalidate();
1244 return false;
1245 }
1246
1247 bool match_recursively(
1248 _In_reads_or_z_opt_(end) const T* text,
1249 _In_ size_t start = 0,
1250 _In_ size_t end = SIZE_MAX,
1251 _In_ int flags = match_default)
1252 {
1253 bool all_matched = true;
1254 for (auto& el : this->m_collection) {
1255 if (!el->interval) {
1256 // Element was not matched in permutatuion yet.
1257 all_matched = false;
1258 if (el->match(text, start, end, flags)) {
1259 // Element matched for the first time.
1260 if (match_recursively(text, el->interval.end, end, flags)) {
1261 // Rest of the elements matched too.
1262 return true;
1263 }
1264 el->invalidate();
1265 }
1266 }
1267 }
1268 if (all_matched) {
1269 this->interval.end = start;
1270 return true;
1271 }
1272 return false;
1273 }
1274 };
1275
1278#ifdef _UNICODE
1279 using tpermutation = wpermutation;
1280#else
1281 using tpermutation = permutation;
1282#endif
1284
1288 template <class T>
1289 class basic_integer : public basic_parser<T>
1290 {
1291 public:
1292 basic_integer(_In_ const std::locale& locale = std::locale()) :
1294 value(0)
1295 {}
1296
1297 virtual void invalidate()
1298 {
1299 value = 0;
1301 }
1302
1303 public:
1304 size_t value;
1305 };
1306
1310 template <class T>
1312 {
1313 public:
1315 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
1316 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
1317 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
1318 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
1319 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
1320 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
1321 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
1322 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
1323 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
1324 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
1325 _In_ const std::locale& locale = std::locale()) :
1327 m_digit_0(digit_0),
1328 m_digit_1(digit_1),
1329 m_digit_2(digit_2),
1330 m_digit_3(digit_3),
1331 m_digit_4(digit_4),
1332 m_digit_5(digit_5),
1333 m_digit_6(digit_6),
1334 m_digit_7(digit_7),
1335 m_digit_8(digit_8),
1336 m_digit_9(digit_9)
1337 {}
1338
1339 protected:
1340 virtual bool do_match(
1341 _In_reads_or_z_opt_(end) const T* text,
1342 _In_ size_t start = 0,
1343 _In_ size_t end = SIZE_MAX,
1344 _In_ int flags = match_default)
1345 {
1346 _Assume_(text || start >= end);
1347 for (this->interval.end = start, this->value = 0; this->interval.end < end && text[this->interval.end];) {
1348 size_t dig;
1349 if (m_digit_0->match(text, this->interval.end, end, flags)) { dig = 0; this->interval.end = m_digit_0->interval.end; }
1350 else if (m_digit_1->match(text, this->interval.end, end, flags)) { dig = 1; this->interval.end = m_digit_1->interval.end; }
1351 else if (m_digit_2->match(text, this->interval.end, end, flags)) { dig = 2; this->interval.end = m_digit_2->interval.end; }
1352 else if (m_digit_3->match(text, this->interval.end, end, flags)) { dig = 3; this->interval.end = m_digit_3->interval.end; }
1353 else if (m_digit_4->match(text, this->interval.end, end, flags)) { dig = 4; this->interval.end = m_digit_4->interval.end; }
1354 else if (m_digit_5->match(text, this->interval.end, end, flags)) { dig = 5; this->interval.end = m_digit_5->interval.end; }
1355 else if (m_digit_6->match(text, this->interval.end, end, flags)) { dig = 6; this->interval.end = m_digit_6->interval.end; }
1356 else if (m_digit_7->match(text, this->interval.end, end, flags)) { dig = 7; this->interval.end = m_digit_7->interval.end; }
1357 else if (m_digit_8->match(text, this->interval.end, end, flags)) { dig = 8; this->interval.end = m_digit_8->interval.end; }
1358 else if (m_digit_9->match(text, this->interval.end, end, flags)) { dig = 9; this->interval.end = m_digit_9->interval.end; }
1359 else break;
1360 this->value = this->value * 10 + dig;
1361 }
1363 this->interval.start = start;
1364 return true;
1365 }
1366 this->interval.invalidate();
1367 return false;
1368 }
1369
1370 std::shared_ptr<basic_parser<T>>
1371 m_digit_0,
1372 m_digit_1,
1373 m_digit_2,
1374 m_digit_3,
1375 m_digit_4,
1376 m_digit_5,
1377 m_digit_6,
1378 m_digit_7,
1379 m_digit_8,
1380 m_digit_9;
1381 };
1382
1385#ifdef _UNICODE
1386 using tinteger10 = winteger10;
1387#else
1388 using tinteger10 = integer10;
1389#endif
1391
1395 template <class T>
1397 {
1398 public:
1400 _In_ const std::shared_ptr<basic_integer10<T>>& digits,
1401 _In_ const std::shared_ptr<basic_set<T>>& separator,
1402 _In_ const std::locale& locale = std::locale()) :
1404 digit_count(0),
1405 has_separators(false),
1406 m_digits(digits),
1407 m_separator(separator)
1408 {}
1409
1410 virtual void invalidate()
1411 {
1412 digit_count = 0;
1413 has_separators = false;
1415 }
1416
1419
1420 protected:
1421 virtual bool do_match(
1422 _In_reads_or_z_opt_(end) const T* text,
1423 _In_ size_t start = 0,
1424 _In_ size_t end = SIZE_MAX,
1425 _In_ int flags = match_default)
1426 {
1427 _Assume_(text || start >= end);
1428 if (m_digits->match(text, start, end, flags)) {
1429 // Leading part match.
1430 this->value = m_digits->value;
1431 digit_count = m_digits->interval.size();
1432 has_separators = false;
1433 this->interval.start = start;
1434 this->interval.end = m_digits->interval.end;
1435 if (m_digits->interval.size() <= 3) {
1436 // Maybe separated with thousand separators?
1437 size_t hit_offset = SIZE_MAX;
1438 while (m_separator->match(text, this->interval.end, end, flags) &&
1439 (hit_offset == SIZE_MAX || hit_offset == m_separator->hit_offset) && // All separators must be the same, no mixing.
1440 m_digits->match(text, m_separator->interval.end, end, flags) &&
1441 m_digits->interval.size() == 3)
1442 {
1443 // Thousand separator and three-digit integer followed.
1444 this->value = this->value * 1000 + m_digits->value;
1445 digit_count += 3;
1446 has_separators = true;
1447 this->interval.end = m_digits->interval.end;
1448 hit_offset = m_separator->hit_offset;
1449 }
1450 }
1451
1452 return true;
1453 }
1454 this->value = 0;
1455 this->interval.invalidate();
1456 return false;
1457 }
1458
1459 std::shared_ptr<basic_integer10<T>> m_digits;
1460 std::shared_ptr<basic_set<T>> m_separator;
1461 };
1462
1463 using integer10ts = basic_integer10ts<char>;
1464 using winteger10ts = basic_integer10ts<wchar_t>;
1465#ifdef _UNICODE
1466 using tinteger10ts = winteger10ts;
1467#else
1468 using tinteger10ts = integer10ts;
1469#endif
1470 using sgml_integer10ts = basic_integer10ts<char>;
1471
1475 template <class T>
1477 {
1478 public:
1480 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
1481 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
1482 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
1483 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
1484 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
1485 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
1486 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
1487 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
1488 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
1489 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
1490 _In_ const std::shared_ptr<basic_parser<T>>& digit_10,
1491 _In_ const std::shared_ptr<basic_parser<T>>& digit_11,
1492 _In_ const std::shared_ptr<basic_parser<T>>& digit_12,
1493 _In_ const std::shared_ptr<basic_parser<T>>& digit_13,
1494 _In_ const std::shared_ptr<basic_parser<T>>& digit_14,
1495 _In_ const std::shared_ptr<basic_parser<T>>& digit_15,
1496 _In_ const std::locale& locale = std::locale()) :
1498 m_digit_0(digit_0),
1499 m_digit_1(digit_1),
1500 m_digit_2(digit_2),
1501 m_digit_3(digit_3),
1502 m_digit_4(digit_4),
1503 m_digit_5(digit_5),
1504 m_digit_6(digit_6),
1505 m_digit_7(digit_7),
1506 m_digit_8(digit_8),
1507 m_digit_9(digit_9),
1508 m_digit_10(digit_10),
1509 m_digit_11(digit_11),
1510 m_digit_12(digit_12),
1511 m_digit_13(digit_13),
1512 m_digit_14(digit_14),
1513 m_digit_15(digit_15)
1514 {}
1515
1516 protected:
1517 virtual bool do_match(
1518 _In_reads_or_z_opt_(end) const T* text,
1519 _In_ size_t start = 0,
1520 _In_ size_t end = SIZE_MAX,
1521 _In_ int flags = match_default)
1522 {
1523 _Assume_(text || start >= end);
1524 for (this->interval.end = start, this->value = 0; this->interval.end < end && text[this->interval.end];) {
1525 size_t dig;
1526 if (m_digit_0->match(text, this->interval.end, end, flags)) { dig = 0; this->interval.end = m_digit_0->interval.end; }
1527 else if (m_digit_1->match(text, this->interval.end, end, flags)) { dig = 1; this->interval.end = m_digit_1->interval.end; }
1528 else if (m_digit_2->match(text, this->interval.end, end, flags)) { dig = 2; this->interval.end = m_digit_2->interval.end; }
1529 else if (m_digit_3->match(text, this->interval.end, end, flags)) { dig = 3; this->interval.end = m_digit_3->interval.end; }
1530 else if (m_digit_4->match(text, this->interval.end, end, flags)) { dig = 4; this->interval.end = m_digit_4->interval.end; }
1531 else if (m_digit_5->match(text, this->interval.end, end, flags)) { dig = 5; this->interval.end = m_digit_5->interval.end; }
1532 else if (m_digit_6->match(text, this->interval.end, end, flags)) { dig = 6; this->interval.end = m_digit_6->interval.end; }
1533 else if (m_digit_7->match(text, this->interval.end, end, flags)) { dig = 7; this->interval.end = m_digit_7->interval.end; }
1534 else if (m_digit_8->match(text, this->interval.end, end, flags)) { dig = 8; this->interval.end = m_digit_8->interval.end; }
1535 else if (m_digit_9->match(text, this->interval.end, end, flags)) { dig = 9; this->interval.end = m_digit_9->interval.end; }
1536 else if (m_digit_10->match(text, this->interval.end, end, flags)) { dig = 10; this->interval.end = m_digit_10->interval.end; }
1537 else if (m_digit_11->match(text, this->interval.end, end, flags)) { dig = 11; this->interval.end = m_digit_11->interval.end; }
1538 else if (m_digit_12->match(text, this->interval.end, end, flags)) { dig = 12; this->interval.end = m_digit_12->interval.end; }
1539 else if (m_digit_13->match(text, this->interval.end, end, flags)) { dig = 13; this->interval.end = m_digit_13->interval.end; }
1540 else if (m_digit_14->match(text, this->interval.end, end, flags)) { dig = 14; this->interval.end = m_digit_14->interval.end; }
1541 else if (m_digit_15->match(text, this->interval.end, end, flags)) { dig = 15; this->interval.end = m_digit_15->interval.end; }
1542 else break;
1543 this->value = this->value * 16 + dig;
1544 }
1546 this->interval.start = start;
1547 return true;
1548 }
1549 this->interval.invalidate();
1550 return false;
1551 }
1552
1553 std::shared_ptr<basic_parser<T>>
1554 m_digit_0,
1555 m_digit_1,
1556 m_digit_2,
1557 m_digit_3,
1558 m_digit_4,
1559 m_digit_5,
1560 m_digit_6,
1561 m_digit_7,
1562 m_digit_8,
1563 m_digit_9,
1564 m_digit_10,
1565 m_digit_11,
1566 m_digit_12,
1567 m_digit_13,
1568 m_digit_14,
1569 m_digit_15;
1570 };
1571
1574#ifdef _UNICODE
1575 using tinteger16 = winteger16;
1576#else
1577 using tinteger16 = integer16;
1578#endif
1580
1584 template <class T>
1586 {
1587 public:
1589 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
1590 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
1591 _In_ const std::shared_ptr<basic_parser<T>>& digit_10,
1592 _In_ const std::shared_ptr<basic_parser<T>>& digit_50,
1593 _In_ const std::shared_ptr<basic_parser<T>>& digit_100,
1594 _In_ const std::shared_ptr<basic_parser<T>>& digit_500,
1595 _In_ const std::shared_ptr<basic_parser<T>>& digit_1000,
1596 _In_ const std::shared_ptr<basic_parser<T>>& digit_5000,
1597 _In_ const std::shared_ptr<basic_parser<T>>& digit_10000,
1598 _In_ const std::locale& locale = std::locale()) :
1600 m_digit_1(digit_1),
1601 m_digit_5(digit_5),
1602 m_digit_10(digit_10),
1603 m_digit_50(digit_50),
1604 m_digit_100(digit_100),
1605 m_digit_500(digit_500),
1606 m_digit_1000(digit_1000),
1607 m_digit_5000(digit_5000),
1608 m_digit_10000(digit_10000)
1609 {}
1610
1611 protected:
1612 virtual bool do_match(
1613 _In_reads_or_z_opt_(end) const T* text,
1614 _In_ size_t start = 0,
1615 _In_ size_t end = SIZE_MAX,
1616 _In_ int flags = match_default)
1617 {
1618 _Assume_(text || start >= end);
1619 size_t
1621 end2;
1622
1623 for (this->interval.end = start, this->value = 0; this->interval.end < end && text[this->interval.end]; dig[3] = dig[2], dig[2] = dig[1], dig[1] = dig[0], this->interval.end = end2) {
1624 if (m_digit_1 && m_digit_1->match(text, this->interval.end, end, flags)) { dig[0] = 1; end2 = m_digit_1->interval.end; }
1625 else if (m_digit_5 && m_digit_5->match(text, this->interval.end, end, flags)) { dig[0] = 5; end2 = m_digit_5->interval.end; }
1626 else if (m_digit_10 && m_digit_10->match(text, this->interval.end, end, flags)) { dig[0] = 10; end2 = m_digit_10->interval.end; }
1627 else if (m_digit_50 && m_digit_50->match(text, this->interval.end, end, flags)) { dig[0] = 50; end2 = m_digit_50->interval.end; }
1628 else if (m_digit_100 && m_digit_100->match(text, this->interval.end, end, flags)) { dig[0] = 100; end2 = m_digit_100->interval.end; }
1629 else if (m_digit_500 && m_digit_500->match(text, this->interval.end, end, flags)) { dig[0] = 500; end2 = m_digit_500->interval.end; }
1630 else if (m_digit_1000 && m_digit_1000->match(text, this->interval.end, end, flags)) { dig[0] = 1000; end2 = m_digit_1000->interval.end; }
1631 else if (m_digit_5000 && m_digit_5000->match(text, this->interval.end, end, flags)) { dig[0] = 5000; end2 = m_digit_5000->interval.end; }
1632 else if (m_digit_10000 && m_digit_10000->match(text, this->interval.end, end, flags)) { dig[0] = 10000; end2 = m_digit_10000->interval.end; }
1633 else break;
1634
1635 // Store first digit.
1636 if (dig[4] == SIZE_MAX) dig[4] = dig[0];
1637
1638 if (dig[3] == dig[2] && dig[2] == dig[1] && dig[1] == dig[0] && dig[0] != dig[4]) {
1639 // Same digit repeated four times. No-go, unless first digit. E.g. XIIII vs. XIV. MMMMMCD allowed, IIII also...
1640 break;
1641 }
1642 if (dig[0] <= dig[1]) {
1643 // Digit is less or equal previous one: add.
1644 this->value += dig[0];
1645 }
1646 else if (
1647 (dig[1] == 1 && (dig[0] == 5 || dig[0] == 10)) ||
1648 (dig[1] == 10 && (dig[0] == 50 || dig[0] == 100)) ||
1649 (dig[1] == 100 && (dig[0] == 500 || dig[0] == 1000)) ||
1650 (dig[1] == 1000 && (dig[0] == 5000 || dig[0] == 10000)))
1651 {
1652 // Digit is up to two orders bigger than previous one: subtract. But...
1653 if (dig[2] < dig[0]) {
1654 // Digit is also bigger than pre-previous one. E.g. VIX (V < X => invalid)
1655 break;
1656 }
1657 this->value -= dig[1]; // Cancel addition in the previous step.
1658 dig[0] -= dig[1]; // Combine last two digits.
1659 dig[1] = dig[2]; // The true previous digit is now pre-previous one. :)
1660 dig[2] = dig[3]; // The true pre-previous digit is now pre-pre-previous one. :)
1661 this->value += dig[0]; // Add combined value.
1662 }
1663 else {
1664 // New digit is too big than the previous one. E.g. VX (V < X => invalid)
1665 break;
1666 }
1667 }
1668 if (this->value) {
1669 this->interval.start = start;
1670 return true;
1671 }
1672 this->interval.invalidate();
1673 return false;
1674 }
1675
1676 std::shared_ptr<basic_parser<T>>
1677 m_digit_1,
1678 m_digit_5,
1679 m_digit_10,
1680 m_digit_50,
1681 m_digit_100,
1682 m_digit_500,
1683 m_digit_1000,
1684 m_digit_5000,
1685 m_digit_10000;
1686 };
1687
1690#ifdef _UNICODE
1692#else
1694#endif
1696
1700 template <class T>
1702 {
1703 public:
1705 _In_ const std::shared_ptr<basic_parser<T>>& _numerator,
1706 _In_ const std::shared_ptr<basic_parser<T>>& _fraction_line,
1707 _In_ const std::shared_ptr<basic_parser<T>>& _denominator,
1708 _In_ const std::locale& locale = std::locale()) :
1710 numerator(_numerator),
1711 fraction_line(_fraction_line),
1712 denominator(_denominator)
1713 {}
1714
1715 virtual void invalidate()
1716 {
1717 numerator->invalidate();
1718 fraction_line->invalidate();
1719 denominator->invalidate();
1721 }
1722
1723 std::shared_ptr<basic_parser<T>> numerator;
1724 std::shared_ptr<basic_parser<T>> fraction_line;
1725 std::shared_ptr<basic_parser<T>> denominator;
1726
1727 protected:
1728 virtual bool do_match(
1729 _In_reads_or_z_opt_(end) const T* text,
1730 _In_ size_t start = 0,
1731 _In_ size_t end = SIZE_MAX,
1732 _In_ int flags = match_default)
1733 {
1734 _Assume_(text || start >= end);
1735 if (numerator->match(text, start, end, flags) &&
1736 fraction_line->match(text, numerator->interval.end, end, flags) &&
1737 denominator->match(text, fraction_line->interval.end, end, flags))
1738 {
1739 this->interval.start = start;
1740 this->interval.end = denominator->interval.end;
1741 return true;
1742 }
1743 numerator->invalidate();
1744 fraction_line->invalidate();
1745 denominator->invalidate();
1746 this->interval.invalidate();
1747 return false;
1748 }
1749 };
1750
1753#ifdef _UNICODE
1754 using tfraction = wfraction;
1755#else
1756 using tfraction = fraction;
1757#endif
1759
1763 template <class T>
1764 class basic_score : public basic_parser<T>
1765 {
1766 public:
1768 _In_ const std::shared_ptr<basic_parser<T>>& _home,
1769 _In_ const std::shared_ptr<basic_parser<T>>& _separator,
1770 _In_ const std::shared_ptr<basic_parser<T>>& _guest,
1771 _In_ const std::shared_ptr<basic_parser<T>>& space,
1772 _In_ const std::locale& locale = std::locale()) :
1774 home(_home),
1775 separator(_separator),
1776 guest(_guest),
1777 m_space(space)
1778 {}
1779
1780 virtual void invalidate()
1781 {
1782 home->invalidate();
1783 separator->invalidate();
1784 guest->invalidate();
1786 }
1787
1788 std::shared_ptr<basic_parser<T>> home;
1789 std::shared_ptr<basic_parser<T>> separator;
1790 std::shared_ptr<basic_parser<T>> guest;
1791
1792 protected:
1793 virtual bool do_match(
1794 _In_reads_or_z_opt_(end) const T* text,
1795 _In_ size_t start = 0,
1796 _In_ size_t end = SIZE_MAX,
1797 _In_ int flags = match_default)
1798 {
1799 _Assume_(text || start >= end);
1800 this->interval.end = start;
1801
1802 const int space_match_flags = flags & ~match_multiline; // Spaces in score must never be broken in new line.
1803
1804 if (home->match(text, this->interval.end, end, flags))
1805 this->interval.end = home->interval.end;
1806 else
1807 goto end;
1808
1809 for (; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
1810
1811 if (separator->match(text, this->interval.end, end, flags))
1812 this->interval.end = separator->interval.end;
1813 else
1814 goto end;
1815
1816 for (; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
1817
1818 if (guest->match(text, this->interval.end, end, flags))
1819 this->interval.end = guest->interval.end;
1820 else
1821 goto end;
1822
1823 this->interval.start = start;
1824 return true;
1825
1826 end:
1827 home->invalidate();
1828 separator->invalidate();
1829 guest->invalidate();
1830 this->interval.invalidate();
1831 return false;
1832 }
1833
1834 std::shared_ptr<basic_parser<T>> m_space;
1835 };
1836
1837 using score = basic_score<char>;
1839#ifdef _UNICODE
1840 using tscore = wscore;
1841#else
1842 using tscore = score;
1843#endif
1845
1849 template <class T>
1851 {
1852 public:
1854 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
1855 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
1856 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
1857 _In_ const std::shared_ptr<basic_parser<T>>& _number,
1858 _In_ const std::locale& locale = std::locale()) :
1864 {}
1865
1866 virtual void invalidate()
1867 {
1868 if (positive_sign) positive_sign->invalidate();
1869 if (negative_sign) negative_sign->invalidate();
1870 if (special_sign) special_sign->invalidate();
1871 number->invalidate();
1873 }
1874
1875 std::shared_ptr<basic_parser<T>> positive_sign;
1876 std::shared_ptr<basic_parser<T>> negative_sign;
1877 std::shared_ptr<basic_parser<T>> special_sign;
1878 std::shared_ptr<basic_parser<T>> number;
1879
1880 protected:
1881 virtual bool do_match(
1882 _In_reads_or_z_opt_(end) const T* text,
1883 _In_ size_t start = 0,
1884 _In_ size_t end = SIZE_MAX,
1885 _In_ int flags = match_default)
1886 {
1887 _Assume_(text || start >= end);
1888 this->interval.end = start;
1889 if (positive_sign && positive_sign->match(text, this->interval.end, end, flags)) {
1890 this->interval.end = positive_sign->interval.end;
1891 if (negative_sign) negative_sign->invalidate();
1892 if (special_sign) special_sign->invalidate();
1893 }
1894 else if (negative_sign && negative_sign->match(text, this->interval.end, end, flags)) {
1895 this->interval.end = negative_sign->interval.end;
1896 if (positive_sign) positive_sign->invalidate();
1897 if (special_sign) special_sign->invalidate();
1898 }
1899 else if (special_sign && special_sign->match(text, this->interval.end, end, flags)) {
1900 this->interval.end = special_sign->interval.end;
1901 if (positive_sign) positive_sign->invalidate();
1902 if (negative_sign) negative_sign->invalidate();
1903 }
1904 else {
1905 if (positive_sign) positive_sign->invalidate();
1906 if (negative_sign) negative_sign->invalidate();
1907 if (special_sign) special_sign->invalidate();
1908 }
1909 if (number->match(text, this->interval.end, end, flags)) {
1910 this->interval.start = start;
1911 this->interval.end = number->interval.end;
1912 return true;
1913 }
1914 if (positive_sign) positive_sign->invalidate();
1915 if (negative_sign) negative_sign->invalidate();
1916 if (special_sign) special_sign->invalidate();
1917 number->invalidate();
1918 this->interval.invalidate();
1919 return false;
1920 }
1921 };
1922
1923 using signed_numeral = basic_signed_numeral<char>;
1924 using wsigned_numeral = basic_signed_numeral<wchar_t>;
1925#ifdef _UNICODE
1926 using tsigned_numeral = wsigned_numeral;
1927#else
1928 using tsigned_numeral = signed_numeral;
1929#endif
1930 using sgml_signed_numeral = basic_signed_numeral<char>;
1931
1935 template <class T>
1937 {
1938 public:
1940 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
1941 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
1942 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
1943 _In_ const std::shared_ptr<basic_parser<T>>& _integer,
1944 _In_ const std::shared_ptr<basic_parser<T>>& space,
1945 _In_ const std::shared_ptr<basic_parser<T>>& _fraction,
1946 _In_ const std::locale& locale = std::locale()) :
1953 m_space(space)
1954 {}
1955
1956 virtual void invalidate()
1957 {
1958 if (positive_sign) positive_sign->invalidate();
1959 if (negative_sign) negative_sign->invalidate();
1960 if (special_sign) special_sign->invalidate();
1961 integer->invalidate();
1962 fraction->invalidate();
1964 }
1965
1966 std::shared_ptr<basic_parser<T>> positive_sign;
1967 std::shared_ptr<basic_parser<T>> negative_sign;
1968 std::shared_ptr<basic_parser<T>> special_sign;
1969 std::shared_ptr<basic_parser<T>> integer;
1970 std::shared_ptr<basic_parser<T>> fraction;
1971
1972 protected:
1973 virtual bool do_match(
1974 _In_reads_or_z_opt_(end) const T* text,
1975 _In_ size_t start = 0,
1976 _In_ size_t end = SIZE_MAX,
1977 _In_ int flags = match_default)
1978 {
1979 _Assume_(text || start >= end);
1980 this->interval.end = start;
1981
1982 if (positive_sign && positive_sign->match(text, this->interval.end, end, flags)) {
1983 this->interval.end = positive_sign->interval.end;
1984 if (negative_sign) negative_sign->invalidate();
1985 if (special_sign) special_sign->invalidate();
1986 }
1987 else if (negative_sign && negative_sign->match(text, this->interval.end, end, flags)) {
1988 this->interval.end = negative_sign->interval.end;
1989 if (positive_sign) positive_sign->invalidate();
1990 if (special_sign) special_sign->invalidate();
1991 }
1992 else if (special_sign && special_sign->match(text, this->interval.end, end, flags)) {
1993 this->interval.end = special_sign->interval.end;
1994 if (positive_sign) positive_sign->invalidate();
1995 if (negative_sign) negative_sign->invalidate();
1996 }
1997 else {
1998 if (positive_sign) positive_sign->invalidate();
1999 if (negative_sign) negative_sign->invalidate();
2000 if (special_sign) special_sign->invalidate();
2001 }
2002
2003 // Check for <integer> <fraction>
2004 const int space_match_flags = flags & ~match_multiline; // Spaces in fractions must never be broken in new line.
2005 if (integer->match(text, this->interval.end, end, flags) &&
2006 m_space->match(text, integer->interval.end, end, space_match_flags))
2007 {
2008 for (this->interval.end = m_space->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
2009 if (fraction->match(text, this->interval.end, end, flags)) {
2010 this->interval.start = start;
2011 this->interval.end = fraction->interval.end;
2012 return true;
2013 }
2014 fraction->invalidate();
2015 this->interval.start = start;
2016 this->interval.end = integer->interval.end;
2017 return true;
2018 }
2019
2020 // Check for <fraction>
2021 if (fraction->match(text, this->interval.end, end, flags)) {
2022 integer->invalidate();
2023 this->interval.start = start;
2024 this->interval.end = fraction->interval.end;
2025 return true;
2026 }
2027
2028 // Check for <integer>
2029 if (integer->match(text, this->interval.end, end, flags)) {
2030 fraction->invalidate();
2031 this->interval.start = start;
2032 this->interval.end = integer->interval.end;
2033 return true;
2034 }
2035
2036 if (positive_sign) positive_sign->invalidate();
2037 if (negative_sign) negative_sign->invalidate();
2038 if (special_sign) special_sign->invalidate();
2039 integer->invalidate();
2040 fraction->invalidate();
2041 this->interval.invalidate();
2042 return false;
2043 }
2044
2045 std::shared_ptr<basic_parser<T>> m_space;
2046 };
2047
2048 using mixed_numeral = basic_mixed_numeral<char>;
2049 using wmixed_numeral = basic_mixed_numeral<wchar_t>;
2050#ifdef _UNICODE
2051 using tmixed_numeral = wmixed_numeral;
2052#else
2053 using tmixed_numeral = mixed_numeral;
2054#endif
2055 using sgml_mixed_numeral = basic_mixed_numeral<char>;
2056
2060 template <class T>
2062 {
2063 public:
2065 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
2066 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
2067 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
2068 _In_ const std::shared_ptr<basic_integer<T>>& _integer,
2069 _In_ const std::shared_ptr<basic_parser<T>>& _decimal_separator,
2070 _In_ const std::shared_ptr<basic_integer<T>>& _decimal,
2071 _In_ const std::shared_ptr<basic_parser<T>>& _exponent_symbol,
2072 _In_ const std::shared_ptr<basic_parser<T>>& _positive_exp_sign,
2073 _In_ const std::shared_ptr<basic_parser<T>>& _negative_exp_sign,
2074 _In_ const std::shared_ptr<basic_integer<T>>& _exponent,
2075 _In_ const std::locale& locale = std::locale()) :
2087 value(std::numeric_limits<double>::quiet_NaN())
2088 {}
2089
2090 virtual void invalidate()
2091 {
2092 if (positive_sign) positive_sign->invalidate();
2093 if (negative_sign) negative_sign->invalidate();
2094 if (special_sign) special_sign->invalidate();
2095 integer->invalidate();
2096 decimal_separator->invalidate();
2097 decimal->invalidate();
2098 if (exponent_symbol) exponent_symbol->invalidate();
2099 if (positive_exp_sign) positive_exp_sign->invalidate();
2100 if (negative_exp_sign) negative_exp_sign->invalidate();
2101 if (exponent) exponent->invalidate();
2102 value = std::numeric_limits<double>::quiet_NaN();
2104 }
2105
2106 std::shared_ptr<basic_parser<T>> positive_sign;
2107 std::shared_ptr<basic_parser<T>> negative_sign;
2108 std::shared_ptr<basic_parser<T>> special_sign;
2109 std::shared_ptr<basic_integer<T>> integer;
2110 std::shared_ptr<basic_parser<T>> decimal_separator;
2111 std::shared_ptr<basic_integer<T>> decimal;
2112 std::shared_ptr<basic_parser<T>> exponent_symbol;
2113 std::shared_ptr<basic_parser<T>> positive_exp_sign;
2114 std::shared_ptr<basic_parser<T>> negative_exp_sign;
2115 std::shared_ptr<basic_integer<T>> exponent;
2116 double value;
2117
2118 protected:
2119 virtual bool do_match(
2120 _In_reads_or_z_opt_(end) const T* text,
2121 _In_ size_t start = 0,
2122 _In_ size_t end = SIZE_MAX,
2123 _In_ int flags = match_default)
2124 {
2125 _Assume_(text || start >= end);
2126 this->interval.end = start;
2127
2128 if (positive_sign && positive_sign->match(text, this->interval.end, end, flags)) {
2129 this->interval.end = positive_sign->interval.end;
2130 if (negative_sign) negative_sign->invalidate();
2131 if (special_sign) special_sign->invalidate();
2132 }
2133 else if (negative_sign && negative_sign->match(text, this->interval.end, end, flags)) {
2134 this->interval.end = negative_sign->interval.end;
2135 if (positive_sign) positive_sign->invalidate();
2136 if (special_sign) special_sign->invalidate();
2137 }
2138 else if (special_sign && special_sign->match(text, this->interval.end, end, flags)) {
2139 this->interval.end = special_sign->interval.end;
2140 if (positive_sign) positive_sign->invalidate();
2141 if (negative_sign) negative_sign->invalidate();
2142 }
2143 else {
2144 if (positive_sign) positive_sign->invalidate();
2145 if (negative_sign) negative_sign->invalidate();
2146 if (special_sign) special_sign->invalidate();
2147 }
2148
2149 if (integer->match(text, this->interval.end, end, flags))
2150 this->interval.end = integer->interval.end;
2151
2152 if (decimal_separator->match(text, this->interval.end, end, flags) &&
2153 decimal->match(text, decimal_separator->interval.end, end, flags))
2154 this->interval.end = decimal->interval.end;
2155 else {
2156 decimal_separator->invalidate();
2157 decimal->invalidate();
2158 }
2159
2160 if (integer->interval.empty() &&
2161 decimal->interval.empty())
2162 {
2163 // No integer part, no decimal part.
2164 if (positive_sign) positive_sign->invalidate();
2165 if (negative_sign) negative_sign->invalidate();
2166 if (special_sign) special_sign->invalidate();
2167 integer->invalidate();
2168 decimal_separator->invalidate();
2169 decimal->invalidate();
2170 if (exponent_symbol) exponent_symbol->invalidate();
2171 if (positive_exp_sign) positive_exp_sign->invalidate();
2172 if (negative_exp_sign) negative_exp_sign->invalidate();
2173 if (exponent) exponent->invalidate();
2174 this->interval.invalidate();
2175 return false;
2176 }
2177
2178 if (exponent_symbol && exponent_symbol->match(text, this->interval.end, end, flags) &&
2179 ((positive_exp_sign && positive_exp_sign->match(text, exponent_symbol->interval.end, end, flags) &&
2180 exponent && exponent->match(text, positive_exp_sign->interval.end, end, flags)) ||
2181 (exponent && exponent->match(text, exponent_symbol->interval.end, end, flags))))
2182 {
2183 this->interval.end = exponent->interval.end;
2184 if (negative_exp_sign) negative_exp_sign->invalidate();
2185 }
2186 else if (exponent_symbol && exponent_symbol->match(text, this->interval.end, end, flags) &&
2187 negative_exp_sign && negative_exp_sign->match(text, exponent_symbol->interval.end, end, flags) &&
2188 exponent && exponent->match(text, negative_exp_sign->interval.end, end, flags))
2189 {
2190 this->interval.end = exponent->interval.end;
2191 if (positive_exp_sign) positive_exp_sign->invalidate();
2192 }
2193 else {
2194 if (exponent_symbol) exponent_symbol->invalidate();
2195 if (positive_exp_sign) positive_exp_sign->invalidate();
2196 if (negative_exp_sign) negative_exp_sign->invalidate();
2197 if (exponent) exponent->invalidate();
2198 }
2199
2200 value = (double)integer->value;
2201 if (decimal->interval)
2202 value += (double)decimal->value * pow(10.0, -(double)decimal->interval.size());
2203 if (negative_sign && negative_sign->interval)
2204 value = -value;
2205 if (exponent && exponent->interval) {
2206 double e = (double)exponent->value;
2207 if (negative_exp_sign && negative_exp_sign->interval)
2208 e = -e;
2209 value *= pow(10.0, e);
2210 }
2211
2212 this->interval.start = start;
2213 return true;
2214 }
2215 };
2216
2217 using scientific_numeral = basic_scientific_numeral<char>;
2218 using wscientific_numeral = basic_scientific_numeral<wchar_t>;
2219#ifdef _UNICODE
2220 using tscientific_numeral = wscientific_numeral;
2221#else
2222 using tscientific_numeral = scientific_numeral;
2223#endif
2224 using sgml_scientific_numeral = basic_scientific_numeral<char>;
2225
2229 template <class T>
2231 {
2232 public:
2234 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
2235 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
2236 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
2237 _In_ const std::shared_ptr<basic_parser<T>>& _currency,
2238 _In_ const std::shared_ptr<basic_parser<T>>& _integer,
2239 _In_ const std::shared_ptr<basic_parser<T>>& _decimal_separator,
2240 _In_ const std::shared_ptr<basic_parser<T>>& _decimal,
2241 _In_ const std::locale& locale = std::locale()) :
2250 {}
2251
2252 virtual void invalidate()
2253 {
2254 if (positive_sign) positive_sign->invalidate();
2255 if (negative_sign) negative_sign->invalidate();
2256 if (special_sign) special_sign->invalidate();
2257 currency->invalidate();
2258 integer->invalidate();
2259 decimal_separator->invalidate();
2260 decimal->invalidate();
2262 }
2263
2264 std::shared_ptr<basic_parser<T>> positive_sign;
2265 std::shared_ptr<basic_parser<T>> negative_sign;
2266 std::shared_ptr<basic_parser<T>> special_sign;
2267 std::shared_ptr<basic_parser<T>> currency;
2268 std::shared_ptr<basic_parser<T>> integer;
2269 std::shared_ptr<basic_parser<T>> decimal_separator;
2270 std::shared_ptr<basic_parser<T>> decimal;
2271
2272 protected:
2273 virtual bool do_match(
2274 _In_reads_or_z_opt_(end) const T* text,
2275 _In_ size_t start = 0,
2276 _In_ size_t end = SIZE_MAX,
2277 _In_ int flags = match_default)
2278 {
2279 _Assume_(text || start >= end);
2280 this->interval.end = start;
2281
2282 if (positive_sign->match(text, this->interval.end, end, flags)) {
2283 this->interval.end = positive_sign->interval.end;
2284 if (negative_sign) negative_sign->invalidate();
2285 if (special_sign) special_sign->invalidate();
2286 }
2287 else if (negative_sign->match(text, this->interval.end, end, flags)) {
2288 this->interval.end = negative_sign->interval.end;
2289 if (positive_sign) positive_sign->invalidate();
2290 if (special_sign) special_sign->invalidate();
2291 }
2292 else if (special_sign->match(text, this->interval.end, end, flags)) {
2293 this->interval.end = special_sign->interval.end;
2294 if (positive_sign) positive_sign->invalidate();
2295 if (negative_sign) negative_sign->invalidate();
2296 }
2297 else {
2298 if (positive_sign) positive_sign->invalidate();
2299 if (negative_sign) negative_sign->invalidate();
2300 if (special_sign) special_sign->invalidate();
2301 }
2302
2303 if (currency->match(text, this->interval.end, end, flags))
2304 this->interval.end = currency->interval.end;
2305 else {
2306 if (positive_sign) positive_sign->invalidate();
2307 if (negative_sign) negative_sign->invalidate();
2308 if (special_sign) special_sign->invalidate();
2309 integer->invalidate();
2310 decimal_separator->invalidate();
2311 decimal->invalidate();
2312 this->interval.invalidate();
2313 return false;
2314 }
2315
2316 if (integer->match(text, this->interval.end, end, flags))
2317 this->interval.end = integer->interval.end;
2318 if (decimal_separator->match(text, this->interval.end, end, flags) &&
2319 decimal->match(text, decimal_separator->interval.end, end, flags))
2320 this->interval.end = decimal->interval.end;
2321 else {
2322 decimal_separator->invalidate();
2323 decimal->invalidate();
2324 }
2325
2326 if (integer->interval.empty() &&
2327 decimal->interval.empty())
2328 {
2329 // No integer part, no decimal part.
2330 if (positive_sign) positive_sign->invalidate();
2331 if (negative_sign) negative_sign->invalidate();
2332 if (special_sign) special_sign->invalidate();
2333 currency->invalidate();
2334 integer->invalidate();
2335 decimal_separator->invalidate();
2336 decimal->invalidate();
2337 this->interval.invalidate();
2338 return false;
2339 }
2340
2341 this->interval.start = start;
2342 return true;
2343 }
2344 };
2345
2346 using monetary_numeral = basic_monetary_numeral<char>;
2347 using wmonetary_numeral = basic_monetary_numeral<wchar_t>;
2348#ifdef _UNICODE
2349 using tmonetary_numeral = wmonetary_numeral;
2350#else
2351 using tmonetary_numeral = monetary_numeral;
2352#endif
2353 using sgml_monetary_numeral = basic_monetary_numeral<char>;
2354
2358 template <class T>
2360 {
2361 public:
2363 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
2364 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
2365 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
2366 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
2367 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
2368 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
2369 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
2370 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
2371 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
2372 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
2373 _In_ const std::shared_ptr<basic_parser<T>>& separator,
2374 _In_ const std::locale& locale = std::locale()) :
2376 m_digit_0(digit_0),
2377 m_digit_1(digit_1),
2378 m_digit_2(digit_2),
2379 m_digit_3(digit_3),
2380 m_digit_4(digit_4),
2381 m_digit_5(digit_5),
2382 m_digit_6(digit_6),
2383 m_digit_7(digit_7),
2384 m_digit_8(digit_8),
2385 m_digit_9(digit_9),
2386 m_separator(separator)
2387 {
2388 value.s_addr = 0;
2389 }
2390
2391 virtual void invalidate()
2392 {
2393 components[0].start = 1;
2394 components[0].end = 0;
2395 components[1].start = 1;
2396 components[1].end = 0;
2397 components[2].start = 1;
2398 components[2].end = 0;
2399 components[3].start = 1;
2400 components[3].end = 0;
2401 value.s_addr = 0;
2403 }
2404
2407
2408 protected:
2409 virtual bool do_match(
2410 _In_reads_or_z_opt_(end) const T* text,
2411 _In_ size_t start = 0,
2412 _In_ size_t end = SIZE_MAX,
2413 _In_ int flags = match_default)
2414 {
2415 _Assume_(text || start >= end);
2416 this->interval.end = start;
2417 value.s_addr = 0;
2418
2419 size_t i;
2420 for (i = 0; i < 4; i++) {
2421 if (i) {
2422 if (m_separator->match(text, this->interval.end, end, flags))
2423 this->interval.end = m_separator->interval.end;
2424 else
2425 goto error;
2426 }
2427
2428 components[i].start = this->interval.end;
2429 bool is_empty = true;
2430 size_t x;
2431 for (x = 0; this->interval.end < end && text[this->interval.end];) {
2432 size_t dig, digit_end;
2433 if (m_digit_0->match(text, this->interval.end, end, flags)) { dig = 0; digit_end = m_digit_0->interval.end; }
2434 else if (m_digit_1->match(text, this->interval.end, end, flags)) { dig = 1; digit_end = m_digit_1->interval.end; }
2435 else if (m_digit_2->match(text, this->interval.end, end, flags)) { dig = 2; digit_end = m_digit_2->interval.end; }
2436 else if (m_digit_3->match(text, this->interval.end, end, flags)) { dig = 3; digit_end = m_digit_3->interval.end; }
2437 else if (m_digit_4->match(text, this->interval.end, end, flags)) { dig = 4; digit_end = m_digit_4->interval.end; }
2438 else if (m_digit_5->match(text, this->interval.end, end, flags)) { dig = 5; digit_end = m_digit_5->interval.end; }
2439 else if (m_digit_6->match(text, this->interval.end, end, flags)) { dig = 6; digit_end = m_digit_6->interval.end; }
2440 else if (m_digit_7->match(text, this->interval.end, end, flags)) { dig = 7; digit_end = m_digit_7->interval.end; }
2441 else if (m_digit_8->match(text, this->interval.end, end, flags)) { dig = 8; digit_end = m_digit_8->interval.end; }
2442 else if (m_digit_9->match(text, this->interval.end, end, flags)) { dig = 9; digit_end = m_digit_9->interval.end; }
2443 else break;
2444 size_t x_n = x * 10 + dig;
2445 if (x_n <= 255) {
2446 x = x_n;
2447 this->interval.end = digit_end;
2448 is_empty = false;
2449 }
2450 else
2451 break;
2452 }
2453 if (is_empty)
2454 goto error;
2455 components[i].end = this->interval.end;
2456 value.s_addr = (value.s_addr << 8) | (uint8_t)x;
2457 }
2458 if (i < 4)
2459 goto error;
2460
2461 HE2BE(reinterpret_cast<uint32_t&>(value.s_addr));
2462 this->interval.start = start;
2463 return true;
2464
2465 error:
2466 invalidate();
2467 return false;
2468 }
2469
2470 std::shared_ptr<basic_parser<T>>
2471 m_digit_0,
2472 m_digit_1,
2473 m_digit_2,
2474 m_digit_3,
2475 m_digit_4,
2476 m_digit_5,
2477 m_digit_6,
2478 m_digit_7,
2479 m_digit_8,
2480 m_digit_9;
2481 std::shared_ptr<basic_parser<T>> m_separator;
2482 };
2483
2484 using ipv4_address = basic_ipv4_address<char>;
2485 using wipv4_address = basic_ipv4_address<wchar_t>;
2486#ifdef _UNICODE
2487 using tipv4_address = wipv4_address;
2488#else
2489 using tipv4_address = ipv4_address;
2490#endif
2491 using sgml_ipv4_address = basic_ipv4_address<char>;
2492
2496 template <class T>
2498 {
2499 public:
2500 basic_ipv6_scope_id_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
2501
2502 protected:
2503 virtual bool do_match(
2504 _In_reads_or_z_opt_(end) const T* text,
2505 _In_ size_t start = 0,
2506 _In_ size_t end = SIZE_MAX,
2507 _In_ int flags = match_default)
2508 {
2509 _Assume_(text || start >= end);
2510 if (start < end && text[start]) {
2511 if (text[start] == '-' ||
2512 text[start] == '_' ||
2513 text[start] == ':' ||
2514 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
2515 {
2516 this->interval.end = (this->interval.start = start) + 1;
2517 return true;
2518 }
2519 }
2520 this->interval.invalidate();
2521 return false;
2522 }
2523 };
2524
2527#ifdef _UNICODE
2529#else
2531#endif
2532
2537 {
2538 public:
2539 sgml_ipv6_scope_id_char(_In_ const std::locale& locale = std::locale()) : sgml_parser(locale) {}
2540
2541 protected:
2542 virtual bool do_match(
2543 _In_reads_or_z_(end) const char* text,
2544 _In_ size_t start = 0,
2545 _In_ size_t end = SIZE_MAX,
2546 _In_ int flags = match_default)
2547 {
2548 _Assume_(text || start >= end);
2549 if (start < end && text[start]) {
2550 wchar_t buf[3];
2551 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
2552 const wchar_t* chr_end = chr + stdex::strlen(chr);
2553 if (((chr[0] == L'-' ||
2554 chr[0] == L'_' ||
2555 chr[0] == L':') && chr[1] == 0) ||
2556 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
2557 {
2558 this->interval.start = start;
2559 return true;
2560 }
2561 }
2562 this->interval.invalidate();
2563 return false;
2564 }
2565 };
2566
2570 template <class T>
2572 {
2573 public:
2575 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
2576 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
2577 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
2578 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
2579 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
2580 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
2581 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
2582 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
2583 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
2584 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
2585 _In_ const std::shared_ptr<basic_parser<T>>& digit_10,
2586 _In_ const std::shared_ptr<basic_parser<T>>& digit_11,
2587 _In_ const std::shared_ptr<basic_parser<T>>& digit_12,
2588 _In_ const std::shared_ptr<basic_parser<T>>& digit_13,
2589 _In_ const std::shared_ptr<basic_parser<T>>& digit_14,
2590 _In_ const std::shared_ptr<basic_parser<T>>& digit_15,
2591 _In_ const std::shared_ptr<basic_parser<T>>& separator,
2592 _In_ const std::shared_ptr<basic_parser<T>>& scope_id_separator = nullptr,
2593 _In_ const std::shared_ptr<basic_parser<T>>& _scope_id = nullptr,
2594 _In_ const std::locale& locale = std::locale()) :
2596 m_digit_0(digit_0),
2597 m_digit_1(digit_1),
2598 m_digit_2(digit_2),
2599 m_digit_3(digit_3),
2600 m_digit_4(digit_4),
2601 m_digit_5(digit_5),
2602 m_digit_6(digit_6),
2603 m_digit_7(digit_7),
2604 m_digit_8(digit_8),
2605 m_digit_9(digit_9),
2606 m_digit_10(digit_10),
2607 m_digit_11(digit_11),
2608 m_digit_12(digit_12),
2609 m_digit_13(digit_13),
2610 m_digit_14(digit_14),
2611 m_digit_15(digit_15),
2612 m_separator(separator),
2613 m_scope_id_separator(scope_id_separator),
2615 {
2616 memset(&value, 0, sizeof(value));
2617 }
2618
2619 virtual void invalidate()
2620 {
2621 components[0].start = 1;
2622 components[0].end = 0;
2623 components[1].start = 1;
2624 components[1].end = 0;
2625 components[2].start = 1;
2626 components[2].end = 0;
2627 components[3].start = 1;
2628 components[3].end = 0;
2629 components[4].start = 1;
2630 components[4].end = 0;
2631 components[5].start = 1;
2632 components[5].end = 0;
2633 components[6].start = 1;
2634 components[6].end = 0;
2635 components[7].start = 1;
2636 components[7].end = 0;
2637 memset(&value, 0, sizeof(value));
2638 if (scope_id) scope_id->invalidate();
2640 }
2641
2644 std::shared_ptr<basic_parser<T>> scope_id;
2645
2646 protected:
2647 virtual bool do_match(
2648 _In_reads_or_z_opt_(end) const T* text,
2649 _In_ size_t start = 0,
2650 _In_ size_t end = SIZE_MAX,
2651 _In_ int flags = match_default)
2652 {
2653 _Assume_(text || start >= end);
2654 this->interval.end = start;
2655 memset(&value, 0, sizeof(value));
2656
2657 size_t i, compaction_i = SIZE_MAX, compaction_start = start;
2658 for (i = 0; i < 8; i++) {
2659 bool is_empty = true;
2660
2661 if (m_separator->match(text, this->interval.end, end, flags)) {
2662 // : found
2663 this->interval.end = m_separator->interval.end;
2664 if (m_separator->match(text, this->interval.end, end, flags)) {
2665 // :: found
2666 if (compaction_i == SIZE_MAX) {
2667 // Zero compaction start
2668 compaction_i = i;
2669 compaction_start = m_separator->interval.start;
2670 this->interval.end = m_separator->interval.end;
2671 }
2672 else {
2673 // More than one zero compaction
2674 break;
2675 }
2676 }
2677 else if (!i) {
2678 // Leading : found
2679 goto error;
2680 }
2681 }
2682 else if (i) {
2683 // : missing
2684 break;
2685 }
2686
2687 components[i].start = this->interval.end;
2688 size_t x;
2689 for (x = 0; this->interval.end < end && text[this->interval.end];) {
2690 size_t dig, digit_end;
2691 if (m_digit_0->match(text, this->interval.end, end, flags)) { dig = 0; digit_end = m_digit_0->interval.end; }
2692 else if (m_digit_1->match(text, this->interval.end, end, flags)) { dig = 1; digit_end = m_digit_1->interval.end; }
2693 else if (m_digit_2->match(text, this->interval.end, end, flags)) { dig = 2; digit_end = m_digit_2->interval.end; }
2694 else if (m_digit_3->match(text, this->interval.end, end, flags)) { dig = 3; digit_end = m_digit_3->interval.end; }
2695 else if (m_digit_4->match(text, this->interval.end, end, flags)) { dig = 4; digit_end = m_digit_4->interval.end; }
2696 else if (m_digit_5->match(text, this->interval.end, end, flags)) { dig = 5; digit_end = m_digit_5->interval.end; }
2697 else if (m_digit_6->match(text, this->interval.end, end, flags)) { dig = 6; digit_end = m_digit_6->interval.end; }
2698 else if (m_digit_7->match(text, this->interval.end, end, flags)) { dig = 7; digit_end = m_digit_7->interval.end; }
2699 else if (m_digit_8->match(text, this->interval.end, end, flags)) { dig = 8; digit_end = m_digit_8->interval.end; }
2700 else if (m_digit_9->match(text, this->interval.end, end, flags)) { dig = 9; digit_end = m_digit_9->interval.end; }
2701 else if (m_digit_10->match(text, this->interval.end, end, flags)) { dig = 10; digit_end = m_digit_10->interval.end; }
2702 else if (m_digit_11->match(text, this->interval.end, end, flags)) { dig = 11; digit_end = m_digit_11->interval.end; }
2703 else if (m_digit_12->match(text, this->interval.end, end, flags)) { dig = 12; digit_end = m_digit_12->interval.end; }
2704 else if (m_digit_13->match(text, this->interval.end, end, flags)) { dig = 13; digit_end = m_digit_13->interval.end; }
2705 else if (m_digit_14->match(text, this->interval.end, end, flags)) { dig = 14; digit_end = m_digit_14->interval.end; }
2706 else if (m_digit_15->match(text, this->interval.end, end, flags)) { dig = 15; digit_end = m_digit_15->interval.end; }
2707 else break;
2708 size_t x_n = x * 16 + dig;
2709 if (x_n <= 0xffff) {
2710 x = x_n;
2711 this->interval.end = digit_end;
2712 is_empty = false;
2713 }
2714 else
2715 break;
2716 }
2717 if (is_empty) {
2718 if (compaction_i != SIZE_MAX) {
2719 // Zero compaction active: no sweat.
2720 break;
2721 }
2722 goto error;
2723 }
2724 components[i].end = this->interval.end;
2725 HE2BE(reinterpret_cast<uint16_t&>(this->value.s6_words[i]));
2726 }
2727
2728 if (compaction_i != SIZE_MAX) {
2729 // Align components right due to zero compaction.
2730 size_t j, k;
2731 for (j = 8, k = i; k > compaction_i;) {
2732 this->value.s6_words[--j] = this->value.s6_words[--k];
2734 }
2735 for (; j > compaction_i;) {
2736 this->value.s6_words[--j] = 0;
2737 components[j].start =
2739 }
2740 }
2741 else if (i < 8)
2742 goto error;
2743
2744 if (m_scope_id_separator && m_scope_id_separator->match(text, this->interval.end, end, flags) &&
2745 scope_id && scope_id->match(text, m_scope_id_separator->interval.end, end, flags))
2746 this->interval.end = scope_id->interval.end;
2747 else if (scope_id)
2748 scope_id->invalidate();
2749
2750 this->interval.start = start;
2751 return true;
2752
2753 error:
2754 invalidate();
2755 return false;
2756 }
2757
2758 std::shared_ptr<basic_parser<T>>
2759 m_digit_0,
2760 m_digit_1,
2761 m_digit_2,
2762 m_digit_3,
2763 m_digit_4,
2764 m_digit_5,
2765 m_digit_6,
2766 m_digit_7,
2767 m_digit_8,
2768 m_digit_9,
2769 m_digit_10,
2770 m_digit_11,
2771 m_digit_12,
2772 m_digit_13,
2773 m_digit_14,
2774 m_digit_15;
2775 std::shared_ptr<basic_parser<T>> m_separator, m_scope_id_separator;
2776 };
2777
2778 using ipv6_address = basic_ipv6_address<char>;
2779 using wipv6_address = basic_ipv6_address<wchar_t>;
2780#ifdef _UNICODE
2781 using tipv6_address = wipv6_address;
2782#else
2783 using tipv6_address = ipv6_address;
2784#endif
2785 using sgml_ipv6_address = basic_ipv6_address<char>;
2786
2790 template <class T>
2792 {
2793 public:
2795 _In_ bool allow_idn,
2796 _In_ const std::locale& locale = std::locale()) :
2798 m_allow_idn(allow_idn),
2799 allow_on_edge(true)
2800 {}
2801
2803
2804 protected:
2805 virtual bool do_match(
2806 _In_reads_or_z_opt_(end) const T* text,
2807 _In_ size_t start = 0,
2808 _In_ size_t end = SIZE_MAX,
2809 _In_ int flags = match_default)
2810 {
2811 _Assume_(text || start >= end);
2812 if (start < end && text[start]) {
2813 if (('A' <= text[start] && text[start] <= 'Z') ||
2814 ('a' <= text[start] && text[start] <= 'z') ||
2815 ('0' <= text[start] && text[start] <= '9'))
2816 allow_on_edge = true;
2817 else if (text[start] == '-')
2818 allow_on_edge = false;
2819 else if (m_allow_idn && std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
2820 allow_on_edge = true;
2821 else {
2822 this->interval.invalidate();
2823 return false;
2824 }
2825 this->interval.end = (this->interval.start = start) + 1;
2826 return true;
2827 }
2828 this->interval.invalidate();
2829 return false;
2830 }
2831
2832 bool m_allow_idn;
2833 };
2834
2835 using dns_domain_char = basic_dns_domain_char<char>;
2836 using wdns_domain_char = basic_dns_domain_char<wchar_t>;
2837#ifdef _UNICODE
2838 using tdns_domain_char = wdns_domain_char;
2839#else
2840 using tdns_domain_char = dns_domain_char;
2841#endif
2842
2847 {
2848 public:
2850 _In_ bool allow_idn,
2851 _In_ const std::locale& locale = std::locale()) :
2853 {}
2854
2855 protected:
2856 virtual bool do_match(
2857 _In_reads_or_z_(end) const char* text,
2858 _In_ size_t start = 0,
2859 _In_ size_t end = SIZE_MAX,
2860 _In_ int flags = match_default)
2861 {
2862 _Assume_(text || start >= end);
2863 if (start < end && text[start]) {
2864 wchar_t buf[3];
2865 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
2866 const wchar_t* chr_end = chr + stdex::strlen(chr);
2867 if ((('A' <= chr[0] && chr[0] <= 'Z') ||
2868 ('a' <= chr[0] && chr[0] <= 'z') ||
2869 ('0' <= chr[0] && chr[0] <= '9')) && chr[1] == 0)
2870 allow_on_edge = true;
2871 else if (chr[0] == '-' && chr[1] == 0)
2872 allow_on_edge = false;
2873 else if (m_allow_idn && std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
2874 allow_on_edge = true;
2875 else {
2876 this->interval.invalidate();
2877 return false;
2878 }
2879 this->interval.start = start;
2880 return true;
2881 }
2882 this->interval.invalidate();
2883 return false;
2884 }
2885 };
2886
2890 template <class T>
2892 {
2893 public:
2895 _In_ bool allow_absolute,
2896 _In_ const std::shared_ptr<basic_dns_domain_char<T>>& domain_char,
2897 _In_ const std::shared_ptr<basic_parser<T>>& separator,
2898 _In_ const std::locale& locale = std::locale()) :
2901 m_domain_char(domain_char),
2902 m_separator(separator)
2903 {}
2904
2905 protected:
2906 virtual bool do_match(
2907 _In_reads_or_z_opt_(end) const T* text,
2908 _In_ size_t start = 0,
2909 _In_ size_t end = SIZE_MAX,
2910 _In_ int flags = match_default)
2911 {
2912 _Assume_(text || start >= end);
2913 size_t i = start, count;
2914 for (count = 0; i < end && text[i] && count < 127; count++) {
2915 if (m_domain_char->match(text, i, end, flags) &&
2916 m_domain_char->allow_on_edge)
2917 {
2918 // Domain start
2919 this->interval.end = i = m_domain_char->interval.end;
2920 while (i < end && text[i]) {
2921 if (m_domain_char->allow_on_edge &&
2922 m_separator->match(text, i, end, flags))
2923 {
2924 // Domain end
2925 if (m_allow_absolute)
2926 this->interval.end = i = m_separator->interval.end;
2927 else {
2928 this->interval.end = i;
2929 i = m_separator->interval.end;
2930 }
2931 break;
2932 }
2933 if (m_domain_char->match(text, i, end, flags)) {
2934 if (m_domain_char->allow_on_edge)
2935 this->interval.end = i = m_domain_char->interval.end;
2936 else
2937 i = m_domain_char->interval.end;
2938 }
2939 else {
2940 this->interval.start = start;
2941 return true;
2942 }
2943 }
2944 }
2945 else
2946 break;
2947 }
2948 if (count) {
2949 this->interval.start = start;
2950 return true;
2951 }
2952 this->interval.invalidate();
2953 return false;
2954 }
2955
2957 std::shared_ptr<basic_dns_domain_char<T>> m_domain_char;
2958 std::shared_ptr<basic_parser<T>> m_separator;
2959 };
2960
2963#ifdef _UNICODE
2964 using tdns_name = wdns_name;
2965#else
2966 using tdns_name = dns_name;
2967#endif
2969
2973 template <class T>
2975 {
2976 public:
2977 basic_url_username_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
2978
2979 protected:
2980 virtual bool do_match(
2981 _In_reads_or_z_opt_(end) const T* text,
2982 _In_ size_t start = 0,
2983 _In_ size_t end = SIZE_MAX,
2984 _In_ int flags = match_default)
2985 {
2986 _Assume_(text || start >= end);
2987 if (start < end && text[start]) {
2988 if (text[start] == '-' ||
2989 text[start] == '.' ||
2990 text[start] == '_' ||
2991 text[start] == '~' ||
2992 text[start] == '%' ||
2993 text[start] == '!' ||
2994 text[start] == '$' ||
2995 text[start] == '&' ||
2996 text[start] == '\'' ||
2997 //text[start] == '(' ||
2998 //text[start] == ')' ||
2999 text[start] == '*' ||
3000 text[start] == '+' ||
3001 text[start] == ',' ||
3002 text[start] == ';' ||
3003 text[start] == '=' ||
3004 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
3005 {
3006 this->interval.end = (this->interval.start = start) + 1;
3007 return true;
3008 }
3009 }
3010 this->interval.invalidate();
3011 return false;
3012 }
3013 };
3014
3017#ifdef _UNICODE
3019#else
3021#endif
3022
3027 {
3028 public:
3029 sgml_url_username_char(_In_ const std::locale& locale = std::locale()) : basic_url_username_char<char>(locale) {}
3030
3031 protected:
3032 virtual bool do_match(
3033 _In_reads_or_z_(end) const char* text,
3034 _In_ size_t start = 0,
3035 _In_ size_t end = SIZE_MAX,
3036 _In_ int flags = match_default)
3037 {
3038 _Assume_(text || start >= end);
3039 if (start < end && text[start]) {
3040 wchar_t buf[3];
3041 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
3042 const wchar_t* chr_end = chr + stdex::strlen(chr);
3043 if (((chr[0] == L'-' ||
3044 chr[0] == L'.' ||
3045 chr[0] == L'_' ||
3046 chr[0] == L'~' ||
3047 chr[0] == L'%' ||
3048 chr[0] == L'!' ||
3049 chr[0] == L'$' ||
3050 chr[0] == L'&' ||
3051 chr[0] == L'\'' ||
3052 //chr[0] == L'(' ||
3053 //chr[0] == L')' ||
3054 chr[0] == L'*' ||
3055 chr[0] == L'+' ||
3056 chr[0] == L',' ||
3057 chr[0] == L';' ||
3058 chr[0] == L'=') && chr[1] == 0) ||
3059 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
3060 {
3061 this->interval.start = start;
3062 return true;
3063 }
3064 }
3065
3066 this->interval.invalidate();
3067 return false;
3068 }
3069 };
3070
3074 template <class T>
3076 {
3077 public:
3078 basic_url_password_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
3079
3080 protected:
3081 virtual bool do_match(
3082 _In_reads_or_z_opt_(end) const T* text,
3083 _In_ size_t start = 0,
3084 _In_ size_t end = SIZE_MAX,
3085 _In_ int flags = match_default)
3086 {
3087 _Assume_(text || start >= end);
3088 if (start < end && text[start]) {
3089 if (text[start] == '-' ||
3090 text[start] == '.' ||
3091 text[start] == '_' ||
3092 text[start] == '~' ||
3093 text[start] == '%' ||
3094 text[start] == '!' ||
3095 text[start] == '$' ||
3096 text[start] == '&' ||
3097 text[start] == '\'' ||
3098 text[start] == '(' ||
3099 text[start] == ')' ||
3100 text[start] == '*' ||
3101 text[start] == '+' ||
3102 text[start] == ',' ||
3103 text[start] == ';' ||
3104 text[start] == '=' ||
3105 text[start] == ':' ||
3106 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
3107 {
3108 this->interval.end = (this->interval.start = start) + 1;
3109 return true;
3110 }
3111 }
3112 this->interval.invalidate();
3113 return false;
3114 }
3115 };
3116
3119#ifdef _UNICODE
3121#else
3123#endif
3124
3129 {
3130 public:
3131 sgml_url_password_char(_In_ const std::locale& locale = std::locale()) : basic_url_password_char<char>(locale) {}
3132
3133 protected:
3134 virtual bool do_match(
3135 _In_reads_or_z_(end) const char* text,
3136 _In_ size_t start = 0,
3137 _In_ size_t end = SIZE_MAX,
3138 _In_ int flags = match_default)
3139 {
3140 _Assume_(text || start >= end);
3141 if (start < end && text[start]) {
3142 wchar_t buf[3];
3143 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
3144 const wchar_t* chr_end = chr + stdex::strlen(chr);
3145 if (((chr[0] == L'-' ||
3146 chr[0] == L'.' ||
3147 chr[0] == L'_' ||
3148 chr[0] == L'~' ||
3149 chr[0] == L'%' ||
3150 chr[0] == L'!' ||
3151 chr[0] == L'$' ||
3152 chr[0] == L'&' ||
3153 chr[0] == L'\'' ||
3154 chr[0] == L'(' ||
3155 chr[0] == L')' ||
3156 chr[0] == L'*' ||
3157 chr[0] == L'+' ||
3158 chr[0] == L',' ||
3159 chr[0] == L';' ||
3160 chr[0] == L'=' ||
3161 chr[0] == L':') && chr[1] == 0) ||
3162 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
3163 {
3164 this->interval.start = start;
3165 return true;
3166 }
3167 }
3168 this->interval.invalidate();
3169 return false;
3170 }
3171 };
3172
3176 template <class T>
3178 {
3179 public:
3180 basic_url_path_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
3181
3182 protected:
3183 virtual bool do_match(
3184 _In_reads_or_z_opt_(end) const T* text,
3185 _In_ size_t start = 0,
3186 _In_ size_t end = SIZE_MAX,
3187 _In_ int flags = match_default)
3188 {
3189 _Assume_(text || start >= end);
3190 if (start < end && text[start]) {
3191 if (text[start] == '/' ||
3192 text[start] == '-' ||
3193 text[start] == '.' ||
3194 text[start] == '_' ||
3195 text[start] == '~' ||
3196 text[start] == '%' ||
3197 text[start] == '!' ||
3198 text[start] == '$' ||
3199 text[start] == '&' ||
3200 text[start] == '\'' ||
3201 text[start] == '(' ||
3202 text[start] == ')' ||
3203 text[start] == '*' ||
3204 text[start] == '+' ||
3205 text[start] == ',' ||
3206 text[start] == ';' ||
3207 text[start] == '=' ||
3208 text[start] == ':' ||
3209 text[start] == '@' ||
3210 text[start] == '?' ||
3211 text[start] == '#' ||
3212 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
3213 {
3214 this->interval.end = (this->interval.start = start) + 1;
3215 return true;
3216 }
3217 }
3218 this->interval.invalidate();
3219 return false;
3220 }
3221 };
3222
3225#ifdef _UNICODE
3227#else
3229#endif
3230
3235 {
3236 public:
3237 sgml_url_path_char(_In_ const std::locale& locale = std::locale()) : basic_url_path_char<char>(locale) {}
3238
3239 protected:
3240 virtual bool do_match(
3241 _In_reads_or_z_(end) const char* text,
3242 _In_ size_t start = 0,
3243 _In_ size_t end = SIZE_MAX,
3244 _In_ int flags = match_default)
3245 {
3246 _Assume_(text || start >= end);
3247 if (start < end && text[start]) {
3248 wchar_t buf[3];
3249 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
3250 const wchar_t* chr_end = chr + stdex::strlen(chr);
3251 if (((chr[0] == L'/' ||
3252 chr[0] == L'-' ||
3253 chr[0] == L'.' ||
3254 chr[0] == L'_' ||
3255 chr[0] == L'~' ||
3256 chr[0] == L'%' ||
3257 chr[0] == L'!' ||
3258 chr[0] == L'$' ||
3259 chr[0] == L'&' ||
3260 chr[0] == L'\'' ||
3261 chr[0] == L'(' ||
3262 chr[0] == L')' ||
3263 chr[0] == L'*' ||
3264 chr[0] == L'+' ||
3265 chr[0] == L',' ||
3266 chr[0] == L';' ||
3267 chr[0] == L'=' ||
3268 chr[0] == L':' ||
3269 chr[0] == L'@' ||
3270 chr[0] == L'?' ||
3271 chr[0] == L'#') && chr[1] == 0) ||
3272 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
3273 {
3274 this->interval.start = start;
3275 return true;
3276 }
3277 }
3278 this->interval.invalidate();
3279 return false;
3280 }
3281 };
3282
3286 template <class T>
3288 {
3289 public:
3291 _In_ const std::shared_ptr<basic_parser<T>>& path_char,
3292 _In_ const std::shared_ptr<basic_parser<T>>& query_start,
3293 _In_ const std::shared_ptr<basic_parser<T>>& bookmark_start,
3294 _In_ const std::locale& locale = std::locale()) :
3296 m_path_char(path_char),
3297 m_query_start(query_start),
3298 m_bookmark_start(bookmark_start)
3299 {}
3300
3301 virtual void invalidate()
3302 {
3303 path.start = 1;
3304 path.end = 0;
3305 query.start = 1;
3306 query.end = 0;
3307 bookmark.start = 1;
3308 bookmark.end = 0;
3310 }
3311
3314 stdex::interval<size_t> bookmark;
3315
3316 protected:
3317 virtual bool do_match(
3318 _In_reads_or_z_opt_(end) const T* text,
3319 _In_ size_t start = 0,
3320 _In_ size_t end = SIZE_MAX,
3321 _In_ int flags = match_default)
3322 {
3323 _Assume_(text || start >= end);
3324
3325 this->interval.end = start;
3326 path.start = start;
3327 query.start = 1;
3328 query.end = 0;
3329 bookmark.start = 1;
3330 bookmark.end = 0;
3331
3332 for (;;) {
3333 if (this->interval.end >= end || !text[this->interval.end])
3334 break;
3335 if (m_query_start->match(text, this->interval.end, end, flags)) {
3336 path.end = this->interval.end;
3337 query.start = this->interval.end = m_query_start->interval.end;
3338 for (;;) {
3339 if (this->interval.end >= end || !text[this->interval.end]) {
3340 query.end = this->interval.end;
3341 break;
3342 }
3343 if (m_bookmark_start->match(text, this->interval.end, end, flags)) {
3344 query.end = this->interval.end;
3345 bookmark.start = this->interval.end = m_bookmark_start->interval.end;
3346 for (;;) {
3347 if (this->interval.end >= end || !text[this->interval.end]) {
3348 bookmark.end = this->interval.end;
3349 break;
3350 }
3351 if (m_path_char->match(text, this->interval.end, end, flags))
3352 this->interval.end = m_path_char->interval.end;
3353 else {
3354 bookmark.end = this->interval.end;
3355 break;
3356 }
3357 }
3358 this->interval.start = start;
3359 return true;
3360 }
3361 if (m_path_char->match(text, this->interval.end, end, flags))
3362 this->interval.end = m_path_char->interval.end;
3363 else {
3364 query.end = this->interval.end;
3365 break;
3366 }
3367 }
3368 this->interval.start = start;
3369 return true;
3370 }
3371 if (m_bookmark_start->match(text, this->interval.end, end, flags)) {
3372 path.end = this->interval.end;
3373 bookmark.start = this->interval.end = m_bookmark_start->interval.end;
3374 for (;;) {
3375 if (this->interval.end >= end || !text[this->interval.end]) {
3376 bookmark.end = this->interval.end;
3377 break;
3378 }
3379 if (m_path_char->match(text, this->interval.end, end, flags))
3380 this->interval.end = m_path_char->interval.end;
3381 else {
3382 bookmark.end = this->interval.end;
3383 break;
3384 }
3385 }
3386 this->interval.start = start;
3387 return true;
3388 }
3389 if (m_path_char->match(text, this->interval.end, end, flags))
3390 this->interval.end = m_path_char->interval.end;
3391 else
3392 break;
3393 }
3394
3396 path.end = this->interval.end;
3397 this->interval.start = start;
3398 return true;
3399 }
3400
3401 path.start = 1;
3402 path.end = 0;
3403 bookmark.start = 1;
3404 bookmark.end = 0;
3405 this->interval.invalidate();
3406 return false;
3407 }
3408
3409 std::shared_ptr<basic_parser<T>> m_path_char;
3410 std::shared_ptr<basic_parser<T>> m_query_start;
3411 std::shared_ptr<basic_parser<T>> m_bookmark_start;
3412 };
3413
3416#ifdef _UNICODE
3417 using turl_path = wurl_path;
3418#else
3419 using turl_path = url_path;
3420#endif
3422
3426 template <class T>
3427 class basic_url : public basic_parser<T>
3428 {
3429 public:
3430 basic_url(
3431 _In_ const std::shared_ptr<basic_parser<T>>& _http_scheme,
3432 _In_ const std::shared_ptr<basic_parser<T>>& _ftp_scheme,
3433 _In_ const std::shared_ptr<basic_parser<T>>& _mailto_scheme,
3434 _In_ const std::shared_ptr<basic_parser<T>>& _file_scheme,
3435 _In_ const std::shared_ptr<basic_parser<T>>& colon,
3436 _In_ const std::shared_ptr<basic_parser<T>>& slash,
3437 _In_ const std::shared_ptr<basic_parser<T>>& _username,
3438 _In_ const std::shared_ptr<basic_parser<T>>& _password,
3439 _In_ const std::shared_ptr<basic_parser<T>>& at,
3440 _In_ const std::shared_ptr<basic_parser<T>>& ip_lbracket,
3441 _In_ const std::shared_ptr<basic_parser<T>>& ip_rbracket,
3442 _In_ const std::shared_ptr<basic_parser<T>>& _ipv4_host,
3443 _In_ const std::shared_ptr<basic_parser<T>>& _ipv6_host,
3444 _In_ const std::shared_ptr<basic_parser<T>>& _dns_host,
3445 _In_ const std::shared_ptr<basic_parser<T>>& _port,
3446 _In_ const std::shared_ptr<basic_parser<T>>& _path,
3447 _In_ const std::locale& locale = std::locale()) :
3449 http_scheme(_http_scheme),
3450 ftp_scheme(_ftp_scheme),
3451 mailto_scheme(_mailto_scheme),
3452 file_scheme(_file_scheme),
3453 m_colon(colon),
3454 m_slash(slash),
3455 username(_username),
3456 password(_password),
3457 m_at(at),
3458 m_ip_lbracket(ip_lbracket),
3459 m_ip_rbracket(ip_rbracket),
3460 ipv4_host(_ipv4_host),
3461 ipv6_host(_ipv6_host),
3462 dns_host(_dns_host),
3463 port(_port),
3464 path(_path)
3465 {}
3466
3467 virtual void invalidate()
3468 {
3469 http_scheme->invalidate();
3470 ftp_scheme->invalidate();
3471 mailto_scheme->invalidate();
3472 file_scheme->invalidate();
3473 username->invalidate();
3474 password->invalidate();
3475 ipv4_host->invalidate();
3476 ipv6_host->invalidate();
3477 dns_host->invalidate();
3478 port->invalidate();
3479 path->invalidate();
3481 }
3482
3483 std::shared_ptr<basic_parser<T>> http_scheme;
3484 std::shared_ptr<basic_parser<T>> ftp_scheme;
3485 std::shared_ptr<basic_parser<T>> mailto_scheme;
3486 std::shared_ptr<basic_parser<T>> file_scheme;
3487 std::shared_ptr<basic_parser<T>> username;
3488 std::shared_ptr<basic_parser<T>> password;
3489 std::shared_ptr<basic_parser<T>> ipv4_host;
3490 std::shared_ptr<basic_parser<T>> ipv6_host;
3491 std::shared_ptr<basic_parser<T>> dns_host;
3492 std::shared_ptr<basic_parser<T>> port;
3493 std::shared_ptr<basic_parser<T>> path;
3494
3495 protected:
3496 virtual bool do_match(
3497 _In_reads_or_z_opt_(end) const T* text,
3498 _In_ size_t start = 0,
3499 _In_ size_t end = SIZE_MAX,
3500 _In_ int flags = match_default)
3501 {
3502 _Assume_(text || start >= end);
3503
3504 this->interval.end = start;
3505
3506 if (http_scheme->match(text, this->interval.end, end, flags) &&
3507 m_colon->match(text, http_scheme->interval.end, end, flags) &&
3508 m_slash->match(text, m_colon->interval.end, end, flags) &&
3509 m_slash->match(text, m_slash->interval.end, end, flags))
3510 {
3511 // http://
3512 this->interval.end = m_slash->interval.end;
3513 ftp_scheme->invalidate();
3514 mailto_scheme->invalidate();
3515 file_scheme->invalidate();
3516 }
3517 else if (ftp_scheme->match(text, this->interval.end, end, flags) &&
3518 m_colon->match(text, ftp_scheme->interval.end, end, flags) &&
3519 m_slash->match(text, m_colon->interval.end, end, flags) &&
3520 m_slash->match(text, m_slash->interval.end, end, flags))
3521 {
3522 // ftp://
3523 this->interval.end = m_slash->interval.end;
3524 http_scheme->invalidate();
3525 mailto_scheme->invalidate();
3526 file_scheme->invalidate();
3527 }
3528 else if (mailto_scheme->match(text, this->interval.end, end, flags) &&
3529 m_colon->match(text, mailto_scheme->interval.end, end, flags))
3530 {
3531 // mailto:
3532 this->interval.end = m_colon->interval.end;
3533 http_scheme->invalidate();
3534 ftp_scheme->invalidate();
3535 file_scheme->invalidate();
3536 }
3537 else if (file_scheme->match(text, this->interval.end, end, flags) &&
3538 m_colon->match(text, file_scheme->interval.end, end, flags) &&
3539 m_slash->match(text, m_colon->interval.end, end, flags) &&
3540 m_slash->match(text, m_slash->interval.end, end, flags))
3541 {
3542 // file://
3543 this->interval.end = m_slash->interval.end;
3544 http_scheme->invalidate();
3545 ftp_scheme->invalidate();
3546 mailto_scheme->invalidate();
3547 }
3548 else {
3549 // Default to http:
3550 http_scheme->invalidate();
3551 ftp_scheme->invalidate();
3552 mailto_scheme->invalidate();
3553 file_scheme->invalidate();
3554 }
3555
3556 if (ftp_scheme->interval) {
3557 if (username->match(text, this->interval.end, end, flags)) {
3558 if (m_colon->match(text, username->interval.end, end, flags) &&
3559 password->match(text, m_colon->interval.end, end, flags) &&
3560 m_at->match(text, password->interval.end, end, flags))
3561 {
3562 // Username and password
3563 this->interval.end = m_at->interval.end;
3564 }
3565 else if (m_at->match(text, this->interval.end, end, flags)) {
3566 // Username only
3567 this->interval.end = m_at->interval.end;
3568 password->invalidate();
3569 }
3570 else {
3571 username->invalidate();
3572 password->invalidate();
3573 }
3574 }
3575 else {
3576 username->invalidate();
3577 password->invalidate();
3578 }
3579
3580 if (ipv4_host->match(text, this->interval.end, end, flags)) {
3581 // Host is IPv4
3582 this->interval.end = ipv4_host->interval.end;
3583 ipv6_host->invalidate();
3584 dns_host->invalidate();
3585 }
3586 else if (
3587 m_ip_lbracket->match(text, this->interval.end, end, flags) &&
3588 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3589 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3590 {
3591 // Host is IPv6
3592 this->interval.end = m_ip_rbracket->interval.end;
3593 ipv4_host->invalidate();
3594 dns_host->invalidate();
3595 }
3596 else if (dns_host->match(text, this->interval.end, end, flags)) {
3597 // Host is hostname
3598 this->interval.end = dns_host->interval.end;
3599 ipv4_host->invalidate();
3600 ipv6_host->invalidate();
3601 }
3602 else {
3603 invalidate();
3604 return false;
3605 }
3606
3607 if (m_colon->match(text, this->interval.end, end, flags) &&
3608 port->match(text, m_colon->interval.end, end, flags))
3609 {
3610 // Port
3611 this->interval.end = port->interval.end;
3612 }
3613 else
3614 port->invalidate();
3615
3616 if (path->match(text, this->interval.end, end, flags)) {
3617 // Path
3618 this->interval.end = path->interval.end;
3619 }
3620
3621 this->interval.start = start;
3622 return true;
3623 }
3624
3625 if (mailto_scheme->interval) {
3626 if (username->match(text, this->interval.end, end, flags) &&
3627 m_at->match(text, username->interval.end, end, flags))
3628 {
3629 // Username
3630 this->interval.end = m_at->interval.end;
3631 }
3632 else {
3633 invalidate();
3634 return false;
3635 }
3636
3637 if (m_ip_lbracket->match(text, this->interval.end, end, flags) &&
3638 ipv4_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3639 m_ip_rbracket->match(text, ipv4_host->interval.end, end, flags))
3640 {
3641 // Host is IPv4
3642 this->interval.end = m_ip_rbracket->interval.end;
3643 ipv6_host->invalidate();
3644 dns_host->invalidate();
3645 }
3646 else if (
3647 m_ip_lbracket->match(text, this->interval.end, end, flags) &&
3648 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3649 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3650 {
3651 // Host is IPv6
3652 this->interval.end = m_ip_rbracket->interval.end;
3653 ipv4_host->invalidate();
3654 dns_host->invalidate();
3655 }
3656 else if (dns_host->match(text, this->interval.end, end, flags)) {
3657 // Host is hostname
3658 this->interval.end = dns_host->interval.end;
3659 ipv4_host->invalidate();
3660 ipv6_host->invalidate();
3661 }
3662 else {
3663 invalidate();
3664 return false;
3665 }
3666
3667 password->invalidate();
3668 port->invalidate();
3669 path->invalidate();
3670 this->interval.start = start;
3671 return true;
3672 }
3673
3674 if (file_scheme->interval) {
3675 if (path->match(text, this->interval.end, end, flags)) {
3676 // Path
3677 this->interval.end = path->interval.end;
3678 }
3679
3680 username->invalidate();
3681 password->invalidate();
3682 ipv4_host->invalidate();
3683 ipv6_host->invalidate();
3684 dns_host->invalidate();
3685 port->invalidate();
3686 this->interval.start = start;
3687 return true;
3688 }
3689
3690 // "http://" found or defaulted to
3691
3692 // If "http://" explicit, test for username&password.
3693 if (http_scheme->interval &&
3694 username->match(text, this->interval.end, end, flags))
3695 {
3696 if (m_colon->match(text, username->interval.end, end, flags) &&
3697 password->match(text, m_colon->interval.end, end, flags) &&
3698 m_at->match(text, password->interval.end, end, flags))
3699 {
3700 // Username and password
3701 this->interval.end = m_at->interval.end;
3702 }
3703 else if (m_at->match(text, username->interval.end, end, flags)) {
3704 // Username only
3705 this->interval.end = m_at->interval.end;
3706 password->invalidate();
3707 }
3708 else {
3709 username->invalidate();
3710 password->invalidate();
3711 }
3712 }
3713 else {
3714 username->invalidate();
3715 password->invalidate();
3716 }
3717
3718 if (ipv4_host->match(text, this->interval.end, end, flags)) {
3719 // Host is IPv4
3720 this->interval.end = ipv4_host->interval.end;
3721 ipv6_host->invalidate();
3722 dns_host->invalidate();
3723 }
3724 else if (
3725 m_ip_lbracket->match(text, this->interval.end, end, flags) &&
3726 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3727 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3728 {
3729 // Host is IPv6
3730 this->interval.end = m_ip_rbracket->interval.end;
3731 ipv4_host->invalidate();
3732 dns_host->invalidate();
3733 }
3734 else if (dns_host->match(text, this->interval.end, end, flags)) {
3735 // Host is hostname
3736 this->interval.end = dns_host->interval.end;
3737 ipv4_host->invalidate();
3738 ipv6_host->invalidate();
3739 }
3740 else {
3741 invalidate();
3742 return false;
3743 }
3744
3745 if (m_colon->match(text, this->interval.end, end, flags) &&
3746 port->match(text, m_colon->interval.end, end, flags))
3747 {
3748 // Port
3749 this->interval.end = port->interval.end;
3750 }
3751 else
3752 port->invalidate();
3753
3754 if (path->match(text, this->interval.end, end, flags)) {
3755 // Path
3756 this->interval.end = path->interval.end;
3757 }
3758
3759 this->interval.start = start;
3760 return true;
3761 }
3762
3763 std::shared_ptr<basic_parser<T>> m_colon;
3764 std::shared_ptr<basic_parser<T>> m_slash;
3765 std::shared_ptr<basic_parser<T>> m_at;
3766 std::shared_ptr<basic_parser<T>> m_ip_lbracket;
3767 std::shared_ptr<basic_parser<T>> m_ip_rbracket;
3768 };
3769
3770 using url = basic_url<char>;
3771 using wurl = basic_url<wchar_t>;
3772#ifdef _UNICODE
3773 using turl = wurl;
3774#else
3775 using turl = url;
3776#endif
3777 using sgml_url = basic_url<char>;
3778
3782 template <class T>
3784 {
3785 public:
3787 _In_ const std::shared_ptr<basic_parser<T>>& _username,
3788 _In_ const std::shared_ptr<basic_parser<T>>& at,
3789 _In_ const std::shared_ptr<basic_parser<T>>& ip_lbracket,
3790 _In_ const std::shared_ptr<basic_parser<T>>& ip_rbracket,
3791 _In_ const std::shared_ptr<basic_parser<T>>& _ipv4_host,
3792 _In_ const std::shared_ptr<basic_parser<T>>& _ipv6_host,
3793 _In_ const std::shared_ptr<basic_parser<T>>& _dns_host,
3794 _In_ const std::locale& locale = std::locale()) :
3796 username(_username),
3797 m_at(at),
3798 m_ip_lbracket(ip_lbracket),
3799 m_ip_rbracket(ip_rbracket),
3800 ipv4_host(_ipv4_host),
3801 ipv6_host(_ipv6_host),
3802 dns_host(_dns_host)
3803 {}
3804
3805 virtual void invalidate()
3806 {
3807 username->invalidate();
3808 ipv4_host->invalidate();
3809 ipv6_host->invalidate();
3810 dns_host->invalidate();
3812 }
3813
3814 std::shared_ptr<basic_parser<T>> username;
3815 std::shared_ptr<basic_parser<T>> ipv4_host;
3816 std::shared_ptr<basic_parser<T>> ipv6_host;
3817 std::shared_ptr<basic_parser<T>> dns_host;
3818
3819 protected:
3820 virtual bool do_match(
3821 _In_reads_or_z_opt_(end) const T* text,
3822 _In_ size_t start = 0,
3823 _In_ size_t end = SIZE_MAX,
3824 _In_ int flags = match_default)
3825 {
3826 _Assume_(text || start >= end);
3827
3828 if (username->match(text, start, end, flags) &&
3829 m_at->match(text, username->interval.end, end, flags))
3830 {
3831 // Username@
3832 if (m_ip_lbracket->match(text, m_at->interval.end, end, flags) &&
3833 ipv4_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3834 m_ip_rbracket->match(text, ipv4_host->interval.end, end, flags))
3835 {
3836 // Host is IPv4
3837 this->interval.end = m_ip_rbracket->interval.end;
3838 ipv6_host->invalidate();
3839 dns_host->invalidate();
3840 }
3841 else if (
3842 m_ip_lbracket->match(text, m_at->interval.end, end, flags) &&
3843 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3844 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3845 {
3846 // Host is IPv6
3847 this->interval.end = m_ip_rbracket->interval.end;
3848 ipv4_host->invalidate();
3849 dns_host->invalidate();
3850 }
3851 else if (dns_host->match(text, m_at->interval.end, end, flags)) {
3852 // Host is hostname
3853 this->interval.end = dns_host->interval.end;
3854 ipv4_host->invalidate();
3855 ipv6_host->invalidate();
3856 }
3857 else
3858 goto error;
3859 this->interval.start = start;
3860 return true;
3861 }
3862
3863 error:
3864 invalidate();
3865 return false;
3866 }
3867
3868 std::shared_ptr<basic_parser<T>> m_at;
3869 std::shared_ptr<basic_parser<T>> m_ip_lbracket;
3870 std::shared_ptr<basic_parser<T>> m_ip_rbracket;
3871 };
3872
3875#ifdef _UNICODE
3877#else
3879#endif
3881
3885 template <class T>
3887 {
3888 public:
3890 _In_ const std::shared_ptr<basic_parser<T>>& _emoticon,
3891 _In_ const std::shared_ptr<basic_parser<T>>& _apex,
3892 _In_ const std::shared_ptr<basic_parser<T>>& _eyes,
3893 _In_ const std::shared_ptr<basic_parser<T>>& _nose,
3894 _In_ const std::shared_ptr<basic_set<T>>& _mouth,
3895 _In_ const std::locale& locale = std::locale()) :
3898 apex(_apex),
3899 eyes(_eyes),
3900 nose(_nose),
3901 mouth(_mouth)
3902 {}
3903
3904 virtual void invalidate()
3905 {
3906 if (emoticon) emoticon->invalidate();
3907 if (apex) apex->invalidate();
3908 eyes->invalidate();
3909 if (nose) nose->invalidate();
3910 mouth->invalidate();
3912 }
3913
3914 std::shared_ptr<basic_parser<T>> emoticon;
3915 std::shared_ptr<basic_parser<T>> apex;
3916 std::shared_ptr<basic_parser<T>> eyes;
3917 std::shared_ptr<basic_parser<T>> nose;
3918 std::shared_ptr<basic_set<T>> mouth;
3919
3920 protected:
3921 virtual bool do_match(
3922 _In_reads_or_z_opt_(end) const T* text,
3923 _In_ size_t start = 0,
3924 _In_ size_t end = SIZE_MAX,
3925 _In_ int flags = match_default)
3926 {
3927 _Assume_(text || start >= end);
3928
3929 if (emoticon && emoticon->match(text, start, end, flags)) {
3930 if (apex) apex->invalidate();
3931 eyes->invalidate();
3932 if (nose) nose->invalidate();
3933 mouth->invalidate();
3934 this->interval.start = start;
3935 this->interval.end = emoticon->interval.end;
3936 return true;
3937 }
3938
3939 this->interval.end = start;
3940
3941 if (apex && apex->match(text, this->interval.end, end, flags))
3942 this->interval.end = apex->interval.end;
3943
3944 if (eyes->match(text, this->interval.end, end, flags)) {
3945 if (nose && nose->match(text, eyes->interval.end, end, flags) &&
3946 mouth->match(text, nose->interval.end, end, flags))
3947 {
3948 size_t
3950 hit_offset = mouth->hit_offset;
3951 // Mouth may repeat :-)))))))
3952 for (this->interval.end = mouth->interval.end; mouth->match(text, this->interval.end, end, flags) && mouth->hit_offset == hit_offset; this->interval.end = mouth->interval.end);
3953 mouth->interval.start = start_mouth;
3954 mouth->interval.end = this->interval.end;
3955 this->interval.start = start;
3956 return true;
3957 }
3958 if (mouth->match(text, eyes->interval.end, end, flags)) {
3959 size_t
3961 hit_offset = mouth->hit_offset;
3962 // Mouth may repeat :-)))))))
3963 for (this->interval.end = mouth->interval.end; mouth->match(text, this->interval.end, end, flags) && mouth->hit_offset == hit_offset; this->interval.end = mouth->interval.end);
3964 if (nose) nose->invalidate();
3965 mouth->interval.start = start_mouth;
3966 mouth->interval.end = this->interval.end;
3967 this->interval.start = start;
3968 return true;
3969 }
3970 }
3971
3972 if (emoticon) emoticon->invalidate();
3973 if (apex) apex->invalidate();
3974 eyes->invalidate();
3975 if (nose) nose->invalidate();
3976 mouth->invalidate();
3977 this->interval.invalidate();
3978 return false;
3979 }
3980 };
3981
3982 using emoticon = basic_emoticon<char>;
3983 using wemoticon = basic_emoticon<wchar_t>;
3984#ifdef _UNICODE
3985 using temoticon = wemoticon;
3986#else
3987 using temoticon = emoticon;
3988#endif
3989 using sgml_emoticon = basic_emoticon<char>;
3990
3994 enum date_format_t {
3995 date_format_none = 0,
3996 date_format_dmy = 0x1,
3997 date_format_mdy = 0x2,
3998 date_format_ymd = 0x4,
3999 date_format_ym = 0x8,
4000 date_format_my = 0x10,
4001 date_format_dm = 0x20,
4002 date_format_md = 0x40,
4003 };
4004
4008 template <class T>
4009 class basic_date : public basic_parser<T>
4010 {
4011 public:
4012 basic_date(
4013 _In_ int format_mask,
4014 _In_ const std::shared_ptr<basic_integer<T>>& _day,
4015 _In_ const std::shared_ptr<basic_integer<T>>& _month,
4016 _In_ const std::shared_ptr<basic_integer<T>>& _year,
4017 _In_ const std::shared_ptr<basic_set<T>>& separator,
4018 _In_ const std::shared_ptr<basic_parser<T>>& space,
4019 _In_ const std::locale& locale = std::locale()) :
4021 format(date_format_none),
4022 m_format_mask(format_mask),
4023 day(_day),
4024 month(_month),
4025 year(_year),
4026 m_separator(separator),
4027 m_space(space)
4028 {}
4029
4030 virtual void invalidate()
4031 {
4032 if (day) day->invalidate();
4033 if (month) month->invalidate();
4034 if (year) year->invalidate();
4035 format = date_format_none;
4037 }
4038
4039 date_format_t format;
4040 std::shared_ptr<basic_integer<T>> day;
4041 std::shared_ptr<basic_integer<T>> month;
4042 std::shared_ptr<basic_integer<T>> year;
4043
4044 protected:
4045 virtual bool do_match(
4046 _In_reads_or_z_opt_(end) const T* text,
4047 _In_ size_t start = 0,
4048 _In_ size_t end = SIZE_MAX,
4049 _In_ int flags = match_default)
4050 {
4051 _Assume_(text || start >= end);
4052
4053 const int space_match_flags = flags & ~match_multiline; // Spaces in dates must never be broken in new line.
4054 if ((m_format_mask & date_format_dmy) == date_format_dmy) {
4055 if (day->match(text, start, end, flags)) {
4056 for (this->interval.end = day->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4057 if (m_separator->match(text, this->interval.end, end, flags)) {
4058 size_t hit_offset = m_separator->hit_offset;
4059 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4060 if (month->match(text, this->interval.end, end, flags)) {
4061 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4062 if (m_separator->match(text, this->interval.end, end, flags) &&
4063 m_separator->hit_offset == hit_offset) // Both separators must match.
4064 {
4065 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4066 if (year->match(text, this->interval.end, end, flags) &&
4067 is_valid(day->value, month->value))
4068 {
4069 this->interval.start = start;
4070 this->interval.end = year->interval.end;
4071 format = date_format_dmy;
4072 return true;
4073 }
4074 }
4075 }
4076 }
4077 }
4078 }
4079
4080 if ((m_format_mask & date_format_mdy) == date_format_mdy) {
4081 if (month->match(text, start, end, flags)) {
4082 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4083 if (m_separator->match(text, this->interval.end, end, flags)) {
4084 size_t hit_offset = m_separator->hit_offset;
4085 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4086 if (day->match(text, this->interval.end, end, flags)) {
4087 for (this->interval.end = day->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4088 if (m_separator->match(text, this->interval.end, end, flags) &&
4089 m_separator->hit_offset == hit_offset) // Both separators must match.
4090 {
4091 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4092 if (year->match(text, this->interval.end, end, flags) &&
4093 is_valid(day->value, month->value))
4094 {
4095 this->interval.start = start;
4096 this->interval.end = year->interval.end;
4097 format = date_format_mdy;
4098 return true;
4099 }
4100 }
4101 }
4102 }
4103 }
4104 }
4105
4106 if ((m_format_mask & date_format_ymd) == date_format_ymd) {
4107 if (year->match(text, start, end, flags)) {
4108 for (this->interval.end = year->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4109 if (m_separator->match(text, this->interval.end, end, flags)) {
4110 size_t hit_offset = m_separator->hit_offset;
4111 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4112 if (month->match(text, this->interval.end, end, flags)) {
4113 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4114 if (m_separator->match(text, this->interval.end, end, flags) &&
4115 m_separator->hit_offset == hit_offset) // Both separators must match.
4116 {
4117 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4118 if (day->match(text, this->interval.end, end, flags) &&
4119 is_valid(day->value, month->value))
4120 {
4121 this->interval.start = start;
4122 this->interval.end = day->interval.end;
4123 format = date_format_ymd;
4124 return true;
4125 }
4126 }
4127 }
4128 }
4129 }
4130 }
4131
4132 if ((m_format_mask & date_format_ym) == date_format_ym) {
4133 if (year->match(text, start, end, flags)) {
4134 for (this->interval.end = year->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4135 if (m_separator->match(text, this->interval.end, end, flags)) {
4136 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4137 if (month->match(text, this->interval.end, end, flags) &&
4138 is_valid(SIZE_MAX, month->value))
4139 {
4140 if (day) day->invalidate();
4141 this->interval.start = start;
4142 this->interval.end = month->interval.end;
4143 format = date_format_ym;
4144 return true;
4145 }
4146 }
4147 }
4148 }
4149
4150 if ((m_format_mask & date_format_my) == date_format_my) {
4151 if (month->match(text, start, end, flags)) {
4152 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4153 if (m_separator->match(text, this->interval.end, end, flags)) {
4154 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4155 if (year->match(text, this->interval.end, end, flags) &&
4156 is_valid(SIZE_MAX, month->value))
4157 {
4158 if (day) day->invalidate();
4159 this->interval.start = start;
4160 this->interval.end = year->interval.end;
4161 format = date_format_my;
4162 return true;
4163 }
4164 }
4165 }
4166 }
4167
4168 if ((m_format_mask & date_format_dm) == date_format_dm) {
4169 if (day->match(text, start, end, flags)) {
4170 for (this->interval.end = day->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4171 if (m_separator->match(text, this->interval.end, end, flags)) {
4172 size_t hit_offset = m_separator->hit_offset;
4173 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4174 if (month->match(text, this->interval.end, end, flags) &&
4175 is_valid(day->value, month->value))
4176 {
4177 if (year) year->invalidate();
4178 this->interval.start = start;
4179 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4180 if (m_separator->match(text, this->interval.end, end, flags) &&
4181 m_separator->hit_offset == hit_offset) // Both separators must match.
4182 this->interval.end = m_separator->interval.end;
4183 else
4184 this->interval.end = month->interval.end;
4185 format = date_format_dm;
4186 return true;
4187 }
4188 }
4189 }
4190 }
4191
4192 if ((m_format_mask & date_format_md) == date_format_md) {
4193 if (month->match(text, start, end, flags)) {
4194 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4195 if (m_separator->match(text, this->interval.end, end, flags)) {
4196 size_t hit_offset = m_separator->hit_offset;
4197 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4198 if (day->match(text, this->interval.end, end, flags) &&
4199 is_valid(day->value, month->value))
4200 {
4201 if (year) year->invalidate();
4202 this->interval.start = start;
4203 for (this->interval.end = day->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4204 if (m_separator->match(text, this->interval.end, end, flags) &&
4205 m_separator->hit_offset == hit_offset) // Both separators must match.
4206 this->interval.end = m_separator->interval.end;
4207 else
4208 this->interval.end = day->interval.end;
4209 format = date_format_md;
4210 return true;
4211 }
4212 }
4213 }
4214 }
4215
4216 if (day) day->invalidate();
4217 if (month) month->invalidate();
4218 if (year) year->invalidate();
4219 format = date_format_none;
4220 this->interval.invalidate();
4221 return false;
4222 }
4223
4224 static bool is_valid(size_t day, size_t month)
4225 {
4226 if (month == SIZE_MAX) {
4227 // Default to January. This allows validating day only, as January has all 31 days.
4228 month = 1;
4229 }
4230 if (day == SIZE_MAX) {
4231 // Default to 1st day in month. This allows validating month only, as each month has 1st day.
4232 day = 1;
4233 }
4234
4235 switch (month) {
4236 case 1:
4237 case 3:
4238 case 5:
4239 case 7:
4240 case 8:
4241 case 10:
4242 case 12:
4243 return 1 <= day && day <= 31;
4244 case 2:
4245 return 1 <= day && day <= 29;
4246 case 4:
4247 case 6:
4248 case 9:
4249 case 11:
4250 return 1 <= day && day <= 30;
4251 default:
4252 return false;
4253 }
4254 }
4255
4256 int m_format_mask;
4257 std::shared_ptr<basic_set<T>> m_separator;
4258 std::shared_ptr<basic_parser<T>> m_space;
4259 };
4260
4261 using date = basic_date<char>;
4262 using wdate = basic_date<wchar_t>;
4263#ifdef _UNICODE
4264 using tdate = wdate;
4265#else
4266 using tdate = date;
4267#endif
4269
4273 template <class T>
4274 class basic_time : public basic_parser<T>
4275 {
4276 public:
4277 basic_time(
4278 _In_ const std::shared_ptr<basic_integer10<T>>& _hour,
4279 _In_ const std::shared_ptr<basic_integer10<T>>& _minute,
4280 _In_ const std::shared_ptr<basic_integer10<T>>& _second,
4281 _In_ const std::shared_ptr<basic_integer10<T>>& _millisecond,
4282 _In_ const std::shared_ptr<basic_set<T>>& separator,
4283 _In_ const std::shared_ptr<basic_parser<T>>& millisecond_separator,
4284 _In_ const std::locale& locale = std::locale()) :
4286 hour(_hour),
4287 minute(_minute),
4288 second(_second),
4289 millisecond(_millisecond),
4290 m_separator(separator),
4291 m_millisecond_separator(millisecond_separator)
4292 {}
4293
4294 virtual void invalidate()
4295 {
4296 hour->invalidate();
4297 minute->invalidate();
4298 if (second) second->invalidate();
4299 if (millisecond) millisecond->invalidate();
4301 }
4302
4303 std::shared_ptr<basic_integer10<T>> hour;
4304 std::shared_ptr<basic_integer10<T>> minute;
4305 std::shared_ptr<basic_integer10<T>> second;
4306 std::shared_ptr<basic_integer10<T>> millisecond;
4307
4308 protected:
4309 virtual bool do_match(
4310 _In_reads_or_z_opt_(end) const T* text,
4311 _In_ size_t start = 0,
4312 _In_ size_t end = SIZE_MAX,
4313 _In_ int flags = match_default)
4314 {
4315 _Assume_(text || start >= end);
4316
4317 if (hour->match(text, start, end, flags) &&
4318 m_separator->match(text, hour->interval.end, end, flags) &&
4319 minute->match(text, m_separator->interval.end, end, flags) &&
4320 minute->value < 60)
4321 {
4322 // hh::mm
4323 size_t hit_offset = m_separator->hit_offset;
4324 if (m_separator->match(text, minute->interval.end, end, flags) &&
4325 m_separator->hit_offset == hit_offset && // Both separators must match.
4326 second && second->match(text, m_separator->interval.end, end, flags) &&
4327 second->value < 60)
4328 {
4329 // hh::mm:ss
4330 if (m_millisecond_separator && m_millisecond_separator->match(text, second->interval.end, end, flags) &&
4331 millisecond && millisecond->match(text, m_millisecond_separator->interval.end, end, flags) &&
4332 millisecond->value < 1000)
4333 {
4334 // hh::mm:ss.mmmm
4335 this->interval.end = millisecond->interval.end;
4336 }
4337 else {
4338 if (millisecond) millisecond->invalidate();
4339 this->interval.end = second->interval.end;
4340 }
4341 }
4342 else {
4343 if (second) second->invalidate();
4344 if (millisecond) millisecond->invalidate();
4345 this->interval.end = minute->interval.end;
4346 }
4347 this->interval.start = start;
4348 return true;
4349 }
4350
4351 hour->invalidate();
4352 minute->invalidate();
4353 if (second) second->invalidate();
4354 if (millisecond) millisecond->invalidate();
4355 this->interval.invalidate();
4356 return false;
4357 }
4358
4359 std::shared_ptr<basic_set<T>> m_separator;
4360 std::shared_ptr<basic_parser<T>> m_millisecond_separator;
4361 };
4362
4363 using time = basic_time<char>;
4364 using wtime = basic_time<wchar_t>;
4365#ifdef _UNICODE
4366 using ttime = wtime;
4367#else
4368 using ttime = time;
4369#endif
4371
4375 template <class T>
4376 class basic_angle : public basic_parser<T>
4377 {
4378 public:
4380 _In_ const std::shared_ptr<basic_integer10<T>>& _degree,
4381 _In_ const std::shared_ptr<basic_parser<T>>& _degree_separator,
4382 _In_ const std::shared_ptr<basic_integer10<T>>& _minute,
4383 _In_ const std::shared_ptr<basic_parser<T>>& _minute_separator,
4384 _In_ const std::shared_ptr<basic_integer10<T>>& _second,
4385 _In_ const std::shared_ptr<basic_parser<T>>& _second_separator,
4386 _In_ const std::shared_ptr<basic_parser<T>>& _decimal,
4387 _In_ const std::locale& locale = std::locale()) :
4389 degree(_degree),
4390 degree_separator(_degree_separator),
4391 minute(_minute),
4392 minute_separator(_minute_separator),
4393 second(_second),
4394 second_separator(_second_separator),
4395 decimal(_decimal)
4396 {}
4397
4398 virtual void invalidate()
4399 {
4400 degree->invalidate();
4401 degree_separator->invalidate();
4402 minute->invalidate();
4403 minute_separator->invalidate();
4404 if (second) second->invalidate();
4405 if (second_separator) second_separator->invalidate();
4406 if (decimal) decimal->invalidate();
4408 }
4409
4410 std::shared_ptr<basic_integer10<T>> degree;
4411 std::shared_ptr<basic_parser<T>> degree_separator;
4412 std::shared_ptr<basic_integer10<T>> minute;
4413 std::shared_ptr<basic_parser<T>> minute_separator;
4414 std::shared_ptr<basic_integer10<T>> second;
4415 std::shared_ptr<basic_parser<T>> second_separator;
4416 std::shared_ptr<basic_parser<T>> decimal;
4417
4418 protected:
4419 virtual bool do_match(
4420 _In_reads_or_z_opt_(end) const T* text,
4421 _In_ size_t start = 0,
4422 _In_ size_t end = SIZE_MAX,
4423 _In_ int flags = match_default)
4424 {
4425 _Assume_(text || start >= end);
4426
4427 this->interval.end = start;
4428
4429 if (degree->match(text, this->interval.end, end, flags) &&
4430 degree_separator->match(text, degree->interval.end, end, flags))
4431 {
4432 // Degrees
4433 this->interval.end = degree_separator->interval.end;
4434 }
4435 else {
4436 degree->invalidate();
4437 degree_separator->invalidate();
4438 }
4439
4440 if (minute->match(text, this->interval.end, end, flags) &&
4441 minute->value < 60 &&
4442 minute_separator->match(text, minute->interval.end, end, flags))
4443 {
4444 // Minutes
4445 this->interval.end = minute_separator->interval.end;
4446 }
4447 else {
4448 minute->invalidate();
4449 minute_separator->invalidate();
4450 }
4451
4452 if (second && second->match(text, this->interval.end, end, flags) &&
4453 second->value < 60)
4454 {
4455 // Seconds
4456 this->interval.end = second->interval.end;
4457 if (second_separator && second_separator->match(text, this->interval.end, end, flags))
4458 this->interval.end = second_separator->interval.end;
4459 else
4460 if (second_separator) second_separator->invalidate();
4461 }
4462 else {
4463 if (second) second->invalidate();
4464 if (second_separator) second_separator->invalidate();
4465 }
4466
4467 if (degree->interval.start < degree->interval.end ||
4468 minute->interval.start < minute->interval.end ||
4469 (second && second->interval.start < second->interval.end))
4470 {
4471 if (decimal && decimal->match(text, this->interval.end, end, flags)) {
4472 // Decimals
4473 this->interval.end = decimal->interval.end;
4474 }
4475 else if (decimal)
4476 decimal->invalidate();
4477 this->interval.start = start;
4478 return true;
4479 }
4480 if (decimal) decimal->invalidate();
4481 this->interval.invalidate();
4482 return false;
4483 }
4484 };
4485
4486 using angle = basic_angle<char>;
4488#ifdef _UNICODE
4489 using RRegElKot = wangle;
4490#else
4491 using RRegElKot = angle;
4492#endif
4494
4498 template <class T>
4500 {
4501 public:
4503 _In_ const std::shared_ptr<basic_parser<T>>& digit,
4504 _In_ const std::shared_ptr<basic_parser<T>>& plus_sign,
4505 _In_ const std::shared_ptr<basic_set<T>>& lparenthesis,
4506 _In_ const std::shared_ptr<basic_set<T>>& rparenthesis,
4507 _In_ const std::shared_ptr<basic_parser<T>>& separator,
4508 _In_ const std::shared_ptr<basic_parser<T>>& space,
4509 _In_ const std::locale& locale = std::locale()) :
4511 m_digit(digit),
4512 m_plus_sign(plus_sign),
4513 m_lparenthesis(lparenthesis),
4514 m_rparenthesis(rparenthesis),
4515 m_separator(separator),
4516 m_space(space)
4517 {}
4518
4519 virtual void invalidate()
4520 {
4521 value.clear();
4523 }
4524
4525 std::basic_string<T> value;
4526
4527 protected:
4528 virtual bool do_match(
4529 _In_reads_or_z_opt_(end) const T* text,
4530 _In_ size_t start = 0,
4531 _In_ size_t end = SIZE_MAX,
4532 _In_ int flags = match_default)
4533 {
4534 _Assume_(text || start >= end);
4535
4536 size_t safe_digit_end = start, safe_value_size = 0;
4537 bool has_digits = false, after_digit = false, in_parentheses = false, after_parentheses = false;
4538 const int space_match_flags = flags & ~match_multiline; // Spaces in phone numbers must never be broken in new line.
4539
4540 this->interval.end = start;
4541 value.clear();
4542 m_lparenthesis->invalidate();
4543 m_rparenthesis->invalidate();
4544
4545 if (m_plus_sign && m_plus_sign->match(text, this->interval.end, end, flags)) {
4546 value.append(text + m_plus_sign->interval.start, text + m_plus_sign->interval.end);
4547 safe_value_size = value.size();
4548 this->interval.end = m_plus_sign->interval.end;
4549 }
4550
4551 for (;;) {
4552 _Assume_(text || this->interval.end >= end);
4553 if (this->interval.end >= end || !text[this->interval.end])
4554 break;
4555 if (m_digit->match(text, this->interval.end, end, flags)) {
4556 // Digit
4557 value.append(text + m_digit->interval.start, text + m_digit->interval.end);
4558 this->interval.end = m_digit->interval.end;
4559 if (!in_parentheses) {
4560 safe_digit_end = this->interval.end;
4561 safe_value_size = value.size();
4562 has_digits = true;
4563 }
4564 after_digit = true;
4565 after_parentheses = false;
4566 }
4567 else if (
4568 m_lparenthesis && !m_lparenthesis->interval && // No left parenthesis yet
4569 m_rparenthesis && !m_rparenthesis->interval && // Right parenthesis after left
4570 m_lparenthesis->match(text, this->interval.end, end, flags))
4571 {
4572 // Left parenthesis
4573 value.append(text + m_lparenthesis->interval.start, m_lparenthesis->interval.size());
4574 this->interval.end = m_lparenthesis->interval.end;
4575 in_parentheses = true;
4576 after_digit = false;
4577 after_parentheses = false;
4578 }
4579 else if (
4580 in_parentheses && // After left parenthesis
4581 m_rparenthesis && !m_rparenthesis->interval && // No right parenthesis yet
4582 m_rparenthesis->match(text, this->interval.end, end, flags) &&
4583 m_lparenthesis->hit_offset == m_rparenthesis->hit_offset) // Left and right parentheses must match
4584 {
4585 // Right parenthesis
4586 value.append(text + m_rparenthesis->interval.start, text + m_rparenthesis->interval.end);
4587 this->interval.end = m_rparenthesis->interval.end;
4588 safe_digit_end = this->interval.end;
4589 safe_value_size = value.size();
4590 in_parentheses = false;
4591 after_digit = false;
4592 after_parentheses = true;
4593 }
4594 else if (
4595 after_digit &&
4596 !in_parentheses && // No separators inside parentheses
4597 !after_parentheses && // No separators following right parenthesis
4598 m_separator && m_separator->match(text, this->interval.end, end, flags))
4599 {
4600 // Separator
4601 this->interval.end = m_separator->interval.end;
4602 after_digit = false;
4603 after_parentheses = false;
4604 }
4605 else if (
4607 m_space && m_space->match(text, this->interval.end, end, space_match_flags))
4608 {
4609 // Space
4610 this->interval.end = m_space->interval.end;
4611 after_digit = false;
4612 after_parentheses = false;
4613 }
4614 else
4615 break;
4616 }
4617 if (has_digits) {
4618 value.erase(safe_value_size);
4619 this->interval.start = start;
4620 this->interval.end = safe_digit_end;
4621 return true;
4622 }
4623 value.clear();
4624 this->interval.invalidate();
4625 return false;
4626 }
4627
4628 std::shared_ptr<basic_parser<T>> m_digit;
4629 std::shared_ptr<basic_parser<T>> m_plus_sign;
4630 std::shared_ptr<basic_set<T>> m_lparenthesis;
4631 std::shared_ptr<basic_set<T>> m_rparenthesis;
4632 std::shared_ptr<basic_parser<T>> m_separator;
4633 std::shared_ptr<basic_parser<T>> m_space;
4634 };
4635
4636 using phone_number = basic_phone_number<char>;
4637 using wphone_number = basic_phone_number<wchar_t>;
4638#ifdef _UNICODE
4639 using tphone_number = wphone_number;
4640#else
4641 using tphone_number = phone_number;
4642#endif
4643 using sgml_phone_number = basic_phone_number<char>;
4644
4650 template <class T>
4651 class basic_iban : public basic_parser<T>
4652 {
4653 public:
4654 basic_iban(
4655 _In_ const std::shared_ptr<basic_parser<T>>& space,
4656 _In_ const std::locale& locale = std::locale()) :
4658 m_space(space)
4659 {
4660 this->country[0] = 0;
4661 this->check_digits[0] = 0;
4662 this->bban[0] = 0;
4663 this->is_valid = false;
4664 }
4665
4666 virtual void invalidate()
4667 {
4668 this->country[0] = 0;
4669 this->check_digits[0] = 0;
4670 this->bban[0] = 0;
4671 this->is_valid = false;
4673 }
4674
4675 T country[3];
4677 T bban[31];
4679
4680 protected:
4681 virtual bool do_match(
4682 _In_reads_or_z_opt_(end) const T* text,
4683 _In_ size_t start = 0,
4684 _In_ size_t end = SIZE_MAX,
4685 _In_ int flags = match_default)
4686 {
4687 _Assume_(text || start >= end);
4688 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
4689 const bool case_insensitive = flags & match_case_insensitive ? true : false;
4690 struct country_t {
4691 T country[2];
4692 T check_digits[2];
4693 size_t length;
4694 };
4695 static const country_t s_countries[] = {
4696 { { 'A', 'D' }, {}, 24 }, // Andorra
4697 { { 'A', 'E' }, {}, 23 }, // United Arab Emirates
4698 { { 'A', 'L' }, {}, 28 }, // Albania
4699 { { 'A', 'O' }, {}, 25 }, // Angola
4700 { { 'A', 'T' }, {}, 20 }, // Austria
4701 { { 'A', 'Z' }, {}, 28 }, // Azerbaijan
4702 { { 'B', 'A' }, { '3', '9' }, 20}, // Bosnia and Herzegovina
4703 { { 'B', 'E' }, {}, 16 }, // Belgium
4704 { { 'B', 'F' }, {}, 28 }, // Burkina Faso
4705 { { 'B', 'G' }, {}, 22 }, // Bulgaria
4706 { { 'B', 'H' }, {}, 22 }, // Bahrain
4707 { { 'B', 'I' }, {}, 27 }, // Burundi
4708 { { 'B', 'J' }, {}, 28 }, // Benin
4709 { { 'B', 'R' }, {}, 29 }, // Brazil
4710 { { 'B', 'Y' }, {}, 28 }, // Belarus
4711 { { 'C', 'F' }, {}, 27 }, // Central African Republic
4712 { { 'C', 'G' }, {}, 27 }, // Congo, Republic of the
4713 { { 'C', 'H' }, {}, 21 }, // Switzerland
4714 { { 'C', 'I' }, {}, 28 }, // Côte d'Ivoire
4715 { { 'C', 'M' }, {}, 27 }, // Cameroon
4716 { { 'C', 'R' }, {}, 22 }, // Costa Rica
4717 { { 'C', 'V' }, {}, 25 }, // Cabo Verde
4718 { { 'C', 'Y' }, {}, 28 }, // Cyprus
4719 { { 'C', 'Z' }, {}, 24 }, // Czech Republic
4720 { { 'D', 'E' }, {}, 22 }, // Germany
4721 { { 'D', 'J' }, {}, 27 }, // Djibouti
4722 { { 'D', 'K' }, {}, 18 }, // Denmark
4723 { { 'D', 'O' }, {}, 28 }, // Dominican Republic
4724 { { 'D', 'Z' }, {}, 26 }, // Algeria
4725 { { 'E', 'E' }, {}, 20 }, // Estonia
4726 { { 'E', 'G' }, {}, 29 }, // Egypt
4727 { { 'E', 'S' }, {}, 24 }, // Spain
4728 { { 'F', 'I' }, {}, 18 }, // Finland
4729 { { 'F', 'O' }, {}, 18 }, // Faroe Islands
4730 { { 'F', 'R' }, {}, 27 }, // France
4731 { { 'G', 'A' }, {}, 27 }, // Gabon
4732 { { 'G', 'B' }, {}, 22 }, // United Kingdom
4733 { { 'G', 'E' }, {}, 22 }, // Georgia
4734 { { 'G', 'I' }, {}, 23 }, // Gibraltar
4735 { { 'G', 'L' }, {}, 18 }, // Greenland
4736 { { 'G', 'Q' }, {}, 27 }, // Equatorial Guinea
4737 { { 'G', 'R' }, {}, 27 }, // Greece
4738 { { 'G', 'T' }, {}, 28 }, // Guatemala
4739 { { 'G', 'W' }, {}, 25 }, // Guinea-Bissau
4740 { { 'H', 'N' }, {}, 28 }, // Honduras
4741 { { 'H', 'R' }, {}, 21 }, // Croatia
4742 { { 'H', 'U' }, {}, 28 }, // Hungary
4743 { { 'I', 'E' }, {}, 22 }, // Ireland
4744 { { 'I', 'L' }, {}, 23 }, // Israel
4745 { { 'I', 'Q' }, {}, 23 }, // Iraq
4746 { { 'I', 'R' }, {}, 26 }, // Iran
4747 { { 'I', 'S' }, {}, 26 }, // Iceland
4748 { { 'I', 'T' }, {}, 27 }, // Italy
4749 { { 'J', 'O' }, {}, 30 }, // Jordan
4750 { { 'K', 'M' }, {}, 27 }, // Comoros
4751 { { 'K', 'W' }, {}, 30 }, // Kuwait
4752 { { 'K', 'Z' }, {}, 20 }, // Kazakhstan
4753 { { 'L', 'B' }, {}, 28 }, // Lebanon
4754 { { 'L', 'C' }, {}, 32 }, // Saint Lucia
4755 { { 'L', 'I' }, {}, 21 }, // Liechtenstein
4756 { { 'L', 'T' }, {}, 20 }, // Lithuania
4757 { { 'L', 'U' }, {}, 20 }, // Luxembourg
4758 { { 'L', 'V' }, {}, 21 }, // Latvia
4759 { { 'L', 'Y' }, {}, 25 }, // Libya
4760 { { 'M', 'A' }, {}, 28 }, // Morocco
4761 { { 'M', 'C' }, {}, 27 }, // Monaco
4762 { { 'M', 'D' }, {}, 24 }, // Moldova
4763 { { 'M', 'E' }, { '2', '5' }, 22 }, // Montenegro
4764 { { 'M', 'G' }, {}, 27 }, // Madagascar
4765 { { 'M', 'K' }, { '0', '7' }, 19 }, // North Macedonia
4766 { { 'M', 'L' }, {}, 28 }, // Mali
4767 { { 'M', 'R' }, { '1', '3' }, 27}, // Mauritania
4768 { { 'M', 'T' }, {}, 31 }, // Malta
4769 { { 'M', 'U' }, {}, 30 }, // Mauritius
4770 { { 'M', 'Z' }, {}, 25 }, // Mozambique
4771 { { 'N', 'E' }, {}, 28 }, // Niger
4772 { { 'N', 'I' }, {}, 32 }, // Nicaragua
4773 { { 'N', 'L' }, {}, 18 }, // Netherlands
4774 { { 'N', 'O' }, {}, 15 }, // Norway
4775 { { 'P', 'K' }, {}, 24 }, // Pakistan
4776 { { 'P', 'L' }, {}, 28 }, // Poland
4777 { { 'P', 'S' }, {}, 29 }, // Palestinian territories
4778 { { 'P', 'T' }, { '5', '0' }, 25 }, // Portugal
4779 { { 'Q', 'A' }, {}, 29 }, // Qatar
4780 { { 'R', 'O' }, {}, 24 }, // Romania
4781 { { 'R', 'S' }, { '3', '5' }, 22 }, // Serbia
4782 { { 'R', 'U' }, {}, 33 }, // Russia
4783 { { 'S', 'A' }, {}, 24 }, // Saudi Arabia
4784 { { 'S', 'C' }, {}, 31 }, // Seychelles
4785 { { 'S', 'D' }, {}, 18 }, // Sudan
4786 { { 'S', 'E' }, {}, 24 }, // Sweden
4787 { { 'S', 'I' }, { '5', '6' }, 19 }, // Slovenia
4788 { { 'S', 'K' }, {}, 24 }, // Slovakia
4789 { { 'S', 'M' }, {}, 27 }, // San Marino
4790 { { 'S', 'N' }, {}, 28 }, // Senegal
4791 { { 'S', 'T' }, {}, 25 }, // São Tomé and Príncipe
4792 { { 'S', 'V' }, {}, 28 }, // El Salvador
4793 { { 'T', 'D' }, {}, 27 }, // Chad
4794 { { 'T', 'G' }, {}, 28 }, // Togo
4795 { { 'T', 'L' }, { '3', '8' }, 23}, // East Timor
4796 { { 'T', 'N' }, { '5', '9' }, 24 }, // Tunisia
4797 { { 'T', 'R' }, {}, 26 }, // Turkey
4798 { { 'U', 'A' }, {}, 29 }, // Ukraine
4799 { { 'V', 'A' }, {}, 22 }, // Vatican City
4800 { { 'V', 'G' }, {}, 24 }, // Virgin Islands, British
4801 { { 'X', 'K' }, {}, 20 }, // Kosovo
4802 };
4803 const country_t* country_desc = nullptr;
4804 size_t n, available, next, bban_length;
4806
4807 this->interval.end = start;
4808 for (size_t i = 0; i < 2; ++i, ++this->interval.end) {
4809 if (this->interval.end >= end || !text[this->interval.end])
4810 goto error; // incomplete country code
4811 T chr = case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end];
4812 if (chr < 'A' || 'Z' < chr)
4813 goto error; // invalid country code
4814 this->country[i] = chr;
4815 }
4816 for (size_t l = 0, r = _countof(s_countries);;) {
4817 if (l >= r)
4818 goto error; // unknown country
4819 size_t m = (l + r) / 2;
4820 const country_t& c = s_countries[m];
4821 if (c.country[0] < this->country[0] || (c.country[0] == this->country[0] && c.country[1] < this->country[1]))
4822 l = m + 1;
4823 else if (this->country[0] < c.country[0] || (this->country[0] == c.country[0] && this->country[1] < c.country[1]))
4824 r = m;
4825 else {
4826 country_desc = &c;
4827 break;
4828 }
4829 }
4830 this->country[2] = 0;
4831
4832 for (size_t i = 0; i < 2; ++i, ++this->interval.end) {
4833 if (this->interval.end >= end || text[this->interval.end] < '0' || '9' < text[this->interval.end])
4834 goto error; // incomplete or invalid check digits
4835 this->check_digits[i] = text[this->interval.end];
4836 }
4837 this->check_digits[2] = 0;
4838
4839 if ((country_desc->check_digits[0] && this->check_digits[0] != country_desc->check_digits[0]) ||
4840 (country_desc->check_digits[1] && this->check_digits[1] != country_desc->check_digits[1]))
4841 goto error; // unexpected check digits
4842
4843 bban_length = country_desc->length - 4;
4844 for (n = 0; n < bban_length;) {
4845 if (this->interval.end >= end || !text[this->interval.end])
4846 goto error; // bban too short
4847 if (m_space && m_space->match(text, this->interval.end, end, flags)) {
4848 this->interval.end = m_space->interval.end;
4849 continue;
4850 }
4851 T chr = case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end];
4852 if (('0' <= chr && chr <= '9') || ('A' <= chr && chr <= 'Z')) {
4853 this->bban[n++] = chr;
4854 this->interval.end++;
4855 }
4856 else
4857 goto error; // invalid bban
4858 }
4859 this->bban[n] = 0;
4860
4861 // Normalize IBAN.
4862 T normalized[69];
4863 available = 0;
4864 for (size_t i = 0; ; ++i) {
4865 if (!this->bban[i]) {
4866 for (i = 0; i < 2; ++i) {
4867 if ('A' <= this->country[i] && this->country[i] <= 'J') {
4868 normalized[available++] = '1';
4869 normalized[available++] = '0' + this->country[i] - 'A';
4870 }
4871 else if ('K' <= this->country[i] && this->country[i] <= 'T') {
4872 normalized[available++] = '2';
4873 normalized[available++] = '0' + this->country[i] - 'K';
4874 }
4875 else if ('U' <= this->country[i] && this->country[i] <= 'Z') {
4876 normalized[available++] = '3';
4877 normalized[available++] = '0' + this->country[i] - 'U';
4878 }
4879 }
4880 normalized[available++] = this->check_digits[0];
4881 normalized[available++] = this->check_digits[1];
4882 normalized[available] = 0;
4883 break;
4884 }
4885 if ('0' <= this->bban[i] && this->bban[i] <= '9')
4886 normalized[available++] = this->bban[i];
4887 else if ('A' <= this->bban[i] && this->bban[i] <= 'J') {
4888 normalized[available++] = '1';
4889 normalized[available++] = '0' + this->bban[i] - 'A';
4890 }
4891 else if ('K' <= this->bban[i] && this->bban[i] <= 'T') {
4892 normalized[available++] = '2';
4893 normalized[available++] = '0' + this->bban[i] - 'K';
4894 }
4895 else if ('U' <= this->bban[i] && this->bban[i] <= 'Z') {
4896 normalized[available++] = '3';
4897 normalized[available++] = '0' + this->bban[i] - 'U';
4898 }
4899 }
4900
4901 // Calculate modulo 97.
4902 nominator = stdex::strtou32(normalized, 9, &next, 10);
4903 for (;;) {
4904 nominator %= 97;
4905 if (!normalized[next]) {
4906 this->is_valid = nominator == 1;
4907 break;
4908 }
4909 size_t digit_count = nominator == 0 ? 0 : nominator < 10 ? 1 : 2;
4910 for (; digit_count < 9 && normalized[next]; ++next, ++digit_count)
4911 nominator = nominator * 10 + (normalized[next] - '0');
4912 }
4913
4914 this->interval.start = start;
4915 return true;
4916
4917 error:
4918 invalidate();
4919 return false;
4920 }
4921
4922 std::shared_ptr<basic_parser<T>> m_space;
4923 };
4924
4925 using iban = basic_iban<char>;
4926 using wiban = basic_iban<wchar_t>;
4927#ifdef _UNICODE
4928 using tiban = wiban;
4929#else
4930 using tiban = iban;
4931#endif
4932 using sgml_iban = basic_iban<char>;
4933
4939 template <class T>
4941 {
4942 public:
4944 _In_ const std::shared_ptr<basic_parser<T>>& space,
4945 _In_ const std::locale& locale = std::locale()) :
4947 m_space(space)
4948 {
4949 this->check_digits[0] = 0;
4950 this->reference[0] = 0;
4951 this->is_valid = false;
4952 }
4953
4954 virtual void invalidate()
4955 {
4956 this->check_digits[0] = 0;
4957 this->reference[0] = 0;
4958 this->is_valid = false;
4960 }
4961
4965
4966 protected:
4967 virtual bool do_match(
4968 _In_reads_or_z_opt_(end) const T* text,
4969 _In_ size_t start = 0,
4970 _In_ size_t end = SIZE_MAX,
4971 _In_ int flags = match_default)
4972 {
4973 _Assume_(text || start >= end);
4974 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
4975 const bool case_insensitive = flags & match_case_insensitive ? true : false;
4976 size_t n, available, next;
4978
4979 this->interval.end = start;
4980 if (this->interval.end + 1 >= end ||
4981 (case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end]) != 'R' ||
4982 (case_insensitive ? ctype.toupper(text[this->interval.end + 1]) : text[this->interval.end + 1]) != 'F')
4983 goto error; // incomplete or wrong reference ID
4984 this->interval.end += 2;
4985
4986 for (size_t i = 0; i < 2; ++i, ++this->interval.end) {
4987 if (this->interval.end >= end || text[this->interval.end] < '0' || '9' < text[this->interval.end])
4988 goto error; // incomplete or invalid check digits
4989 this->check_digits[i] = text[this->interval.end];
4990 }
4991 this->check_digits[2] = 0;
4992
4993 for (n = 0;;) {
4994 if (m_space && m_space->match(text, this->interval.end, end, flags))
4995 this->interval.end = m_space->interval.end;
4996 for (size_t j = 0; j < 4; ++j) {
4997 if (this->interval.end >= end || !text[this->interval.end])
4998 goto out;
4999 T chr = case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end];
5000 if (('0' <= chr && chr <= '9') || ('A' <= chr && chr <= 'Z')) {
5001 if (n >= _countof(reference) - 1)
5002 goto error; // reference overflow
5003 this->reference[n++] = chr;
5004 this->interval.end++;
5005 }
5006 else
5007 goto out;
5008 }
5009 }
5010 out:
5011 if (!n)
5012 goto error; // reference too short
5013 this->reference[_countof(this->reference) - 1] = 0;
5014 for (size_t i = n, j = _countof(this->reference) - 1; i;)
5015 this->reference[--j] = this->reference[--i];
5016 for (size_t j = _countof(this->reference) - 1 - n; j;)
5017 this->reference[--j] = '0';
5018
5019 // Normalize creditor reference.
5020 T normalized[47];
5021 available = 0;
5022 for (size_t i = 0; ; ++i) {
5023 if (!this->reference[i]) {
5024 normalized[available++] = '2'; // R
5025 normalized[available++] = '7';
5026 normalized[available++] = '1'; // F
5027 normalized[available++] = '5';
5028 normalized[available++] = this->check_digits[0];
5029 normalized[available++] = this->check_digits[1];
5030 normalized[available] = 0;
5031 break;
5032 }
5033 if ('0' <= this->reference[i] && this->reference[i] <= '9')
5034 normalized[available++] = this->reference[i];
5035 else if ('A' <= this->reference[i] && this->reference[i] <= 'J') {
5036 normalized[available++] = '1';
5037 normalized[available++] = '0' + this->reference[i] - 'A';
5038 }
5039 else if ('K' <= this->reference[i] && this->reference[i] <= 'T') {
5040 normalized[available++] = '2';
5041 normalized[available++] = '0' + this->reference[i] - 'K';
5042 }
5043 else if ('U' <= this->reference[i] && this->reference[i] <= 'Z') {
5044 normalized[available++] = '3';
5045 normalized[available++] = '0' + this->reference[i] - 'U';
5046 }
5047 }
5048
5049 // Calculate modulo 97.
5050 nominator = stdex::strtou32(normalized, 9, &next, 10);
5051 for (;;) {
5052 nominator %= 97;
5053 if (!normalized[next]) {
5054 this->is_valid = nominator == 1;
5055 break;
5056 }
5057 size_t digit_count = nominator == 0 ? 0 : nominator < 10 ? 1 : 2;
5058 for (; digit_count < 9 && normalized[next]; ++next, ++digit_count)
5059 nominator = nominator * 10 + (normalized[next] - '0');
5060 }
5061
5062 this->interval.start = start;
5063 return true;
5064
5065 error:
5066 invalidate();
5067 return false;
5068 }
5069
5070 std::shared_ptr<basic_parser<T>> m_space;
5071 };
5072
5073 using creditor_reference = basic_creditor_reference<char>;
5074 using wcreditor_reference = basic_creditor_reference<wchar_t>;
5075#ifdef _UNICODE
5076 using tcreditor_reference = wcreditor_reference;
5077#else
5078 using tcreditor_reference = creditor_reference;
5079#endif
5080 using sgml_creditor_reference = basic_creditor_reference<char>;
5081
5087 template <class T>
5089 {
5090 public:
5091 basic_si_reference_part(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
5092
5093 protected:
5094 virtual bool do_match(
5095 _In_reads_or_z_opt_(end) const T* text,
5096 _In_ size_t start = 0,
5097 _In_ size_t end = SIZE_MAX,
5098 _In_ int flags = match_default)
5099 {
5100 _Assume_(text || start >= end);
5101 this->interval.end = start;
5102 for (;;) {
5103 if (this->interval.end >= end || !text[this->interval.end])
5104 break;
5105 if ('0' <= text[this->interval.end] && text[this->interval.end] <= '9')
5106 this->interval.end++;
5107 else
5108 break;
5109 }
5111 this->interval.start = start;
5112 return true;
5113 }
5114 this->interval.invalidate();
5115 return false;
5116 }
5117 };
5118
5121#ifdef _UNICODE
5123#else
5125#endif
5127
5133 template <class T>
5135 {
5136 public:
5137 basic_si_reference_delimiter(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
5138
5139 protected:
5140 virtual bool do_match(
5141 _In_reads_or_z_opt_(end) const T* text,
5142 _In_ size_t start = 0,
5143 _In_ size_t end = SIZE_MAX,
5144 _In_ int flags = match_default)
5145 {
5146 _Assume_(text || start >= end);
5147 if (start < end && text[start] == '-') {
5148 this->interval.end = (this->interval.start = start) + 1;
5149 return true;
5150 }
5151 this->interval.invalidate();
5152 return false;
5153 }
5154 };
5155
5158#ifdef _UNICODE
5160#else
5162#endif
5164
5172 template <class T>
5174 {
5175 public:
5177 _In_ const std::shared_ptr<basic_parser<T>>& space,
5178 _In_ const std::locale& locale = std::locale()) :
5180 part1(locale),
5181 part2(locale),
5182 part3(locale),
5183 is_valid(false),
5184 m_space(space),
5185 m_delimiter(locale)
5186 {
5187 this->model[0] = 0;
5188 }
5189
5190 virtual void invalidate()
5191 {
5192 this->model[0] = 0;
5193 this->part1.invalidate();
5194 this->part2.invalidate();
5195 this->part3.invalidate();
5196 this->is_valid = false;
5198 }
5199
5200 T model[3];
5205
5206 protected:
5207 virtual bool do_match(
5208 _In_reads_or_z_opt_(end) const T* text,
5209 _In_ size_t start = 0,
5210 _In_ size_t end = SIZE_MAX,
5211 _In_ int flags = match_default)
5212 {
5213 _Assume_(text || start >= end);
5214 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
5215 const bool case_insensitive = flags & match_case_insensitive ? true : false;
5216
5217 this->interval.end = start;
5218 if (this->interval.end + 1 >= end ||
5219 (case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end]) != 'S' ||
5220 (case_insensitive ? ctype.toupper(text[this->interval.end + 1]) : text[this->interval.end + 1]) != 'I')
5221 goto error; // incomplete or wrong reference ID
5222 this->interval.end += 2;
5223
5224 for (size_t i = 0; i < 2; ++i, ++this->interval.end) {
5225 if (this->interval.end >= end || text[this->interval.end] < '0' || '9' < text[this->interval.end])
5226 goto error; // incomplete or invalid model
5227 this->model[i] = text[this->interval.end];
5228 }
5229 this->model[2] = 0;
5230
5231 this->part1.invalidate();
5232 this->part2.invalidate();
5233 this->part3.invalidate();
5234 if (this->model[0] == '9' && this->model[1] == '9') {
5235 is_valid = true;
5236 this->interval.start = start;
5237 return true;
5238 }
5239
5240 if (m_space && m_space->match(text, this->interval.end, end, flags))
5241 this->interval.end = m_space->interval.end;
5242
5243 this->part1.match(text, this->interval.end, end, flags) &&
5244 this->m_delimiter.match(text, this->part1.interval.end, end, flags) &&
5245 this->part2.match(text, this->m_delimiter.interval.end, end, flags) &&
5246 this->m_delimiter.match(text, this->part2.interval.end, end, flags) &&
5247 this->part3.match(text, this->m_delimiter.interval.end, end, flags);
5248
5249 this->interval.start = start;
5250 if (this->part3.interval)
5251 this->interval.end = this->part3.interval.end;
5252 else if (this->part2.interval)
5253 this->interval.end = this->part2.interval.end;
5254 else if (this->part1.interval)
5255 this->interval.end = this->part1.interval.end;
5256 else
5257 this->interval.end = start + 4;
5258
5259 if (this->model[0] == '0' && this->model[1] == '0')
5260 is_valid =
5261 this->part3.interval ?
5262 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5263 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 :
5264 this->part2.interval ?
5265 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5266 this->part1.interval.size() + this->part2.interval.size() <= 20 :
5267 this->part1.interval ?
5268 this->part1.interval.size() <= 12 :
5269 false;
5270 else if (this->model[0] == '0' && this->model[1] == '1')
5271 is_valid =
5272 this->part3.interval ?
5273 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5274 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5275 check11(
5276 text + this->part1.interval.start, this->part1.interval.size(),
5277 text + this->part2.interval.start, this->part2.interval.size(),
5278 text + this->part3.interval.start, this->part3.interval.size()) :
5279 this->part2.interval ?
5280 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5281 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5282 check11(
5283 text + this->part1.interval.start, this->part1.interval.size(),
5284 text + this->part2.interval.start, this->part2.interval.size()) :
5285 this->part1.interval ?
5286 this->part1.interval.size() <= 12 &&
5287 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5288 false;
5289 else if (this->model[0] == '0' && this->model[1] == '2')
5290 is_valid =
5291 this->part3.interval ?
5292 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5293 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5294 check11(text + this->part2.interval.start, this->part2.interval.size()) &&
5295 check11(text + this->part3.interval.start, this->part3.interval.size()) :
5296 false;
5297 else if (this->model[0] == '0' && this->model[1] == '3')
5298 is_valid =
5299 this->part3.interval ?
5300 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5301 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5302 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5303 check11(text + this->part2.interval.start, this->part2.interval.size()) &&
5304 check11(text + this->part3.interval.start, this->part3.interval.size()) :
5305 false;
5306 else if (this->model[0] == '0' && this->model[1] == '4')
5307 is_valid =
5308 this->part3.interval ?
5309 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5310 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5311 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5312 check11(text + this->part3.interval.start, this->part3.interval.size()) :
5313 false;
5314 else if ((this->model[0] == '0' || this->model[0] == '5') && this->model[1] == '5')
5315 is_valid =
5316 this->part3.interval ?
5317 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5318 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5319 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5320 this->part2.interval ?
5321 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5322 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5323 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5324 this->part1.interval ?
5325 this->part1.interval.size() <= 12 &&
5326 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5327 false;
5328 else if (this->model[0] == '0' && this->model[1] == '6')
5329 is_valid =
5330 this->part3.interval ?
5331 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5332 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5333 check11(
5334 text + this->part2.interval.start, this->part2.interval.size(),
5335 text + this->part3.interval.start, this->part3.interval.size()) :
5336 this->part2.interval ?
5337 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5338 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5339 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5340 false;
5341 else if (this->model[0] == '0' && this->model[1] == '7')
5342 is_valid =
5343 this->part3.interval ?
5344 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5345 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5346 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5347 this->part2.interval ?
5348 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5349 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5350 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5351 false;
5352 else if (this->model[0] == '0' && this->model[1] == '8')
5353 is_valid =
5354 this->part3.interval ?
5355 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5356 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5357 check11(
5358 text + this->part1.interval.start, this->part1.interval.size(),
5359 text + this->part2.interval.start, this->part2.interval.size()) &&
5360 check11(text + this->part3.interval.start, this->part3.interval.size()) :
5361 false;
5362 else if (this->model[0] == '0' && this->model[1] == '9')
5363 is_valid =
5364 this->part3.interval ?
5365 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5366 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5367 check11(
5368 text + this->part1.interval.start, this->part1.interval.size(),
5369 text + this->part2.interval.start, this->part2.interval.size()) :
5370 this->part2.interval ?
5371 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5372 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5373 check11(
5374 text + this->part1.interval.start, this->part1.interval.size(),
5375 text + this->part2.interval.start, this->part2.interval.size()) :
5376 this->part1.interval ?
5377 this->part1.interval.size() <= 12 &&
5378 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5379 false;
5380 else if (this->model[0] == '1' && this->model[1] == '0')
5381 is_valid =
5382 this->part3.interval ?
5383 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5384 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5385 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5386 check11(
5387 text + this->part2.interval.start, this->part2.interval.size(),
5388 text + this->part3.interval.start, this->part3.interval.size()) :
5389 this->part2.interval ?
5390 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5391 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5392 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5393 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5394 false;
5395 else if (
5396 (this->model[0] == '1' && (this->model[1] == '1' || this->model[1] == '8' || this->model[1] == '9')) ||
5397 ((this->model[0] == '2' || this->model[0] == '3') && this->model[1] == '8') ||
5398 (this->model[0] == '4' && (this->model[1] == '0' || this->model[1] == '1' || this->model[1] == '8' || this->model[1] == '9')) ||
5399 (this->model[0] == '5' && (this->model[1] == '1' || this->model[1] == '8')))
5400 is_valid =
5401 this->part3.interval ?
5402 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5403 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5404 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5405 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5406 this->part2.interval ?
5407 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5408 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5409 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5410 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5411 false;
5412 else if (this->model[0] == '1' && this->model[1] == '2')
5413 is_valid =
5414 this->part3.interval ? false :
5415 this->part2.interval ? false :
5416 this->part1.interval ?
5417 this->part1.interval.size() <= 13 &&
5418 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5419 false;
5420 else if ((this->model[0] == '2' || this->model[0] == '3') && this->model[1] == '1')
5421 is_valid =
5422 this->part3.interval ? false :
5423 this->part2.interval ?
5424 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5425 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5426 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5427 false;
5428 else
5429 is_valid = true; // Assume models we don't handle as valid
5430 return true;
5431
5432 error:
5433 invalidate();
5434 return false;
5435 }
5436
5437 static bool check11(
5438 _In_count_(num_part1) const T* part1, _In_ size_t num_part1)
5439 {
5440 _Assume_(part1 && num_part1 >= 1);
5441 uint32_t nominator = 0, ponder = 2;
5442 for (size_t i = num_part1 - 1; i--; ++ponder)
5443 nominator += (part1[i] - '0') * ponder;
5444 uint8_t control = 11 - static_cast<uint8_t>(nominator % 11);
5445 if (control >= 10)
5446 control = 0;
5447 return control == part1[num_part1 - 1] - '0';
5448 }
5449
5450 static bool check11(
5451 _In_count_(num_part1) const T* part1, _In_ size_t num_part1,
5452 _In_count_(num_part2) const T* part2, _In_ size_t num_part2)
5453 {
5454 _Assume_(part1 || !num_part1);
5455 _Assume_(part2 && num_part2 >= 1);
5456 uint32_t nominator = 0, ponder = 2;
5457 for (size_t i = num_part2 - 1; i--; ++ponder)
5458 nominator += (part2[i] - '0') * ponder;
5459 for (size_t i = num_part1; i--; ++ponder)
5460 nominator += (part1[i] - '0') * ponder;
5461 uint8_t control = 11 - static_cast<uint8_t>(nominator % 11);
5462 if (control == 10)
5463 control = 0;
5464 return control == part2[num_part2 - 1] - '0';
5465 }
5466
5467 static bool check11(
5468 _In_count_(num_part1) const T* part1, _In_ size_t num_part1,
5469 _In_count_(num_part2) const T* part2, _In_ size_t num_part2,
5470 _In_count_(num_part3) const T* part3, _In_ size_t num_part3)
5471 {
5472 _Assume_(part1 || !num_part1);
5473 _Assume_(part2 || !num_part2);
5474 _Assume_(part3 && num_part3 >= 1);
5475 uint32_t nominator = 0, ponder = 2;
5476 for (size_t i = num_part3 - 1; i--; ++ponder)
5477 nominator += (part3[i] - '0') * ponder;
5478 for (size_t i = num_part2; i--; ++ponder)
5479 nominator += (part2[i] - '0') * ponder;
5480 for (size_t i = num_part1; i--; ++ponder)
5481 nominator += (part1[i] - '0') * ponder;
5482 uint8_t control = 11 - static_cast<uint8_t>(nominator % 11);
5483 if (control == 10)
5484 control = 0;
5485 return control == part2[num_part3 - 1] - '0';
5486 }
5487
5488 std::shared_ptr<basic_parser<T>> m_space;
5489 basic_si_reference_delimiter<T> m_delimiter;
5490 };
5491
5492 using si_reference = basic_si_reference<char>;
5493 using wsi_reference = basic_si_reference<wchar_t>;
5494#ifdef _UNICODE
5495 using tsi_reference = wsi_reference;
5496#else
5497 using tsi_reference = si_reference;
5498#endif
5499 using sgml_si_reference = basic_si_reference<char>;
5500
5504 template <class T>
5506 {
5507 public:
5509 _In_ const std::shared_ptr<basic_parser<T>>& element,
5510 _In_ const std::shared_ptr<basic_parser<T>>& digit,
5511 _In_ const std::shared_ptr<basic_parser<T>>& sign,
5512 _In_ const std::locale& locale = std::locale()) :
5514 m_element(element),
5515 m_digit(digit),
5516 m_sign(sign),
5517 has_digits(false),
5518 has_charge(false)
5519 {}
5520
5521 virtual void invalidate()
5522 {
5523 has_digits = false;
5524 has_charge = false;
5526 }
5527
5528 bool has_digits;
5529 bool has_charge;
5530
5531 protected:
5532 virtual bool do_match(
5533 _In_reads_or_z_opt_(end) const T* text,
5534 _In_ size_t start = 0,
5535 _In_ size_t end = SIZE_MAX,
5536 _In_ int flags = match_default)
5537 {
5538 _Assume_(text || start >= end);
5539
5540 has_digits = false;
5541 has_charge = false;
5542 this->interval.end = start;
5543
5544 const int element_match_flags = flags & ~match_case_insensitive; // Chemical elements are always case-sensitive.
5545 for (;;) {
5546 if (m_element->match(text, this->interval.end, end, element_match_flags)) {
5547 this->interval.end = m_element->interval.end;
5548 while (m_digit->match(text, this->interval.end, end, flags)) {
5549 this->interval.end = m_digit->interval.end;
5550 has_digits = true;
5551 }
5552 }
5553 else if (start < this->interval.end) {
5554 if (m_sign->match(text, this->interval.end, end, flags)) {
5555 this->interval.end = m_sign->interval.end;
5556 has_charge = true;
5557 }
5558 this->interval.start = start;
5559 return true;
5560 }
5561 else {
5562 this->interval.invalidate();
5563 return false;
5564 }
5565 }
5566 }
5567
5568 std::shared_ptr<basic_parser<T>> m_element;
5569 std::shared_ptr<basic_parser<T>> m_digit;
5570 std::shared_ptr<basic_parser<T>> m_sign;
5571 };
5572
5575#ifdef _UNICODE
5577#else
5579#endif
5581
5586 {
5587 protected:
5588 virtual bool do_match(
5589 _In_reads_or_z_(end) const char* text,
5590 _In_ size_t start = 0,
5591 _In_ size_t end = SIZE_MAX,
5592 _In_ int flags = match_default)
5593 {
5594 _Assume_(text || start >= end);
5595 this->interval.end = start;
5596
5597 _Assume_(text || this->interval.end >= end);
5598 if (this->interval.end < end && text[this->interval.end]) {
5599 if (text[this->interval.end] == '\r') {
5600 this->interval.end++;
5601 if (this->interval.end < end && text[this->interval.end] == '\n') {
5602 this->interval.start = start;
5603 this->interval.end++;
5604 return true;
5605 }
5606 }
5607 else if (text[this->interval.end] == '\n') {
5608 this->interval.start = start;
5609 this->interval.end++;
5610 return true;
5611 }
5612 }
5613 this->interval.invalidate();
5614 return false;
5615 }
5616 };
5617
5621 class http_space : public parser
5622 {
5623 protected:
5624 virtual bool do_match(
5625 _In_reads_or_z_(end) const char* text,
5626 _In_ size_t start = 0,
5627 _In_ size_t end = SIZE_MAX,
5628 _In_ int flags = match_default)
5629 {
5630 _Assume_(text || start >= end);
5631 this->interval.end = start;
5632 if (m_line_break.match(text, this->interval.end, end, flags)) {
5633 this->interval.end = m_line_break.interval.end;
5634 if (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) {
5635 this->interval.start = start;
5636 this->interval.end++;
5637 while (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) this->interval.end++;
5638 return true;
5639 }
5640 }
5641 else if (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) {
5642 this->interval.start = start;
5643 this->interval.end++;
5644 while (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) this->interval.end++;
5645 return true;
5646 }
5647 this->interval.invalidate();
5648 return false;
5649 }
5650
5651 http_line_break m_line_break;
5652 };
5653
5657 class http_text_char : public parser
5658 {
5659 protected:
5660 virtual bool do_match(
5661 _In_reads_or_z_(end) const char* text,
5662 _In_ size_t start = 0,
5663 _In_ size_t end = SIZE_MAX,
5664 _In_ int flags = match_default)
5665 {
5666 _Assume_(text || start >= end);
5667 this->interval.end = start;
5668
5669 _Assume_(text || this->interval.end >= end);
5670 if (m_space.match(text, this->interval.end, end, flags)) {
5671 this->interval.start = start;
5672 this->interval.end = m_space.interval.end;
5673 return true;
5674 }
5675 else if (this->interval.end < end && text[this->interval.end] && text[this->interval.end] >= 0x20) {
5676 this->interval.start = start;
5677 this->interval.end++;
5678 return true;
5679 }
5680 this->interval.invalidate();
5681 return false;
5682 }
5683
5684 http_space m_space;
5685 };
5686
5690 class http_token : public parser
5691 {
5692 protected:
5693 virtual bool do_match(
5694 _In_reads_or_z_(end) const char* text,
5695 _In_ size_t start = 0,
5696 _In_ size_t end = SIZE_MAX,
5697 _In_ int flags = match_default)
5698 {
5699 _Assume_(text || start >= end);
5700 this->interval.end = start;
5701 for (;;) {
5702 if (this->interval.end < end && text[this->interval.end]) {
5703 if ((unsigned int)text[this->interval.end] < 0x20 ||
5704 (unsigned int)text[this->interval.end] == 0x7f ||
5705 text[this->interval.end] == '(' ||
5706 text[this->interval.end] == ')' ||
5707 text[this->interval.end] == '<' ||
5708 text[this->interval.end] == '>' ||
5709 text[this->interval.end] == '@' ||
5710 text[this->interval.end] == ',' ||
5711 text[this->interval.end] == ';' ||
5712 text[this->interval.end] == ':' ||
5713 text[this->interval.end] == '\\' ||
5714 text[this->interval.end] == '\"' ||
5715 text[this->interval.end] == '/' ||
5716 text[this->interval.end] == '[' ||
5717 text[this->interval.end] == ']' ||
5718 text[this->interval.end] == '?' ||
5719 text[this->interval.end] == '=' ||
5720 text[this->interval.end] == '{' ||
5721 text[this->interval.end] == '}' ||
5722 stdex::isspace(text[this->interval.end]))
5723 break;
5724 else
5725 this->interval.end++;
5726 }
5727 else
5728 break;
5729 }
5731 this->interval.start = start;
5732 return true;
5733 }
5734 else {
5735 this->interval.invalidate();
5736 return false;
5737 }
5738 }
5739 };
5740
5745 {
5746 public:
5747 virtual void invalidate()
5748 {
5749 content.start = 1;
5750 content.end = 0;
5751 parser::invalidate();
5752 }
5753
5755
5756 protected:
5757 virtual bool do_match(
5758 _In_reads_or_z_(end) const char* text,
5759 _In_ size_t start = 0,
5760 _In_ size_t end = SIZE_MAX,
5761 _In_ int flags = match_default)
5762 {
5763 _Assume_(text || start >= end);
5764 this->interval.end = start;
5765 if (this->interval.end < end && text[this->interval.end] != '"')
5766 goto error;
5767 this->interval.end++;
5768 content.start = this->interval.end;
5769 for (;;) {
5770 _Assume_(text || this->interval.end >= end);
5771 if (this->interval.end < end && text[this->interval.end]) {
5772 if (text[this->interval.end] == '"') {
5773 content.end = this->interval.end;
5774 this->interval.end++;
5775 break;
5776 }
5777 else if (text[this->interval.end] == '\\') {
5778 this->interval.end++;
5779 if (this->interval.end < end && text[this->interval.end]) {
5780 this->interval.end++;
5781 }
5782 else
5783 goto error;
5784 }
5785 else if (m_chr.match(text, this->interval.end, end, flags))
5786 this->interval.end++;
5787 else
5788 goto error;
5789 }
5790 else
5791 goto error;
5792 }
5793 this->interval.start = start;
5794 return true;
5795
5796 error:
5797 invalidate();
5798 return false;
5799 }
5800
5801 http_text_char m_chr;
5802 };
5803
5807 class http_value : public parser
5808 {
5809 public:
5810 virtual void invalidate()
5811 {
5812 string.invalidate();
5813 token.invalidate();
5814 parser::invalidate();
5815 }
5816
5819
5820 protected:
5821 virtual bool do_match(
5822 _In_reads_or_z_(end) const char* text,
5823 _In_ size_t start = 0,
5824 _In_ size_t end = SIZE_MAX,
5825 _In_ int flags = match_default)
5826 {
5827 _Assume_(text || start >= end);
5828 this->interval.end = start;
5829 if (string.match(text, this->interval.end, end, flags)) {
5830 token.invalidate();
5831 this->interval.end = string.interval.end;
5832 this->interval.start = start;
5833 return true;
5834 }
5835 else if (token.match(text, this->interval.end, end, flags)) {
5836 string.invalidate();
5837 this->interval.end = token.interval.end;
5838 this->interval.start = start;
5839 return true;
5840 }
5841 else {
5842 this->interval.invalidate();
5843 return false;
5844 }
5845 }
5846 };
5847
5851 class http_parameter : public parser
5852 {
5853 public:
5854 virtual void invalidate()
5855 {
5856 name.invalidate();
5857 value.invalidate();
5858 parser::invalidate();
5859 }
5860
5863
5864 protected:
5865 virtual bool do_match(
5866 _In_reads_or_z_(end) const char* text,
5867 _In_ size_t start = 0,
5868 _In_ size_t end = SIZE_MAX,
5869 _In_ int flags = match_default)
5870 {
5871 _Assume_(text || start >= end);
5872 this->interval.end = start;
5873 if (name.match(text, this->interval.end, end, flags))
5874 this->interval.end = name.interval.end;
5875 else
5876 goto error;
5877 while (m_space.match(text, this->interval.end, end, flags))
5878 this->interval.end = m_space.interval.end;
5879 _Assume_(text || this->interval.end >= end);
5880 if (this->interval.end < end && text[this->interval.end] == '=')
5881 this->interval.end++;
5882 else
5883 while (m_space.match(text, this->interval.end, end, flags))
5884 this->interval.end = m_space.interval.end;
5885 if (value.match(text, this->interval.end, end, flags))
5886 this->interval.end = value.interval.end;
5887 else
5888 goto error;
5889 this->interval.start = start;
5890 return true;
5891
5892 error:
5893 invalidate();
5894 return false;
5895 }
5896
5897 http_space m_space;
5898 };
5899
5903 class http_any_type : public parser
5904 {
5905 protected:
5906 virtual bool do_match(
5907 _In_reads_or_z_(end) const char* text,
5908 _In_ size_t start = 0,
5909 _In_ size_t end = SIZE_MAX,
5910 _In_ int flags = match_default)
5911 {
5912 _Assume_(text || start >= end);
5913 if (start + 2 < end &&
5914 text[start] == '*' &&
5915 text[start + 1] == '/' &&
5916 text[start + 2] == '*')
5917 {
5918 this->interval.end = (this->interval.start = start) + 3;
5919 return true;
5920 }
5921 else if (start < end && text[start] == '*') {
5922 this->interval.end = (this->interval.start = start) + 1;
5923 return true;
5924 }
5925 else {
5926 this->interval.invalidate();
5927 return false;
5928 }
5929 }
5930 };
5931
5936 {
5937 public:
5938 virtual void invalidate()
5939 {
5940 type.invalidate();
5941 subtype.invalidate();
5942 parser::invalidate();
5943 }
5944
5945 http_token type;
5946 http_token subtype;
5947
5948 protected:
5949 virtual bool do_match(
5950 _In_reads_or_z_(end) const char* text,
5951 _In_ size_t start = 0,
5952 _In_ size_t end = SIZE_MAX,
5953 _In_ int flags = match_default)
5954 {
5955 _Assume_(text || start >= end);
5956 this->interval.end = start;
5957 if (type.match(text, this->interval.end, end, flags))
5958 this->interval.end = type.interval.end;
5959 else
5960 goto error;
5961 while (m_space.match(text, this->interval.end, end, flags))
5962 this->interval.end = m_space.interval.end;
5963 if (this->interval.end < end && text[this->interval.end] == '/')
5964 this->interval.end++;
5965 else
5966 goto error;
5967 while (m_space.match(text, this->interval.end, end, flags))
5968 this->interval.end = m_space.interval.end;
5969 if (subtype.match(text, this->interval.end, end, flags))
5970 this->interval.end = subtype.interval.end;
5971 else
5972 goto error;
5973 this->interval.start = start;
5974 return true;
5975
5976 error:
5977 invalidate();
5978 return false;
5979 }
5980
5981 http_space m_space;
5982 };
5983
5988 {
5989 public:
5990 virtual void invalidate()
5991 {
5992 params.clear();
5993 http_media_range::invalidate();
5994 }
5995
5996 std::list<http_parameter> params;
5997
5998 protected:
5999 virtual bool do_match(
6000 _In_reads_or_z_(end) const char* text,
6001 _In_ size_t start = 0,
6002 _In_ size_t end = SIZE_MAX,
6003 _In_ int flags = match_default)
6004 {
6005 _Assume_(text || start >= end);
6006 if (!http_media_range::do_match(text, start, end, flags))
6007 goto error;
6008 params.clear();
6009 for (;;) {
6010 if (this->interval.end < end && text[this->interval.end]) {
6011 if (m_space.match(text, this->interval.end, end, flags))
6012 this->interval.end = m_space.interval.end;
6013 else if (text[this->interval.end] == ';') {
6014 this->interval.end++;
6015 while (m_space.match(text, this->interval.end, end, flags))
6016 this->interval.end = m_space.interval.end;
6017 http_parameter param;
6018 if (param.match(text, this->interval.end, end, flags)) {
6019 this->interval.end = param.interval.end;
6020 params.push_back(std::move(param));
6021 }
6022 else
6023 break;
6024 }
6025 else
6026 break;
6027 }
6028 else
6029 break;
6030 }
6031 this->interval.end = params.empty() ? subtype.interval.end : params.back().interval.end;
6032 return true;
6033
6034 error:
6035 invalidate();
6036 return false;
6037 }
6038 };
6039
6044 {
6045 protected:
6046 virtual bool do_match(
6047 _In_reads_or_z_(end) const char* text,
6048 _In_ size_t start = 0,
6049 _In_ size_t end = SIZE_MAX,
6050 _In_ int flags = match_default)
6051 {
6052 _Assume_(text || start >= end);
6053 this->interval.end = start;
6054 for (;;) {
6055 if (this->interval.end < end && text[this->interval.end]) {
6056 if ((unsigned int)text[this->interval.end] < 0x20 ||
6057 (unsigned int)text[this->interval.end] == 0x7f ||
6058 text[this->interval.end] == ':' ||
6059 text[this->interval.end] == '/' ||
6060 stdex::isspace(text[this->interval.end]))
6061 break;
6062 else
6063 this->interval.end++;
6064 }
6065 else
6066 break;
6067 }
6069 this->interval.start = start;
6070 return true;
6071 }
6072 this->interval.invalidate();
6073 return false;
6074 }
6075 };
6076
6080 class http_url_port : public parser
6081 {
6082 public:
6083 http_url_port(_In_ const std::locale& locale = std::locale()) :
6084 parser(locale),
6085 value(0)
6086 {}
6087
6088 virtual void invalidate()
6089 {
6090 value = 0;
6091 parser::invalidate();
6092 }
6093
6094 uint16_t value;
6095
6096 protected:
6097 virtual bool do_match(
6098 _In_reads_or_z_(end) const char* text,
6099 _In_ size_t start = 0,
6100 _In_ size_t end = SIZE_MAX,
6101 _In_ int flags = match_default)
6102 {
6103 _Assume_(text || start >= end);
6104 value = 0;
6105 this->interval.end = start;
6106 for (;;) {
6107 if (this->interval.end < end && text[this->interval.end]) {
6108 if ('0' <= text[this->interval.end] && text[this->interval.end] <= '9') {
6109 size_t _value = (size_t)value * 10 + text[this->interval.end] - '0';
6110 if (_value > (uint16_t)-1) {
6111 value = 0;
6112 this->interval.invalidate();
6113 return false;
6114 }
6115 value = (uint16_t)_value;
6116 this->interval.end++;
6117 }
6118 else
6119 break;
6120 }
6121 else
6122 break;
6123 }
6125 this->interval.start = start;
6126 return true;
6127 }
6128 this->interval.invalidate();
6129 return false;
6130 }
6131 };
6132
6137 {
6138 protected:
6139 virtual bool do_match(
6140 _In_reads_or_z_(end) const char* text,
6141 _In_ size_t start = 0,
6142 _In_ size_t end = SIZE_MAX,
6143 _In_ int flags = match_default)
6144 {
6145 _Assume_(text || start >= end);
6146 this->interval.end = start;
6147 for (;;) {
6148 if (this->interval.end < end && text[this->interval.end]) {
6149 if ((unsigned int)text[this->interval.end] < 0x20 ||
6150 (unsigned int)text[this->interval.end] == 0x7f ||
6151 text[this->interval.end] == '?' ||
6152 text[this->interval.end] == '/' ||
6153 stdex::isspace(text[this->interval.end]))
6154 break;
6155 else
6156 this->interval.end++;
6157 }
6158 else
6159 break;
6160 }
6161 this->interval.start = start;
6162 return true;
6163 }
6164 };
6165
6169 class http_url_path : public parser
6170 {
6171 public:
6172 virtual void invalidate()
6173 {
6174 segments.clear();
6175 parser::invalidate();
6176 }
6177
6178 std::vector<http_url_path_segment> segments;
6179
6180 protected:
6181 virtual bool do_match(
6182 _In_reads_or_z_(end) const char* text,
6183 _In_ size_t start = 0,
6184 _In_ size_t end = SIZE_MAX,
6185 _In_ int flags = match_default)
6186 {
6187 _Assume_(text || start >= end);
6189 this->interval.end = start;
6190 segments.clear();
6191 _Assume_(text || this->interval.end >= end);
6192 if (this->interval.end < end && text[this->interval.end] != '/')
6193 goto error;
6194 this->interval.end++;
6195 s.match(text, this->interval.end, end, flags);
6196 segments.push_back(s);
6197 this->interval.end = s.interval.end;
6198 for (;;) {
6199 if (this->interval.end < end && text[this->interval.end]) {
6200 if (text[this->interval.end] == '/') {
6201 this->interval.end++;
6202 s.match(text, this->interval.end, end, flags);
6203 segments.push_back(s);
6204 this->interval.end = s.interval.end;
6205 }
6206 else
6207 break;
6208 }
6209 else
6210 break;
6211 }
6212 this->interval.start = start;
6213 return true;
6214
6215 error:
6216 invalidate();
6217 return false;
6218 }
6219 };
6220
6225 {
6226 public:
6227 virtual void invalidate()
6228 {
6229 name.start = 1;
6230 name.end = 0;
6231 value.start = 1;
6232 value.end = 0;
6233 parser::invalidate();
6234 }
6235
6238
6239 protected:
6240 virtual bool do_match(
6241 _In_reads_or_z_(end) const char* text,
6242 _In_ size_t start = 0,
6243 _In_ size_t end = SIZE_MAX,
6244 _In_ int flags = match_default)
6245 {
6246 _Assume_(text || start >= end);
6247 this->interval.end = start;
6248 name.start = this->interval.end;
6249 for (;;) {
6250 if (this->interval.end < end && text[this->interval.end]) {
6251 if ((unsigned int)text[this->interval.end] < 0x20 ||
6252 (unsigned int)text[this->interval.end] == 0x7f ||
6253 text[this->interval.end] == '&' ||
6254 text[this->interval.end] == '=' ||
6255 stdex::isspace(text[this->interval.end]))
6256 break;
6257 else
6258 this->interval.end++;
6259 }
6260 else
6261 break;
6262 }
6264 name.end = this->interval.end;
6265 else
6266 goto error;
6267 if (text[this->interval.end] == '=') {
6268 this->interval.end++;
6269 value.start = this->interval.end;
6270 for (;;) {
6271 if (this->interval.end < end && text[this->interval.end]) {
6272 if ((unsigned int)text[this->interval.end] < 0x20 ||
6273 (unsigned int)text[this->interval.end] == 0x7f ||
6274 text[this->interval.end] == '&' ||
6275 stdex::isspace(text[this->interval.end]))
6276 break;
6277 else
6278 this->interval.end++;
6279 }
6280 else
6281 break;
6282 }
6283 value.end = this->interval.end;
6284 }
6285 else {
6286 value.start = 1;
6287 value.end = 0;
6288 }
6289 this->interval.start = start;
6290 return true;
6291
6292 error:
6293 invalidate();
6294 return false;
6295 }
6296 };
6297
6301 class http_url : public parser
6302 {
6303 public:
6304 http_url(_In_ const std::locale& locale = std::locale()) :
6305 parser(locale),
6306 port(locale)
6307 {}
6308
6309 virtual void invalidate()
6310 {
6311 server.invalidate();
6312 port.invalidate();
6313 path.invalidate();
6314 params.clear();
6315 parser::invalidate();
6316 }
6317
6318 http_url_server server;
6319 http_url_port port;
6320 http_url_path path;
6321 std::list<http_url_parameter> params;
6322
6323 protected:
6324 virtual bool do_match(
6325 _In_reads_or_z_(end) const char* text,
6326 _In_ size_t start = 0,
6327 _In_ size_t end = SIZE_MAX,
6328 _In_ int flags = match_default)
6329 {
6330 _Assume_(text || start >= end);
6331 this->interval.end = start;
6332
6333 if (this->interval.end + 7 <= end && stdex::strnicmp(text + this->interval.end, 7, "http://", SIZE_MAX, m_locale) == 0) {
6334 this->interval.end += 7;
6335 if (server.match(text, this->interval.end, end, flags))
6336 this->interval.end = server.interval.end;
6337 else
6338 goto error;
6339 if (this->interval.end < end && text[this->interval.end] == ':') {
6340 this->interval.end++;
6341 if (port.match(text, this->interval.end, end, flags))
6342 this->interval.end = port.interval.end;
6343 }
6344 else {
6345 port.invalidate();
6346 port.value = 80;
6347 }
6348 }
6349 else {
6350 server.invalidate();
6351 port.invalidate();
6352 port.value = 80;
6353 }
6354
6355 if (path.match(text, this->interval.end, end, flags))
6356 this->interval.end = path.interval.end;
6357 else
6358 goto error;
6359
6360 params.clear();
6361
6362 if (this->interval.end < end && text[this->interval.end] == '?') {
6363 this->interval.end++;
6364 for (;;) {
6365 if (this->interval.end < end && text[this->interval.end]) {
6366 if ((unsigned int)text[this->interval.end] < 0x20 ||
6367 (unsigned int)text[this->interval.end] == 0x7f ||
6368 stdex::isspace(text[this->interval.end]))
6369 break;
6370 else if (text[this->interval.end] == '&')
6371 this->interval.end++;
6372 else {
6373 http_url_parameter param;
6374 if (param.match(text, this->interval.end, end, flags)) {
6375 this->interval.end = param.interval.end;
6376 params.push_back(std::move(param));
6377 }
6378 else
6379 break;
6380 }
6381 }
6382 else
6383 break;
6384 }
6385 }
6386
6387 this->interval.start = start;
6388 return true;
6389
6390 error:
6391 invalidate();
6392 return false;
6393 }
6394 };
6395
6399 class http_language : public parser
6400 {
6401 public:
6402 virtual void invalidate()
6403 {
6404 components.clear();
6405 parser::invalidate();
6406 }
6407
6408 std::vector<stdex::interval<size_t>> components;
6409
6410 protected:
6411 virtual bool do_match(
6412 _In_reads_or_z_(end) const char* text,
6413 _In_ size_t start = 0,
6414 _In_ size_t end = SIZE_MAX,
6415 _In_ int flags = match_default)
6416 {
6417 _Assume_(text || start >= end);
6418 this->interval.end = start;
6419 components.clear();
6420 for (;;) {
6421 if (this->interval.end < end && text[this->interval.end]) {
6423 k.end = this->interval.end;
6424 for (;;) {
6425 if (k.end < end && text[k.end]) {
6426 if (stdex::isalpha(text[k.end]))
6427 k.end++;
6428 else
6429 break;
6430 }
6431 else
6432 break;
6433 }
6434 if (this->interval.end < k.end) {
6435 k.start = this->interval.end;
6436 this->interval.end = k.end;
6437 components.push_back(k);
6438 }
6439 else
6440 break;
6441 if (this->interval.end < end && text[this->interval.end] == '-')
6442 this->interval.end++;
6443 else
6444 break;
6445 }
6446 else
6447 break;
6448 }
6449 if (!components.empty()) {
6450 this->interval.start = start;
6451 this->interval.end = components.back().end;
6452 return true;
6453 }
6454 this->interval.invalidate();
6455 return false;
6456 }
6457 };
6458
6462 class http_weight : public parser
6463 {
6464 public:
6465 http_weight(_In_ const std::locale& locale = std::locale()) :
6466 parser(locale),
6467 value(1.0f)
6468 {}
6469
6470 virtual void invalidate()
6471 {
6472 value = 1.0f;
6473 parser::invalidate();
6474 }
6475
6476 float value;
6477
6478 protected:
6479 virtual bool do_match(
6480 _In_reads_or_z_(end) const char* text,
6481 _In_ size_t start = 0,
6482 _In_ size_t end = SIZE_MAX,
6483 _In_ int flags = match_default)
6484 {
6485 _Assume_(text || start >= end);
6486 size_t celi_del = 0, decimalni_del = 0, decimalni_del_n = 1;
6487 this->interval.end = start;
6488 for (;;) {
6489 if (this->interval.end < end && text[this->interval.end]) {
6490 if ('0' <= text[this->interval.end] && text[this->interval.end] <= '9') {
6491 celi_del = celi_del * 10 + text[this->interval.end] - '0';
6492 this->interval.end++;
6493 }
6494 else if (text[this->interval.end] == '.') {
6495 this->interval.end++;
6496 for (;;) {
6497 if (this->interval.end < end && text[this->interval.end]) {
6498 if ('0' <= text[this->interval.end] && text[this->interval.end] <= '9') {
6499 decimalni_del = decimalni_del * 10 + text[this->interval.end] - '0';
6500 decimalni_del_n *= 10;
6501 this->interval.end++;
6502 }
6503 else
6504 break;
6505 }
6506 else
6507 break;
6508 }
6509 break;
6510 }
6511 else
6512 break;
6513 }
6514 else
6515 break;
6516 }
6519 this->interval.start = start;
6520 return true;
6521 }
6522 value = 1.0f;
6523 this->interval.invalidate();
6524 return false;
6525 }
6526 };
6527
6531 class http_asterisk : public parser
6532 {
6533 protected:
6534 virtual bool do_match(
6535 _In_reads_or_z_(end) const char* text,
6536 _In_ size_t start = 0,
6537 _In_ size_t end = SIZE_MAX,
6538 _In_ int flags = match_default)
6539 {
6540 _Assume_(text || end <= start);
6541 if (start < end && text[start] == '*') {
6542 this->interval.end = (this->interval.start = start) + 1;
6543 return true;
6544 }
6545 this->interval.invalidate();
6546 return false;
6547 }
6548 };
6549
6553 template <class T, class T_asterisk = http_asterisk>
6555 {
6556 public:
6557 http_weighted_value(_In_ const std::locale& locale = std::locale()) :
6558 parser(locale),
6559 factor(locale)
6560 {}
6561
6562 virtual void invalidate()
6563 {
6564 asterisk.invalidate();
6565 value.invalidate();
6566 factor.invalidate();
6567 parser::invalidate();
6568 }
6569
6570 T_asterisk asterisk;
6571 T value;
6572 http_weight factor;
6573
6574 protected:
6575 virtual bool do_match(
6576 _In_reads_or_z_(end) const char* text,
6577 _In_ size_t start = 0,
6578 _In_ size_t end = SIZE_MAX,
6579 _In_ int flags = match_default)
6580 {
6581 _Assume_(text || start >= end);
6582 size_t konec_vrednosti;
6583 this->interval.end = start;
6584 if (asterisk.match(text, this->interval.end, end, flags)) {
6585 this->interval.end = konec_vrednosti = asterisk.interval.end;
6586 value.invalidate();
6587 }
6588 else if (value.match(text, this->interval.end, end, flags)) {
6589 this->interval.end = konec_vrednosti = value.interval.end;
6590 asterisk.invalidate();
6591 }
6592 else {
6593 asterisk.invalidate();
6594 value.invalidate();
6595 this->interval.invalidate();
6596 return false;
6597 }
6598
6599 while (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) this->interval.end++;
6600 if (this->interval.end < end && text[this->interval.end] == ';') {
6601 this->interval.end++;
6602 while (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) this->interval.end++;
6603 if (this->interval.end < end && (text[this->interval.end] == 'q' || text[this->interval.end] == 'Q')) {
6604 this->interval.end++;
6605 while (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) this->interval.end++;
6606 if (this->interval.end < end && text[this->interval.end] == '=') {
6607 this->interval.end++;
6608 while (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) this->interval.end++;
6609 if (factor.match(text, this->interval.end, end, flags))
6610 this->interval.end = factor.interval.end;
6611 }
6612 }
6613 }
6614 if (!factor.interval) {
6615 factor.invalidate();
6616 this->interval.end = konec_vrednosti;
6617 }
6618 this->interval.start = start;
6619 return true;
6620 }
6621 };
6622
6627 {
6628 public:
6629 virtual void invalidate()
6630 {
6631 name.invalidate();
6632 value.invalidate();
6633 parser::invalidate();
6634 }
6635
6636 http_token name;
6637 http_value value;
6638
6639 protected:
6640 virtual bool do_match(
6641 _In_reads_or_z_(end) const char* text,
6642 _In_ size_t start = 0,
6643 _In_ size_t end = SIZE_MAX,
6644 _In_ int flags = match_default)
6645 {
6646 _Assume_(text || start >= end);
6647 this->interval.end = start;
6648 if (this->interval.end < end && text[this->interval.end] == '$')
6649 this->interval.end++;
6650 else
6651 goto error;
6652 if (name.match(text, this->interval.end, end, flags))
6653 this->interval.end = name.interval.end;
6654 else
6655 goto error;
6656 while (m_space.match(text, this->interval.end, end, flags))
6657 this->interval.end = m_space.interval.end;
6658 if (this->interval.end < end && text[this->interval.end] == '=')
6659 this->interval.end++;
6660 else
6661 goto error;
6662 while (m_space.match(text, this->interval.end, end, flags))
6663 this->interval.end = m_space.interval.end;
6664 if (value.match(text, this->interval.end, end, flags))
6665 this->interval.end = value.interval.end;
6666 else
6667 goto error;
6668 this->interval.start = start;
6669 return true;
6670
6671 error:
6672 invalidate();
6673 return false;
6674 }
6675
6676 http_space m_space;
6677 };
6678
6682 class http_cookie : public parser
6683 {
6684 public:
6685 virtual void invalidate()
6686 {
6687 name.invalidate();
6688 value.invalidate();
6689 params.clear();
6690 parser::invalidate();
6691 }
6692
6695 std::list<http_cookie_parameter> params;
6696
6697 protected:
6698 virtual bool do_match(
6699 _In_reads_or_z_(end) const char* text,
6700 _In_ size_t start = 0,
6701 _In_ size_t end = SIZE_MAX,
6702 _In_ int flags = match_default)
6703 {
6704 _Assume_(text || start >= end);
6705 this->interval.end = start;
6706 if (name.match(text, this->interval.end, end, flags))
6707 this->interval.end = name.interval.end;
6708 else
6709 goto error;
6710 while (m_space.match(text, this->interval.end, end, flags))
6711 this->interval.end = m_space.interval.end;
6712 if (this->interval.end < end && text[this->interval.end] == '=')
6713 this->interval.end++;
6714 else
6715 goto error;
6716 while (m_space.match(text, this->interval.end, end, flags))
6717 this->interval.end = m_space.interval.end;
6718 if (value.match(text, this->interval.end, end, flags))
6719 this->interval.end = value.interval.end;
6720 else
6721 goto error;
6722 params.clear();
6723 for (;;) {
6724 if (this->interval.end < end && text[this->interval.end]) {
6725 if (m_space.match(text, this->interval.end, end, flags))
6726 this->interval.end = m_space.interval.end;
6727 else if (text[this->interval.end] == ';') {
6728 this->interval.end++;
6729 while (m_space.match(text, this->interval.end, end, flags))
6730 this->interval.end = m_space.interval.end;
6732 if (param.match(text, this->interval.end, end, flags)) {
6733 this->interval.end = param.interval.end;
6734 params.push_back(std::move(param));
6735 }
6736 else
6737 break;
6738 }
6739 else
6740 break;
6741 }
6742 else
6743 break;
6744 }
6745 this->interval.start = start;
6746 this->interval.end = params.empty() ? value.interval.end : params.back().interval.end;
6747 return true;
6748
6749 error:
6750 invalidate();
6751 return false;
6752 }
6753
6754 http_space m_space;
6755 };
6756
6760 class http_agent : public parser
6761 {
6762 public:
6763 virtual void invalidate()
6764 {
6765 type.start = 1;
6766 type.end = 0;
6767 version.start = 1;
6768 version.end = 0;
6769 parser::invalidate();
6770 }
6771
6774
6775 protected:
6776 virtual bool do_match(
6777 _In_reads_or_z_(end) const char* text,
6778 _In_ size_t start = 0,
6779 _In_ size_t end = SIZE_MAX,
6780 _In_ int flags = match_default)
6781 {
6782 _Assume_(text || start >= end);
6783 this->interval.end = start;
6784 type.start = this->interval.end;
6785 for (;;) {
6786 if (this->interval.end < end && text[this->interval.end]) {
6787 if (text[this->interval.end] == '/') {
6788 type.end = this->interval.end;
6789 this->interval.end++;
6790 version.start = this->interval.end;
6791 for (;;) {
6792 if (this->interval.end < end && text[this->interval.end]) {
6793 if (stdex::isspace(text[this->interval.end])) {
6794 version.end = this->interval.end;
6795 break;
6796 }
6797 else
6798 this->interval.end++;
6799 }
6800 else {
6801 version.end = this->interval.end;
6802 break;
6803 }
6804 }
6805 break;
6806 }
6807 else if (stdex::isspace(text[this->interval.end])) {
6808 type.end = this->interval.end;
6809 break;
6810 }
6811 else
6812 this->interval.end++;
6813 }
6814 else {
6815 type.end = this->interval.end;
6816 break;
6817 }
6818 }
6820 this->interval.start = start;
6821 return true;
6822 }
6823 type.start = 1;
6824 type.end = 0;
6825 version.start = 1;
6826 version.end = 0;
6827 this->interval.invalidate();
6828 return false;
6829 }
6830 };
6831
6835 class http_protocol : public parser
6836 {
6837 public:
6838 http_protocol(_In_ const std::locale& locale = std::locale()) :
6839 parser(locale),
6840 version(0x009)
6841 {}
6842
6843 virtual void invalidate()
6844 {
6845 type.start = 1;
6846 type.end = 0;
6847 version_maj.start = 1;
6848 version_maj.end = 0;
6849 version_min.start = 1;
6850 version_min.end = 0;
6851 version = 0x009;
6852 parser::invalidate();
6853 }
6854
6856 stdex::interval<size_t> version_maj;
6857 stdex::interval<size_t> version_min;
6859
6860 protected:
6861 virtual bool do_match(
6862 _In_reads_or_z_(end) const char* text,
6863 _In_ size_t start = 0,
6864 _In_ size_t end = SIZE_MAX,
6865 _In_ int flags = match_default)
6866 {
6867 _Assume_(text || start >= end);
6868 this->interval.end = start;
6869 type.start = this->interval.end;
6870 for (;;) {
6871 if (this->interval.end < end && text[this->interval.end]) {
6872 if (text[this->interval.end] == '/') {
6873 type.end = this->interval.end;
6874 this->interval.end++;
6875 break;
6876 }
6877 else if (stdex::isspace(text[this->interval.end]))
6878 goto error;
6879 else
6880 this->interval.end++;
6881 }
6882 else {
6883 type.end = this->interval.end;
6884 goto error;
6885 }
6886 }
6887 version_maj.start = this->interval.end;
6888 for (;;) {
6889 if (this->interval.end < end && text[this->interval.end]) {
6890 if (text[this->interval.end] == '.') {
6891 version_maj.end = this->interval.end;
6892 this->interval.end++;
6893 version_min.start = this->interval.end;
6894 for (;;) {
6895 if (this->interval.end < end && text[this->interval.end]) {
6896 if (stdex::isspace(text[this->interval.end])) {
6897 version_min.end = this->interval.end;
6898 version =
6899 (uint16_t)strtoui(text + version_maj.start, version_maj.size(), nullptr, 10) * 0x100 +
6900 (uint16_t)strtoui(text + version_min.start, version_min.size(), nullptr, 10);
6901 break;
6902 }
6903 else
6904 this->interval.end++;
6905 }
6906 else
6907 goto error;
6908 }
6909 break;
6910 }
6911 else if (stdex::isspace(text[this->interval.end])) {
6912 version_maj.end = this->interval.end;
6913 version_min.start = 1;
6914 version_min.end = 0;
6915 version = (uint16_t)strtoui(text + version_maj.start, version_maj.size(), nullptr, 10) * 0x100;
6916 break;
6917 }
6918 else
6919 this->interval.end++;
6920 }
6921 else
6922 goto error;
6923 }
6924 this->interval.start = start;
6925 return true;
6926
6927 error:
6928 invalidate();
6929 return false;
6930 }
6931 };
6932
6936 class http_request : public parser
6937 {
6938 public:
6939 http_request(_In_ const std::locale& locale = std::locale()) :
6940 parser(locale),
6941 url(locale),
6942 protocol(locale)
6943 {}
6944
6945 virtual void invalidate()
6946 {
6947 verb.start = 1;
6948 verb.end = 0;
6949 url.invalidate();
6950 protocol.invalidate();
6951 parser::invalidate();
6952 }
6953
6955 http_url url;
6956 http_protocol protocol;
6957
6958 protected:
6959 virtual bool do_match(
6960 _In_reads_or_z_(end) const char* text,
6961 _In_ size_t start = 0,
6962 _In_ size_t end = SIZE_MAX,
6963 _In_ int flags = match_default)
6964 {
6965 _Assume_(text || start >= end);
6966 this->interval.end = start;
6967
6968 for (;;) {
6969 if (m_line_break.match(text, this->interval.end, end, flags))
6970 goto error;
6971 else if (this->interval.end < end && text[this->interval.end]) {
6972 if (stdex::isspace(text[this->interval.end]))
6973 this->interval.end++;
6974 else
6975 break;
6976 }
6977 else
6978 goto error;
6979 }
6980 verb.start = this->interval.end;
6981 for (;;) {
6982 if (m_line_break.match(text, this->interval.end, end, flags))
6983 goto error;
6984 else if (this->interval.end < end && text[this->interval.end]) {
6985 if (stdex::isspace(text[this->interval.end])) {
6986 verb.end = this->interval.end;
6987 this->interval.end++;
6988 break;
6989 }
6990 else
6991 this->interval.end++;
6992 }
6993 else
6994 goto error;
6995 }
6996
6997 for (;;) {
6998 if (m_line_break.match(text, this->interval.end, end, flags))
6999 goto error;
7000 else if (this->interval.end < end && text[this->interval.end]) {
7001 if (stdex::isspace(text[this->interval.end]))
7002 this->interval.end++;
7003 else
7004 break;
7005 }
7006 else
7007 goto error;
7008 }
7009 if (url.match(text, this->interval.end, end, flags))
7010 this->interval.end = url.interval.end;
7011 else
7012 goto error;
7013
7014 protocol.invalidate();
7015 for (;;) {
7016 if (m_line_break.match(text, this->interval.end, end, flags)) {
7017 this->interval.end = m_line_break.interval.end;
7018 goto end;
7019 }
7020 else if (this->interval.end < end && text[this->interval.end]) {
7021 if (stdex::isspace(text[this->interval.end]))
7022 this->interval.end++;
7023 else
7024 break;
7025 }
7026 else
7027 goto end;
7028 }
7029 for (;;) {
7030 if (m_line_break.match(text, this->interval.end, end, flags)) {
7031 this->interval.end = m_line_break.interval.end;
7032 goto end;
7033 }
7034 else if (protocol.match(text, this->interval.end, end, flags)) {
7035 this->interval.end = protocol.interval.end;
7036 break;
7037 }
7038 else
7039 goto end;
7040 }
7041
7042 for (;;) {
7043 if (m_line_break.match(text, this->interval.end, end, flags)) {
7044 this->interval.end = m_line_break.interval.end;
7045 break;
7046 }
7047 else if (this->interval.end < end && text[this->interval.end])
7048 this->interval.end++;
7049 else
7050 goto end;
7051 }
7052
7053 end:
7054 this->interval.start = start;
7055 return true;
7056
7057 error:
7058 invalidate();
7059 return false;
7060 }
7061
7062 http_line_break m_line_break;
7063 };
7064
7068 class http_header : public parser
7069 {
7070 public:
7071 virtual void invalidate()
7072 {
7073 name.start = 1;
7074 name.end = 0;
7075 value.start = 1;
7076 value.end = 0;
7077 parser::invalidate();
7078 }
7079
7082
7083 protected:
7084 virtual bool do_match(
7085 _In_reads_or_z_(end) const char* text,
7086 _In_ size_t start = 0,
7087 _In_ size_t end = SIZE_MAX,
7088 _In_ int flags = match_default)
7089 {
7090 _Assume_(text || start >= end);
7091 this->interval.end = start;
7092
7093 if (m_line_break.match(text, this->interval.end, end, flags) ||
7094 (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])))
7095 goto error;
7096 name.start = this->interval.end;
7097 for (;;) {
7098 if (m_line_break.match(text, this->interval.end, end, flags))
7099 goto error;
7100 else if (this->interval.end < end && text[this->interval.end]) {
7101 if (stdex::isspace(text[this->interval.end])) {
7102 name.end = this->interval.end;
7103 this->interval.end++;
7104 for (;;) {
7105 if (m_line_break.match(text, this->interval.end, end, flags))
7106 goto error;
7107 else if (this->interval.end < end && text[this->interval.end]) {
7108 if (stdex::isspace(text[this->interval.end]))
7109 this->interval.end++;
7110 else
7111 break;
7112 }
7113 else
7114 goto error;
7115 }
7116 if (this->interval.end < end && text[this->interval.end] == ':') {
7117 this->interval.end++;
7118 break;
7119 }
7120 else
7121 goto error;
7122 break;
7123 }
7124 else if (text[this->interval.end] == ':') {
7125 name.end = this->interval.end;
7126 this->interval.end++;
7127 break;
7128 }
7129 else
7130 this->interval.end++;
7131 }
7132 else
7133 goto error;
7134 }
7135 value.start = SIZE_MAX;
7136 value.end = 0;
7137 for (;;) {
7138 if (m_line_break.match(text, this->interval.end, end, flags)) {
7139 this->interval.end = m_line_break.interval.end;
7140 if (!m_line_break.match(text, this->interval.end, end, flags) &&
7141 this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end]))
7142 this->interval.end++;
7143 else
7144 break;
7145 }
7146 else if (this->interval.end < end && text[this->interval.end]) {
7147 if (stdex::isspace(text[this->interval.end]))
7148 this->interval.end++;
7149 else {
7150 if (value.start == SIZE_MAX) value.start = this->interval.end;
7151 value.end = ++this->interval.end;
7152 }
7153 }
7154 else
7155 break;
7156 }
7157 this->interval.start = start;
7158 return true;
7159
7160 error:
7161 invalidate();
7162 return false;
7163 }
7164
7165 http_line_break m_line_break;
7166 };
7167
7171 template <class KEY, class T>
7172 class http_value_collection : public T
7173 {
7174 public:
7175 void insert(
7176 _In_reads_or_z_(end) const char* text,
7177 _In_ size_t start = 0,
7178 _In_ size_t end = SIZE_MAX,
7179 _In_ int flags = match_default)
7180 {
7181 while (start < end) {
7182 while (start < end && text[start] && stdex::isspace(text[start])) start++;
7183 if (start < end && text[start] == ',') {
7184 start++;
7185 while (start < end&& text[start] && stdex::isspace(text[start])) start++;
7186 }
7187 KEY el;
7188 if (el.match(text, start, end, flags)) {
7189 start = el.interval.end;
7190 T::insert(std::move(el));
7191 }
7192 else
7193 break;
7194 }
7195 }
7196 };
7197
7198 template <class T>
7200 constexpr bool operator()(const T& a, const T& b) const noexcept
7201 {
7202 return a.factor.value > b.factor.value;
7203 }
7204 };
7205
7209 template <class T, class AX = std::allocator<T>>
7211
7215 template <class T>
7217 {
7218 public:
7220 _In_ const std::shared_ptr<basic_parser<T>>& quote,
7221 _In_ const std::shared_ptr<basic_parser<T>>& chr,
7222 _In_ const std::shared_ptr<basic_parser<T>>& escape,
7223 _In_ const std::shared_ptr<basic_parser<T>>& sol,
7224 _In_ const std::shared_ptr<basic_parser<T>>& bs,
7225 _In_ const std::shared_ptr<basic_parser<T>>& ff,
7226 _In_ const std::shared_ptr<basic_parser<T>>& lf,
7227 _In_ const std::shared_ptr<basic_parser<T>>& cr,
7228 _In_ const std::shared_ptr<basic_parser<T>>& htab,
7229 _In_ const std::shared_ptr<basic_parser<T>>& uni,
7230 _In_ const std::shared_ptr<basic_integer16<T>>& hex,
7231 _In_ const std::locale& locale = std::locale()) :
7233 m_quote(quote),
7234 m_chr(chr),
7235 m_escape(escape),
7236 m_sol(sol),
7237 m_bs(bs),
7238 m_ff(ff),
7239 m_lf(lf),
7240 m_cr(cr),
7241 m_htab(htab),
7242 m_uni(uni),
7243 m_hex(hex)
7244 {}
7245
7246 virtual void invalidate()
7247 {
7248 value.clear();
7250 }
7251
7252 std::basic_string<T> value;
7253
7254 protected:
7255 virtual bool do_match(
7256 _In_reads_or_z_opt_(end) const T* text,
7257 _In_ size_t start = 0,
7258 _In_ size_t end = SIZE_MAX,
7259 _In_ int flags = match_default)
7260 {
7261 _Assume_(text || start >= end);
7262 this->interval.end = start;
7263 if (m_quote->match(text, this->interval.end, end, flags)) {
7264 this->interval.end = m_quote->interval.end;
7265 value.clear();
7266 for (;;) {
7267 if (m_quote->match(text, this->interval.end, end, flags)) {
7268 this->interval.start = start;
7269 this->interval.end = m_quote->interval.end;
7270 return true;
7271 }
7272 if (m_escape->match(text, this->interval.end, end, flags)) {
7273 if (m_quote->match(text, m_escape->interval.end, end, flags)) {
7274 value += '"'; this->interval.end = m_quote->interval.end;
7275 continue;
7276 }
7277 if (m_sol->match(text, m_escape->interval.end, end, flags)) {
7278 value += '/'; this->interval.end = m_sol->interval.end;
7279 continue;
7280 }
7281 if (m_bs->match(text, m_escape->interval.end, end, flags)) {
7282 value += '\b'; this->interval.end = m_bs->interval.end;
7283 continue;
7284 }
7285 if (m_ff->match(text, m_escape->interval.end, end, flags)) {
7286 value += '\f'; this->interval.end = m_ff->interval.end;
7287 continue;
7288 }
7289 if (m_lf->match(text, m_escape->interval.end, end, flags)) {
7290 value += '\n'; this->interval.end = m_lf->interval.end;
7291 continue;
7292 }
7293 if (m_cr->match(text, m_escape->interval.end, end, flags)) {
7294 value += '\r'; this->interval.end = m_cr->interval.end;
7295 continue;
7296 }
7297 if (m_htab->match(text, m_escape->interval.end, end, flags)) {
7298 value += '\t'; this->interval.end = m_htab->interval.end;
7299 continue;
7300 }
7301 if (
7302 m_uni->match(text, m_escape->interval.end, end, flags) &&
7303 m_hex->match(text, m_uni->interval.end, std::min(m_uni->interval.end + 4, end), flags | match_case_insensitive) &&
7304 m_hex->interval.size() == 4 /* JSON requests 4-digit Unicode sequneces: \u.... */)
7305 {
7306 _Assume_(m_hex->value <= 0xffff);
7307 if (sizeof(T) == 1) {
7308 if (m_hex->value > 0x7ff) {
7309 value += (T)(0xe0 | ((m_hex->value >> 12) & 0x0f));
7310 value += (T)(0x80 | ((m_hex->value >> 6) & 0x3f));
7311 value += (T)(0x80 | (m_hex->value & 0x3f));
7312 }
7313 else if (m_hex->value > 0x7f) {
7314 value += (T)(0xc0 | ((m_hex->value >> 6) & 0x1f));
7315 value += (T)(0x80 | (m_hex->value & 0x3f));
7316 }
7317 else
7318 value += (T)(m_hex->value & 0x7f);
7319 }
7320 else
7321 value += (T)m_hex->value;
7322 this->interval.end = m_hex->interval.end;
7323 continue;
7324 }
7325 if (m_escape->match(text, m_escape->interval.end, end, flags)) {
7326 value += '\\'; this->interval.end = m_escape->interval.end;
7327 continue;
7328 }
7329 }
7330 if (m_chr->match(text, this->interval.end, end, flags)) {
7331 value.append(text + m_chr->interval.start, m_chr->interval.size());
7332 this->interval.end = m_chr->interval.end;
7333 continue;
7334 }
7335 break;
7336 }
7337 }
7338 value.clear();
7339 this->interval.invalidate();
7340 return false;
7341 }
7342
7343 std::shared_ptr<basic_parser<T>> m_quote;
7344 std::shared_ptr<basic_parser<T>> m_chr;
7345 std::shared_ptr<basic_parser<T>> m_escape;
7346 std::shared_ptr<basic_parser<T>> m_sol;
7347 std::shared_ptr<basic_parser<T>> m_bs;
7348 std::shared_ptr<basic_parser<T>> m_ff;
7349 std::shared_ptr<basic_parser<T>> m_lf;
7350 std::shared_ptr<basic_parser<T>> m_cr;
7351 std::shared_ptr<basic_parser<T>> m_htab;
7352 std::shared_ptr<basic_parser<T>> m_uni;
7353 std::shared_ptr<basic_integer16<T>> m_hex;
7354 };
7355
7358#ifdef _UNICODE
7359 using tjson_string = wjson_string;
7360#else
7361 using tjson_string = json_string;
7362#endif
7363
7367 template <class T>
7369 {
7370 public:
7371 virtual void invalidate()
7372 {
7373 this->content.invalidate();
7374 basic_parser::invalidate();
7375 }
7376
7378
7379 protected:
7380 virtual bool do_match(
7381 _In_reads_or_z_opt_(end) const T* text,
7382 _In_ size_t start = 0,
7383 _In_ size_t end = SIZE_MAX,
7384 _In_ int flags = match_multiline)
7385 {
7386 _Unreferenced_(flags);
7387 _Assume_(text || start + 1 >= end);
7388 if (start + 1 < end &&
7389 text[start] == '/' &&
7390 text[start + 1] == '*')
7391 {
7392 // /*
7393 this->content.start = this->interval.end = start + 2;
7394 for (;;) {
7395 if (this->interval.end >= end || !text[this->interval.end])
7396 break;
7397 if (this->interval.end + 1 < end &&
7398 text[this->interval.end] == '*' &&
7399 text[this->interval.end + 1] == '/')
7400 {
7401 // /*...*/
7402 this->content.end = this->interval.end;
7403 this->interval.start = start;
7404 this->interval.end = this->interval.end + 2;
7405 return true;
7406 }
7407 this->interval.end++;
7408 }
7409 }
7410 this->content.invalidate();
7411 this->interval.invalidate();
7412 return false;
7413 }
7414 };
7415
7416 using css_comment = basic_css_comment<char>;
7417 using wcss_comment = basic_css_comment<wchar_t>;
7418#ifdef _UNICODE
7419 using tcss_comment = wcss_comment;
7420#else
7421 using tcss_comment = css_comment;
7422#endif
7423
7427 template <class T>
7428 class basic_css_cdo : public basic_parser<T>
7429 {
7430 protected:
7431 virtual bool do_match(
7432 _In_reads_or_z_opt_(end) const T* text,
7433 _In_ size_t start = 0,
7434 _In_ size_t end = SIZE_MAX,
7435 _In_ int flags = match_multiline)
7436 {
7437 _Unreferenced_(flags);
7438 _Assume_(text || start + 3 >= end);
7439 if (start + 3 < end &&
7440 text[start] == '<' &&
7441 text[start + 1] == '!' &&
7442 text[start + 2] == '-' &&
7443 text[start + 3] == '-')
7444 {
7445 this->interval.start = start;
7446 this->interval.end = start + 4;
7447 return true;
7448 }
7449 this->interval.invalidate();
7450 return false;
7451 }
7452 };
7453
7456#ifdef _UNICODE
7457 using tcss_cdo = wcss_cdo;
7458#else
7459 using tcss_cdo = css_cdo;
7460#endif
7461
7465 template <class T>
7466 class basic_css_cdc : public basic_parser<T>
7467 {
7468 protected:
7469 virtual bool do_match(
7470 _In_reads_or_z_opt_(end) const T* text,
7471 _In_ size_t start = 0,
7472 _In_ size_t end = SIZE_MAX,
7473 _In_ int flags = match_multiline)
7474 {
7475 _Unreferenced_(flags);
7476 _Assume_(text || start + 2 >= end);
7477 if (start + 2 < end &&
7478 text[start] == '-' &&
7479 text[start + 1] == '-' &&
7480 text[start + 2] == '>')
7481 {
7482 this->interval.start = start;
7483 this->interval.end = start + 3;
7484 return true;
7485 }
7486 this->interval.invalidate();
7487 return false;
7488 }
7489 };
7490
7493#ifdef _UNICODE
7494 using tcss_cdc = wcss_cdc;
7495#else
7496 using tcss_cdc = css_cdc;
7497#endif
7498
7502 template <class T>
7504 {
7505 public:
7506 virtual void invalidate()
7507 {
7508 this->content.invalidate();
7509 basic_parser::invalidate();
7510 }
7511
7513
7514 protected:
7515 virtual bool do_match(
7516 _In_reads_or_z_opt_(end) const T* text,
7517 _In_ size_t start = 0,
7518 _In_ size_t end = SIZE_MAX,
7519 _In_ int flags = match_multiline)
7520 {
7521 _Unreferenced_(flags);
7522 this->interval.end = start;
7523 _Assume_(text || this->interval.end >= end);
7524 if (this->interval.end < end &&
7525 (text[this->interval.end] == '\"' || text[this->interval.end] == '\''))
7526 {
7527 // "Quoted...
7528 T quote = text[this->interval.end];
7529 this->content.start = ++this->interval.end;
7530 for (;;) {
7531 if (this->interval.end >= end || !text[this->interval.end])
7532 break;
7533 if (text[this->interval.end] == quote) {
7534 // End quote"
7535 this->content.end = this->interval.end;
7536 this->interval.start = start;
7537 this->interval.end++;
7538 return true;
7539 }
7540 if (this->interval.end + 1 < end &&
7541 text[this->interval.end] == '\\' &&
7542 (text[this->interval.end + 1] == '\"' || text[this->interval.end + 1] == '\''))
7543 {
7544 // Escaped quote
7545 this->interval.end = this->interval.end + 2;
7546 }
7547 else
7548 this->interval.end++;
7549 }
7550 }
7551
7552 this->content.invalidate();
7553 this->interval.invalidate();
7554 return false;
7555 }
7556 };
7557
7558 using css_string = basic_css_string<char>;
7559 using wcss_string = basic_css_string<wchar_t>;
7560#ifdef _UNICODE
7561 using tcss_string = wcss_string;
7562#else
7563 using tcss_string = css_string;
7564#endif
7565
7569 template <class T>
7570 class basic_css_uri : public basic_parser<T>
7571 {
7572 public:
7573 virtual void invalidate()
7574 {
7575 this->content.invalidate();
7576 basic_parser::invalidate();
7577 }
7578
7580
7581 protected:
7582 virtual bool do_match(
7583 _In_reads_or_z_opt_(end) const T* text,
7584 _In_ size_t start = 0,
7585 _In_ size_t end = SIZE_MAX,
7586 _In_ int flags = match_multiline)
7587 {
7588 _Unreferenced_(flags);
7589 this->interval.end = start;
7590 _Assume_(text || this->interval.end + 3 >= end);
7591 if (this->interval.end + 3 < end &&
7592 (text[this->interval.end] == 'u' || text[this->interval.end] == 'U') &&
7593 (text[this->interval.end + 1] == 'r' || text[this->interval.end + 1] == 'R') &&
7594 (text[this->interval.end + 2] == 'l' || text[this->interval.end + 2] == 'L') &&
7595 text[this->interval.end + 3] == '(')
7596 {
7597 // url(
7598 this->interval.end = this->interval.end + 4;
7599
7600 // Skip whitespace.
7601 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
7602 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
7603
7604 if (this->interval.end < end &&
7605 (text[this->interval.end] == '\"' || text[this->interval.end] == '\''))
7606 {
7607 // url("Quoted...
7608 T quote = text[this->interval.end];
7609 this->content.start = ++this->interval.end;
7610 for (;;) {
7611 if (this->interval.end >= end || !text[this->interval.end])
7612 goto error;
7613 if (text[this->interval.end] == quote) {
7614 // End quote"
7615 this->content.end = this->interval.end;
7616 this->interval.end++;
7617 break;
7618 }
7619 if (this->interval.end + 1 < end &&
7620 text[this->interval.end] == '\\' &&
7621 (text[this->interval.end + 1] == '\"' || text[this->interval.end + 1] == '\''))
7622 {
7623 // Escaped quote
7624 this->interval.end = this->interval.end + 2;
7625 }
7626 else
7627 this->interval.end++;
7628 }
7629
7630 // Skip whitespace.
7631 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
7632
7633 if (this->interval.end < end &&
7634 text[this->interval.end] == ')')
7635 {
7636 // url("...")
7637 this->interval.start = start;
7638 this->interval.end++;
7639 return true;
7640 }
7641 }
7642 else {
7643 // url(...
7644 this->content.start = content.end = this->interval.end;
7645 for (;;) {
7646 if (this->interval.end >= end || !text[this->interval.end])
7647 goto error;
7648 if (text[this->interval.end] == ')') {
7649 // url(...)
7650 this->interval.start = start;
7651 this->interval.end++;
7652 return true;
7653 }
7654 if (ctype.is(ctype.space, text[this->interval.end]))
7655 this->interval.end++;
7656 else
7657 this->content.end = ++this->interval.end;
7658 }
7659 }
7660 }
7661
7662 error:
7663 invalidate();
7664 return false;
7665 }
7666 };
7667
7668 using css_uri = basic_css_uri<char>;
7669 using wcss_uri = basic_css_uri<wchar_t>;
7670#ifdef _UNICODE
7671 using tcss_uri = wcss_uri;
7672#else
7673 using tcss_uri = css_uri;
7674#endif
7675
7679 template <class T>
7681 {
7682 public:
7683 virtual void invalidate()
7684 {
7685 this->content.invalidate();
7686 basic_parser::invalidate();
7687 }
7688
7690
7691 protected:
7692 virtual bool do_match(
7693 _In_reads_or_z_opt_(end) const T* text,
7694 _In_ size_t start = 0,
7695 _In_ size_t end = SIZE_MAX,
7696 _In_ int flags = match_multiline)
7697 {
7698 _Unreferenced_(flags);
7699 this->interval.end = start;
7700 _Assume_(text || this->interval.end + 6 >= end);
7701 if (this->interval.end + 6 < end &&
7702 text[this->interval.end] == '@' &&
7703 (text[this->interval.end + 1] == 'i' || text[this->interval.end + 1] == 'I') &&
7704 (text[this->interval.end + 2] == 'm' || text[this->interval.end + 2] == 'M') &&
7705 (text[this->interval.end + 3] == 'p' || text[this->interval.end + 3] == 'P') &&
7706 (text[this->interval.end + 4] == 'o' || text[this->interval.end + 4] == 'O') &&
7707 (text[this->interval.end + 5] == 'r' || text[this->interval.end + 5] == 'R') &&
7708 (text[this->interval.end + 6] == 't' || text[this->interval.end + 6] == 'T'))
7709 {
7710 // @import...
7711 this->interval.end = this->interval.end + 7;
7712
7713 // Skip whitespace.
7714 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
7715 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
7716
7717 if (this->interval.end < end &&
7718 (text[this->interval.end] == '\"' || text[this->interval.end] == '\''))
7719 {
7720 // @import "Quoted
7721 T quote = text[this->interval.end];
7722 this->content.start = ++this->interval.end;
7723 for (;;) {
7724 if (this->interval.end >= end || !text[this->interval.end])
7725 goto error;
7726 if (text[this->interval.end] == quote) {
7727 // End quote"
7728 this->content.end = this->interval.end;
7729 this->interval.start = start;
7730 this->interval.end++;
7731 return true;
7732 }
7733 if (this->interval.end + 1 < end &&
7734 text[this->interval.end] == '\\' &&
7735 (text[this->interval.end + 1] == '\"' || text[this->interval.end + 1] == '\''))
7736 {
7737 // Escaped quote
7738 this->interval.end = this->interval.end + 2;
7739 }
7740 else
7741 this->interval.end++;
7742 }
7743 }
7744 }
7745
7746 error:
7747 invalidate();
7748 return false;
7749 }
7750 };
7751
7752 using css_import = basic_css_import<char>;
7753 using wcss_import = basic_css_import<wchar_t>;
7754#ifdef _UNICODE
7755 using tcss_import = wcss_import;
7756#else
7757 using tcss_import = css_import;
7758#endif
7759
7763 template <class T>
7765 {
7766 public:
7767 virtual void invalidate()
7768 {
7769 this->base_type.invalidate();
7770 this->sub_type.invalidate();
7771 this->charset.invalidate();
7772 basic_parser::invalidate();
7773 }
7774
7778
7779 protected:
7780 virtual bool do_match(
7781 _In_reads_or_z_opt_(end) const T* text,
7782 _In_ size_t start = 0,
7783 _In_ size_t end = SIZE_MAX,
7784 _In_ int flags = match_multiline)
7785 {
7786 _Unreferenced_(flags);
7787 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
7788
7789 this->interval.end = start;
7790 this->base_type.start = this->interval.end;
7791 for (;;) {
7792 _Assume_(text || this->interval.end >= end);
7793 if (this->interval.end >= end || !text[this->interval.end])
7794 break;
7795 if (text[this->interval.end] == '/' ||
7796 text[this->interval.end] == ';' ||
7797 ctype.is(ctype.space, text[this->interval.end]))
7798 break;
7799 this->interval.end++;
7800 }
7801 if (this->interval.end <= this->base_type.start)
7802 goto error;
7803 this->base_type.end = this->interval.end;
7804
7805 if (end <= this->interval.end || text[this->interval.end] != '/')
7806 goto error;
7807
7808 this->interval.end++;
7809 this->sub_type.start = this->interval.end;
7810 for (;;) {
7811 if (this->interval.end >= end || !text[this->interval.end])
7812 break;
7813 if (text[this->interval.end] == '/' ||
7814 text[this->interval.end] == ';' ||
7815 ctype.is(ctype.space, text[this->interval.end]))
7816 break;
7817 this->interval.end++;
7818 }
7819 if (this->interval.end <= this->sub_type.start)
7820 goto error;
7821
7822 this->sub_type.end = this->interval.end;
7823 this->charset.invalidate();
7824 if (this->interval.end < end && text[this->interval.end] == ';') {
7825 this->interval.end++;
7826
7827 // Skip whitespace.
7828 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
7829
7830 if (this->interval.end + 7 < end &&
7831 (text[this->interval.end] == 'c' || text[this->interval.end] == 'C') &&
7832 (text[this->interval.end + 1] == 'h' || text[this->interval.end + 1] == 'H') &&
7833 (text[this->interval.end + 2] == 'a' || text[this->interval.end + 2] == 'A') &&
7834 (text[this->interval.end + 3] == 'r' || text[this->interval.end + 3] == 'R') &&
7835 (text[this->interval.end + 4] == 's' || text[this->interval.end + 4] == 'S') &&
7836 (text[this->interval.end + 5] == 'e' || text[this->interval.end + 5] == 'E') &&
7837 (text[this->interval.end + 6] == 't' || text[this->interval.end + 6] == 'T') &&
7838 text[this->interval.end + 7] == '=')
7839 {
7840 this->interval.end = this->interval.end + 8;
7841 if (this->interval.end < end &&
7842 (text[this->interval.end] == '\"' || text[this->interval.end] == '\''))
7843 {
7844 // "Quoted...
7845 T quote = text[this->interval.end];
7846 this->charset.start = ++this->interval.end;
7847 for (;;) {
7848 if (this->interval.end >= end || !text[this->interval.end]) {
7849 // No end quote!
7850 this->charset.invalidate();
7851 break;
7852 }
7853 if (text[this->interval.end] == quote) {
7854 // End quote"
7855 this->charset.end = this->interval.end;
7856 this->interval.end++;
7857 break;
7858 }
7859 this->interval.end++;
7860 }
7861 }
7862 else {
7863 // Nonquoted
7864 this->charset.start = this->interval.end;
7865 for (;;) {
7866 if (this->interval.end >= end || !text[this->interval.end] ||
7867 ctype.is(ctype.space, text[this->interval.end])) {
7868 this->charset.end = this->interval.end;
7869 break;
7870 }
7871 this->interval.end++;
7872 }
7873 }
7874 }
7875 }
7876 this->interval.start = start;
7877 return true;
7878
7879 error:
7880 invalidate();
7881 return false;
7882 }
7883 };
7884
7885 using mime_type = basic_mime_type<char>;
7886 using wmime_type = basic_mime_type<wchar_t>;
7887#ifdef _UNICODE
7888 using tmime_type = wmime_type;
7889#else
7890 using tmime_type = mime_type;
7891#endif
7892
7896 template <class T>
7898 {
7899 protected:
7900 virtual bool do_match(
7901 _In_reads_or_z_opt_(end) const T* text,
7902 _In_ size_t start = 0,
7903 _In_ size_t end = SIZE_MAX,
7904 _In_ int flags = match_default)
7905 {
7906 _Unreferenced_(flags);
7907 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
7908 this->interval.end = start;
7909 for (;;) {
7910 _Assume_(text || this->interval.end >= end);
7911 if (this->interval.end >= end || !text[this->interval.end]) {
7913 this->interval.start = start;
7914 return true;
7915 }
7916 this->interval.invalidate();
7917 return false;
7918 }
7919 if (text[this->interval.end] == '>' ||
7920 text[this->interval.end] == '=' ||
7921 text[this->interval.end] == '/' && this->interval.end + 1 < end && text[this->interval.end + 1] == '>' ||
7922 ctype.is(ctype.space, text[this->interval.end]))
7923 {
7924 this->interval.start = start;
7925 return true;
7926 }
7927 this->interval.end++;
7928 }
7929 }
7930 };
7931
7934#ifdef _UNICODE
7935 using thtml_ident = whtml_ident;
7936#else
7937 using thtml_ident = html_ident;
7938#endif
7939
7943 template <class T>
7945 {
7946 public:
7947 virtual void invalidate()
7948 {
7949 this->content.invalidate();
7950 basic_parser::invalidate();
7951 }
7952
7954
7955 protected:
7956 virtual bool do_match(
7957 _In_reads_or_z_opt_(end) const T* text,
7958 _In_ size_t start = 0,
7959 _In_ size_t end = SIZE_MAX,
7960 _In_ int flags = match_default)
7961 {
7962 _Unreferenced_(flags);
7963 this->interval.end = start;
7964 _Assume_(text || this->interval.end >= end);
7965 if (this->interval.end < end &&
7966 (text[this->interval.end] == '\"' || text[this->interval.end] == '\''))
7967 {
7968 // "Quoted...
7969 T quote = text[this->interval.end];
7970 this->content.start = ++this->interval.end;
7971 for (;;) {
7972 if (this->interval.end >= end || !text[this->interval.end]) {
7973 // No end quote!
7974 this->content.invalidate();
7975 this->interval.invalidate();
7976 return false;
7977 }
7978 if (text[this->interval.end] == quote) {
7979 // End quote"
7980 this->content.end = this->interval.end;
7981 this->interval.start = start;
7982 this->interval.end++;
7983 return true;
7984 }
7985 this->interval.end++;
7986 }
7987 }
7988
7989 // Nonquoted
7990 this->content.start = this->interval.end;
7991 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
7992 for (;;) {
7993 _Assume_(text || this->interval.end >= end);
7994 if (this->interval.end >= end || !text[this->interval.end]) {
7995 this->content.end = this->interval.end;
7996 this->interval.start = start;
7997 return true;
7998 }
7999 if (text[this->interval.end] == '>' ||
8000 text[this->interval.end] == '/' && this->interval.end + 1 < end && text[this->interval.end + 1] == '>' ||
8001 ctype.is(ctype.space, text[this->interval.end]))
8002 {
8003 this->content.end = this->interval.end;
8004 this->interval.start = start;
8005 return true;
8006 }
8007 this->interval.end++;
8008 }
8009 }
8010 };
8011
8012 using html_value = basic_html_value<char>;
8013 using whtml_value = basic_html_value<wchar_t>;
8014#ifdef _UNICODE
8015 using thtml_value = whtml_value;
8016#else
8017 using thtml_value = html_value;
8018#endif
8019
8023 enum class html_sequence_t {
8024 text = 0,
8025 element,
8026 element_start,
8027 element_end,
8028 declaration,
8029 comment,
8030 instruction,
8031 PCDATA,
8032 CDATA,
8033
8034 unknown = -1,
8035 };
8036
8044
8048 template <class T>
8050 {
8051 public:
8052 basic_html_tag(_In_ const std::locale& locale = std::locale()) :
8054 type(html_sequence_t::unknown)
8055 {}
8056
8057 virtual void invalidate()
8058 {
8059 this->type = html_sequence_t::unknown;
8060 this->name.invalidate();
8061 this->attributes.clear();
8062 basic_parser::invalidate();
8063 }
8064
8065 html_sequence_t type;
8067 std::vector<html_attribute> attributes;
8068
8069 protected:
8070 virtual bool do_match(
8071 _In_reads_or_z_opt_(end) const T* text,
8072 _In_ size_t start = 0,
8073 _In_ size_t end = SIZE_MAX,
8074 _In_ int flags = match_multiline)
8075 {
8076 _Assume_(text || start >= end);
8077 if (start >= end || text[start] != '<')
8078 goto error;
8079 this->interval.end = start + 1;
8080 if (this->interval.end >= end || !text[this->interval.end])
8081 goto error;
8082 if (text[this->interval.end] == '/' &&
8083 this->m_ident.match(text, this->interval.end + 1, end, flags))
8084 {
8085 // </...
8086 this->type = html_sequence_t::element_end;
8087 this->name = this->m_ident.interval;
8088 this->interval.end = this->m_ident.interval.end;
8089 }
8090 else if (text[this->interval.end] == '!') {
8091 // <!...
8092 this->interval.end++;
8093 if (this->interval.end + 1 < end &&
8094 text[this->interval.end] == '-' &&
8095 text[this->interval.end + 1] == '-')
8096 {
8097 // <!--...
8098 this->name.start = this->interval.end = this->interval.end + 2;
8099 for (;;) {
8100 if (this->interval.end >= end || !text[this->interval.end])
8101 goto error;
8102 if (this->interval.end + 2 < end &&
8103 text[this->interval.end] == '-' &&
8104 text[this->interval.end + 1] == '-' &&
8105 text[this->interval.end + 2] == '>')
8106 {
8107 // <!--...-->
8108 this->type = html_sequence_t::comment;
8109 this->name.end = this->interval.end;
8110 this->attributes.clear();
8111 this->interval.start = start;
8112 this->interval.end = this->interval.end + 3;
8113 return true;
8114 }
8115 this->interval.end++;
8116 }
8117 }
8118 this->type = html_sequence_t::declaration;
8119 this->name.start = this->name.end = this->interval.end;
8120 }
8121 else if (text[this->interval.end] == '?') {
8122 // <?...
8123 this->name.start = ++this->interval.end;
8124 for (;;) {
8125 if (this->interval.end >= end || !text[this->interval.end])
8126 goto error;
8127 if (text[this->interval.end] == '>') {
8128 // <?...>
8129 this->type = html_sequence_t::instruction;
8130 this->name.end = this->interval.end;
8131 this->attributes.clear();
8132 this->interval.start = start;
8133 this->interval.end++;
8134 return true;
8135 }
8136 if (this->interval.end + 1 < end &&
8137 text[this->interval.end] == '?' &&
8138 text[this->interval.end + 1] == '>')
8139 {
8140 // <?...?>
8141 this->type = html_sequence_t::instruction;
8142 this->name.end = this->interval.end;
8143 this->attributes.clear();
8144 this->interval.start = start;
8145 this->interval.end = this->interval.end + 2;
8146 return true;
8147 }
8148 this->interval.end++;
8149 }
8150 }
8151 else if (this->m_ident.match(text, this->interval.end, end, flags)) {
8152 // <tag...
8153 this->type = html_sequence_t::element_start;
8154 this->name = this->m_ident.interval;
8155 this->interval.end = this->m_ident.interval.end;
8156 }
8157 else
8158 goto error;
8159
8160 // Skip whitespace.
8161 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
8162 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8163
8164 this->attributes.clear();
8165 for (;;) {
8166 if (this->type == html_sequence_t::element_start &&
8167 this->interval.end + 1 < end &&
8168 text[this->interval.end] == '/' &&
8169 text[this->interval.end + 1] == '>')
8170 {
8171 // <tag .../>
8172 this->type = html_sequence_t::element;
8173 this->interval.end = this->interval.end + 2;
8174 break;
8175 }
8176 if (this->interval.end < end &&
8177 text[this->interval.end] == '>')
8178 {
8179 // <tag ...>
8180 this->interval.end++;
8181 break;
8182 }
8183 if (this->type == html_sequence_t::declaration &&
8184 this->interval.end + 1 < end &&
8185 text[this->interval.end] == '!' &&
8186 text[this->interval.end + 1] == '>')
8187 {
8188 // "<!...!>".
8189 this->interval.end = this->interval.end + 2;
8190 break;
8191 }
8192 if (this->type == html_sequence_t::declaration &&
8193 this->interval.end + 1 < end &&
8194 text[this->interval.end] == '-' &&
8195 text[this->interval.end + 1] == '-')
8196 {
8197 // "<! ... --...".
8198 this->interval.end = this->interval.end + 2;
8199 for (;;) {
8200 if (this->interval.end >= end || !text[this->interval.end])
8201 goto error;
8202 if (this->interval.end + 1 < end &&
8203 text[this->interval.end] == '-' &&
8204 text[this->interval.end + 1] == '-')
8205 {
8206 // "<! ... --...--".
8207 this->interval.end = this->interval.end + 2;
8208 break;
8209 }
8210 this->interval.end++;
8211 }
8212
8213 // Skip whitespace.
8214 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8215 continue;
8216 }
8217
8218 if (this->interval.end >= end || !text[this->interval.end])
8219 goto error;
8220
8221 // Attributes follow...
8222 html_attribute* a = nullptr;
8223 if (this->m_ident.match(text, this->interval.end, end, flags)) {
8224 this->attributes.push_back(std::move(html_attribute{ this->m_ident.interval }));
8225 a = &this->attributes.back();
8226 _Assume_(a);
8227 this->interval.end = this->m_ident.interval.end;
8228 }
8229 else {
8230 // What was that?! Skip.
8231 this->interval.end++;
8232 continue;
8233 }
8234
8235 // Skip whitespace.
8236 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8237
8238 if (this->interval.end < end && text[this->interval.end] == '=') {
8239 this->interval.end++;
8240
8241 // Skip whitespace.
8242 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8243
8244 if (this->m_value.match(text, this->interval.end, end, flags)) {
8245 // This attribute has value.
8246 a->value = this->m_value.content;
8247 this->interval.end = this->m_value.interval.end;
8248
8249 // Skip whitespace.
8250 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8251 }
8252 }
8253 else {
8254 // This attribute has no value.
8255 a->value.invalidate();
8256 }
8257 }
8258
8259 this->interval.start = start;
8260 return true;
8261
8262 error:
8263 invalidate();
8264 return false;
8265 }
8266
8267 basic_html_ident<T> m_ident;
8268 basic_html_value<T> m_value;
8269 };
8270
8271 using html_tag = basic_html_tag<char>;
8272 using whtml_tag = basic_html_tag<wchar_t>;
8273#ifdef _UNICODE
8274 using thtml_tag = whtml_tag;
8275#else
8276 using thtml_tag = html_tag;
8277#endif
8278
8282 template <class T>
8284 {
8285 public:
8286 virtual void invalidate()
8287 {
8288 this->condition.invalidate();
8289 basic_parser::invalidate();
8290 }
8291
8292 stdex::interval<size_t> condition;
8293
8294 protected:
8295 virtual bool do_match(
8296 _In_reads_or_z_opt_(end) const T* text,
8297 _In_ size_t start = 0,
8298 _In_ size_t end = SIZE_MAX,
8299 _In_ int flags = match_multiline)
8300 {
8301 _Unreferenced_(flags);
8302 _Assume_(text || start + 2 >= end);
8303 if (start + 2 < end &&
8304 text[start] == '<' &&
8305 text[start + 1] == '!' &&
8306 text[start + 2] == '[')
8307 {
8308 this->interval.end = start + 3;
8309
8310 // Skip whitespace.
8311 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
8312 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8313
8314 this->condition.start = this->condition.end = this->interval.end;
8315
8316 for (;;) {
8317 if (this->interval.end >= end || !text[this->interval.end])
8318 break;
8319 if (text[this->interval.end] == '[') {
8320 this->interval.start = start;
8321 this->interval.end++;
8322 return true;
8323 }
8324 if (ctype.is(ctype.space, text[this->interval.end]))
8325 this->interval.end++;
8326 else
8327 this->condition.end = ++this->interval.end;
8328 }
8329 }
8330
8331 this->condition.invalidate();
8332 this->interval.invalidate();
8333 return false;
8334 }
8335 };
8336
8337 using html_declaration_condition_start = basic_html_declaration_condition_start<char>;
8338 using whtml_declaration_condition_start = basic_html_declaration_condition_start<wchar_t>;
8339#ifdef _UNICODE
8340 using thtml_declaration_condition_start = whtml_declaration_condition_start;
8341#else
8342 using thtml_declaration_condition_start = html_declaration_condition_start;
8343#endif
8344
8348 template <class T>
8350 {
8351 protected:
8352 virtual bool do_match(
8353 _In_reads_or_z_opt_(end) const T* text,
8354 _In_ size_t start = 0,
8355 _In_ size_t end = SIZE_MAX,
8356 _In_ int flags = match_multiline)
8357 {
8358 _Unreferenced_(flags);
8359 _Assume_(text || start + 2 >= end);
8360 if (start + 2 < end &&
8361 text[start] == ']' &&
8362 text[start + 1] == ']' &&
8363 text[start + 2] == '>')
8364 {
8365 this->interval.start = start;
8366 this->interval.end = start + 3;
8367 return true;
8368 }
8369 this->interval.invalidate();
8370 return false;
8371 }
8372 };
8373
8376#ifdef _UNICODE
8378#else
8380#endif
8381 }
8382}
8383
8384#undef ENUM_FLAG_OPERATOR
8385#undef ENUM_FLAGS
8386
8387#ifdef _MSC_VER
8388#pragma warning(pop)
8389#endif
locale_t helper class to free_locale when going out of scope.
Definition locale.hpp:69
Test for angle in d°mm'ss.dddd form.
Definition parser.hpp:4377
Test for any code unit.
Definition parser.hpp:232
Test for beginning of line.
Definition parser.hpp:631
Test for any.
Definition parser.hpp:1074
Test for chemical formula.
Definition parser.hpp:5506
Test for Creditor Reference.
Definition parser.hpp:4941
T reference[22]
Normalized national reference number.
Definition parser.hpp:4963
T check_digits[3]
Two check digits.
Definition parser.hpp:4962
bool is_valid
Is reference valid per ISO 7064.
Definition parser.hpp:4964
Legacy CSS comment end -->
Definition parser.hpp:7467
Legacy CSS comment start <!--
Definition parser.hpp:7429
CSS comment.
Definition parser.hpp:7369
stdex::interval< size_t > content
content position in source
Definition parser.hpp:7377
CSS import directive.
Definition parser.hpp:7681
stdex::interval< size_t > content
content position in source
Definition parser.hpp:7689
CSS string.
Definition parser.hpp:7504
stdex::interval< size_t > content
content position in source
Definition parser.hpp:7512
URI in CSS.
Definition parser.hpp:7571
stdex::interval< size_t > content
content position in source
Definition parser.hpp:7579
Test for any code unit from a given string of code units.
Definition parser.hpp:736
Test for specific code unit.
Definition parser.hpp:304
Test for date.
Definition parser.hpp:4010
Test for valid DNS domain character.
Definition parser.hpp:2792
bool allow_on_edge
Is character allowed at the beginning or an end of a DNS domain?
Definition parser.hpp:2802
Test for DNS domain/hostname.
Definition parser.hpp:2892
bool m_allow_absolute
May DNS names end with a dot (absolute name)?
Definition parser.hpp:2956
Test for e-mail address.
Definition parser.hpp:3784
Test for emoticon.
Definition parser.hpp:3887
std::shared_ptr< basic_parser< T > > apex
apex/eyebrows/halo (e.g. O, 0)
Definition parser.hpp:3915
std::shared_ptr< basic_parser< T > > eyes
eyes (e.g. :, ;, >, |, B)
Definition parser.hpp:3916
std::shared_ptr< basic_set< T > > mouth
mouth (e.g. ), ), (, (, |, P, D, p, d)
Definition parser.hpp:3918
std::shared_ptr< basic_parser< T > > nose
nose (e.g. -, o)
Definition parser.hpp:3917
std::shared_ptr< basic_parser< T > > emoticon
emoticon as a whole (e.g. 😀, 🤔, 😶)
Definition parser.hpp:3914
Test for end of line.
Definition parser.hpp:670
Test for fraction.
Definition parser.hpp:1702
End of condition ...]]>
Definition parser.hpp:8350
Start of condition <![condition[...
Definition parser.hpp:8284
virtual bool do_match(_In_reads_or_z_opt_(end) const T *text, size_t start=0, size_t end=SIZE_MAX, int flags=match_multiline)
condition position in source
Definition parser.hpp:8295
Contiguous sequence of characters representing name of element, attribute etc.
Definition parser.hpp:7898
Tag.
Definition parser.hpp:8050
std::vector< html_attribute > attributes
tag attributes
Definition parser.hpp:8067
html_sequence_t type
tag type
Definition parser.hpp:8065
stdex::interval< size_t > name
tag name position in source
Definition parser.hpp:8066
Optionally-quoted string representing value of an attribute.
Definition parser.hpp:7945
stdex::interval< size_t > content
content position in source
Definition parser.hpp:7953
Test for International Bank Account Number.
Definition parser.hpp:4652
T bban[31]
Normalized Basic Bank Account Number.
Definition parser.hpp:4677
T country[3]
ISO 3166-1 alpha-2 country code.
Definition parser.hpp:4675
T check_digits[3]
Two check digits.
Definition parser.hpp:4676
bool is_valid
Is IBAN valid per ISO 7064.
Definition parser.hpp:4678
Test for decimal integer.
Definition parser.hpp:1312
Test for decimal integer possibly containing thousand separators.
Definition parser.hpp:1397
bool has_separators
Did integer have any separators?
Definition parser.hpp:1418
size_t digit_count
Total number of digits in integer.
Definition parser.hpp:1417
Test for hexadecimal integer.
Definition parser.hpp:1477
Base class for integer testing.
Definition parser.hpp:1290
size_t value
Calculated value of the numeral.
Definition parser.hpp:1304
Test for IPv4 address.
Definition parser.hpp:2360
stdex::interval< size_t > components[4]
Individual component intervals.
Definition parser.hpp:2405
struct in_addr value
IPv4 address value.
Definition parser.hpp:2406
Test for IPv6 address.
Definition parser.hpp:2572
std::shared_ptr< basic_parser< T > > scope_id
Scope ID (e.g. NIC index with link-local addresses)
Definition parser.hpp:2644
stdex::interval< size_t > components[8]
Individual component intervals.
Definition parser.hpp:2642
struct in6_addr value
IPv6 address value.
Definition parser.hpp:2643
Test for valid IPv6 address scope ID character.
Definition parser.hpp:2498
Test for repeating.
Definition parser.hpp:926
bool m_greedy
try to match as long sequence as possible
Definition parser.hpp:965
std::shared_ptr< basic_parser< T > > m_el
repeating element
Definition parser.hpp:962
size_t m_min_iterations
minimum number of iterations
Definition parser.hpp:963
size_t m_max_iterations
maximum number of iterations
Definition parser.hpp:964
Test for JSON string.
Definition parser.hpp:7217
MIME content type.
Definition parser.hpp:7765
stdex::interval< size_t > base_type
basic type position in source
Definition parser.hpp:7775
stdex::interval< size_t > sub_type
sub-type position in source
Definition parser.hpp:7776
stdex::interval< size_t > charset
charset position in source
Definition parser.hpp:7777
Test for mixed numeral.
Definition parser.hpp:1937
std::shared_ptr< basic_parser< T > > fraction
fraction
Definition parser.hpp:1970
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:1968
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:1967
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:1966
std::shared_ptr< basic_parser< T > > integer
Integer part.
Definition parser.hpp:1969
Test for monetary numeral.
Definition parser.hpp:2231
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:2264
std::shared_ptr< basic_parser< T > > decimal_separator
Decimal separator.
Definition parser.hpp:2269
std::shared_ptr< basic_parser< T > > currency
Currency part.
Definition parser.hpp:2267
std::shared_ptr< basic_parser< T > > decimal
Decimal part.
Definition parser.hpp:2270
std::shared_ptr< basic_parser< T > > integer
Integer part.
Definition parser.hpp:2268
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:2265
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:2266
"No-op" match
Definition parser.hpp:200
Base template for all parsers.
Definition parser.hpp:76
stdex::interval< size_t > interval
Region of the last match.
Definition parser.hpp:116
Test for permutation.
Definition parser.hpp:1214
Test for phone number.
Definition parser.hpp:4500
std::basic_string< T > value
Normalized phone number.
Definition parser.hpp:4525
Test for any punctuation code unit.
Definition parser.hpp:477
Test for Roman numeral.
Definition parser.hpp:1586
Test for scientific numeral.
Definition parser.hpp:2062
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:2108
std::shared_ptr< basic_parser< T > > exponent_symbol
Exponent symbol (e.g. 'e')
Definition parser.hpp:2112
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:2106
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:2107
double value
Calculated value of the numeral.
Definition parser.hpp:2116
std::shared_ptr< basic_parser< T > > negative_exp_sign
Negative exponent sign (e.g. '-')
Definition parser.hpp:2114
std::shared_ptr< basic_integer< T > > decimal
Decimal part.
Definition parser.hpp:2111
std::shared_ptr< basic_parser< T > > positive_exp_sign
Positive exponent sign (e.g. '+')
Definition parser.hpp:2113
std::shared_ptr< basic_integer< T > > exponent
Exponent part.
Definition parser.hpp:2115
std::shared_ptr< basic_parser< T > > decimal_separator
Decimal separator.
Definition parser.hpp:2110
std::shared_ptr< basic_integer< T > > integer
Integer part.
Definition parser.hpp:2109
Test for match score.
Definition parser.hpp:1765
Test for sequence.
Definition parser.hpp:1022
Definition parser.hpp:705
Test for SI Reference delimiter.
Definition parser.hpp:5135
Test for SI Reference part.
Definition parser.hpp:5089
Test for SI Reference.
Definition parser.hpp:5174
basic_si_reference_part< T > part3
Reference data part 3 (P3)
Definition parser.hpp:5203
basic_si_reference_part< T > part1
Reference data part 1 (P1)
Definition parser.hpp:5201
bool is_valid
Is reference valid.
Definition parser.hpp:5204
T model[3]
Reference model.
Definition parser.hpp:5200
basic_si_reference_part< T > part2
Reference data part 2 (P2)
Definition parser.hpp:5202
Test for signed numeral.
Definition parser.hpp:1851
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:1877
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:1876
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:1875
std::shared_ptr< basic_parser< T > > number
Number.
Definition parser.hpp:1878
Test for any space code unit.
Definition parser.hpp:397
Test for any space or punctuation code unit.
Definition parser.hpp:552
Test for any string.
Definition parser.hpp:1142
Test for given string.
Definition parser.hpp:831
Test for time.
Definition parser.hpp:4275
Test for valid URL password character.
Definition parser.hpp:3076
Test for valid URL path character.
Definition parser.hpp:3178
Test for URL path.
Definition parser.hpp:3288
Test for valid URL username character.
Definition parser.hpp:2975
Test for URL.
Definition parser.hpp:3428
Test for HTTP agent.
Definition parser.hpp:6761
Test for HTTP any type.
Definition parser.hpp:5904
Test for HTTP asterisk.
Definition parser.hpp:6532
Test for HTTP header.
Definition parser.hpp:7069
Test for HTTP language (RFC1766)
Definition parser.hpp:6400
Test for HTTP line break (RFC2616: CRLF | LF)
Definition parser.hpp:5586
Test for HTTP media range (RFC2616: media-range)
Definition parser.hpp:5936
Test for HTTP media type (RFC2616: media-type)
Definition parser.hpp:5988
Test for HTTP parameter (RFC2616: parameter)
Definition parser.hpp:5852
http_token name
Parameter name.
Definition parser.hpp:5861
http_value value
Parameter value.
Definition parser.hpp:5862
Test for HTTP protocol.
Definition parser.hpp:6836
uint16_t version
HTTP protocol version: 0x100 = 1.0, 0x101 = 1.1...
Definition parser.hpp:6858
Test for HTTP quoted string (RFC2616: quoted-string)
Definition parser.hpp:5745
stdex::interval< size_t > content
String content (without quotes)
Definition parser.hpp:5754
Test for HTTP request.
Definition parser.hpp:6937
Test for HTTP space (RFC2616: LWS)
Definition parser.hpp:5622
Test for HTTP text character (RFC2616: TEXT)
Definition parser.hpp:5658
Test for HTTP token (RFC2616: token - tolerates non-ASCII)
Definition parser.hpp:5691
Test for HTTP URL parameter.
Definition parser.hpp:6225
Test for HTTP URL path segment.
Definition parser.hpp:6137
Test for HTTP URL path segment.
Definition parser.hpp:6170
std::vector< http_url_path_segment > segments
Path segments.
Definition parser.hpp:6178
Test for HTTP URL port.
Definition parser.hpp:6081
Test for HTTP URL server.
Definition parser.hpp:6044
Test for HTTP URL.
Definition parser.hpp:6302
Collection of HTTP values.
Definition parser.hpp:7173
Test for HTTP value (RFC2616: value)
Definition parser.hpp:5808
http_quoted_string string
Value when matched as quoted string.
Definition parser.hpp:5817
http_token token
Value when matched as token.
Definition parser.hpp:5818
Test for HTTP weight factor.
Definition parser.hpp:6463
float value
Calculated value of the weight factor.
Definition parser.hpp:6476
Test for HTTP weighted value.
Definition parser.hpp:6555
Base template for collection-holding parsers.
Definition parser.hpp:982
Test for any SGML code point.
Definition parser.hpp:265
Test for any SGML code point from a given string of SGML code points.
Definition parser.hpp:788
Test for specific SGML code point.
Definition parser.hpp:353
Test for valid DNS domain SGML character.
Definition parser.hpp:2847
Test for valid IPv6 address scope ID SGML character.
Definition parser.hpp:2537
Test for any SGML punctuation code point.
Definition parser.hpp:518
Test for any SGML space code point.
Definition parser.hpp:440
Test for any SGML space or punctuation code point.
Definition parser.hpp:595
Test for SGML given string.
Definition parser.hpp:878
Test for valid URL password SGML character.
Definition parser.hpp:3129
Test for valid URL path SGML character.
Definition parser.hpp:3235
Test for valid URL username SGML character.
Definition parser.hpp:3027
Numerical interval.
Definition interval.hpp:18
T size() const
Returns interval size.
Definition interval.hpp:47
T end
interval end
Definition interval.hpp:20
interval() noexcept
Constructs an invalid interval.
Definition interval.hpp:25
void invalidate()
Invalidates interval.
Definition interval.hpp:59
T start
interval start
Definition interval.hpp:19
Tag attribute.
Definition parser.hpp:8040
stdex::interval< size_t > name
attribute name position in source
Definition parser.hpp:8041
stdex::interval< size_t > value
attribute value position in source
Definition parser.hpp:8042
Definition parser.hpp:7199