stdex
Additional custom or not Standard C++ covered algorithms
Loading...
Searching...
No Matches
parser.hpp
1/*
2 SPDX-License-Identifier: MIT
3 Copyright © 2023 Amebis
4*/
5
6#pragma once
7
8#include "compat.hpp"
9#include "interval.hpp"
10#include "memory.hpp"
11#include "sgml.hpp"
12#include "string.hpp"
13#include <stdarg.h>
14#include <stdint.h>
15#include <math.h>
16#if defined(_WIN32)
17#include <winsock2.h>
18#if _MSC_VER >= 1300
19#include <ws2ipdef.h>
20#endif
21#include <ws2tcpip.h>
22#else
23#include <netinet/in.h>
24#endif
25#include <limits>
26#include <list>
27#include <locale>
28#include <memory>
29#include <set>
30#include <string>
31
32#ifdef _MSC_VER
33#pragma warning(push)
34#pragma warning(disable: 4100)
35#endif
36
37#define ENUM_FLAG_OPERATOR(T,X) \
38inline T operator X (const T lhs, const T rhs) { return static_cast<T>(static_cast<std::underlying_type_t<T>>(lhs) X static_cast<std::underlying_type_t<T>>(rhs)); } \
39inline T operator X (const T lhs, const std::underlying_type_t<T> rhs) { return static_cast<T>(static_cast<std::underlying_type_t<T>>(lhs) X rhs); } \
40inline T operator X (const std::underlying_type_t<T> lhs, const T rhs) { return static_cast<T>(lhs X static_cast<std::underlying_type_t<T>>(rhs)); } \
41inline T& operator X= (T& lhs, const T rhs) { return lhs = lhs X rhs; } \
42inline T& operator X= (T& lhs, const std::underlying_type_t<T> rhs) { return lhs = lhs X rhs; }
43#define ENUM_FLAGS(T, type) \
44enum class T : type; \
45inline T operator ~ (T t) { return (T) (~static_cast<std::underlying_type_t <T>>(t)); } \
46ENUM_FLAG_OPERATOR(T,|) \
47ENUM_FLAG_OPERATOR(T,^) \
48ENUM_FLAG_OPERATOR(T,&) \
49enum class T : type
50
51#if defined(_WIN32)
52#elif defined(__APPLE__)
53#define s6_words __u6_addr.__u6_addr16
54#else
55#define s6_words s6_addr16
56#endif
57
58namespace stdex
59{
60 namespace parser
61 {
65 constexpr int match_default = 0;
66 constexpr int match_case_insensitive = 0x1;
67 constexpr int match_multiline = 0x2;
68
72 template <class T>
74 {
75 public:
76 basic_parser(_In_ const std::locale& locale = std::locale()) : m_locale(locale) {}
77 virtual ~basic_parser() {}
78
79 bool search(
80 _In_reads_or_z_opt_(end) const T* text,
81 _In_ size_t start = 0,
82 _In_ size_t end = SIZE_MAX,
83 _In_ int flags = match_default)
84 {
85 for (size_t i = start; i < end && text[i]; i++)
86 if (match(text, i, end, flags))
87 return true;
88 return false;
89 }
90
91 virtual bool match(
92 _In_reads_or_z_opt_(end) const T* text,
93 _In_ size_t start = 0,
94 _In_ size_t end = SIZE_MAX,
95 _In_ int flags = match_default) = 0;
96
97 template<class _Traits, class _Ax>
98 inline bool match(
99 const std::basic_string<T, _Traits, _Ax>& text,
100 _In_ size_t start = 0,
101 _In_ size_t end = SIZE_MAX,
102 _In_ int flags = match_default)
103 {
104 return match(text.c_str(), start, std::min<size_t>(end, text.size()), flags);
105 }
106
107 virtual void invalidate()
108 {
109 this->interval.invalidate();
110 }
111
112 protected:
114 const wchar_t* next_sgml_cp(_In_ const char* text, _In_ size_t start, _In_ size_t end, _Out_ size_t& chr_end, _Out_ wchar_t(&buf)[3])
115 {
116 if (text[start] == '&') {
117 // Potential entity start
118 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
119 for (chr_end = start + 1;; chr_end++) {
120 if (chr_end >= end || text[chr_end] == 0) {
121 // Unterminated entity
122 break;
123 }
124 if (text[chr_end] == ';') {
125 // Entity end
126 size_t n = chr_end - start - 1;
127 if (n >= 2 && text[start + 1] == '#') {
128 // Numerical entity
129 char32_t unicode;
130 if (text[start + 2] == 'x' || text[start + 2] == 'X')
131 unicode = strtou32(text + start + 3, n - 2, nullptr, 16);
132 else
133 unicode = strtou32(text + start + 2, n - 1, nullptr, 10);
134#ifdef _WIN32
135 if (unicode < 0x10000) {
136 buf[0] = (wchar_t)unicode;
137 buf[1] = 0;
138 }
139 else {
140 ucs4_to_surrogate_pair(buf, unicode);
141 buf[2] = 0;
142 }
143#else
144 buf[0] = (wchar_t)unicode;
145 buf[1] = 0;
146#endif
147 chr_end++;
148 return buf;
149 }
150 const wchar_t* entity_w = sgml2uni(text + start + 1, n);
151 if (entity_w) {
152 chr_end++;
153 return entity_w;
154 }
155 // Unknown entity.
156 break;
157 }
158 else if (text[chr_end] == '&' || ctype.is(ctype.space, text[chr_end])) {
159 // This char cannot possibly be a part of entity.
160 break;
161 }
162 }
163 }
164 buf[0] = text[start];
165 buf[1] = 0;
166 chr_end = start + 1;
167 return buf;
168 }
170
171 public:
173
174 protected:
175 std::locale m_locale;
176 };
177
180#ifdef _UNICODE
181 using tparser = wparser;
182#else
183 using tparser = parser;
184#endif
186
190 template <class T>
191 class basic_noop : public basic_parser<T>
192 {
193 public:
194 virtual bool match(
195 _In_reads_or_z_opt_(end) const T* text,
196 _In_ size_t start = 0,
197 _In_ size_t end = SIZE_MAX,
198 _In_ int flags = match_default)
199 {
200 _Assume_(text || start >= end);
201 if (start < end && text[start]) {
202 this->interval.start = this->interval.end = start;
203 return true;
204 }
205 this->interval.invalidate();
206 return false;
207 }
208 };
209
210 using noop = basic_noop<char>;
212#ifdef _UNICODE
213 using tnoop = wnoop;
214#else
215 using tnoop = noop;
216#endif
218
222 template <class T>
223 class basic_any_cu : public basic_parser<T>
224 {
225 public:
226 basic_any_cu(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
227
228 virtual bool match(
229 _In_reads_or_z_opt_(end) const T* text,
230 _In_ size_t start = 0,
231 _In_ size_t end = SIZE_MAX,
232 _In_ int flags = match_default)
233 {
234 _Assume_(text || start >= end);
235 if (start < end && text[start]) {
236 this->interval.end = (this->interval.start = start) + 1;
237 return true;
238 }
239 this->interval.invalidate();
240 return false;
241 }
242 };
243
246#ifdef _UNICODE
247 using tany_cu = wany_cu;
248#else
249 using tany_cu = any_cu;
250#endif
251
255 class sgml_any_cp : public basic_any_cu<char>
256 {
257 public:
258 sgml_any_cp(_In_ const std::locale& locale = std::locale()) : basic_any_cu<char>(locale) {}
259
260 virtual bool match(
261 _In_reads_or_z_(end) const char* text,
262 _In_ size_t start = 0,
263 _In_ size_t end = SIZE_MAX,
264 _In_ int flags = match_default)
265 {
266 _Assume_(text || start >= end);
267 if (start < end && text[start]) {
268 if (text[start] == '&') {
269 // SGML entity
270 const auto& ctype = std::use_facet<std::ctype<char>>(m_locale);
271 for (this->interval.end = start + 1; this->interval.end < end && text[this->interval.end]; this->interval.end++)
272 if (text[this->interval.end] == ';') {
273 this->interval.end++;
274 this->interval.start = start;
275 return true;
276 }
277 else if (text[this->interval.end] == '&' || ctype.is(ctype.space, text[this->interval.end]))
278 break;
279 // Unterminated entity
280 }
281 this->interval.end = (this->interval.start = start) + 1;
282 return true;
283 }
284 this->interval.invalidate();
285 return false;
286 }
287 };
288
292 template <class T>
293 class basic_cu : public basic_parser<T>
294 {
295 public:
296 basic_cu(T chr, bool invert = false, _In_ const std::locale& locale = std::locale()) :
297 basic_parser<T>(locale),
298 m_chr(chr),
299 m_invert(invert)
300 {}
301
302 virtual bool match(
303 _In_reads_or_z_opt_(end) const T* text,
304 _In_ size_t start = 0,
305 _In_ size_t end = SIZE_MAX,
306 _In_ int flags = match_default)
307 {
308 _Assume_(text || start >= end);
309 if (start < end && text[start]) {
310 bool r;
311 if (flags & match_case_insensitive) {
312 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
313 r = ctype.tolower(text[start]) == ctype.tolower(m_chr);
314 }
315 else
316 r = text[start] == m_chr;
317 if ((r && !m_invert) || (!r && m_invert)) {
318 this->interval.end = (this->interval.start = start) + 1;
319 return true;
320 }
321 }
322 this->interval.invalidate();
323 return false;
324 }
325
326 protected:
327 T m_chr;
328 bool m_invert;
329 };
330
331 using cu = basic_cu<char>;
332 using wcu = basic_cu<wchar_t>;
333#ifdef _UNICODE
334 using tcu = wcu;
335#else
336 using tcu = cu;
337#endif
338
342 class sgml_cp : public sgml_parser
343 {
344 public:
345 sgml_cp(const char* chr, size_t count = SIZE_MAX, bool invert = false, _In_ const std::locale& locale = std::locale()) :
346 sgml_parser(locale),
347 m_invert(invert)
348 {
349 _Assume_(chr || !count);
350 wchar_t buf[3];
351 size_t chr_end;
352 m_chr.assign(count ? next_sgml_cp(chr, 0, count, chr_end, buf) : L"");
353 }
354
355 virtual bool match(
356 _In_reads_or_z_(end) const char* text,
357 _In_ size_t start = 0,
358 _In_ size_t end = SIZE_MAX,
359 _In_ int flags = match_default)
360 {
361 _Assume_(text || start >= end);
362 if (start < end && text[start]) {
363 wchar_t buf[3];
364 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
365 bool r = ((flags & match_case_insensitive) ?
366 stdex::strnicmp(chr, SIZE_MAX, m_chr.c_str(), m_chr.size(), m_locale) :
367 stdex::strncmp(chr, SIZE_MAX, m_chr.c_str(), m_chr.size())) == 0;
368 if ((r && !m_invert) || (!r && m_invert)) {
369 this->interval.start = start;
370 return true;
371 }
372 }
373 this->interval.invalidate();
374 return false;
375 }
376
377 protected:
378 std::wstring m_chr;
379 bool m_invert;
380 };
381
385 template <class T>
386 class basic_space_cu : public basic_parser<T>
387 {
388 public:
389 basic_space_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) :
390 basic_parser<T>(locale),
391 m_invert(invert)
392 {}
393
394 virtual bool match(
395 _In_reads_or_z_opt_(end) const T* text,
396 _In_ size_t start = 0,
397 _In_ size_t end = SIZE_MAX,
398 _In_ int flags = match_default)
399 {
400 _Assume_(text || start >= end);
401 if (start < end && text[start]) {
402 bool r =
403 ((flags & match_multiline) || !stdex::islbreak(text[start])) &&
404 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::space, text[start]);
405 if ((r && !m_invert) || (!r && m_invert)) {
406 this->interval.end = (this->interval.start = start) + 1;
407 return true;
408 }
409 }
410 this->interval.invalidate();
411 return false;
412 }
413
414 protected:
415 bool m_invert;
416 };
417
420#ifdef _UNICODE
421 using tspace_cu = wspace_cu;
422#else
423 using tspace_cu = space_cu;
424#endif
425
429 class sgml_space_cp : public basic_space_cu<char>
430 {
431 public:
432 sgml_space_cp(_In_ bool invert = false, _In_ const std::locale& locale = std::locale()) :
434 {}
435
436 virtual bool match(
437 _In_reads_or_z_(end) const char* text,
438 _In_ size_t start = 0,
439 _In_ size_t end = SIZE_MAX,
440 _In_ int flags = match_default)
441 {
442 _Assume_(text || start >= end);
443 if (start < end && text[start]) {
444 wchar_t buf[3];
445 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
446 const wchar_t* chr_end = chr + stdex::strlen(chr);
447 bool r =
448 ((flags & match_multiline) || !stdex::islbreak(chr, SIZE_MAX)) &&
449 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::space, chr, chr_end) == chr_end;
450 if ((r && !m_invert) || (!r && m_invert)) {
451 this->interval.start = start;
452 return true;
453 }
454 }
455
456 this->interval.invalidate();
457 return false;
458 }
459 };
460
464 template <class T>
465 class basic_punct_cu : public basic_parser<T>
466 {
467 public:
468 basic_punct_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) :
469 basic_parser<T>(locale),
470 m_invert(invert)
471 {}
472
473 virtual bool match(
474 _In_reads_or_z_opt_(end) const T* text,
475 _In_ size_t start = 0,
476 _In_ size_t end = SIZE_MAX,
477 _In_ int flags = match_default)
478 {
479 _Assume_(text || start >= end);
480 if (start < end && text[start]) {
481 bool r = std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::punct, text[start]);
482 if ((r && !m_invert) || (!r && m_invert)) {
483 this->interval.end = (this->interval.start = start) + 1;
484 return true;
485 }
486 }
487 this->interval.invalidate();
488 return false;
489 }
490
491 protected:
492 bool m_invert;
493 };
494
497#ifdef _UNICODE
498 using tpunct_cu = wpunct_cu;
499#else
500 using tpunct_cu = punct_cu;
501#endif
502
506 class sgml_punct_cp : public basic_punct_cu<char>
507 {
508 public:
509 sgml_punct_cp(bool invert = false, _In_ const std::locale& locale = std::locale()) :
511 {}
512
513 virtual bool match(
514 _In_reads_or_z_(end) const char* text,
515 _In_ size_t start = 0,
516 _In_ size_t end = SIZE_MAX,
517 _In_ int flags = match_default)
518 {
519 _Assume_(text || start >= end);
520 if (start < end && text[start]) {
521 wchar_t buf[3];
522 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
523 const wchar_t* chr_end = chr + stdex::strlen(chr);
524 bool r = std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::punct, chr, chr_end) == chr_end;
525 if ((r && !m_invert) || (!r && m_invert)) {
526 this->interval.start = start;
527 return true;
528 }
529 }
530 this->interval.invalidate();
531 return false;
532 }
533 };
534
538 template <class T>
540 {
541 public:
542 basic_space_or_punct_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) :
543 basic_parser<T>(locale),
544 m_invert(invert)
545 {}
546
547 virtual bool match(
548 _In_reads_or_z_opt_(end) const T* text,
549 _In_ size_t start = 0,
550 _In_ size_t end = SIZE_MAX,
551 _In_ int flags = match_default)
552 {
553 _Assume_(text || start >= end);
554 if (start < end && text[start]) {
555 bool r =
556 ((flags & match_multiline) || !stdex::islbreak(text[start])) &&
557 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::space | std::ctype_base::punct, text[start]);
558 if ((r && !m_invert) || (!r && m_invert)) {
559 this->interval.end = (this->interval.start = start) + 1;
560 return true;
561 }
562 }
563 this->interval.invalidate();
564 return false;
565 }
566
567 protected:
568 bool m_invert;
569 };
570
573#ifdef _UNICODE
575#else
577#endif
578
583 {
584 public:
585 sgml_space_or_punct_cp(bool invert = false, _In_ const std::locale& locale = std::locale()) :
587 {}
588
589 virtual bool match(
590 _In_reads_or_z_(end) const char* text,
591 _In_ size_t start = 0,
592 _In_ size_t end = SIZE_MAX,
593 _In_ int flags = match_default)
594 {
595 _Assume_(text || start >= end);
596 if (start < end && text[start]) {
597 wchar_t buf[3];
598 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
599 const wchar_t* chr_end = chr + stdex::strlen(chr);
600 bool r =
601 ((flags & match_multiline) || !stdex::islbreak(chr, SIZE_MAX)) &&
602 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::space | std::ctype_base::punct, chr, chr_end) == chr_end;
603 if ((r && !m_invert) || (!r && m_invert)) {
604 this->interval.start = start;
605 return true;
606 }
607 }
608 this->interval.invalidate();
609 return false;
610 }
611 };
612
616 template <class T>
617 class basic_bol : public basic_parser<T>
618 {
619 public:
620 basic_bol(bool invert = false) : m_invert(invert) {}
621
622 virtual bool match(
623 _In_reads_or_z_opt_(end) const T* text,
624 _In_ size_t start = 0,
625 _In_ size_t end = SIZE_MAX,
626 _In_ int flags = match_default)
627 {
628 _Assume_(text || !end);
629 _Assume_(text || start >= end);
630 bool r = start == 0 || (start <= end && stdex::islbreak(text[start - 1]));
631 if ((r && !m_invert) || (!r && m_invert)) {
632 this->interval.end = this->interval.start = start;
633 return true;
634 }
635 this->interval.invalidate();
636 return false;
637 }
638
639 protected:
640 bool m_invert;
641 };
642
643 using bol = basic_bol<char>;
644 using wbol = basic_bol<wchar_t>;
645#ifdef _UNICODE
646 using tbol = wbol;
647#else
648 using tbol = bol;
649#endif
651
655 template <class T>
656 class basic_eol : public basic_parser<T>
657 {
658 public:
659 basic_eol(bool invert = false) : m_invert(invert) {}
660
661 virtual bool match(
662 _In_reads_or_z_opt_(end) const T* text,
663 _In_ size_t start = 0,
664 _In_ size_t end = SIZE_MAX,
665 _In_ int flags = match_default)
666 {
667 _Assume_(text || start >= end);
668 bool r = start >= end || !text[start] || stdex::islbreak(text[start]);
669 if ((r && !m_invert) || (!r && m_invert)) {
670 this->interval.end = this->interval.start = start;
671 return true;
672 }
673 this->interval.invalidate();
674 return false;
675 }
676
677 protected:
678 bool m_invert;
679 };
680
681 using eol = basic_eol<char>;
682 using weol = basic_eol<wchar_t>;
683#ifdef _UNICODE
684 using teol = weol;
685#else
686 using teol = eol;
687#endif
689
690 template <class T>
691 class basic_set : public basic_parser<T>
692 {
693 public:
694 basic_set(bool invert = false, _In_ const std::locale& locale = std::locale()) :
695 basic_parser<T>(locale),
696 hit_offset(SIZE_MAX),
697 m_invert(invert)
698 {}
699
700 virtual bool match(
701 _In_reads_or_z_opt_(end) const T* text,
702 _In_ size_t start = 0,
703 _In_ size_t end = SIZE_MAX,
704 _In_ int flags = match_default) = 0;
705
706 virtual void invalidate()
707 {
708 hit_offset = SIZE_MAX;
710 }
711
712 public:
713 size_t hit_offset;
714
715 protected:
716 bool m_invert;
717 };
718
722 template <class T>
723 class basic_cu_set : public basic_set<T>
724 {
725 public:
727 _In_reads_or_z_(count) const T* set,
728 _In_ size_t count = SIZE_MAX,
729 _In_ bool invert = false,
730 _In_ const std::locale& locale = std::locale()) :
731 basic_set<T>(invert, locale)
732 {
733 if (set)
734 m_set.assign(set, set + stdex::strnlen(set, count));
735 }
736
737 virtual bool match(
738 _In_reads_or_z_opt_(end) const T* text,
739 _In_ size_t start = 0,
740 _In_ size_t end = SIZE_MAX,
741 _In_ int flags = match_default)
742 {
743 _Assume_(text || start >= end);
744 if (start < end && text[start]) {
745 const T* set = m_set.c_str();
746 size_t r = (flags & match_case_insensitive) ?
747 stdex::strnichr(set, m_set.size(), text[start], this->m_locale) :
748 stdex::strnchr(set, m_set.size(), text[start]);
749 if ((r != stdex::npos && !this->m_invert) || (r == stdex::npos && this->m_invert)) {
750 this->hit_offset = r;
751 this->interval.end = (this->interval.start = start) + 1;
752 return true;
753 }
754 }
755 this->hit_offset = SIZE_MAX;
756 this->interval.invalidate();
757 return false;
758 }
759
760 protected:
761 std::basic_string<T> m_set;
762 };
763
766#ifdef _UNICODE
767 using tcu_set = wcu_set;
768#else
769 using tcu_set = cu_set;
770#endif
771
775 class sgml_cp_set : public basic_set<char>
776 {
777 public:
778 sgml_cp_set(const char* set, size_t count = SIZE_MAX, bool invert = false, _In_ const std::locale& locale = std::locale()) :
779 basic_set<char>(invert, locale)
780 {
781 if (set)
782 m_set = sgml2str(set, count);
783 }
784
785 virtual bool match(
786 _In_reads_or_z_(end) const char* text,
787 _In_ size_t start = 0,
788 _In_ size_t end = SIZE_MAX,
789 _In_ int flags = match_default)
790 {
791 _Assume_(text || start >= end);
792 if (start < end && text[start]) {
793 wchar_t buf[3];
794 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
795 const wchar_t* set = m_set.c_str();
796 size_t r = (flags & match_case_insensitive) ?
797 stdex::strnistr(set, m_set.size(), chr, m_locale) :
798 stdex::strnstr(set, m_set.size(), chr);
799 if ((r != stdex::npos && !m_invert) || (r == stdex::npos && m_invert)) {
800 hit_offset = r;
801 this->interval.start = start;
802 return true;
803 }
804 }
805 hit_offset = SIZE_MAX;
806 this->interval.invalidate();
807 return false;
808 }
809
810 protected:
811 std::wstring m_set;
812 };
813
817 template <class T>
818 class basic_string : public basic_parser<T>
819 {
820 public:
822 _In_reads_or_z_(count) const T* str,
823 _In_ size_t count = SIZE_MAX,
824 _In_ const std::locale& locale = std::locale()) :
825 basic_parser<T>(locale),
826 m_str(str, str + stdex::strnlen(str, count))
827 {}
828
829 virtual bool match(
830 _In_reads_or_z_opt_(end) const T* text,
831 _In_ size_t start = 0,
832 _In_ size_t end = SIZE_MAX,
833 _In_ int flags = match_default)
834 {
835 _Assume_(text || start >= end);
836 size_t
837 m = m_str.size(),
838 n = std::min<size_t>(end - start, m);
839 bool r = ((flags & match_case_insensitive) ?
840 stdex::strnicmp(text + start, n, m_str.c_str(), m, this->m_locale) :
841 stdex::strncmp(text + start, n, m_str.c_str(), m)) == 0;
842 if (r) {
843 this->interval.end = (this->interval.start = start) + n;
844 return true;
845 }
846 this->interval.invalidate();
847 return false;
848 }
849
850 protected:
851 std::basic_string<T> m_str;
852 };
853
856#ifdef _UNICODE
857 using tstring = wstring;
858#else
859 using tstring = string;
860#endif
861
866 {
867 public:
868 sgml_string(const char* str, size_t count = SIZE_MAX, _In_ const std::locale& locale = std::locale()) :
869 sgml_parser(locale),
870 m_str(sgml2str(str, count))
871 {}
872
873 virtual bool match(
874 _In_reads_or_z_(end) const char* text,
875 _In_ size_t start = 0,
876 _In_ size_t end = SIZE_MAX,
877 _In_ int flags = match_default)
878 {
879 _Assume_(text || start >= end);
880 const wchar_t* str = m_str.c_str();
881 const bool case_insensitive = flags & match_case_insensitive ? true : false;
882 const auto& ctype = std::use_facet<std::ctype<wchar_t>>(m_locale);
883 for (this->interval.end = start;;) {
884 if (!*str) {
885 this->interval.start = start;
886 return true;
887 }
888 if (this->interval.end >= end || !text[this->interval.end]) {
889 this->interval.invalidate();
890 return false;
891 }
892 wchar_t buf[3];
893 const wchar_t* chr = next_sgml_cp(text, this->interval.end, end, this->interval.end, buf);
894 for (; *chr; ++str, ++chr) {
895 if (!*str ||
896 (case_insensitive ? ctype.tolower(*str) != ctype.tolower(*chr) : *str != *chr))
897 {
898 this->interval.invalidate();
899 return false;
900 }
901 }
902 }
903 }
904
905 protected:
906 std::wstring m_str;
907 };
908
912 template <class T>
914 {
915 public:
916 basic_iterations(const std::shared_ptr<basic_parser<T>>& el, size_t min_iterations = 0, size_t max_iterations = SIZE_MAX, bool greedy = true) :
917 m_el(el),
921 {}
922
923 virtual bool match(
924 _In_reads_or_z_opt_(end) const T* text,
925 _In_ size_t start = 0,
926 _In_ size_t end = SIZE_MAX,
927 _In_ int flags = match_default)
928 {
929 _Assume_(text || start >= end);
930 this->interval.start = this->interval.end = start;
931 for (size_t i = 0; ; i++) {
932 if ((!m_greedy && i >= m_min_iterations) || i >= m_max_iterations)
933 return true;
934 if (!m_el->match(text, this->interval.end, end, flags)) {
935 if (i >= m_min_iterations)
936 return true;
937 break;
938 }
939 if (m_el->interval.end == this->interval.end) {
940 // Element did match, but the matching interval was empty. Quit instead of spinning.
941 return true;
942 }
943 this->interval.end = m_el->interval.end;
944 }
945 this->interval.invalidate();
946 return false;
947 }
948
949 protected:
950 std::shared_ptr<basic_parser<T>> m_el;
953 bool m_greedy;
954 };
955
958#ifdef _UNICODE
959 using titerations = witerations;
960#else
961 using titerations = iterations;
962#endif
964
968 template <class T>
970 {
971 protected:
972 parser_collection(_In_ const std::locale& locale) : basic_parser<T>(locale) {}
973
974 public:
976 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el,
977 _In_ size_t count,
978 _In_ const std::locale& locale = std::locale()) :
979 basic_parser<T>(locale)
980 {
981 _Assume_(el || !count);
982 m_collection.reserve(count);
983 for (size_t i = 0; i < count; i++)
984 m_collection.push_back(el[i]);
985 }
986
988 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
989 _In_ const std::locale& locale = std::locale()) :
990 basic_parser<T>(locale),
991 m_collection(std::move(collection))
992 {}
993
994 virtual void invalidate()
995 {
996 for (auto& el : m_collection)
997 el->invalidate();
999 }
1000
1001 protected:
1002 std::vector<std::shared_ptr<basic_parser<T>>> m_collection;
1003 };
1004
1008 template <class T>
1010 {
1011 public:
1013 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el = nullptr,
1014 _In_ size_t count = 0,
1015 _In_ const std::locale& locale = std::locale()) :
1016 parser_collection<T>(el, count, locale)
1017 {}
1018
1020 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1021 _In_ const std::locale& locale = std::locale()) :
1022 parser_collection<T>(std::move(collection), locale)
1023 {}
1024
1025 virtual bool match(
1026 _In_reads_or_z_opt_(end) const T* text,
1027 _In_ size_t start = 0,
1028 _In_ size_t end = SIZE_MAX,
1029 _In_ int flags = match_default)
1030 {
1031 _Assume_(text || start >= end);
1032 this->interval.end = start;
1033 for (auto i = this->m_collection.begin(); i != this->m_collection.end(); ++i) {
1034 if (!(*i)->match(text, this->interval.end, end, flags)) {
1035 for (++i; i != this->m_collection.end(); ++i)
1036 (*i)->invalidate();
1037 this->interval.invalidate();
1038 return false;
1039 }
1040 this->interval.end = (*i)->interval.end;
1041 }
1042 this->interval.start = start;
1043 return true;
1044 }
1045 };
1046
1049#ifdef _UNICODE
1050 using tsequence = wsequence;
1051#else
1052 using tsequence = sequence;
1053#endif
1055
1059 template <class T>
1061 {
1062 protected:
1063 basic_branch(_In_ const std::locale& locale) :
1064 parser_collection<T>(locale),
1065 hit_offset(SIZE_MAX)
1066 {}
1067
1068 public:
1070 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el = nullptr,
1071 _In_ size_t count = 0,
1072 _In_ const std::locale& locale = std::locale()) :
1073 parser_collection<T>(el, count, locale),
1074 hit_offset(SIZE_MAX)
1075 {}
1076
1078 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1079 _In_ const std::locale& locale = std::locale()) :
1080 parser_collection<T>(std::move(collection), locale),
1081 hit_offset(SIZE_MAX)
1082 {}
1083
1084 virtual bool match(
1085 _In_reads_or_z_opt_(end) const T* text,
1086 _In_ size_t start = 0,
1087 _In_ size_t end = SIZE_MAX,
1088 _In_ int flags = match_default)
1089 {
1090 _Assume_(text || start >= end);
1091 hit_offset = 0;
1092 for (auto i = this->m_collection.begin(); i != this->m_collection.end(); ++i, ++hit_offset) {
1093 if ((*i)->match(text, start, end, flags)) {
1094 this->interval = (*i)->interval;
1095 for (++i; i != this->m_collection.end(); ++i)
1096 (*i)->invalidate();
1097 return true;
1098 }
1099 }
1100 hit_offset = SIZE_MAX;
1101 this->interval.invalidate();
1102 return false;
1103 }
1104
1105 virtual void invalidate()
1106 {
1107 hit_offset = SIZE_MAX;
1109 }
1110
1111 public:
1112 size_t hit_offset;
1113 };
1114
1115 using branch = basic_branch<char>;
1117#ifdef _UNICODE
1118 using tbranch = wbranch;
1119#else
1120 using tbranch = branch;
1121#endif
1123
1127 template <class T, class T_parser = basic_string<T>>
1129 {
1130 public:
1131 inline basic_string_branch(
1132 _In_reads_(count) const T* str_z = nullptr,
1133 _In_ size_t count = 0,
1134 _In_ const std::locale& locale = std::locale()) :
1135 basic_branch<T>(locale)
1136 {
1137 build(str_z, count);
1138 }
1139
1140 inline basic_string_branch(_In_z_ const T* str, ...) :
1141 basic_branch<T>(std::locale())
1142 {
1143 va_list params;
1144 va_start(params, str);
1145 build(str, params);
1146 va_end(params);
1147 }
1148
1149 inline basic_string_branch(_In_ const std::locale& locale, _In_z_ const T* str, ...) :
1150 basic_branch<T>(locale)
1151 {
1152 va_list params;
1153 va_start(params, str);
1154 build(str, params);
1155 va_end(params);
1156 }
1157
1158 protected:
1159 void build(_In_reads_(count) const T* str_z, _In_ size_t count)
1160 {
1161 _Assume_(str_z || !count);
1162 if (count) {
1163 size_t offset, n;
1164 for (
1165 offset = n = 0;
1166 offset < count && str_z[offset];
1167 offset += stdex::strnlen(str_z + offset, count - offset) + 1, ++n);
1168 this->m_collection.reserve(n);
1169 for (
1170 offset = 0;
1171 offset < count && str_z[offset];
1172 offset += stdex::strnlen(str_z + offset, count - offset) + 1)
1173 this->m_collection.push_back(std::move(std::make_shared<T_parser>(str_z + offset, count - offset, this->m_locale)));
1174 }
1175 }
1176
1177 void build(_In_z_ const T* str, _In_ va_list params)
1178 {
1179 const T* p;
1180 for (
1181 this->m_collection.push_back(std::move(std::make_shared<T_parser>(str, SIZE_MAX, this->m_locale)));
1182 (p = va_arg(params, const T*)) != nullptr;
1183 this->m_collection.push_back(std::move(std::make_shared<T_parser>(p, SIZE_MAX, this->m_locale))));
1184 }
1185 };
1186
1189#ifdef _UNICODE
1191#else
1193#endif
1195
1199 template <class T>
1201 {
1202 public:
1204 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el = nullptr,
1205 _In_ size_t count = 0,
1206 _In_ const std::locale& locale = std::locale()) :
1207 parser_collection<T>(el, count, locale)
1208 {}
1209
1211 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1212 _In_ const std::locale& locale = std::locale()) :
1213 parser_collection<T>(std::move(collection), locale)
1214 {}
1215
1216 virtual bool match(
1217 _In_reads_or_z_opt_(end) const T* text,
1218 _In_ size_t start = 0,
1219 _In_ size_t end = SIZE_MAX,
1220 _In_ int flags = match_default)
1221 {
1222 _Assume_(text || start >= end);
1223 for (auto& el : this->m_collection)
1224 el->invalidate();
1225 if (match_recursively(text, start, end, flags)) {
1226 this->interval.start = start;
1227 return true;
1228 }
1229 this->interval.invalidate();
1230 return false;
1231 }
1232
1233 protected:
1234 bool match_recursively(
1235 _In_reads_or_z_opt_(end) const T* text,
1236 _In_ size_t start = 0,
1237 _In_ size_t end = SIZE_MAX,
1238 _In_ int flags = match_default)
1239 {
1240 bool all_matched = true;
1241 for (auto& el : this->m_collection) {
1242 if (!el->interval) {
1243 // Element was not matched in permutatuion yet.
1244 all_matched = false;
1245 if (el->match(text, start, end, flags)) {
1246 // Element matched for the first time.
1247 if (match_recursively(text, el->interval.end, end, flags)) {
1248 // Rest of the elements matched too.
1249 return true;
1250 }
1251 el->invalidate();
1252 }
1253 }
1254 }
1255 if (all_matched) {
1256 this->interval.end = start;
1257 return true;
1258 }
1259 return false;
1260 }
1261 };
1262
1265#ifdef _UNICODE
1266 using tpermutation = wpermutation;
1267#else
1268 using tpermutation = permutation;
1269#endif
1271
1275 template <class T>
1276 class basic_integer : public basic_parser<T>
1277 {
1278 public:
1279 basic_integer(_In_ const std::locale& locale = std::locale()) :
1280 basic_parser<T>(locale),
1281 value(0)
1282 {}
1283
1284 virtual void invalidate()
1285 {
1286 value = 0;
1288 }
1289
1290 public:
1291 size_t value;
1292 };
1293
1297 template <class T>
1299 {
1300 public:
1302 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
1303 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
1304 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
1305 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
1306 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
1307 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
1308 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
1309 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
1310 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
1311 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
1312 _In_ const std::locale& locale = std::locale()) :
1313 basic_integer<T>(locale),
1314 m_digit_0(digit_0),
1315 m_digit_1(digit_1),
1316 m_digit_2(digit_2),
1317 m_digit_3(digit_3),
1318 m_digit_4(digit_4),
1319 m_digit_5(digit_5),
1320 m_digit_6(digit_6),
1321 m_digit_7(digit_7),
1322 m_digit_8(digit_8),
1323 m_digit_9(digit_9)
1324 {}
1325
1326 virtual bool match(
1327 _In_reads_or_z_opt_(end) const T* text,
1328 _In_ size_t start = 0,
1329 _In_ size_t end = SIZE_MAX,
1330 _In_ int flags = match_default)
1331 {
1332 _Assume_(text || start >= end);
1333 for (this->interval.end = start, this->value = 0; this->interval.end < end && text[this->interval.end];) {
1334 size_t dig;
1335 if (m_digit_0->match(text, this->interval.end, end, flags)) { dig = 0; this->interval.end = m_digit_0->interval.end; }
1336 else if (m_digit_1->match(text, this->interval.end, end, flags)) { dig = 1; this->interval.end = m_digit_1->interval.end; }
1337 else if (m_digit_2->match(text, this->interval.end, end, flags)) { dig = 2; this->interval.end = m_digit_2->interval.end; }
1338 else if (m_digit_3->match(text, this->interval.end, end, flags)) { dig = 3; this->interval.end = m_digit_3->interval.end; }
1339 else if (m_digit_4->match(text, this->interval.end, end, flags)) { dig = 4; this->interval.end = m_digit_4->interval.end; }
1340 else if (m_digit_5->match(text, this->interval.end, end, flags)) { dig = 5; this->interval.end = m_digit_5->interval.end; }
1341 else if (m_digit_6->match(text, this->interval.end, end, flags)) { dig = 6; this->interval.end = m_digit_6->interval.end; }
1342 else if (m_digit_7->match(text, this->interval.end, end, flags)) { dig = 7; this->interval.end = m_digit_7->interval.end; }
1343 else if (m_digit_8->match(text, this->interval.end, end, flags)) { dig = 8; this->interval.end = m_digit_8->interval.end; }
1344 else if (m_digit_9->match(text, this->interval.end, end, flags)) { dig = 9; this->interval.end = m_digit_9->interval.end; }
1345 else break;
1346 this->value = this->value * 10 + dig;
1347 }
1349 this->interval.start = start;
1350 return true;
1351 }
1352 this->interval.invalidate();
1353 return false;
1354 }
1355
1356 protected:
1357 std::shared_ptr<basic_parser<T>>
1358 m_digit_0,
1359 m_digit_1,
1360 m_digit_2,
1361 m_digit_3,
1362 m_digit_4,
1363 m_digit_5,
1364 m_digit_6,
1365 m_digit_7,
1366 m_digit_8,
1367 m_digit_9;
1368 };
1369
1372#ifdef _UNICODE
1373 using tinteger10 = winteger10;
1374#else
1375 using tinteger10 = integer10;
1376#endif
1378
1382 template <class T>
1384 {
1385 public:
1387 _In_ const std::shared_ptr<basic_integer10<T>>& digits,
1388 _In_ const std::shared_ptr<basic_set<T>>& separator,
1389 _In_ const std::locale& locale = std::locale()) :
1390 basic_integer<T>(locale),
1391 digit_count(0),
1392 has_separators(false),
1393 m_digits(digits),
1394 m_separator(separator)
1395 {}
1396
1397 virtual bool match(
1398 _In_reads_or_z_opt_(end) const T* text,
1399 _In_ size_t start = 0,
1400 _In_ size_t end = SIZE_MAX,
1401 _In_ int flags = match_default)
1402 {
1403 _Assume_(text || start >= end);
1404 if (m_digits->match(text, start, end, flags)) {
1405 // Leading part match.
1406 this->value = m_digits->value;
1407 digit_count = m_digits->interval.size();
1408 has_separators = false;
1409 this->interval.start = start;
1410 this->interval.end = m_digits->interval.end;
1411 if (m_digits->interval.size() <= 3) {
1412 // Maybe separated with thousand separators?
1413 size_t hit_offset = SIZE_MAX;
1414 while (m_separator->match(text, this->interval.end, end, flags) &&
1415 (hit_offset == SIZE_MAX || hit_offset == m_separator->hit_offset) && // All separators must be the same, no mixing.
1416 m_digits->match(text, m_separator->interval.end, end, flags) &&
1417 m_digits->interval.size() == 3)
1418 {
1419 // Thousand separator and three-digit integer followed.
1420 this->value = this->value * 1000 + m_digits->value;
1421 digit_count += 3;
1422 has_separators = true;
1423 this->interval.end = m_digits->interval.end;
1424 hit_offset = m_separator->hit_offset;
1425 }
1426 }
1427
1428 return true;
1429 }
1430 this->value = 0;
1431 this->interval.invalidate();
1432 return false;
1433 }
1434
1435 virtual void invalidate()
1436 {
1437 digit_count = 0;
1438 has_separators = false;
1440 }
1441
1442 public:
1445
1446 protected:
1447 std::shared_ptr<basic_integer10<T>> m_digits;
1448 std::shared_ptr<basic_set<T>> m_separator;
1449 };
1450
1453#ifdef _UNICODE
1454 using tinteger10ts = winteger10ts;
1455#else
1456 using tinteger10ts = integer10ts;
1457#endif
1459
1463 template <class T>
1465 {
1466 public:
1468 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
1469 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
1470 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
1471 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
1472 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
1473 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
1474 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
1475 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
1476 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
1477 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
1478 _In_ const std::shared_ptr<basic_parser<T>>& digit_10,
1479 _In_ const std::shared_ptr<basic_parser<T>>& digit_11,
1480 _In_ const std::shared_ptr<basic_parser<T>>& digit_12,
1481 _In_ const std::shared_ptr<basic_parser<T>>& digit_13,
1482 _In_ const std::shared_ptr<basic_parser<T>>& digit_14,
1483 _In_ const std::shared_ptr<basic_parser<T>>& digit_15,
1484 _In_ const std::locale& locale = std::locale()) :
1485 basic_integer<T>(locale),
1486 m_digit_0(digit_0),
1487 m_digit_1(digit_1),
1488 m_digit_2(digit_2),
1489 m_digit_3(digit_3),
1490 m_digit_4(digit_4),
1491 m_digit_5(digit_5),
1492 m_digit_6(digit_6),
1493 m_digit_7(digit_7),
1494 m_digit_8(digit_8),
1495 m_digit_9(digit_9),
1496 m_digit_10(digit_10),
1497 m_digit_11(digit_11),
1498 m_digit_12(digit_12),
1499 m_digit_13(digit_13),
1500 m_digit_14(digit_14),
1501 m_digit_15(digit_15)
1502 {}
1503
1504 virtual bool match(
1505 _In_reads_or_z_opt_(end) const T* text,
1506 _In_ size_t start = 0,
1507 _In_ size_t end = SIZE_MAX,
1508 _In_ int flags = match_default)
1509 {
1510 _Assume_(text || start >= end);
1511 for (this->interval.end = start, this->value = 0; this->interval.end < end && text[this->interval.end];) {
1512 size_t dig;
1513 if (m_digit_0->match(text, this->interval.end, end, flags)) { dig = 0; this->interval.end = m_digit_0->interval.end; }
1514 else if (m_digit_1->match(text, this->interval.end, end, flags)) { dig = 1; this->interval.end = m_digit_1->interval.end; }
1515 else if (m_digit_2->match(text, this->interval.end, end, flags)) { dig = 2; this->interval.end = m_digit_2->interval.end; }
1516 else if (m_digit_3->match(text, this->interval.end, end, flags)) { dig = 3; this->interval.end = m_digit_3->interval.end; }
1517 else if (m_digit_4->match(text, this->interval.end, end, flags)) { dig = 4; this->interval.end = m_digit_4->interval.end; }
1518 else if (m_digit_5->match(text, this->interval.end, end, flags)) { dig = 5; this->interval.end = m_digit_5->interval.end; }
1519 else if (m_digit_6->match(text, this->interval.end, end, flags)) { dig = 6; this->interval.end = m_digit_6->interval.end; }
1520 else if (m_digit_7->match(text, this->interval.end, end, flags)) { dig = 7; this->interval.end = m_digit_7->interval.end; }
1521 else if (m_digit_8->match(text, this->interval.end, end, flags)) { dig = 8; this->interval.end = m_digit_8->interval.end; }
1522 else if (m_digit_9->match(text, this->interval.end, end, flags)) { dig = 9; this->interval.end = m_digit_9->interval.end; }
1523 else if (m_digit_10->match(text, this->interval.end, end, flags)) { dig = 10; this->interval.end = m_digit_10->interval.end; }
1524 else if (m_digit_11->match(text, this->interval.end, end, flags)) { dig = 11; this->interval.end = m_digit_11->interval.end; }
1525 else if (m_digit_12->match(text, this->interval.end, end, flags)) { dig = 12; this->interval.end = m_digit_12->interval.end; }
1526 else if (m_digit_13->match(text, this->interval.end, end, flags)) { dig = 13; this->interval.end = m_digit_13->interval.end; }
1527 else if (m_digit_14->match(text, this->interval.end, end, flags)) { dig = 14; this->interval.end = m_digit_14->interval.end; }
1528 else if (m_digit_15->match(text, this->interval.end, end, flags)) { dig = 15; this->interval.end = m_digit_15->interval.end; }
1529 else break;
1530 this->value = this->value * 16 + dig;
1531 }
1533 this->interval.start = start;
1534 return true;
1535 }
1536 this->interval.invalidate();
1537 return false;
1538 }
1539
1540 protected:
1541 std::shared_ptr<basic_parser<T>>
1542 m_digit_0,
1543 m_digit_1,
1544 m_digit_2,
1545 m_digit_3,
1546 m_digit_4,
1547 m_digit_5,
1548 m_digit_6,
1549 m_digit_7,
1550 m_digit_8,
1551 m_digit_9,
1552 m_digit_10,
1553 m_digit_11,
1554 m_digit_12,
1555 m_digit_13,
1556 m_digit_14,
1557 m_digit_15;
1558 };
1559
1562#ifdef _UNICODE
1563 using tinteger16 = winteger16;
1564#else
1565 using tinteger16 = integer16;
1566#endif
1568
1572 template <class T>
1574 {
1575 public:
1577 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
1578 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
1579 _In_ const std::shared_ptr<basic_parser<T>>& digit_10,
1580 _In_ const std::shared_ptr<basic_parser<T>>& digit_50,
1581 _In_ const std::shared_ptr<basic_parser<T>>& digit_100,
1582 _In_ const std::shared_ptr<basic_parser<T>>& digit_500,
1583 _In_ const std::shared_ptr<basic_parser<T>>& digit_1000,
1584 _In_ const std::shared_ptr<basic_parser<T>>& digit_5000,
1585 _In_ const std::shared_ptr<basic_parser<T>>& digit_10000,
1586 _In_ const std::locale& locale = std::locale()) :
1587 basic_integer<T>(locale),
1588 m_digit_1(digit_1),
1589 m_digit_5(digit_5),
1590 m_digit_10(digit_10),
1591 m_digit_50(digit_50),
1592 m_digit_100(digit_100),
1593 m_digit_500(digit_500),
1594 m_digit_1000(digit_1000),
1595 m_digit_5000(digit_5000),
1596 m_digit_10000(digit_10000)
1597 {}
1598
1599 virtual bool match(
1600 _In_reads_or_z_opt_(end) const T* text,
1601 _In_ size_t start = 0,
1602 _In_ size_t end = SIZE_MAX,
1603 _In_ int flags = match_default)
1604 {
1605 _Assume_(text || start >= end);
1606 size_t
1608 end2;
1609
1610 for (this->interval.end = start, this->value = 0; this->interval.end < end && text[this->interval.end]; dig[3] = dig[2], dig[2] = dig[1], dig[1] = dig[0], this->interval.end = end2) {
1611 if (m_digit_1 && m_digit_1->match(text, this->interval.end, end, flags)) { dig[0] = 1; end2 = m_digit_1->interval.end; }
1612 else if (m_digit_5 && m_digit_5->match(text, this->interval.end, end, flags)) { dig[0] = 5; end2 = m_digit_5->interval.end; }
1613 else if (m_digit_10 && m_digit_10->match(text, this->interval.end, end, flags)) { dig[0] = 10; end2 = m_digit_10->interval.end; }
1614 else if (m_digit_50 && m_digit_50->match(text, this->interval.end, end, flags)) { dig[0] = 50; end2 = m_digit_50->interval.end; }
1615 else if (m_digit_100 && m_digit_100->match(text, this->interval.end, end, flags)) { dig[0] = 100; end2 = m_digit_100->interval.end; }
1616 else if (m_digit_500 && m_digit_500->match(text, this->interval.end, end, flags)) { dig[0] = 500; end2 = m_digit_500->interval.end; }
1617 else if (m_digit_1000 && m_digit_1000->match(text, this->interval.end, end, flags)) { dig[0] = 1000; end2 = m_digit_1000->interval.end; }
1618 else if (m_digit_5000 && m_digit_5000->match(text, this->interval.end, end, flags)) { dig[0] = 5000; end2 = m_digit_5000->interval.end; }
1619 else if (m_digit_10000 && m_digit_10000->match(text, this->interval.end, end, flags)) { dig[0] = 10000; end2 = m_digit_10000->interval.end; }
1620 else break;
1621
1622 // Store first digit.
1623 if (dig[4] == SIZE_MAX) dig[4] = dig[0];
1624
1625 if (dig[3] == dig[2] && dig[2] == dig[1] && dig[1] == dig[0] && dig[0] != dig[4]) {
1626 // Same digit repeated four times. No-go, unless first digit. E.g. XIIII vs. XIV. MMMMMCD allowed, IIII also...
1627 break;
1628 }
1629 if (dig[0] <= dig[1]) {
1630 // Digit is less or equal previous one: add.
1631 this->value += dig[0];
1632 }
1633 else if (
1634 (dig[1] == 1 && (dig[0] == 5 || dig[0] == 10)) ||
1635 (dig[1] == 10 && (dig[0] == 50 || dig[0] == 100)) ||
1636 (dig[1] == 100 && (dig[0] == 500 || dig[0] == 1000)) ||
1637 (dig[1] == 1000 && (dig[0] == 5000 || dig[0] == 10000)))
1638 {
1639 // Digit is up to two orders bigger than previous one: subtract. But...
1640 if (dig[2] < dig[0]) {
1641 // Digit is also bigger than pre-previous one. E.g. VIX (V < X => invalid)
1642 break;
1643 }
1644 this->value -= dig[1]; // Cancel addition in the previous step.
1645 dig[0] -= dig[1]; // Combine last two digits.
1646 dig[1] = dig[2]; // The true previous digit is now pre-previous one. :)
1647 dig[2] = dig[3]; // The true pre-previous digit is now pre-pre-previous one. :)
1648 this->value += dig[0]; // Add combined value.
1649 }
1650 else {
1651 // New digit is too big than the previous one. E.g. VX (V < X => invalid)
1652 break;
1653 }
1654 }
1655 if (this->value) {
1656 this->interval.start = start;
1657 return true;
1658 }
1659 this->interval.invalidate();
1660 return false;
1661 }
1662
1663 protected:
1664 std::shared_ptr<basic_parser<T>>
1665 m_digit_1,
1666 m_digit_5,
1667 m_digit_10,
1668 m_digit_50,
1669 m_digit_100,
1670 m_digit_500,
1671 m_digit_1000,
1672 m_digit_5000,
1673 m_digit_10000;
1674 };
1675
1678#ifdef _UNICODE
1680#else
1682#endif
1684
1688 template <class T>
1690 {
1691 public:
1693 _In_ const std::shared_ptr<basic_parser<T>>& _numerator,
1694 _In_ const std::shared_ptr<basic_parser<T>>& _fraction_line,
1695 _In_ const std::shared_ptr<basic_parser<T>>& _denominator,
1696 _In_ const std::locale& locale = std::locale()) :
1697 basic_parser<T>(locale),
1698 numerator(_numerator),
1699 fraction_line(_fraction_line),
1700 denominator(_denominator)
1701 {}
1702
1703 virtual bool match(
1704 _In_reads_or_z_opt_(end) const T* text,
1705 _In_ size_t start = 0,
1706 _In_ size_t end = SIZE_MAX,
1707 _In_ int flags = match_default)
1708 {
1709 _Assume_(text || start >= end);
1710 if (numerator->match(text, start, end, flags) &&
1711 fraction_line->match(text, numerator->interval.end, end, flags) &&
1712 denominator->match(text, fraction_line->interval.end, end, flags))
1713 {
1714 this->interval.start = start;
1715 this->interval.end = denominator->interval.end;
1716 return true;
1717 }
1718 numerator->invalidate();
1719 fraction_line->invalidate();
1720 denominator->invalidate();
1721 this->interval.invalidate();
1722 return false;
1723 }
1724
1725 virtual void invalidate()
1726 {
1727 numerator->invalidate();
1728 fraction_line->invalidate();
1729 denominator->invalidate();
1731 }
1732
1733 public:
1734 std::shared_ptr<basic_parser<T>> numerator;
1735 std::shared_ptr<basic_parser<T>> fraction_line;
1736 std::shared_ptr<basic_parser<T>> denominator;
1737 };
1738
1741#ifdef _UNICODE
1742 using tfraction = wfraction;
1743#else
1744 using tfraction = fraction;
1745#endif
1747
1751 template <class T>
1752 class basic_score : public basic_parser<T>
1753 {
1754 public:
1756 _In_ const std::shared_ptr<basic_parser<T>>& _home,
1757 _In_ const std::shared_ptr<basic_parser<T>>& _separator,
1758 _In_ const std::shared_ptr<basic_parser<T>>& _guest,
1759 _In_ const std::shared_ptr<basic_parser<T>>& space,
1760 _In_ const std::locale& locale = std::locale()) :
1761 basic_parser<T>(locale),
1762 home(_home),
1763 separator(_separator),
1764 guest(_guest),
1765 m_space(space)
1766 {}
1767
1768 virtual bool match(
1769 _In_reads_or_z_opt_(end) const T* text,
1770 _In_ size_t start = 0,
1771 _In_ size_t end = SIZE_MAX,
1772 _In_ int flags = match_default)
1773 {
1774 _Assume_(text || start >= end);
1775 this->interval.end = start;
1776
1777 const int space_match_flags = flags & ~match_multiline; // Spaces in score must never be broken in new line.
1778
1779 if (home->match(text, this->interval.end, end, flags))
1780 this->interval.end = home->interval.end;
1781 else
1782 goto end;
1783
1784 for (; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
1785
1786 if (separator->match(text, this->interval.end, end, flags))
1787 this->interval.end = separator->interval.end;
1788 else
1789 goto end;
1790
1791 for (; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
1792
1793 if (guest->match(text, this->interval.end, end, flags))
1794 this->interval.end = guest->interval.end;
1795 else
1796 goto end;
1797
1798 this->interval.start = start;
1799 return true;
1800
1801 end:
1802 home->invalidate();
1803 separator->invalidate();
1804 guest->invalidate();
1805 this->interval.invalidate();
1806 return false;
1807 }
1808
1809 virtual void invalidate()
1810 {
1811 home->invalidate();
1812 separator->invalidate();
1813 guest->invalidate();
1815 }
1816
1817 public:
1818 std::shared_ptr<basic_parser<T>> home;
1819 std::shared_ptr<basic_parser<T>> separator;
1820 std::shared_ptr<basic_parser<T>> guest;
1821
1822 protected:
1823 std::shared_ptr<basic_parser<T>> m_space;
1824 };
1825
1826 using score = basic_score<char>;
1828#ifdef _UNICODE
1829 using tscore = wscore;
1830#else
1831 using tscore = score;
1832#endif
1834
1838 template <class T>
1840 {
1841 public:
1843 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
1844 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
1845 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
1846 _In_ const std::shared_ptr<basic_parser<T>>& _number,
1847 _In_ const std::locale& locale = std::locale()) :
1848 basic_parser<T>(locale),
1853 {}
1854
1855 virtual bool match(
1856 _In_reads_or_z_opt_(end) const T* text,
1857 _In_ size_t start = 0,
1858 _In_ size_t end = SIZE_MAX,
1859 _In_ int flags = match_default)
1860 {
1861 _Assume_(text || start >= end);
1862 this->interval.end = start;
1863 if (positive_sign && positive_sign->match(text, this->interval.end, end, flags)) {
1864 this->interval.end = positive_sign->interval.end;
1865 if (negative_sign) negative_sign->invalidate();
1866 if (special_sign) special_sign->invalidate();
1867 }
1868 else if (negative_sign && negative_sign->match(text, this->interval.end, end, flags)) {
1869 this->interval.end = negative_sign->interval.end;
1870 if (positive_sign) positive_sign->invalidate();
1871 if (special_sign) special_sign->invalidate();
1872 }
1873 else if (special_sign && special_sign->match(text, this->interval.end, end, flags)) {
1874 this->interval.end = special_sign->interval.end;
1875 if (positive_sign) positive_sign->invalidate();
1876 if (negative_sign) negative_sign->invalidate();
1877 }
1878 else {
1879 if (positive_sign) positive_sign->invalidate();
1880 if (negative_sign) negative_sign->invalidate();
1881 if (special_sign) special_sign->invalidate();
1882 }
1883 if (number->match(text, this->interval.end, end, flags)) {
1884 this->interval.start = start;
1885 this->interval.end = number->interval.end;
1886 return true;
1887 }
1888 if (positive_sign) positive_sign->invalidate();
1889 if (negative_sign) negative_sign->invalidate();
1890 if (special_sign) special_sign->invalidate();
1891 number->invalidate();
1892 this->interval.invalidate();
1893 return false;
1894 }
1895
1896 virtual void invalidate()
1897 {
1898 if (positive_sign) positive_sign->invalidate();
1899 if (negative_sign) negative_sign->invalidate();
1900 if (special_sign) special_sign->invalidate();
1901 number->invalidate();
1903 }
1904
1905 public:
1906 std::shared_ptr<basic_parser<T>> positive_sign;
1907 std::shared_ptr<basic_parser<T>> negative_sign;
1908 std::shared_ptr<basic_parser<T>> special_sign;
1909 std::shared_ptr<basic_parser<T>> number;
1910 };
1911
1914#ifdef _UNICODE
1916#else
1918#endif
1920
1924 template <class T>
1926 {
1927 public:
1929 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
1930 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
1931 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
1932 _In_ const std::shared_ptr<basic_parser<T>>& _integer,
1933 _In_ const std::shared_ptr<basic_parser<T>>& space,
1934 _In_ const std::shared_ptr<basic_parser<T>>& _fraction,
1935 _In_ const std::locale& locale = std::locale()) :
1936 basic_parser<T>(locale),
1942 m_space(space)
1943 {}
1944
1945 virtual bool match(
1946 _In_reads_or_z_opt_(end) const T* text,
1947 _In_ size_t start = 0,
1948 _In_ size_t end = SIZE_MAX,
1949 _In_ int flags = match_default)
1950 {
1951 _Assume_(text || start >= end);
1952 this->interval.end = start;
1953
1954 if (positive_sign && positive_sign->match(text, this->interval.end, end, flags)) {
1955 this->interval.end = positive_sign->interval.end;
1956 if (negative_sign) negative_sign->invalidate();
1957 if (special_sign) special_sign->invalidate();
1958 }
1959 else if (negative_sign && negative_sign->match(text, this->interval.end, end, flags)) {
1960 this->interval.end = negative_sign->interval.end;
1961 if (positive_sign) positive_sign->invalidate();
1962 if (special_sign) special_sign->invalidate();
1963 }
1964 else if (special_sign && special_sign->match(text, this->interval.end, end, flags)) {
1965 this->interval.end = special_sign->interval.end;
1966 if (positive_sign) positive_sign->invalidate();
1967 if (negative_sign) negative_sign->invalidate();
1968 }
1969 else {
1970 if (positive_sign) positive_sign->invalidate();
1971 if (negative_sign) negative_sign->invalidate();
1972 if (special_sign) special_sign->invalidate();
1973 }
1974
1975 // Check for <integer> <fraction>
1976 const int space_match_flags = flags & ~match_multiline; // Spaces in fractions must never be broken in new line.
1977 if (integer->match(text, this->interval.end, end, flags) &&
1978 m_space->match(text, integer->interval.end, end, space_match_flags))
1979 {
1980 for (this->interval.end = m_space->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
1981 if (fraction->match(text, this->interval.end, end, flags)) {
1982 this->interval.start = start;
1983 this->interval.end = fraction->interval.end;
1984 return true;
1985 }
1986 fraction->invalidate();
1987 this->interval.start = start;
1988 this->interval.end = integer->interval.end;
1989 return true;
1990 }
1991
1992 // Check for <fraction>
1993 if (fraction->match(text, this->interval.end, end, flags)) {
1994 integer->invalidate();
1995 this->interval.start = start;
1996 this->interval.end = fraction->interval.end;
1997 return true;
1998 }
1999
2000 // Check for <integer>
2001 if (integer->match(text, this->interval.end, end, flags)) {
2002 fraction->invalidate();
2003 this->interval.start = start;
2004 this->interval.end = integer->interval.end;
2005 return true;
2006 }
2007
2008 if (positive_sign) positive_sign->invalidate();
2009 if (negative_sign) negative_sign->invalidate();
2010 if (special_sign) special_sign->invalidate();
2011 integer->invalidate();
2012 fraction->invalidate();
2013 this->interval.invalidate();
2014 return false;
2015 }
2016
2017 virtual void invalidate()
2018 {
2019 if (positive_sign) positive_sign->invalidate();
2020 if (negative_sign) negative_sign->invalidate();
2021 if (special_sign) special_sign->invalidate();
2022 integer->invalidate();
2023 fraction->invalidate();
2025 }
2026
2027 public:
2028 std::shared_ptr<basic_parser<T>> positive_sign;
2029 std::shared_ptr<basic_parser<T>> negative_sign;
2030 std::shared_ptr<basic_parser<T>> special_sign;
2031 std::shared_ptr<basic_parser<T>> integer;
2032 std::shared_ptr<basic_parser<T>> fraction;
2033
2034 protected:
2035 std::shared_ptr<basic_parser<T>> m_space;
2036 };
2037
2040#ifdef _UNICODE
2042#else
2044#endif
2046
2050 template <class T>
2052 {
2053 public:
2055 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
2056 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
2057 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
2058 _In_ const std::shared_ptr<basic_integer<T>>& _integer,
2059 _In_ const std::shared_ptr<basic_parser<T>>& _decimal_separator,
2060 _In_ const std::shared_ptr<basic_integer<T>>& _decimal,
2061 _In_ const std::shared_ptr<basic_parser<T>>& _exponent_symbol,
2062 _In_ const std::shared_ptr<basic_parser<T>>& _positive_exp_sign,
2063 _In_ const std::shared_ptr<basic_parser<T>>& _negative_exp_sign,
2064 _In_ const std::shared_ptr<basic_integer<T>>& _exponent,
2065 _In_ const std::locale& locale = std::locale()) :
2066 basic_parser<T>(locale),
2077 value(std::numeric_limits<double>::quiet_NaN())
2078 {}
2079
2080 virtual bool match(
2081 _In_reads_or_z_opt_(end) const T* text,
2082 _In_ size_t start = 0,
2083 _In_ size_t end = SIZE_MAX,
2084 _In_ int flags = match_default)
2085 {
2086 _Assume_(text || start >= end);
2087 this->interval.end = start;
2088
2089 if (positive_sign && positive_sign->match(text, this->interval.end, end, flags)) {
2090 this->interval.end = positive_sign->interval.end;
2091 if (negative_sign) negative_sign->invalidate();
2092 if (special_sign) special_sign->invalidate();
2093 }
2094 else if (negative_sign && negative_sign->match(text, this->interval.end, end, flags)) {
2095 this->interval.end = negative_sign->interval.end;
2096 if (positive_sign) positive_sign->invalidate();
2097 if (special_sign) special_sign->invalidate();
2098 }
2099 else if (special_sign && special_sign->match(text, this->interval.end, end, flags)) {
2100 this->interval.end = special_sign->interval.end;
2101 if (positive_sign) positive_sign->invalidate();
2102 if (negative_sign) negative_sign->invalidate();
2103 }
2104 else {
2105 if (positive_sign) positive_sign->invalidate();
2106 if (negative_sign) negative_sign->invalidate();
2107 if (special_sign) special_sign->invalidate();
2108 }
2109
2110 if (integer->match(text, this->interval.end, end, flags))
2111 this->interval.end = integer->interval.end;
2112
2113 if (decimal_separator->match(text, this->interval.end, end, flags) &&
2114 decimal->match(text, decimal_separator->interval.end, end, flags))
2115 this->interval.end = decimal->interval.end;
2116 else {
2117 decimal_separator->invalidate();
2118 decimal->invalidate();
2119 }
2120
2121 if (integer->interval.empty() &&
2122 decimal->interval.empty())
2123 {
2124 // No integer part, no decimal part.
2125 if (positive_sign) positive_sign->invalidate();
2126 if (negative_sign) negative_sign->invalidate();
2127 if (special_sign) special_sign->invalidate();
2128 integer->invalidate();
2129 decimal_separator->invalidate();
2130 decimal->invalidate();
2131 if (exponent_symbol) exponent_symbol->invalidate();
2132 if (positive_exp_sign) positive_exp_sign->invalidate();
2133 if (negative_exp_sign) negative_exp_sign->invalidate();
2134 if (exponent) exponent->invalidate();
2135 this->interval.invalidate();
2136 return false;
2137 }
2138
2139 if (exponent_symbol && exponent_symbol->match(text, this->interval.end, end, flags) &&
2140 ((positive_exp_sign && positive_exp_sign->match(text, exponent_symbol->interval.end, end, flags) &&
2141 exponent && exponent->match(text, positive_exp_sign->interval.end, end, flags)) ||
2142 (exponent && exponent->match(text, exponent_symbol->interval.end, end, flags))))
2143 {
2144 this->interval.end = exponent->interval.end;
2145 if (negative_exp_sign) negative_exp_sign->invalidate();
2146 }
2147 else if (exponent_symbol && exponent_symbol->match(text, this->interval.end, end, flags) &&
2148 negative_exp_sign && negative_exp_sign->match(text, exponent_symbol->interval.end, end, flags) &&
2149 exponent && exponent->match(text, negative_exp_sign->interval.end, end, flags))
2150 {
2151 this->interval.end = exponent->interval.end;
2152 if (positive_exp_sign) positive_exp_sign->invalidate();
2153 }
2154 else {
2155 if (exponent_symbol) exponent_symbol->invalidate();
2156 if (positive_exp_sign) positive_exp_sign->invalidate();
2157 if (negative_exp_sign) negative_exp_sign->invalidate();
2158 if (exponent) exponent->invalidate();
2159 }
2160
2161 value = (double)integer->value;
2162 if (decimal->interval)
2163 value += (double)decimal->value * pow(10.0, -(double)decimal->interval.size());
2164 if (negative_sign && negative_sign->interval)
2165 value = -value;
2166 if (exponent && exponent->interval) {
2167 double e = (double)exponent->value;
2168 if (negative_exp_sign && negative_exp_sign->interval)
2169 e = -e;
2170 value *= pow(10.0, e);
2171 }
2172
2173 this->interval.start = start;
2174 return true;
2175 }
2176
2177 virtual void invalidate()
2178 {
2179 if (positive_sign) positive_sign->invalidate();
2180 if (negative_sign) negative_sign->invalidate();
2181 if (special_sign) special_sign->invalidate();
2182 integer->invalidate();
2183 decimal_separator->invalidate();
2184 decimal->invalidate();
2185 if (exponent_symbol) exponent_symbol->invalidate();
2186 if (positive_exp_sign) positive_exp_sign->invalidate();
2187 if (negative_exp_sign) negative_exp_sign->invalidate();
2188 if (exponent) exponent->invalidate();
2189 value = std::numeric_limits<double>::quiet_NaN();
2191 }
2192
2193 public:
2194 std::shared_ptr<basic_parser<T>> positive_sign;
2195 std::shared_ptr<basic_parser<T>> negative_sign;
2196 std::shared_ptr<basic_parser<T>> special_sign;
2197 std::shared_ptr<basic_integer<T>> integer;
2198 std::shared_ptr<basic_parser<T>> decimal_separator;
2199 std::shared_ptr<basic_integer<T>> decimal;
2200 std::shared_ptr<basic_parser<T>> exponent_symbol;
2201 std::shared_ptr<basic_parser<T>> positive_exp_sign;
2202 std::shared_ptr<basic_parser<T>> negative_exp_sign;
2203 std::shared_ptr<basic_integer<T>> exponent;
2204 double value;
2205 };
2206
2209#ifdef _UNICODE
2211#else
2213#endif
2215
2219 template <class T>
2221 {
2222 public:
2224 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
2225 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
2226 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
2227 _In_ const std::shared_ptr<basic_parser<T>>& _currency,
2228 _In_ const std::shared_ptr<basic_parser<T>>& _integer,
2229 _In_ const std::shared_ptr<basic_parser<T>>& _decimal_separator,
2230 _In_ const std::shared_ptr<basic_parser<T>>& _decimal,
2231 _In_ const std::locale& locale = std::locale()) :
2232 basic_parser<T>(locale),
2240 {}
2241
2242 virtual bool match(
2243 _In_reads_or_z_opt_(end) const T* text,
2244 _In_ size_t start = 0,
2245 _In_ size_t end = SIZE_MAX,
2246 _In_ int flags = match_default)
2247 {
2248 _Assume_(text || start >= end);
2249 this->interval.end = start;
2250
2251 if (positive_sign->match(text, this->interval.end, end, flags)) {
2252 this->interval.end = positive_sign->interval.end;
2253 if (negative_sign) negative_sign->invalidate();
2254 if (special_sign) special_sign->invalidate();
2255 }
2256 else if (negative_sign->match(text, this->interval.end, end, flags)) {
2257 this->interval.end = negative_sign->interval.end;
2258 if (positive_sign) positive_sign->invalidate();
2259 if (special_sign) special_sign->invalidate();
2260 }
2261 else if (special_sign->match(text, this->interval.end, end, flags)) {
2262 this->interval.end = special_sign->interval.end;
2263 if (positive_sign) positive_sign->invalidate();
2264 if (negative_sign) negative_sign->invalidate();
2265 }
2266 else {
2267 if (positive_sign) positive_sign->invalidate();
2268 if (negative_sign) negative_sign->invalidate();
2269 if (special_sign) special_sign->invalidate();
2270 }
2271
2272 if (currency->match(text, this->interval.end, end, flags))
2273 this->interval.end = currency->interval.end;
2274 else {
2275 if (positive_sign) positive_sign->invalidate();
2276 if (negative_sign) negative_sign->invalidate();
2277 if (special_sign) special_sign->invalidate();
2278 integer->invalidate();
2279 decimal_separator->invalidate();
2280 decimal->invalidate();
2281 this->interval.invalidate();
2282 return false;
2283 }
2284
2285 if (integer->match(text, this->interval.end, end, flags))
2286 this->interval.end = integer->interval.end;
2287 if (decimal_separator->match(text, this->interval.end, end, flags) &&
2288 decimal->match(text, decimal_separator->interval.end, end, flags))
2289 this->interval.end = decimal->interval.end;
2290 else {
2291 decimal_separator->invalidate();
2292 decimal->invalidate();
2293 }
2294
2295 if (integer->interval.empty() &&
2296 decimal->interval.empty())
2297 {
2298 // No integer part, no decimal part.
2299 if (positive_sign) positive_sign->invalidate();
2300 if (negative_sign) negative_sign->invalidate();
2301 if (special_sign) special_sign->invalidate();
2302 currency->invalidate();
2303 integer->invalidate();
2304 decimal_separator->invalidate();
2305 decimal->invalidate();
2306 this->interval.invalidate();
2307 return false;
2308 }
2309
2310 this->interval.start = start;
2311 return true;
2312 }
2313
2314 virtual void invalidate()
2315 {
2316 if (positive_sign) positive_sign->invalidate();
2317 if (negative_sign) negative_sign->invalidate();
2318 if (special_sign) special_sign->invalidate();
2319 currency->invalidate();
2320 integer->invalidate();
2321 decimal_separator->invalidate();
2322 decimal->invalidate();
2324 }
2325
2326 public:
2327 std::shared_ptr<basic_parser<T>> positive_sign;
2328 std::shared_ptr<basic_parser<T>> negative_sign;
2329 std::shared_ptr<basic_parser<T>> special_sign;
2330 std::shared_ptr<basic_parser<T>> currency;
2331 std::shared_ptr<basic_parser<T>> integer;
2332 std::shared_ptr<basic_parser<T>> decimal_separator;
2333 std::shared_ptr<basic_parser<T>> decimal;
2334 };
2335
2338#ifdef _UNICODE
2340#else
2342#endif
2344
2348 template <class T>
2350 {
2351 public:
2353 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
2354 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
2355 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
2356 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
2357 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
2358 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
2359 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
2360 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
2361 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
2362 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
2363 _In_ const std::shared_ptr<basic_parser<T>>& separator,
2364 _In_ const std::locale& locale = std::locale()) :
2365 basic_parser<T>(locale),
2366 m_digit_0(digit_0),
2367 m_digit_1(digit_1),
2368 m_digit_2(digit_2),
2369 m_digit_3(digit_3),
2370 m_digit_4(digit_4),
2371 m_digit_5(digit_5),
2372 m_digit_6(digit_6),
2373 m_digit_7(digit_7),
2374 m_digit_8(digit_8),
2375 m_digit_9(digit_9),
2376 m_separator(separator)
2377 {
2378 value.s_addr = 0;
2379 }
2380
2381 virtual bool match(
2382 _In_reads_or_z_opt_(end) const T* text,
2383 _In_ size_t start = 0,
2384 _In_ size_t end = SIZE_MAX,
2385 _In_ int flags = match_default)
2386 {
2387 _Assume_(text || start >= end);
2388 this->interval.end = start;
2389 value.s_addr = 0;
2390
2391 size_t i;
2392 for (i = 0; i < 4; i++) {
2393 if (i) {
2394 if (m_separator->match(text, this->interval.end, end, flags))
2395 this->interval.end = m_separator->interval.end;
2396 else
2397 goto error;
2398 }
2399
2400 components[i].start = this->interval.end;
2401 bool is_empty = true;
2402 size_t x;
2403 for (x = 0; this->interval.end < end && text[this->interval.end];) {
2404 size_t dig, digit_end;
2405 if (m_digit_0->match(text, this->interval.end, end, flags)) { dig = 0; digit_end = m_digit_0->interval.end; }
2406 else if (m_digit_1->match(text, this->interval.end, end, flags)) { dig = 1; digit_end = m_digit_1->interval.end; }
2407 else if (m_digit_2->match(text, this->interval.end, end, flags)) { dig = 2; digit_end = m_digit_2->interval.end; }
2408 else if (m_digit_3->match(text, this->interval.end, end, flags)) { dig = 3; digit_end = m_digit_3->interval.end; }
2409 else if (m_digit_4->match(text, this->interval.end, end, flags)) { dig = 4; digit_end = m_digit_4->interval.end; }
2410 else if (m_digit_5->match(text, this->interval.end, end, flags)) { dig = 5; digit_end = m_digit_5->interval.end; }
2411 else if (m_digit_6->match(text, this->interval.end, end, flags)) { dig = 6; digit_end = m_digit_6->interval.end; }
2412 else if (m_digit_7->match(text, this->interval.end, end, flags)) { dig = 7; digit_end = m_digit_7->interval.end; }
2413 else if (m_digit_8->match(text, this->interval.end, end, flags)) { dig = 8; digit_end = m_digit_8->interval.end; }
2414 else if (m_digit_9->match(text, this->interval.end, end, flags)) { dig = 9; digit_end = m_digit_9->interval.end; }
2415 else break;
2416 size_t x_n = x * 10 + dig;
2417 if (x_n <= 255) {
2418 x = x_n;
2419 this->interval.end = digit_end;
2420 is_empty = false;
2421 }
2422 else
2423 break;
2424 }
2425 if (is_empty)
2426 goto error;
2427 components[i].end = this->interval.end;
2428 value.s_addr = (value.s_addr << 8) | (uint8_t)x;
2429 }
2430 if (i < 4)
2431 goto error;
2432
2433 this->interval.start = start;
2434 return true;
2435
2436 error:
2437 components[0].start = 1;
2438 components[0].end = 0;
2439 components[1].start = 1;
2440 components[1].end = 0;
2441 components[2].start = 1;
2442 components[2].end = 0;
2443 components[3].start = 1;
2444 components[3].end = 0;
2445 value.s_addr = 0;
2446 this->interval.invalidate();
2447 return false;
2448 }
2449
2450 virtual void invalidate()
2451 {
2452 components[0].start = 1;
2453 components[0].end = 0;
2454 components[1].start = 1;
2455 components[1].end = 0;
2456 components[2].start = 1;
2457 components[2].end = 0;
2458 components[3].start = 1;
2459 components[3].end = 0;
2460 value.s_addr = 0;
2462 }
2463
2464 public:
2467
2468 protected:
2469 std::shared_ptr<basic_parser<T>>
2470 m_digit_0,
2471 m_digit_1,
2472 m_digit_2,
2473 m_digit_3,
2474 m_digit_4,
2475 m_digit_5,
2476 m_digit_6,
2477 m_digit_7,
2478 m_digit_8,
2479 m_digit_9;
2480 std::shared_ptr<basic_parser<T>> m_separator;
2481 };
2482
2485#ifdef _UNICODE
2487#else
2489#endif
2491
2495 template <class T>
2497 {
2498 public:
2499 basic_ipv6_scope_id_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
2500
2501 virtual bool match(
2502 _In_reads_or_z_opt_(end) const T* text,
2503 _In_ size_t start = 0,
2504 _In_ size_t end = SIZE_MAX,
2505 _In_ int flags = match_default)
2506 {
2507 _Assume_(text || start >= end);
2508 if (start < end && text[start]) {
2509 if (text[start] == '-' ||
2510 text[start] == '_' ||
2511 text[start] == ':' ||
2512 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
2513 {
2514 this->interval.end = (this->interval.start = start) + 1;
2515 return true;
2516 }
2517 }
2518 this->interval.invalidate();
2519 return false;
2520 }
2521 };
2522
2525#ifdef _UNICODE
2527#else
2529#endif
2530
2535 {
2536 public:
2537 sgml_ipv6_scope_id_char(_In_ const std::locale& locale = std::locale()) : sgml_parser(locale) {}
2538
2539 virtual bool match(
2540 _In_reads_or_z_(end) const char* text,
2541 _In_ size_t start = 0,
2542 _In_ size_t end = SIZE_MAX,
2543 _In_ int flags = match_default)
2544 {
2545 _Assume_(text || start >= end);
2546 if (start < end && text[start]) {
2547 wchar_t buf[3];
2548 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
2549 const wchar_t* chr_end = chr + stdex::strlen(chr);
2550 if (((chr[0] == L'-' ||
2551 chr[0] == L'_' ||
2552 chr[0] == L':') && chr[1] == 0) ||
2553 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
2554 {
2555 this->interval.start = start;
2556 return true;
2557 }
2558 }
2559 this->interval.invalidate();
2560 return false;
2561 }
2562 };
2563
2567 template <class T>
2569 {
2570 public:
2572 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
2573 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
2574 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
2575 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
2576 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
2577 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
2578 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
2579 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
2580 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
2581 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
2582 _In_ const std::shared_ptr<basic_parser<T>>& digit_10,
2583 _In_ const std::shared_ptr<basic_parser<T>>& digit_11,
2584 _In_ const std::shared_ptr<basic_parser<T>>& digit_12,
2585 _In_ const std::shared_ptr<basic_parser<T>>& digit_13,
2586 _In_ const std::shared_ptr<basic_parser<T>>& digit_14,
2587 _In_ const std::shared_ptr<basic_parser<T>>& digit_15,
2588 _In_ const std::shared_ptr<basic_parser<T>>& separator,
2589 _In_ const std::shared_ptr<basic_parser<T>>& scope_id_separator = nullptr,
2590 _In_ const std::shared_ptr<basic_parser<T>>& _scope_id = nullptr,
2591 _In_ const std::locale& locale = std::locale()) :
2592 basic_parser<T>(locale),
2593 m_digit_0(digit_0),
2594 m_digit_1(digit_1),
2595 m_digit_2(digit_2),
2596 m_digit_3(digit_3),
2597 m_digit_4(digit_4),
2598 m_digit_5(digit_5),
2599 m_digit_6(digit_6),
2600 m_digit_7(digit_7),
2601 m_digit_8(digit_8),
2602 m_digit_9(digit_9),
2603 m_digit_10(digit_10),
2604 m_digit_11(digit_11),
2605 m_digit_12(digit_12),
2606 m_digit_13(digit_13),
2607 m_digit_14(digit_14),
2608 m_digit_15(digit_15),
2609 m_separator(separator),
2610 m_scope_id_separator(scope_id_separator),
2612 {
2613 memset(&value, 0, sizeof(value));
2614 }
2615
2616 virtual bool match(
2617 _In_reads_or_z_opt_(end) const T* text,
2618 _In_ size_t start = 0,
2619 _In_ size_t end = SIZE_MAX,
2620 _In_ int flags = match_default)
2621 {
2622 _Assume_(text || start >= end);
2623 this->interval.end = start;
2624 memset(&value, 0, sizeof(value));
2625
2626 size_t i, compaction_i = SIZE_MAX, compaction_start = start;
2627 for (i = 0; i < 8; i++) {
2628 bool is_empty = true;
2629
2630 if (m_separator->match(text, this->interval.end, end, flags)) {
2631 // : found
2632 this->interval.end = m_separator->interval.end;
2633 if (m_separator->match(text, this->interval.end, end, flags)) {
2634 // :: found
2635 if (compaction_i == SIZE_MAX) {
2636 // Zero compaction start
2637 compaction_i = i;
2638 compaction_start = m_separator->interval.start;
2639 this->interval.end = m_separator->interval.end;
2640 }
2641 else {
2642 // More than one zero compaction
2643 break;
2644 }
2645 }
2646 else if (!i) {
2647 // Leading : found
2648 goto error;
2649 }
2650 }
2651 else if (i) {
2652 // : missing
2653 break;
2654 }
2655
2656 components[i].start = this->interval.end;
2657 size_t x;
2658 for (x = 0; this->interval.end < end && text[this->interval.end];) {
2659 size_t dig, digit_end;
2660 if (m_digit_0->match(text, this->interval.end, end, flags)) { dig = 0; digit_end = m_digit_0->interval.end; }
2661 else if (m_digit_1->match(text, this->interval.end, end, flags)) { dig = 1; digit_end = m_digit_1->interval.end; }
2662 else if (m_digit_2->match(text, this->interval.end, end, flags)) { dig = 2; digit_end = m_digit_2->interval.end; }
2663 else if (m_digit_3->match(text, this->interval.end, end, flags)) { dig = 3; digit_end = m_digit_3->interval.end; }
2664 else if (m_digit_4->match(text, this->interval.end, end, flags)) { dig = 4; digit_end = m_digit_4->interval.end; }
2665 else if (m_digit_5->match(text, this->interval.end, end, flags)) { dig = 5; digit_end = m_digit_5->interval.end; }
2666 else if (m_digit_6->match(text, this->interval.end, end, flags)) { dig = 6; digit_end = m_digit_6->interval.end; }
2667 else if (m_digit_7->match(text, this->interval.end, end, flags)) { dig = 7; digit_end = m_digit_7->interval.end; }
2668 else if (m_digit_8->match(text, this->interval.end, end, flags)) { dig = 8; digit_end = m_digit_8->interval.end; }
2669 else if (m_digit_9->match(text, this->interval.end, end, flags)) { dig = 9; digit_end = m_digit_9->interval.end; }
2670 else if (m_digit_10->match(text, this->interval.end, end, flags)) { dig = 10; digit_end = m_digit_10->interval.end; }
2671 else if (m_digit_11->match(text, this->interval.end, end, flags)) { dig = 11; digit_end = m_digit_11->interval.end; }
2672 else if (m_digit_12->match(text, this->interval.end, end, flags)) { dig = 12; digit_end = m_digit_12->interval.end; }
2673 else if (m_digit_13->match(text, this->interval.end, end, flags)) { dig = 13; digit_end = m_digit_13->interval.end; }
2674 else if (m_digit_14->match(text, this->interval.end, end, flags)) { dig = 14; digit_end = m_digit_14->interval.end; }
2675 else if (m_digit_15->match(text, this->interval.end, end, flags)) { dig = 15; digit_end = m_digit_15->interval.end; }
2676 else break;
2677 size_t x_n = x * 16 + dig;
2678 if (x_n <= 0xffff) {
2679 x = x_n;
2680 this->interval.end = digit_end;
2681 is_empty = false;
2682 }
2683 else
2684 break;
2685 }
2686 if (is_empty) {
2687 if (compaction_i != SIZE_MAX) {
2688 // Zero compaction active: no sweat.
2689 break;
2690 }
2691 goto error;
2692 }
2693 components[i].end = this->interval.end;
2694 this->value.s6_words[i] = (uint16_t)x;
2695 }
2696
2697 if (compaction_i != SIZE_MAX) {
2698 // Align components right due to zero compaction.
2699 size_t j, k;
2700 for (j = 8, k = i; k > compaction_i;) {
2701 this->value.s6_words[--j] = this->value.s6_words[--k];
2703 }
2704 for (; j > compaction_i;) {
2705 this->value.s6_words[--j] = 0;
2706 components[j].start =
2708 }
2709 }
2710 else if (i < 8)
2711 goto error;
2712
2713 if (m_scope_id_separator && m_scope_id_separator->match(text, this->interval.end, end, flags) &&
2714 scope_id && scope_id->match(text, m_scope_id_separator->interval.end, end, flags))
2715 this->interval.end = scope_id->interval.end;
2716 else if (scope_id)
2717 scope_id->invalidate();
2718
2719 this->interval.start = start;
2720 return true;
2721
2722 error:
2723 components[0].start = 1;
2724 components[0].end = 0;
2725 components[1].start = 1;
2726 components[1].end = 0;
2727 components[2].start = 1;
2728 components[2].end = 0;
2729 components[3].start = 1;
2730 components[3].end = 0;
2731 components[4].start = 1;
2732 components[4].end = 0;
2733 components[5].start = 1;
2734 components[5].end = 0;
2735 components[6].start = 1;
2736 components[6].end = 0;
2737 components[7].start = 1;
2738 components[7].end = 0;
2739 memset(&value, 0, sizeof(value));
2740 if (scope_id) scope_id->invalidate();
2741 this->interval.invalidate();
2742 return false;
2743 }
2744
2745 virtual void invalidate()
2746 {
2747 components[0].start = 1;
2748 components[0].end = 0;
2749 components[1].start = 1;
2750 components[1].end = 0;
2751 components[2].start = 1;
2752 components[2].end = 0;
2753 components[3].start = 1;
2754 components[3].end = 0;
2755 components[4].start = 1;
2756 components[4].end = 0;
2757 components[5].start = 1;
2758 components[5].end = 0;
2759 components[6].start = 1;
2760 components[6].end = 0;
2761 components[7].start = 1;
2762 components[7].end = 0;
2763 memset(&value, 0, sizeof(value));
2764 if (scope_id) scope_id->invalidate();
2766 }
2767
2768 public:
2771 std::shared_ptr<basic_parser<T>> scope_id;
2772
2773 protected:
2774 std::shared_ptr<basic_parser<T>>
2775 m_digit_0,
2776 m_digit_1,
2777 m_digit_2,
2778 m_digit_3,
2779 m_digit_4,
2780 m_digit_5,
2781 m_digit_6,
2782 m_digit_7,
2783 m_digit_8,
2784 m_digit_9,
2785 m_digit_10,
2786 m_digit_11,
2787 m_digit_12,
2788 m_digit_13,
2789 m_digit_14,
2790 m_digit_15;
2791 std::shared_ptr<basic_parser<T>> m_separator, m_scope_id_separator;
2792 };
2793
2796#ifdef _UNICODE
2798#else
2800#endif
2802
2806 template <class T>
2808 {
2809 public:
2811 _In_ bool allow_idn,
2812 _In_ const std::locale& locale = std::locale()) :
2813 basic_parser<T>(locale),
2814 m_allow_idn(allow_idn),
2815 allow_on_edge(true)
2816 {}
2817
2818 virtual bool match(
2819 _In_reads_or_z_opt_(end) const T* text,
2820 _In_ size_t start = 0,
2821 _In_ size_t end = SIZE_MAX,
2822 _In_ int flags = match_default)
2823 {
2824 _Assume_(text || start >= end);
2825 if (start < end && text[start]) {
2826 if (('A' <= text[start] && text[start] <= 'Z') ||
2827 ('a' <= text[start] && text[start] <= 'z') ||
2828 ('0' <= text[start] && text[start] <= '9'))
2829 allow_on_edge = true;
2830 else if (text[start] == '-')
2831 allow_on_edge = false;
2832 else if (m_allow_idn && std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
2833 allow_on_edge = true;
2834 else {
2835 this->interval.invalidate();
2836 return false;
2837 }
2838 this->interval.end = (this->interval.start = start) + 1;
2839 return true;
2840 }
2841 this->interval.invalidate();
2842 return false;
2843 }
2844
2845 public:
2847
2848 protected:
2849 bool m_allow_idn;
2850 };
2851
2854#ifdef _UNICODE
2856#else
2858#endif
2859
2864 {
2865 public:
2867 _In_ bool allow_idn,
2868 _In_ const std::locale& locale = std::locale()) :
2870 {}
2871
2872 virtual bool match(
2873 _In_reads_or_z_(end) const char* text,
2874 _In_ size_t start = 0,
2875 _In_ size_t end = SIZE_MAX,
2876 _In_ int flags = match_default)
2877 {
2878 _Assume_(text || start >= end);
2879 if (start < end && text[start]) {
2880 wchar_t buf[3];
2881 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
2882 const wchar_t* chr_end = chr + stdex::strlen(chr);
2883 if ((('A' <= chr[0] && chr[0] <= 'Z') ||
2884 ('a' <= chr[0] && chr[0] <= 'z') ||
2885 ('0' <= chr[0] && chr[0] <= '9')) && chr[1] == 0)
2886 allow_on_edge = true;
2887 else if (chr[0] == '-' && chr[1] == 0)
2888 allow_on_edge = false;
2889 else if (m_allow_idn && std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
2890 allow_on_edge = true;
2891 else {
2892 this->interval.invalidate();
2893 return false;
2894 }
2895 this->interval.start = start;
2896 return true;
2897 }
2898 this->interval.invalidate();
2899 return false;
2900 }
2901 };
2902
2906 template <class T>
2908 {
2909 public:
2911 _In_ bool allow_absolute,
2912 _In_ const std::shared_ptr<basic_dns_domain_char<T>>& domain_char,
2913 _In_ const std::shared_ptr<basic_parser<T>>& separator,
2914 _In_ const std::locale& locale = std::locale()) :
2915 basic_parser<T>(locale),
2917 m_domain_char(domain_char),
2918 m_separator(separator)
2919 {}
2920
2921 virtual bool match(
2922 _In_reads_or_z_opt_(end) const T* text,
2923 _In_ size_t start = 0,
2924 _In_ size_t end = SIZE_MAX,
2925 _In_ int flags = match_default)
2926 {
2927 _Assume_(text || start >= end);
2928 size_t i = start, count;
2929 for (count = 0; i < end && text[i] && count < 127; count++) {
2930 if (m_domain_char->match(text, i, end, flags) &&
2931 m_domain_char->allow_on_edge)
2932 {
2933 // Domain start
2934 this->interval.end = i = m_domain_char->interval.end;
2935 while (i < end && text[i]) {
2936 if (m_domain_char->allow_on_edge &&
2937 m_separator->match(text, i, end, flags))
2938 {
2939 // Domain end
2940 if (m_allow_absolute)
2941 this->interval.end = i = m_separator->interval.end;
2942 else {
2943 this->interval.end = i;
2944 i = m_separator->interval.end;
2945 }
2946 break;
2947 }
2948 if (m_domain_char->match(text, i, end, flags)) {
2949 if (m_domain_char->allow_on_edge)
2950 this->interval.end = i = m_domain_char->interval.end;
2951 else
2952 i = m_domain_char->interval.end;
2953 }
2954 else {
2955 this->interval.start = start;
2956 return true;
2957 }
2958 }
2959 }
2960 else
2961 break;
2962 }
2963 if (count) {
2964 this->interval.start = start;
2965 return true;
2966 }
2967 this->interval.invalidate();
2968 return false;
2969 }
2970
2971 protected:
2973 std::shared_ptr<basic_dns_domain_char<T>> m_domain_char;
2974 std::shared_ptr<basic_parser<T>> m_separator;
2975 };
2976
2979#ifdef _UNICODE
2980 using tdns_name = wdns_name;
2981#else
2982 using tdns_name = dns_name;
2983#endif
2985
2989 template <class T>
2991 {
2992 public:
2993 basic_url_username_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
2994
2995 virtual bool match(
2996 _In_reads_or_z_opt_(end) const T* text,
2997 _In_ size_t start = 0,
2998 _In_ size_t end = SIZE_MAX,
2999 _In_ int flags = match_default)
3000 {
3001 _Assume_(text || start >= end);
3002 if (start < end && text[start]) {
3003 if (text[start] == '-' ||
3004 text[start] == '.' ||
3005 text[start] == '_' ||
3006 text[start] == '~' ||
3007 text[start] == '%' ||
3008 text[start] == '!' ||
3009 text[start] == '$' ||
3010 text[start] == '&' ||
3011 text[start] == '\'' ||
3012 //text[start] == '(' ||
3013 //text[start] == ')' ||
3014 text[start] == '*' ||
3015 text[start] == '+' ||
3016 text[start] == ',' ||
3017 text[start] == ';' ||
3018 text[start] == '=' ||
3019 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
3020 {
3021 this->interval.end = (this->interval.start = start) + 1;
3022 return true;
3023 }
3024 }
3025 this->interval.invalidate();
3026 return false;
3027 }
3028 };
3029
3032#ifdef _UNICODE
3034#else
3036#endif
3037
3042 {
3043 public:
3044 sgml_url_username_char(_In_ const std::locale& locale = std::locale()) : basic_url_username_char<char>(locale) {}
3045
3046 virtual bool match(
3047 _In_reads_or_z_(end) const char* text,
3048 _In_ size_t start = 0,
3049 _In_ size_t end = SIZE_MAX,
3050 _In_ int flags = match_default)
3051 {
3052 _Assume_(text || start >= end);
3053 if (start < end && text[start]) {
3054 wchar_t buf[3];
3055 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
3056 const wchar_t* chr_end = chr + stdex::strlen(chr);
3057 if (((chr[0] == L'-' ||
3058 chr[0] == L'.' ||
3059 chr[0] == L'_' ||
3060 chr[0] == L'~' ||
3061 chr[0] == L'%' ||
3062 chr[0] == L'!' ||
3063 chr[0] == L'$' ||
3064 chr[0] == L'&' ||
3065 chr[0] == L'\'' ||
3066 //chr[0] == L'(' ||
3067 //chr[0] == L')' ||
3068 chr[0] == L'*' ||
3069 chr[0] == L'+' ||
3070 chr[0] == L',' ||
3071 chr[0] == L';' ||
3072 chr[0] == L'=') && chr[1] == 0) ||
3073 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
3074 {
3075 this->interval.start = start;
3076 return true;
3077 }
3078 }
3079
3080 this->interval.invalidate();
3081 return false;
3082 }
3083 };
3084
3088 template <class T>
3090 {
3091 public:
3092 basic_url_password_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
3093
3094 virtual bool match(
3095 _In_reads_or_z_opt_(end) const T* text,
3096 _In_ size_t start = 0,
3097 _In_ size_t end = SIZE_MAX,
3098 _In_ int flags = match_default)
3099 {
3100 _Assume_(text || start >= end);
3101 if (start < end && text[start]) {
3102 if (text[start] == '-' ||
3103 text[start] == '.' ||
3104 text[start] == '_' ||
3105 text[start] == '~' ||
3106 text[start] == '%' ||
3107 text[start] == '!' ||
3108 text[start] == '$' ||
3109 text[start] == '&' ||
3110 text[start] == '\'' ||
3111 text[start] == '(' ||
3112 text[start] == ')' ||
3113 text[start] == '*' ||
3114 text[start] == '+' ||
3115 text[start] == ',' ||
3116 text[start] == ';' ||
3117 text[start] == '=' ||
3118 text[start] == ':' ||
3119 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
3120 {
3121 this->interval.end = (this->interval.start = start) + 1;
3122 return true;
3123 }
3124 }
3125 this->interval.invalidate();
3126 return false;
3127 }
3128 };
3129
3132#ifdef _UNICODE
3134#else
3136#endif
3137
3142 {
3143 public:
3144 sgml_url_password_char(_In_ const std::locale& locale = std::locale()) : basic_url_password_char<char>(locale) {}
3145
3146 virtual bool match(
3147 _In_reads_or_z_(end) const char* text,
3148 _In_ size_t start = 0,
3149 _In_ size_t end = SIZE_MAX,
3150 _In_ int flags = match_default)
3151 {
3152 _Assume_(text || start >= end);
3153 if (start < end && text[start]) {
3154 wchar_t buf[3];
3155 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
3156 const wchar_t* chr_end = chr + stdex::strlen(chr);
3157 if (((chr[0] == L'-' ||
3158 chr[0] == L'.' ||
3159 chr[0] == L'_' ||
3160 chr[0] == L'~' ||
3161 chr[0] == L'%' ||
3162 chr[0] == L'!' ||
3163 chr[0] == L'$' ||
3164 chr[0] == L'&' ||
3165 chr[0] == L'\'' ||
3166 chr[0] == L'(' ||
3167 chr[0] == L')' ||
3168 chr[0] == L'*' ||
3169 chr[0] == L'+' ||
3170 chr[0] == L',' ||
3171 chr[0] == L';' ||
3172 chr[0] == L'=' ||
3173 chr[0] == L':') && chr[1] == 0) ||
3174 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
3175 {
3176 this->interval.start = start;
3177 return true;
3178 }
3179 }
3180 this->interval.invalidate();
3181 return false;
3182 }
3183 };
3184
3188 template <class T>
3190 {
3191 public:
3192 basic_url_path_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
3193
3194 virtual bool match(
3195 _In_reads_or_z_opt_(end) const T* text,
3196 _In_ size_t start = 0,
3197 _In_ size_t end = SIZE_MAX,
3198 _In_ int flags = match_default)
3199 {
3200 _Assume_(text || start >= end);
3201 if (start < end && text[start]) {
3202 if (text[start] == '/' ||
3203 text[start] == '-' ||
3204 text[start] == '.' ||
3205 text[start] == '_' ||
3206 text[start] == '~' ||
3207 text[start] == '%' ||
3208 text[start] == '!' ||
3209 text[start] == '$' ||
3210 text[start] == '&' ||
3211 text[start] == '\'' ||
3212 text[start] == '(' ||
3213 text[start] == ')' ||
3214 text[start] == '*' ||
3215 text[start] == '+' ||
3216 text[start] == ',' ||
3217 text[start] == ';' ||
3218 text[start] == '=' ||
3219 text[start] == ':' ||
3220 text[start] == '@' ||
3221 text[start] == '?' ||
3222 text[start] == '#' ||
3223 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
3224 {
3225 this->interval.end = (this->interval.start = start) + 1;
3226 return true;
3227 }
3228 }
3229 this->interval.invalidate();
3230 return false;
3231 }
3232 };
3233
3236#ifdef _UNICODE
3238#else
3240#endif
3241
3246 {
3247 public:
3248 sgml_url_path_char(_In_ const std::locale& locale = std::locale()) : basic_url_path_char<char>(locale) {}
3249
3250 virtual bool match(
3251 _In_reads_or_z_(end) const char* text,
3252 _In_ size_t start = 0,
3253 _In_ size_t end = SIZE_MAX,
3254 _In_ int flags = match_default)
3255 {
3256 _Assume_(text || start >= end);
3257 if (start < end && text[start]) {
3258 wchar_t buf[3];
3259 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
3260 const wchar_t* chr_end = chr + stdex::strlen(chr);
3261 if (((chr[0] == L'/' ||
3262 chr[0] == L'-' ||
3263 chr[0] == L'.' ||
3264 chr[0] == L'_' ||
3265 chr[0] == L'~' ||
3266 chr[0] == L'%' ||
3267 chr[0] == L'!' ||
3268 chr[0] == L'$' ||
3269 chr[0] == L'&' ||
3270 chr[0] == L'\'' ||
3271 chr[0] == L'(' ||
3272 chr[0] == L')' ||
3273 chr[0] == L'*' ||
3274 chr[0] == L'+' ||
3275 chr[0] == L',' ||
3276 chr[0] == L';' ||
3277 chr[0] == L'=' ||
3278 chr[0] == L':' ||
3279 chr[0] == L'@' ||
3280 chr[0] == L'?' ||
3281 chr[0] == L'#') && chr[1] == 0) ||
3282 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
3283 {
3284 this->interval.start = start;
3285 return true;
3286 }
3287 }
3288 this->interval.invalidate();
3289 return false;
3290 }
3291 };
3292
3296 template <class T>
3298 {
3299 public:
3301 _In_ const std::shared_ptr<basic_parser<T>>& path_char,
3302 _In_ const std::shared_ptr<basic_parser<T>>& query_start,
3303 _In_ const std::shared_ptr<basic_parser<T>>& bookmark_start,
3304 _In_ const std::locale& locale = std::locale()) :
3305 basic_parser<T>(locale),
3306 m_path_char(path_char),
3307 m_query_start(query_start),
3308 m_bookmark_start(bookmark_start)
3309 {}
3310
3311 virtual bool match(
3312 _In_reads_or_z_opt_(end) const T* text,
3313 _In_ size_t start = 0,
3314 _In_ size_t end = SIZE_MAX,
3315 _In_ int flags = match_default)
3316 {
3317 _Assume_(text || start >= end);
3318
3319 this->interval.end = start;
3320 path.start = start;
3321 query.start = 1;
3322 query.end = 0;
3323 bookmark.start = 1;
3324 bookmark.end = 0;
3325
3326 for (;;) {
3327 if (this->interval.end >= end || !text[this->interval.end])
3328 break;
3329 if (m_query_start->match(text, this->interval.end, end, flags)) {
3330 path.end = this->interval.end;
3331 query.start = this->interval.end = m_query_start->interval.end;
3332 for (;;) {
3333 if (this->interval.end >= end || !text[this->interval.end]) {
3334 query.end = this->interval.end;
3335 break;
3336 }
3337 if (m_bookmark_start->match(text, this->interval.end, end, flags)) {
3338 query.end = this->interval.end;
3339 bookmark.start = this->interval.end = m_bookmark_start->interval.end;
3340 for (;;) {
3341 if (this->interval.end >= end || !text[this->interval.end]) {
3342 bookmark.end = this->interval.end;
3343 break;
3344 }
3345 if (m_path_char->match(text, this->interval.end, end, flags))
3346 this->interval.end = m_path_char->interval.end;
3347 else {
3348 bookmark.end = this->interval.end;
3349 break;
3350 }
3351 }
3352 this->interval.start = start;
3353 return true;
3354 }
3355 if (m_path_char->match(text, this->interval.end, end, flags))
3356 this->interval.end = m_path_char->interval.end;
3357 else {
3358 query.end = this->interval.end;
3359 break;
3360 }
3361 }
3362 this->interval.start = start;
3363 return true;
3364 }
3365 if (m_bookmark_start->match(text, this->interval.end, end, flags)) {
3366 path.end = this->interval.end;
3367 bookmark.start = this->interval.end = m_bookmark_start->interval.end;
3368 for (;;) {
3369 if (this->interval.end >= end || !text[this->interval.end]) {
3370 bookmark.end = this->interval.end;
3371 break;
3372 }
3373 if (m_path_char->match(text, this->interval.end, end, flags))
3374 this->interval.end = m_path_char->interval.end;
3375 else {
3376 bookmark.end = this->interval.end;
3377 break;
3378 }
3379 }
3380 this->interval.start = start;
3381 return true;
3382 }
3383 if (m_path_char->match(text, this->interval.end, end, flags))
3384 this->interval.end = m_path_char->interval.end;
3385 else
3386 break;
3387 }
3388
3390 path.end = this->interval.end;
3391 this->interval.start = start;
3392 return true;
3393 }
3394
3395 path.start = 1;
3396 path.end = 0;
3397 bookmark.start = 1;
3398 bookmark.end = 0;
3399 this->interval.invalidate();
3400 return false;
3401 }
3402
3403 virtual void invalidate()
3404 {
3405 path.start = 1;
3406 path.end = 0;
3407 query.start = 1;
3408 query.end = 0;
3409 bookmark.start = 1;
3410 bookmark.end = 0;
3412 }
3413
3414 public:
3417 stdex::interval<size_t> bookmark;
3418
3419 protected:
3420 std::shared_ptr<basic_parser<T>> m_path_char;
3421 std::shared_ptr<basic_parser<T>> m_query_start;
3422 std::shared_ptr<basic_parser<T>> m_bookmark_start;
3423 };
3424
3427#ifdef _UNICODE
3428 using turl_path = wurl_path;
3429#else
3430 using turl_path = url_path;
3431#endif
3433
3437 template <class T>
3438 class basic_url : public basic_parser<T>
3439 {
3440 public:
3441 basic_url(
3442 _In_ const std::shared_ptr<basic_parser<T>>& _http_scheme,
3443 _In_ const std::shared_ptr<basic_parser<T>>& _ftp_scheme,
3444 _In_ const std::shared_ptr<basic_parser<T>>& _mailto_scheme,
3445 _In_ const std::shared_ptr<basic_parser<T>>& _file_scheme,
3446 _In_ const std::shared_ptr<basic_parser<T>>& colon,
3447 _In_ const std::shared_ptr<basic_parser<T>>& slash,
3448 _In_ const std::shared_ptr<basic_parser<T>>& _username,
3449 _In_ const std::shared_ptr<basic_parser<T>>& _password,
3450 _In_ const std::shared_ptr<basic_parser<T>>& at,
3451 _In_ const std::shared_ptr<basic_parser<T>>& ip_lbracket,
3452 _In_ const std::shared_ptr<basic_parser<T>>& ip_rbracket,
3453 _In_ const std::shared_ptr<basic_parser<T>>& _ipv4_host,
3454 _In_ const std::shared_ptr<basic_parser<T>>& _ipv6_host,
3455 _In_ const std::shared_ptr<basic_parser<T>>& _dns_host,
3456 _In_ const std::shared_ptr<basic_parser<T>>& _port,
3457 _In_ const std::shared_ptr<basic_parser<T>>& _path,
3458 _In_ const std::locale& locale = std::locale()) :
3459 basic_parser<T>(locale),
3460 http_scheme(_http_scheme),
3461 ftp_scheme(_ftp_scheme),
3462 mailto_scheme(_mailto_scheme),
3463 file_scheme(_file_scheme),
3464 m_colon(colon),
3465 m_slash(slash),
3466 username(_username),
3467 password(_password),
3468 m_at(at),
3469 m_ip_lbracket(ip_lbracket),
3470 m_ip_rbracket(ip_rbracket),
3471 ipv4_host(_ipv4_host),
3472 ipv6_host(_ipv6_host),
3473 dns_host(_dns_host),
3474 port(_port),
3475 path(_path)
3476 {}
3477
3478 virtual bool match(
3479 _In_reads_or_z_opt_(end) const T* text,
3480 _In_ size_t start = 0,
3481 _In_ size_t end = SIZE_MAX,
3482 _In_ int flags = match_default)
3483 {
3484 _Assume_(text || start >= end);
3485
3486 this->interval.end = start;
3487
3488 if (http_scheme->match(text, this->interval.end, end, flags) &&
3489 m_colon->match(text, http_scheme->interval.end, end, flags) &&
3490 m_slash->match(text, m_colon->interval.end, end, flags) &&
3491 m_slash->match(text, m_slash->interval.end, end, flags))
3492 {
3493 // http://
3494 this->interval.end = m_slash->interval.end;
3495 ftp_scheme->invalidate();
3496 mailto_scheme->invalidate();
3497 file_scheme->invalidate();
3498 }
3499 else if (ftp_scheme->match(text, this->interval.end, end, flags) &&
3500 m_colon->match(text, ftp_scheme->interval.end, end, flags) &&
3501 m_slash->match(text, m_colon->interval.end, end, flags) &&
3502 m_slash->match(text, m_slash->interval.end, end, flags))
3503 {
3504 // ftp://
3505 this->interval.end = m_slash->interval.end;
3506 http_scheme->invalidate();
3507 mailto_scheme->invalidate();
3508 file_scheme->invalidate();
3509 }
3510 else if (mailto_scheme->match(text, this->interval.end, end, flags) &&
3511 m_colon->match(text, mailto_scheme->interval.end, end, flags))
3512 {
3513 // mailto:
3514 this->interval.end = m_colon->interval.end;
3515 http_scheme->invalidate();
3516 ftp_scheme->invalidate();
3517 file_scheme->invalidate();
3518 }
3519 else if (file_scheme->match(text, this->interval.end, end, flags) &&
3520 m_colon->match(text, file_scheme->interval.end, end, flags) &&
3521 m_slash->match(text, m_colon->interval.end, end, flags) &&
3522 m_slash->match(text, m_slash->interval.end, end, flags))
3523 {
3524 // file://
3525 this->interval.end = m_slash->interval.end;
3526 http_scheme->invalidate();
3527 ftp_scheme->invalidate();
3528 mailto_scheme->invalidate();
3529 }
3530 else {
3531 // Default to http:
3532 http_scheme->invalidate();
3533 ftp_scheme->invalidate();
3534 mailto_scheme->invalidate();
3535 file_scheme->invalidate();
3536 }
3537
3538 if (ftp_scheme->interval) {
3539 if (username->match(text, this->interval.end, end, flags)) {
3540 if (m_colon->match(text, username->interval.end, end, flags) &&
3541 password->match(text, m_colon->interval.end, end, flags) &&
3542 m_at->match(text, password->interval.end, end, flags))
3543 {
3544 // Username and password
3545 this->interval.end = m_at->interval.end;
3546 }
3547 else if (m_at->match(text, this->interval.end, end, flags)) {
3548 // Username only
3549 this->interval.end = m_at->interval.end;
3550 password->invalidate();
3551 }
3552 else {
3553 username->invalidate();
3554 password->invalidate();
3555 }
3556 }
3557 else {
3558 username->invalidate();
3559 password->invalidate();
3560 }
3561
3562 if (ipv4_host->match(text, this->interval.end, end, flags)) {
3563 // Host is IPv4
3564 this->interval.end = ipv4_host->interval.end;
3565 ipv6_host->invalidate();
3566 dns_host->invalidate();
3567 }
3568 else if (
3569 m_ip_lbracket->match(text, this->interval.end, end, flags) &&
3570 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3571 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3572 {
3573 // Host is IPv6
3574 this->interval.end = m_ip_rbracket->interval.end;
3575 ipv4_host->invalidate();
3576 dns_host->invalidate();
3577 }
3578 else if (dns_host->match(text, this->interval.end, end, flags)) {
3579 // Host is hostname
3580 this->interval.end = dns_host->interval.end;
3581 ipv4_host->invalidate();
3582 ipv6_host->invalidate();
3583 }
3584 else {
3585 invalidate();
3586 return false;
3587 }
3588
3589 if (m_colon->match(text, this->interval.end, end, flags) &&
3590 port->match(text, m_colon->interval.end, end, flags))
3591 {
3592 // Port
3593 this->interval.end = port->interval.end;
3594 }
3595 else
3596 port->invalidate();
3597
3598 if (path->match(text, this->interval.end, end, flags)) {
3599 // Path
3600 this->interval.end = path->interval.end;
3601 }
3602
3603 this->interval.start = start;
3604 return true;
3605 }
3606
3607 if (mailto_scheme->interval) {
3608 if (username->match(text, this->interval.end, end, flags) &&
3609 m_at->match(text, username->interval.end, end, flags))
3610 {
3611 // Username
3612 this->interval.end = m_at->interval.end;
3613 }
3614 else {
3615 invalidate();
3616 return false;
3617 }
3618
3619 if (m_ip_lbracket->match(text, this->interval.end, end, flags) &&
3620 ipv4_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3621 m_ip_rbracket->match(text, ipv4_host->interval.end, end, flags))
3622 {
3623 // Host is IPv4
3624 this->interval.end = m_ip_rbracket->interval.end;
3625 ipv6_host->invalidate();
3626 dns_host->invalidate();
3627 }
3628 else if (
3629 m_ip_lbracket->match(text, this->interval.end, end, flags) &&
3630 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3631 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3632 {
3633 // Host is IPv6
3634 this->interval.end = m_ip_rbracket->interval.end;
3635 ipv4_host->invalidate();
3636 dns_host->invalidate();
3637 }
3638 else if (dns_host->match(text, this->interval.end, end, flags)) {
3639 // Host is hostname
3640 this->interval.end = dns_host->interval.end;
3641 ipv4_host->invalidate();
3642 ipv6_host->invalidate();
3643 }
3644 else {
3645 invalidate();
3646 return false;
3647 }
3648
3649 password->invalidate();
3650 port->invalidate();
3651 path->invalidate();
3652 this->interval.start = start;
3653 return true;
3654 }
3655
3656 if (file_scheme->interval) {
3657 if (path->match(text, this->interval.end, end, flags)) {
3658 // Path
3659 this->interval.end = path->interval.end;
3660 }
3661
3662 username->invalidate();
3663 password->invalidate();
3664 ipv4_host->invalidate();
3665 ipv6_host->invalidate();
3666 dns_host->invalidate();
3667 port->invalidate();
3668 this->interval.start = start;
3669 return true;
3670 }
3671
3672 // "http://" found or defaulted to
3673
3674 // If "http://" explicit, test for username&password.
3675 if (http_scheme->interval &&
3676 username->match(text, this->interval.end, end, flags))
3677 {
3678 if (m_colon->match(text, username->interval.end, end, flags) &&
3679 password->match(text, m_colon->interval.end, end, flags) &&
3680 m_at->match(text, password->interval.end, end, flags))
3681 {
3682 // Username and password
3683 this->interval.end = m_at->interval.end;
3684 }
3685 else if (m_at->match(text, username->interval.end, end, flags)) {
3686 // Username only
3687 this->interval.end = m_at->interval.end;
3688 password->invalidate();
3689 }
3690 else {
3691 username->invalidate();
3692 password->invalidate();
3693 }
3694 }
3695 else {
3696 username->invalidate();
3697 password->invalidate();
3698 }
3699
3700 if (ipv4_host->match(text, this->interval.end, end, flags)) {
3701 // Host is IPv4
3702 this->interval.end = ipv4_host->interval.end;
3703 ipv6_host->invalidate();
3704 dns_host->invalidate();
3705 }
3706 else if (
3707 m_ip_lbracket->match(text, this->interval.end, end, flags) &&
3708 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3709 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3710 {
3711 // Host is IPv6
3712 this->interval.end = m_ip_rbracket->interval.end;
3713 ipv4_host->invalidate();
3714 dns_host->invalidate();
3715 }
3716 else if (dns_host->match(text, this->interval.end, end, flags)) {
3717 // Host is hostname
3718 this->interval.end = dns_host->interval.end;
3719 ipv4_host->invalidate();
3720 ipv6_host->invalidate();
3721 }
3722 else {
3723 invalidate();
3724 return false;
3725 }
3726
3727 if (m_colon->match(text, this->interval.end, end, flags) &&
3728 port->match(text, m_colon->interval.end, end, flags))
3729 {
3730 // Port
3731 this->interval.end = port->interval.end;
3732 }
3733 else
3734 port->invalidate();
3735
3736 if (path->match(text, this->interval.end, end, flags)) {
3737 // Path
3738 this->interval.end = path->interval.end;
3739 }
3740
3741 this->interval.start = start;
3742 return true;
3743 }
3744
3745 virtual void invalidate()
3746 {
3747 http_scheme->invalidate();
3748 ftp_scheme->invalidate();
3749 mailto_scheme->invalidate();
3750 file_scheme->invalidate();
3751 username->invalidate();
3752 password->invalidate();
3753 ipv4_host->invalidate();
3754 ipv6_host->invalidate();
3755 dns_host->invalidate();
3756 port->invalidate();
3757 path->invalidate();
3759 }
3760
3761 public:
3762 std::shared_ptr<basic_parser<T>> http_scheme;
3763 std::shared_ptr<basic_parser<T>> ftp_scheme;
3764 std::shared_ptr<basic_parser<T>> mailto_scheme;
3765 std::shared_ptr<basic_parser<T>> file_scheme;
3766 std::shared_ptr<basic_parser<T>> username;
3767 std::shared_ptr<basic_parser<T>> password;
3768 std::shared_ptr<basic_parser<T>> ipv4_host;
3769 std::shared_ptr<basic_parser<T>> ipv6_host;
3770 std::shared_ptr<basic_parser<T>> dns_host;
3771 std::shared_ptr<basic_parser<T>> port;
3772 std::shared_ptr<basic_parser<T>> path;
3773
3774 protected:
3775 std::shared_ptr<basic_parser<T>> m_colon;
3776 std::shared_ptr<basic_parser<T>> m_slash;
3777 std::shared_ptr<basic_parser<T>> m_at;
3778 std::shared_ptr<basic_parser<T>> m_ip_lbracket;
3779 std::shared_ptr<basic_parser<T>> m_ip_rbracket;
3780 };
3781
3782 using url = basic_url<char>;
3783 using wurl = basic_url<wchar_t>;
3784#ifdef _UNICODE
3785 using turl = wurl;
3786#else
3787 using turl = url;
3788#endif
3789 using sgml_url = basic_url<char>;
3790
3794 template <class T>
3796 {
3797 public:
3799 _In_ const std::shared_ptr<basic_parser<T>>& _username,
3800 _In_ const std::shared_ptr<basic_parser<T>>& at,
3801 _In_ const std::shared_ptr<basic_parser<T>>& ip_lbracket,
3802 _In_ const std::shared_ptr<basic_parser<T>>& ip_rbracket,
3803 _In_ const std::shared_ptr<basic_parser<T>>& _ipv4_host,
3804 _In_ const std::shared_ptr<basic_parser<T>>& _ipv6_host,
3805 _In_ const std::shared_ptr<basic_parser<T>>& _dns_host,
3806 _In_ const std::locale& locale = std::locale()) :
3807 basic_parser<T>(locale),
3808 username(_username),
3809 m_at(at),
3810 m_ip_lbracket(ip_lbracket),
3811 m_ip_rbracket(ip_rbracket),
3812 ipv4_host(_ipv4_host),
3813 ipv6_host(_ipv6_host),
3814 dns_host(_dns_host)
3815 {}
3816
3817 virtual bool match(
3818 _In_reads_or_z_opt_(end) const T* text,
3819 _In_ size_t start = 0,
3820 _In_ size_t end = SIZE_MAX,
3821 _In_ int flags = match_default)
3822 {
3823 _Assume_(text || start >= end);
3824
3825 if (username->match(text, start, end, flags) &&
3826 m_at->match(text, username->interval.end, end, flags))
3827 {
3828 // Username@
3829 if (m_ip_lbracket->match(text, m_at->interval.end, end, flags) &&
3830 ipv4_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3831 m_ip_rbracket->match(text, ipv4_host->interval.end, end, flags))
3832 {
3833 // Host is IPv4
3834 this->interval.end = m_ip_rbracket->interval.end;
3835 ipv6_host->invalidate();
3836 dns_host->invalidate();
3837 }
3838 else if (
3839 m_ip_lbracket->match(text, m_at->interval.end, end, flags) &&
3840 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3841 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3842 {
3843 // Host is IPv6
3844 this->interval.end = m_ip_rbracket->interval.end;
3845 ipv4_host->invalidate();
3846 dns_host->invalidate();
3847 }
3848 else if (dns_host->match(text, m_at->interval.end, end, flags)) {
3849 // Host is hostname
3850 this->interval.end = dns_host->interval.end;
3851 ipv4_host->invalidate();
3852 ipv6_host->invalidate();
3853 }
3854 else
3855 goto error;
3856 this->interval.start = start;
3857 return true;
3858 }
3859
3860 error:
3861 username->invalidate();
3862 ipv4_host->invalidate();
3863 ipv6_host->invalidate();
3864 dns_host->invalidate();
3865 this->interval.invalidate();
3866 return false;
3867 }
3868
3869 virtual void invalidate()
3870 {
3871 username->invalidate();
3872 ipv4_host->invalidate();
3873 ipv6_host->invalidate();
3874 dns_host->invalidate();
3876 }
3877
3878 public:
3879 std::shared_ptr<basic_parser<T>> username;
3880 std::shared_ptr<basic_parser<T>> ipv4_host;
3881 std::shared_ptr<basic_parser<T>> ipv6_host;
3882 std::shared_ptr<basic_parser<T>> dns_host;
3883
3884 protected:
3885 std::shared_ptr<basic_parser<T>> m_at;
3886 std::shared_ptr<basic_parser<T>> m_ip_lbracket;
3887 std::shared_ptr<basic_parser<T>> m_ip_rbracket;
3888 };
3889
3892#ifdef _UNICODE
3894#else
3896#endif
3898
3902 template <class T>
3904 {
3905 public:
3907 _In_ const std::shared_ptr<basic_parser<T>>& _emoticon,
3908 _In_ const std::shared_ptr<basic_parser<T>>& _apex,
3909 _In_ const std::shared_ptr<basic_parser<T>>& _eyes,
3910 _In_ const std::shared_ptr<basic_parser<T>>& _nose,
3911 _In_ const std::shared_ptr<basic_set<T>>& _mouth,
3912 _In_ const std::locale& locale = std::locale()) :
3913 basic_parser<T>(locale),
3915 apex(_apex),
3916 eyes(_eyes),
3917 nose(_nose),
3918 mouth(_mouth)
3919 {}
3920
3921 virtual bool match(
3922 _In_reads_or_z_opt_(end) const T* text,
3923 _In_ size_t start = 0,
3924 _In_ size_t end = SIZE_MAX,
3925 _In_ int flags = match_default)
3926 {
3927 _Assume_(text || start >= end);
3928
3929 if (emoticon && emoticon->match(text, start, end, flags)) {
3930 if (apex) apex->invalidate();
3931 eyes->invalidate();
3932 if (nose) nose->invalidate();
3933 mouth->invalidate();
3934 this->interval.start = start;
3935 this->interval.end = emoticon->interval.end;
3936 return true;
3937 }
3938
3939 this->interval.end = start;
3940
3941 if (apex && apex->match(text, this->interval.end, end, flags))
3942 this->interval.end = apex->interval.end;
3943
3944 if (eyes->match(text, this->interval.end, end, flags)) {
3945 if (nose && nose->match(text, eyes->interval.end, end, flags) &&
3946 mouth->match(text, nose->interval.end, end, flags))
3947 {
3948 size_t
3950 hit_offset = mouth->hit_offset;
3951 // Mouth may repeat :-)))))))
3952 for (this->interval.end = mouth->interval.end; mouth->match(text, this->interval.end, end, flags) && mouth->hit_offset == hit_offset; this->interval.end = mouth->interval.end);
3953 mouth->interval.start = start_mouth;
3954 mouth->interval.end = this->interval.end;
3955 this->interval.start = start;
3956 return true;
3957 }
3958 if (mouth->match(text, eyes->interval.end, end, flags)) {
3959 size_t
3961 hit_offset = mouth->hit_offset;
3962 // Mouth may repeat :-)))))))
3963 for (this->interval.end = mouth->interval.end; mouth->match(text, this->interval.end, end, flags) && mouth->hit_offset == hit_offset; this->interval.end = mouth->interval.end);
3964 if (nose) nose->invalidate();
3965 mouth->interval.start = start_mouth;
3966 mouth->interval.end = this->interval.end;
3967 this->interval.start = start;
3968 return true;
3969 }
3970 }
3971
3972 if (emoticon) emoticon->invalidate();
3973 if (apex) apex->invalidate();
3974 eyes->invalidate();
3975 if (nose) nose->invalidate();
3976 mouth->invalidate();
3977 this->interval.invalidate();
3978 return false;
3979 }
3980
3981 virtual void invalidate()
3982 {
3983 if (emoticon) emoticon->invalidate();
3984 if (apex) apex->invalidate();
3985 eyes->invalidate();
3986 if (nose) nose->invalidate();
3987 mouth->invalidate();
3989 }
3990
3991 public:
3992 std::shared_ptr<basic_parser<T>> emoticon;
3993 std::shared_ptr<basic_parser<T>> apex;
3994 std::shared_ptr<basic_parser<T>> eyes;
3995 std::shared_ptr<basic_parser<T>> nose;
3996 std::shared_ptr<basic_set<T>> mouth;
3997 };
3998
4001#ifdef _UNICODE
4002 using temoticon = wemoticon;
4003#else
4004 using temoticon = emoticon;
4005#endif
4007
4011 enum date_format_t {
4012 date_format_none = 0,
4013 date_format_dmy = 0x1,
4014 date_format_mdy = 0x2,
4015 date_format_ymd = 0x4,
4016 date_format_ym = 0x8,
4017 date_format_my = 0x10,
4018 date_format_dm = 0x20,
4019 date_format_md = 0x40,
4020 };
4021
4025 template <class T>
4026 class basic_date : public basic_parser<T>
4027 {
4028 public:
4029 basic_date(
4030 _In_ int format_mask,
4031 _In_ const std::shared_ptr<basic_integer<T>>& _day,
4032 _In_ const std::shared_ptr<basic_integer<T>>& _month,
4033 _In_ const std::shared_ptr<basic_integer<T>>& _year,
4034 _In_ const std::shared_ptr<basic_set<T>>& separator,
4035 _In_ const std::shared_ptr<basic_parser<T>>& space,
4036 _In_ const std::locale& locale = std::locale()) :
4037 basic_parser<T>(locale),
4038 format(date_format_none),
4039 m_format_mask(format_mask),
4040 day(_day),
4041 month(_month),
4042 year(_year),
4043 m_separator(separator),
4044 m_space(space)
4045 {}
4046
4047 virtual bool match(
4048 _In_reads_or_z_opt_(end) const T* text,
4049 _In_ size_t start = 0,
4050 _In_ size_t end = SIZE_MAX,
4051 _In_ int flags = match_default)
4052 {
4053 _Assume_(text || start >= end);
4054
4055 const int space_match_flags = flags & ~match_multiline; // Spaces in dates must never be broken in new line.
4056 if ((m_format_mask & date_format_dmy) == date_format_dmy) {
4057 if (day->match(text, start, end, flags)) {
4058 for (this->interval.end = day->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4059 if (m_separator->match(text, this->interval.end, end, flags)) {
4060 size_t hit_offset = m_separator->hit_offset;
4061 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4062 if (month->match(text, this->interval.end, end, flags)) {
4063 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4064 if (m_separator->match(text, this->interval.end, end, flags) &&
4065 m_separator->hit_offset == hit_offset) // Both separators must match.
4066 {
4067 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4068 if (year->match(text, this->interval.end, end, flags) &&
4069 is_valid(day->value, month->value))
4070 {
4071 this->interval.start = start;
4072 this->interval.end = year->interval.end;
4073 format = date_format_dmy;
4074 return true;
4075 }
4076 }
4077 }
4078 }
4079 }
4080 }
4081
4082 if ((m_format_mask & date_format_mdy) == date_format_mdy) {
4083 if (month->match(text, start, end, flags)) {
4084 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4085 if (m_separator->match(text, this->interval.end, end, flags)) {
4086 size_t hit_offset = m_separator->hit_offset;
4087 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4088 if (day->match(text, this->interval.end, end, flags)) {
4089 for (this->interval.end = day->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4090 if (m_separator->match(text, this->interval.end, end, flags) &&
4091 m_separator->hit_offset == hit_offset) // Both separators must match.
4092 {
4093 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4094 if (year->match(text, this->interval.end, end, flags) &&
4095 is_valid(day->value, month->value))
4096 {
4097 this->interval.start = start;
4098 this->interval.end = year->interval.end;
4099 format = date_format_mdy;
4100 return true;
4101 }
4102 }
4103 }
4104 }
4105 }
4106 }
4107
4108 if ((m_format_mask & date_format_ymd) == date_format_ymd) {
4109 if (year->match(text, start, end, flags)) {
4110 for (this->interval.end = year->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4111 if (m_separator->match(text, this->interval.end, end, flags)) {
4112 size_t hit_offset = m_separator->hit_offset;
4113 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4114 if (month->match(text, this->interval.end, end, flags)) {
4115 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4116 if (m_separator->match(text, this->interval.end, end, flags) &&
4117 m_separator->hit_offset == hit_offset) // Both separators must match.
4118 {
4119 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4120 if (day->match(text, this->interval.end, end, flags) &&
4121 is_valid(day->value, month->value))
4122 {
4123 this->interval.start = start;
4124 this->interval.end = day->interval.end;
4125 format = date_format_ymd;
4126 return true;
4127 }
4128 }
4129 }
4130 }
4131 }
4132 }
4133
4134 if ((m_format_mask & date_format_ym) == date_format_ym) {
4135 if (year->match(text, start, end, flags)) {
4136 for (this->interval.end = year->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4137 if (m_separator->match(text, this->interval.end, end, flags)) {
4138 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4139 if (month->match(text, this->interval.end, end, flags) &&
4140 is_valid(SIZE_MAX, month->value))
4141 {
4142 if (day) day->invalidate();
4143 this->interval.start = start;
4144 this->interval.end = month->interval.end;
4145 format = date_format_ym;
4146 return true;
4147 }
4148 }
4149 }
4150 }
4151
4152 if ((m_format_mask & date_format_my) == date_format_my) {
4153 if (month->match(text, start, end, flags)) {
4154 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4155 if (m_separator->match(text, this->interval.end, end, flags)) {
4156 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4157 if (year->match(text, this->interval.end, end, flags) &&
4158 is_valid(SIZE_MAX, month->value))
4159 {
4160 if (day) day->invalidate();
4161 this->interval.start = start;
4162 this->interval.end = year->interval.end;
4163 format = date_format_my;
4164 return true;
4165 }
4166 }
4167 }
4168 }
4169
4170 if ((m_format_mask & date_format_dm) == date_format_dm) {
4171 if (day->match(text, start, end, flags)) {
4172 for (this->interval.end = day->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4173 if (m_separator->match(text, this->interval.end, end, flags)) {
4174 size_t hit_offset = m_separator->hit_offset;
4175 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4176 if (month->match(text, this->interval.end, end, flags) &&
4177 is_valid(day->value, month->value))
4178 {
4179 if (year) year->invalidate();
4180 this->interval.start = start;
4181 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4182 if (m_separator->match(text, this->interval.end, end, flags) &&
4183 m_separator->hit_offset == hit_offset) // Both separators must match.
4184 this->interval.end = m_separator->interval.end;
4185 else
4186 this->interval.end = month->interval.end;
4187 format = date_format_dm;
4188 return true;
4189 }
4190 }
4191 }
4192 }
4193
4194 if ((m_format_mask & date_format_md) == date_format_md) {
4195 if (month->match(text, start, end, flags)) {
4196 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4197 if (m_separator->match(text, this->interval.end, end, flags)) {
4198 size_t hit_offset = m_separator->hit_offset;
4199 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4200 if (day->match(text, this->interval.end, end, flags) &&
4201 is_valid(day->value, month->value))
4202 {
4203 if (year) year->invalidate();
4204 this->interval.start = start;
4205 for (this->interval.end = day->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4206 if (m_separator->match(text, this->interval.end, end, flags) &&
4207 m_separator->hit_offset == hit_offset) // Both separators must match.
4208 this->interval.end = m_separator->interval.end;
4209 else
4210 this->interval.end = day->interval.end;
4211 format = date_format_md;
4212 return true;
4213 }
4214 }
4215 }
4216 }
4217
4218 if (day) day->invalidate();
4219 if (month) month->invalidate();
4220 if (year) year->invalidate();
4221 format = date_format_none;
4222 this->interval.invalidate();
4223 return false;
4224 }
4225
4226 virtual void invalidate()
4227 {
4228 if (day) day->invalidate();
4229 if (month) month->invalidate();
4230 if (year) year->invalidate();
4231 format = date_format_none;
4233 }
4234
4235 protected:
4236 static inline bool is_valid(size_t day, size_t month)
4237 {
4238 if (month == SIZE_MAX) {
4239 // Default to January. This allows validating day only, as January has all 31 days.
4240 month = 1;
4241 }
4242 if (day == SIZE_MAX) {
4243 // Default to 1st day in month. This allows validating month only, as each month has 1st day.
4244 day = 1;
4245 }
4246
4247 switch (month) {
4248 case 1:
4249 case 3:
4250 case 5:
4251 case 7:
4252 case 8:
4253 case 10:
4254 case 12:
4255 return 1 <= day && day <= 31;
4256 case 2:
4257 return 1 <= day && day <= 29;
4258 case 4:
4259 case 6:
4260 case 9:
4261 case 11:
4262 return 1 <= day && day <= 30;
4263 default:
4264 return false;
4265 }
4266 }
4267
4268 public:
4269 date_format_t format;
4270 std::shared_ptr<basic_integer<T>> day;
4271 std::shared_ptr<basic_integer<T>> month;
4272 std::shared_ptr<basic_integer<T>> year;
4273
4274 protected:
4275 int m_format_mask;
4276 std::shared_ptr<basic_set<T>> m_separator;
4277 std::shared_ptr<basic_parser<T>> m_space;
4278 };
4279
4280 using date = basic_date<char>;
4281 using wdate = basic_date<wchar_t>;
4282#ifdef _UNICODE
4283 using tdate = wdate;
4284#else
4285 using tdate = date;
4286#endif
4288
4292 template <class T>
4293 class basic_time : public basic_parser<T>
4294 {
4295 public:
4296 basic_time(
4297 _In_ const std::shared_ptr<basic_integer10<T>>& _hour,
4298 _In_ const std::shared_ptr<basic_integer10<T>>& _minute,
4299 _In_ const std::shared_ptr<basic_integer10<T>>& _second,
4300 _In_ const std::shared_ptr<basic_integer10<T>>& _millisecond,
4301 _In_ const std::shared_ptr<basic_set<T>>& separator,
4302 _In_ const std::shared_ptr<basic_parser<T>>& millisecond_separator,
4303 _In_ const std::locale& locale = std::locale()) :
4304 basic_parser<T>(locale),
4305 hour(_hour),
4306 minute(_minute),
4307 second(_second),
4308 millisecond(_millisecond),
4309 m_separator(separator),
4310 m_millisecond_separator(millisecond_separator)
4311 {}
4312
4313 virtual bool match(
4314 _In_reads_or_z_opt_(end) const T* text,
4315 _In_ size_t start = 0,
4316 _In_ size_t end = SIZE_MAX,
4317 _In_ int flags = match_default)
4318 {
4319 _Assume_(text || start >= end);
4320
4321 if (hour->match(text, start, end, flags) &&
4322 m_separator->match(text, hour->interval.end, end, flags) &&
4323 minute->match(text, m_separator->interval.end, end, flags) &&
4324 minute->value < 60)
4325 {
4326 // hh::mm
4327 size_t hit_offset = m_separator->hit_offset;
4328 if (m_separator->match(text, minute->interval.end, end, flags) &&
4329 m_separator->hit_offset == hit_offset && // Both separators must match.
4330 second && second->match(text, m_separator->interval.end, end, flags) &&
4331 second->value < 60)
4332 {
4333 // hh::mm:ss
4334 if (m_millisecond_separator && m_millisecond_separator->match(text, second->interval.end, end, flags) &&
4335 millisecond && millisecond->match(text, m_millisecond_separator->interval.end, end, flags) &&
4336 millisecond->value < 1000)
4337 {
4338 // hh::mm:ss.mmmm
4339 this->interval.end = millisecond->interval.end;
4340 }
4341 else {
4342 if (millisecond) millisecond->invalidate();
4343 this->interval.end = second->interval.end;
4344 }
4345 }
4346 else {
4347 if (second) second->invalidate();
4348 if (millisecond) millisecond->invalidate();
4349 this->interval.end = minute->interval.end;
4350 }
4351 this->interval.start = start;
4352 return true;
4353 }
4354
4355 hour->invalidate();
4356 minute->invalidate();
4357 if (second) second->invalidate();
4358 if (millisecond) millisecond->invalidate();
4359 this->interval.invalidate();
4360 return false;
4361 }
4362
4363 virtual void invalidate()
4364 {
4365 hour->invalidate();
4366 minute->invalidate();
4367 if (second) second->invalidate();
4368 if (millisecond) millisecond->invalidate();
4370 }
4371
4372 public:
4373 std::shared_ptr<basic_integer10<T>> hour;
4374 std::shared_ptr<basic_integer10<T>> minute;
4375 std::shared_ptr<basic_integer10<T>> second;
4376 std::shared_ptr<basic_integer10<T>> millisecond;
4377
4378 protected:
4379 std::shared_ptr<basic_set<T>> m_separator;
4380 std::shared_ptr<basic_parser<T>> m_millisecond_separator;
4381 };
4382
4383 using time = basic_time<char>;
4384 using wtime = basic_time<wchar_t>;
4385#ifdef _UNICODE
4386 using ttime = wtime;
4387#else
4388 using ttime = time;
4389#endif
4391
4395 template <class T>
4396 class basic_angle : public basic_parser<T>
4397 {
4398 public:
4400 _In_ const std::shared_ptr<basic_integer10<T>>& _degree,
4401 _In_ const std::shared_ptr<basic_parser<T>>& _degree_separator,
4402 _In_ const std::shared_ptr<basic_integer10<T>>& _minute,
4403 _In_ const std::shared_ptr<basic_parser<T>>& _minute_separator,
4404 _In_ const std::shared_ptr<basic_integer10<T>>& _second,
4405 _In_ const std::shared_ptr<basic_parser<T>>& _second_separator,
4406 _In_ const std::shared_ptr<basic_parser<T>>& _decimal,
4407 _In_ const std::locale& locale = std::locale()) :
4408 basic_parser<T>(locale),
4409 degree(_degree),
4410 degree_separator(_degree_separator),
4411 minute(_minute),
4412 minute_separator(_minute_separator),
4413 second(_second),
4414 second_separator(_second_separator),
4415 decimal(_decimal)
4416 {}
4417
4418 virtual bool match(
4419 _In_reads_or_z_opt_(end) const T* text,
4420 _In_ size_t start = 0,
4421 _In_ size_t end = SIZE_MAX,
4422 _In_ int flags = match_default)
4423 {
4424 _Assume_(text || start >= end);
4425
4426 this->interval.end = start;
4427
4428 if (degree->match(text, this->interval.end, end, flags) &&
4429 degree_separator->match(text, degree->interval.end, end, flags))
4430 {
4431 // Degrees
4432 this->interval.end = degree_separator->interval.end;
4433 }
4434 else {
4435 degree->invalidate();
4436 degree_separator->invalidate();
4437 }
4438
4439 if (minute->match(text, this->interval.end, end, flags) &&
4440 minute->value < 60 &&
4441 minute_separator->match(text, minute->interval.end, end, flags))
4442 {
4443 // Minutes
4444 this->interval.end = minute_separator->interval.end;
4445 }
4446 else {
4447 minute->invalidate();
4448 minute_separator->invalidate();
4449 }
4450
4451 if (second && second->match(text, this->interval.end, end, flags) &&
4452 second->value < 60)
4453 {
4454 // Seconds
4455 this->interval.end = second->interval.end;
4456 if (second_separator && second_separator->match(text, this->interval.end, end, flags))
4457 this->interval.end = second_separator->interval.end;
4458 else
4459 if (second_separator) second_separator->invalidate();
4460 }
4461 else {
4462 if (second) second->invalidate();
4463 if (second_separator) second_separator->invalidate();
4464 }
4465
4466 if (degree->interval.start < degree->interval.end ||
4467 minute->interval.start < minute->interval.end ||
4468 (second && second->interval.start < second->interval.end))
4469 {
4470 if (decimal && decimal->match(text, this->interval.end, end, flags)) {
4471 // Decimals
4472 this->interval.end = decimal->interval.end;
4473 }
4474 else if (decimal)
4475 decimal->invalidate();
4476 this->interval.start = start;
4477 return true;
4478 }
4479 if (decimal) decimal->invalidate();
4480 this->interval.invalidate();
4481 return false;
4482 }
4483
4484 virtual void invalidate()
4485 {
4486 degree->invalidate();
4487 degree_separator->invalidate();
4488 minute->invalidate();
4489 minute_separator->invalidate();
4490 if (second) second->invalidate();
4491 if (second_separator) second_separator->invalidate();
4492 if (decimal) decimal->invalidate();
4494 }
4495
4496 public:
4497 std::shared_ptr<basic_integer10<T>> degree;
4498 std::shared_ptr<basic_parser<T>> degree_separator;
4499 std::shared_ptr<basic_integer10<T>> minute;
4500 std::shared_ptr<basic_parser<T>> minute_separator;
4501 std::shared_ptr<basic_integer10<T>> second;
4502 std::shared_ptr<basic_parser<T>> second_separator;
4503 std::shared_ptr<basic_parser<T>> decimal;
4504 };
4505
4506 using angle = basic_angle<char>;
4508#ifdef _UNICODE
4509 using RRegElKot = wangle;
4510#else
4511 using RRegElKot = angle;
4512#endif
4514
4518 template <class T>
4520 {
4521 public:
4523 _In_ const std::shared_ptr<basic_parser<T>>& digit,
4524 _In_ const std::shared_ptr<basic_parser<T>>& plus_sign,
4525 _In_ const std::shared_ptr<basic_set<T>>& lparenthesis,
4526 _In_ const std::shared_ptr<basic_set<T>>& rparenthesis,
4527 _In_ const std::shared_ptr<basic_parser<T>>& separator,
4528 _In_ const std::shared_ptr<basic_parser<T>>& space,
4529 _In_ const std::locale& locale = std::locale()) :
4530 basic_parser<T>(locale),
4531 m_digit(digit),
4532 m_plus_sign(plus_sign),
4533 m_lparenthesis(lparenthesis),
4534 m_rparenthesis(rparenthesis),
4535 m_separator(separator),
4536 m_space(space)
4537 {}
4538
4539 virtual bool match(
4540 _In_reads_or_z_opt_(end) const T* text,
4541 _In_ size_t start = 0,
4542 _In_ size_t end = SIZE_MAX,
4543 _In_ int flags = match_default)
4544 {
4545 _Assume_(text || start >= end);
4546
4547 size_t safe_digit_end = start, safe_value_size = 0;
4548 bool has_digits = false, after_digit = false, in_parentheses = false, after_parentheses = false;
4549 const int space_match_flags = flags & ~match_multiline; // Spaces in phone numbers must never be broken in new line.
4550
4551 this->interval.end = start;
4552 value.clear();
4553 m_lparenthesis->invalidate();
4554 m_rparenthesis->invalidate();
4555
4556 if (m_plus_sign && m_plus_sign->match(text, this->interval.end, end, flags)) {
4557 value.append(text + m_plus_sign->interval.start, text + m_plus_sign->interval.end);
4558 safe_value_size = value.size();
4559 this->interval.end = m_plus_sign->interval.end;
4560 }
4561
4562 for (;;) {
4563 _Assume_(text || this->interval.end >= end);
4564 if (this->interval.end >= end || !text[this->interval.end])
4565 break;
4566 if (m_digit->match(text, this->interval.end, end, flags)) {
4567 // Digit
4568 value.append(text + m_digit->interval.start, text + m_digit->interval.end);
4569 this->interval.end = m_digit->interval.end;
4570 if (!in_parentheses) {
4571 safe_digit_end = this->interval.end;
4572 safe_value_size = value.size();
4573 has_digits = true;
4574 }
4575 after_digit = true;
4576 after_parentheses = false;
4577 }
4578 else if (
4579 m_lparenthesis && !m_lparenthesis->interval && // No left parenthesis yet
4580 m_rparenthesis && !m_rparenthesis->interval && // Right parenthesis after left
4581 m_lparenthesis->match(text, this->interval.end, end, flags))
4582 {
4583 // Left parenthesis
4584 value.append(text + m_lparenthesis->interval.start, m_lparenthesis->interval.size());
4585 this->interval.end = m_lparenthesis->interval.end;
4586 in_parentheses = true;
4587 after_digit = false;
4588 after_parentheses = false;
4589 }
4590 else if (
4591 in_parentheses && // After left parenthesis
4592 m_rparenthesis && !m_rparenthesis->interval && // No right parenthesis yet
4593 m_rparenthesis->match(text, this->interval.end, end, flags) &&
4594 m_lparenthesis->hit_offset == m_rparenthesis->hit_offset) // Left and right parentheses must match
4595 {
4596 // Right parenthesis
4597 value.append(text + m_rparenthesis->interval.start, text + m_rparenthesis->interval.end);
4598 this->interval.end = m_rparenthesis->interval.end;
4599 safe_digit_end = this->interval.end;
4600 safe_value_size = value.size();
4601 in_parentheses = false;
4602 after_digit = false;
4603 after_parentheses = true;
4604 }
4605 else if (
4606 after_digit &&
4607 !in_parentheses && // No separators inside parentheses
4608 !after_parentheses && // No separators following right parenthesis
4609 m_separator && m_separator->match(text, this->interval.end, end, flags))
4610 {
4611 // Separator
4612 this->interval.end = m_separator->interval.end;
4613 after_digit = false;
4614 after_parentheses = false;
4615 }
4616 else if (
4618 m_space && m_space->match(text, this->interval.end, end, space_match_flags))
4619 {
4620 // Space
4621 this->interval.end = m_space->interval.end;
4622 after_digit = false;
4623 after_parentheses = false;
4624 }
4625 else
4626 break;
4627 }
4628 if (has_digits) {
4629 value.erase(safe_value_size);
4630 this->interval.start = start;
4631 this->interval.end = safe_digit_end;
4632 return true;
4633 }
4634 value.clear();
4635 this->interval.invalidate();
4636 return false;
4637 }
4638
4639 virtual void invalidate()
4640 {
4641 value.clear();
4643 }
4644
4645 public:
4646 std::basic_string<T> value;
4647
4648 protected:
4649 std::shared_ptr<basic_parser<T>> m_digit;
4650 std::shared_ptr<basic_parser<T>> m_plus_sign;
4651 std::shared_ptr<basic_set<T>> m_lparenthesis;
4652 std::shared_ptr<basic_set<T>> m_rparenthesis;
4653 std::shared_ptr<basic_parser<T>> m_separator;
4654 std::shared_ptr<basic_parser<T>> m_space;
4655 };
4656
4659#ifdef _UNICODE
4661#else
4663#endif
4665
4671 template <class T>
4672 class basic_iban : public basic_parser<T>
4673 {
4674 public:
4675 basic_iban(
4676 _In_ const std::shared_ptr<basic_parser<T>>& space,
4677 _In_ const std::locale& locale = std::locale()) :
4678 basic_parser<T>(locale),
4679 m_space(space)
4680 {
4681 this->country[0] = 0;
4682 this->check_digits[0] = 0;
4683 this->bban[0] = 0;
4684 this->is_valid = false;
4685 }
4686
4687 virtual bool match(
4688 _In_reads_or_z_opt_(end) const T* text,
4689 _In_ size_t start = 0,
4690 _In_ size_t end = SIZE_MAX,
4691 _In_ int flags = match_default)
4692 {
4693 _Assume_(text || start >= end);
4694 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
4695 const bool case_insensitive = flags & match_case_insensitive ? true : false;
4696 struct country_t {
4697 T country[2];
4698 T check_digits[2];
4699 size_t length;
4700 };
4701 static const country_t s_countries[] = {
4702 { { 'A', 'D' }, {}, 24 }, // Andorra
4703 { { 'A', 'E' }, {}, 23 }, // United Arab Emirates
4704 { { 'A', 'L' }, {}, 28 }, // Albania
4705 { { 'A', 'O' }, {}, 25 }, // Angola
4706 { { 'A', 'T' }, {}, 20 }, // Austria
4707 { { 'A', 'Z' }, {}, 28 }, // Azerbaijan
4708 { { 'B', 'A' }, { '3', '9' }, 20}, // Bosnia and Herzegovina
4709 { { 'B', 'E' }, {}, 16 }, // Belgium
4710 { { 'B', 'F' }, {}, 28 }, // Burkina Faso
4711 { { 'B', 'G' }, {}, 22 }, // Bulgaria
4712 { { 'B', 'H' }, {}, 22 }, // Bahrain
4713 { { 'B', 'I' }, {}, 27 }, // Burundi
4714 { { 'B', 'J' }, {}, 28 }, // Benin
4715 { { 'B', 'R' }, {}, 29 }, // Brazil
4716 { { 'B', 'Y' }, {}, 28 }, // Belarus
4717 { { 'C', 'F' }, {}, 27 }, // Central African Republic
4718 { { 'C', 'G' }, {}, 27 }, // Congo, Republic of the
4719 { { 'C', 'H' }, {}, 21 }, // Switzerland
4720 { { 'C', 'I' }, {}, 28 }, // Côte d'Ivoire
4721 { { 'C', 'M' }, {}, 27 }, // Cameroon
4722 { { 'C', 'R' }, {}, 22 }, // Costa Rica
4723 { { 'C', 'V' }, {}, 25 }, // Cabo Verde
4724 { { 'C', 'Y' }, {}, 28 }, // Cyprus
4725 { { 'C', 'Z' }, {}, 24 }, // Czech Republic
4726 { { 'D', 'E' }, {}, 22 }, // Germany
4727 { { 'D', 'J' }, {}, 27 }, // Djibouti
4728 { { 'D', 'K' }, {}, 18 }, // Denmark
4729 { { 'D', 'O' }, {}, 28 }, // Dominican Republic
4730 { { 'D', 'Z' }, {}, 26 }, // Algeria
4731 { { 'E', 'E' }, {}, 20 }, // Estonia
4732 { { 'E', 'G' }, {}, 29 }, // Egypt
4733 { { 'E', 'S' }, {}, 24 }, // Spain
4734 { { 'F', 'I' }, {}, 18 }, // Finland
4735 { { 'F', 'O' }, {}, 18 }, // Faroe Islands
4736 { { 'F', 'R' }, {}, 27 }, // France
4737 { { 'G', 'A' }, {}, 27 }, // Gabon
4738 { { 'G', 'B' }, {}, 22 }, // United Kingdom
4739 { { 'G', 'E' }, {}, 22 }, // Georgia
4740 { { 'G', 'I' }, {}, 23 }, // Gibraltar
4741 { { 'G', 'L' }, {}, 18 }, // Greenland
4742 { { 'G', 'Q' }, {}, 27 }, // Equatorial Guinea
4743 { { 'G', 'R' }, {}, 27 }, // Greece
4744 { { 'G', 'T' }, {}, 28 }, // Guatemala
4745 { { 'G', 'W' }, {}, 25 }, // Guinea-Bissau
4746 { { 'H', 'N' }, {}, 28 }, // Honduras
4747 { { 'H', 'R' }, {}, 21 }, // Croatia
4748 { { 'H', 'U' }, {}, 28 }, // Hungary
4749 { { 'I', 'E' }, {}, 22 }, // Ireland
4750 { { 'I', 'L' }, {}, 23 }, // Israel
4751 { { 'I', 'Q' }, {}, 23 }, // Iraq
4752 { { 'I', 'R' }, {}, 26 }, // Iran
4753 { { 'I', 'S' }, {}, 26 }, // Iceland
4754 { { 'I', 'T' }, {}, 27 }, // Italy
4755 { { 'J', 'O' }, {}, 30 }, // Jordan
4756 { { 'K', 'M' }, {}, 27 }, // Comoros
4757 { { 'K', 'W' }, {}, 30 }, // Kuwait
4758 { { 'K', 'Z' }, {}, 20 }, // Kazakhstan
4759 { { 'L', 'B' }, {}, 28 }, // Lebanon
4760 { { 'L', 'C' }, {}, 32 }, // Saint Lucia
4761 { { 'L', 'I' }, {}, 21 }, // Liechtenstein
4762 { { 'L', 'T' }, {}, 20 }, // Lithuania
4763 { { 'L', 'U' }, {}, 20 }, // Luxembourg
4764 { { 'L', 'V' }, {}, 21 }, // Latvia
4765 { { 'L', 'Y' }, {}, 25 }, // Libya
4766 { { 'M', 'A' }, {}, 28 }, // Morocco
4767 { { 'M', 'C' }, {}, 27 }, // Monaco
4768 { { 'M', 'D' }, {}, 24 }, // Moldova
4769 { { 'M', 'E' }, { '2', '5' }, 22 }, // Montenegro
4770 { { 'M', 'G' }, {}, 27 }, // Madagascar
4771 { { 'M', 'K' }, { '0', '7' }, 19 }, // North Macedonia
4772 { { 'M', 'L' }, {}, 28 }, // Mali
4773 { { 'M', 'R' }, { '1', '3' }, 27}, // Mauritania
4774 { { 'M', 'T' }, {}, 31 }, // Malta
4775 { { 'M', 'U' }, {}, 30 }, // Mauritius
4776 { { 'M', 'Z' }, {}, 25 }, // Mozambique
4777 { { 'N', 'E' }, {}, 28 }, // Niger
4778 { { 'N', 'I' }, {}, 32 }, // Nicaragua
4779 { { 'N', 'L' }, {}, 18 }, // Netherlands
4780 { { 'N', 'O' }, {}, 15 }, // Norway
4781 { { 'P', 'K' }, {}, 24 }, // Pakistan
4782 { { 'P', 'L' }, {}, 28 }, // Poland
4783 { { 'P', 'S' }, {}, 29 }, // Palestinian territories
4784 { { 'P', 'T' }, { '5', '0' }, 25 }, // Portugal
4785 { { 'Q', 'A' }, {}, 29 }, // Qatar
4786 { { 'R', 'O' }, {}, 24 }, // Romania
4787 { { 'R', 'S' }, { '3', '5' }, 22 }, // Serbia
4788 { { 'R', 'U' }, {}, 33 }, // Russia
4789 { { 'S', 'A' }, {}, 24 }, // Saudi Arabia
4790 { { 'S', 'C' }, {}, 31 }, // Seychelles
4791 { { 'S', 'D' }, {}, 18 }, // Sudan
4792 { { 'S', 'E' }, {}, 24 }, // Sweden
4793 { { 'S', 'I' }, { '5', '6' }, 19 }, // Slovenia
4794 { { 'S', 'K' }, {}, 24 }, // Slovakia
4795 { { 'S', 'M' }, {}, 27 }, // San Marino
4796 { { 'S', 'N' }, {}, 28 }, // Senegal
4797 { { 'S', 'T' }, {}, 25 }, // São Tomé and Príncipe
4798 { { 'S', 'V' }, {}, 28 }, // El Salvador
4799 { { 'T', 'D' }, {}, 27 }, // Chad
4800 { { 'T', 'G' }, {}, 28 }, // Togo
4801 { { 'T', 'L' }, { '3', '8' }, 23}, // East Timor
4802 { { 'T', 'N' }, { '5', '9' }, 24 }, // Tunisia
4803 { { 'T', 'R' }, {}, 26 }, // Turkey
4804 { { 'U', 'A' }, {}, 29 }, // Ukraine
4805 { { 'V', 'A' }, {}, 22 }, // Vatican City
4806 { { 'V', 'G' }, {}, 24 }, // Virgin Islands, British
4807 { { 'X', 'K' }, {}, 20 }, // Kosovo
4808 };
4809 const country_t* country_desc = nullptr;
4810 size_t n, available, next, bban_length;
4812
4813 this->interval.end = start;
4814 for (size_t i = 0; i < 2; ++i, ++this->interval.end) {
4815 if (this->interval.end >= end || !text[this->interval.end])
4816 goto error; // incomplete country code
4817 T chr = case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end];
4818 if (chr < 'A' || 'Z' < chr)
4819 goto error; // invalid country code
4820 this->country[i] = chr;
4821 }
4822 for (size_t l = 0, r = _countof(s_countries);;) {
4823 if (l >= r)
4824 goto error; // unknown country
4825 size_t m = (l + r) / 2;
4826 const country_t& c = s_countries[m];
4827 if (c.country[0] < this->country[0] || (c.country[0] == this->country[0] && c.country[1] < this->country[1]))
4828 l = m + 1;
4829 else if (this->country[0] < c.country[0] || (this->country[0] == c.country[0] && this->country[1] < c.country[1]))
4830 r = m;
4831 else {
4832 country_desc = &c;
4833 break;
4834 }
4835 }
4836 this->country[2] = 0;
4837
4838 for (size_t i = 0; i < 2; ++i, ++this->interval.end) {
4839 if (this->interval.end >= end || text[this->interval.end] < '0' || '9' < text[this->interval.end])
4840 goto error; // incomplete or invalid check digits
4841 this->check_digits[i] = text[this->interval.end];
4842 }
4843 this->check_digits[2] = 0;
4844
4845 if ((country_desc->check_digits[0] && this->check_digits[0] != country_desc->check_digits[0]) ||
4846 (country_desc->check_digits[1] && this->check_digits[1] != country_desc->check_digits[1]))
4847 goto error; // unexpected check digits
4848
4849 bban_length = country_desc->length - 4;
4850 for (n = 0; n < bban_length;) {
4851 if (this->interval.end >= end || !text[this->interval.end])
4852 goto error; // bban too short
4853 if (m_space && m_space->match(text, this->interval.end, end, flags)) {
4854 this->interval.end = m_space->interval.end;
4855 continue;
4856 }
4857 T chr = case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end];
4858 if (('0' <= chr && chr <= '9') || ('A' <= chr && chr <= 'Z')) {
4859 this->bban[n++] = chr;
4860 this->interval.end++;
4861 }
4862 else
4863 goto error; // invalid bban
4864 }
4865 this->bban[n] = 0;
4866
4867 // Normalize IBAN.
4868 T normalized[69];
4869 available = 0;
4870 for (size_t i = 0; ; ++i) {
4871 if (!this->bban[i]) {
4872 for (i = 0; i < 2; ++i) {
4873 if ('A' <= this->country[i] && this->country[i] <= 'J') {
4874 normalized[available++] = '1';
4875 normalized[available++] = '0' + this->country[i] - 'A';
4876 }
4877 else if ('K' <= this->country[i] && this->country[i] <= 'T') {
4878 normalized[available++] = '2';
4879 normalized[available++] = '0' + this->country[i] - 'K';
4880 }
4881 else if ('U' <= this->country[i] && this->country[i] <= 'Z') {
4882 normalized[available++] = '3';
4883 normalized[available++] = '0' + this->country[i] - 'U';
4884 }
4885 }
4886 normalized[available++] = this->check_digits[0];
4887 normalized[available++] = this->check_digits[1];
4888 normalized[available] = 0;
4889 break;
4890 }
4891 if ('0' <= this->bban[i] && this->bban[i] <= '9')
4892 normalized[available++] = this->bban[i];
4893 else if ('A' <= this->bban[i] && this->bban[i] <= 'J') {
4894 normalized[available++] = '1';
4895 normalized[available++] = '0' + this->bban[i] - 'A';
4896 }
4897 else if ('K' <= this->bban[i] && this->bban[i] <= 'T') {
4898 normalized[available++] = '2';
4899 normalized[available++] = '0' + this->bban[i] - 'K';
4900 }
4901 else if ('U' <= this->bban[i] && this->bban[i] <= 'Z') {
4902 normalized[available++] = '3';
4903 normalized[available++] = '0' + this->bban[i] - 'U';
4904 }
4905 }
4906
4907 // Calculate modulo 97.
4908 nominator = stdex::strtou32(normalized, 9, &next, 10);
4909 for (;;) {
4910 nominator %= 97;
4911 if (!normalized[next]) {
4912 this->is_valid = nominator == 1;
4913 break;
4914 }
4915 size_t digit_count = nominator == 0 ? 0 : nominator < 10 ? 1 : 2;
4916 for (; digit_count < 9 && normalized[next]; ++next, ++digit_count)
4917 nominator = nominator * 10 + (normalized[next] - '0');
4918 }
4919
4920 this->interval.start = start;
4921 return true;
4922
4923 error:
4924 this->country[0] = 0;
4925 this->check_digits[0] = 0;
4926 this->bban[0] = 0;
4927 this->is_valid = false;
4928 this->interval.invalidate();
4929 return false;
4930 }
4931
4932 virtual void invalidate()
4933 {
4934 this->country[0] = 0;
4935 this->check_digits[0] = 0;
4936 this->bban[0] = 0;
4937 this->is_valid = false;
4939 }
4940
4941 public:
4942 T country[3];
4944 T bban[31];
4946
4947 protected:
4948 std::shared_ptr<basic_parser<T>> m_space;
4949 };
4950
4951 using iban = basic_iban<char>;
4952 using wiban = basic_iban<wchar_t>;
4953#ifdef _UNICODE
4954 using tiban = wiban;
4955#else
4956 using tiban = iban;
4957#endif
4959
4965 template <class T>
4967 {
4968 public:
4970 _In_ const std::shared_ptr<basic_parser<T>>& space,
4971 _In_ const std::locale& locale = std::locale()) :
4972 basic_parser<T>(locale),
4973 m_space(space)
4974 {
4975 this->check_digits[0] = 0;
4976 this->reference[0] = 0;
4977 this->is_valid = false;
4978 }
4979
4980 virtual bool match(
4981 _In_reads_or_z_opt_(end) const T* text,
4982 _In_ size_t start = 0,
4983 _In_ size_t end = SIZE_MAX,
4984 _In_ int flags = match_default)
4985 {
4986 _Assume_(text || start >= end);
4987 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
4988 const bool case_insensitive = flags & match_case_insensitive ? true : false;
4989 size_t n, available, next;
4991
4992 this->interval.end = start;
4993 if (this->interval.end + 1 >= end ||
4994 (case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end]) != 'R' ||
4995 (case_insensitive ? ctype.toupper(text[this->interval.end + 1]) : text[this->interval.end + 1]) != 'F')
4996 goto error; // incomplete or wrong reference ID
4997 this->interval.end += 2;
4998
4999 for (size_t i = 0; i < 2; ++i, ++this->interval.end) {
5000 if (this->interval.end >= end || text[this->interval.end] < '0' || '9' < text[this->interval.end])
5001 goto error; // incomplete or invalid check digits
5002 this->check_digits[i] = text[this->interval.end];
5003 }
5004 this->check_digits[2] = 0;
5005
5006 for (n = 0;;) {
5007 if (m_space && m_space->match(text, this->interval.end, end, flags))
5008 this->interval.end = m_space->interval.end;
5009 for (size_t j = 0; j < 4; ++j) {
5010 if (this->interval.end >= end || !text[this->interval.end])
5011 goto out;
5012 T chr = case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end];
5013 if (('0' <= chr && chr <= '9') || ('A' <= chr && chr <= 'Z')) {
5014 if (n >= _countof(reference) - 1)
5015 goto error; // reference overflow
5016 this->reference[n++] = chr;
5017 this->interval.end++;
5018 }
5019 else
5020 goto out;
5021 }
5022 }
5023 out:
5024 if (!n)
5025 goto error; // reference too short
5026 this->reference[_countof(this->reference) - 1] = 0;
5027 for (size_t i = n, j = _countof(this->reference) - 1; i;)
5028 this->reference[--j] = this->reference[--i];
5029 for (size_t j = _countof(this->reference) - 1 - n; j;)
5030 this->reference[--j] = '0';
5031
5032 // Normalize creditor reference.
5033 T normalized[47];
5034 available = 0;
5035 for (size_t i = 0; ; ++i) {
5036 if (!this->reference[i]) {
5037 normalized[available++] = '2'; // R
5038 normalized[available++] = '7';
5039 normalized[available++] = '1'; // F
5040 normalized[available++] = '5';
5041 normalized[available++] = this->check_digits[0];
5042 normalized[available++] = this->check_digits[1];
5043 normalized[available] = 0;
5044 break;
5045 }
5046 if ('0' <= this->reference[i] && this->reference[i] <= '9')
5047 normalized[available++] = this->reference[i];
5048 else if ('A' <= this->reference[i] && this->reference[i] <= 'J') {
5049 normalized[available++] = '1';
5050 normalized[available++] = '0' + this->reference[i] - 'A';
5051 }
5052 else if ('K' <= this->reference[i] && this->reference[i] <= 'T') {
5053 normalized[available++] = '2';
5054 normalized[available++] = '0' + this->reference[i] - 'K';
5055 }
5056 else if ('U' <= this->reference[i] && this->reference[i] <= 'Z') {
5057 normalized[available++] = '3';
5058 normalized[available++] = '0' + this->reference[i] - 'U';
5059 }
5060 }
5061
5062 // Calculate modulo 97.
5063 nominator = stdex::strtou32(normalized, 9, &next, 10);
5064 for (;;) {
5065 nominator %= 97;
5066 if (!normalized[next]) {
5067 this->is_valid = nominator == 1;
5068 break;
5069 }
5070 size_t digit_count = nominator == 0 ? 0 : nominator < 10 ? 1 : 2;
5071 for (; digit_count < 9 && normalized[next]; ++next, ++digit_count)
5072 nominator = nominator * 10 + (normalized[next] - '0');
5073 }
5074
5075 this->interval.start = start;
5076 return true;
5077
5078 error:
5079 this->check_digits[0] = 0;
5080 this->reference[0] = 0;
5081 this->is_valid = false;
5082 this->interval.invalidate();
5083 return false;
5084 }
5085
5086 virtual void invalidate()
5087 {
5088 this->check_digits[0] = 0;
5089 this->reference[0] = 0;
5090 this->is_valid = false;
5092 }
5093
5094 public:
5098
5099 protected:
5100 std::shared_ptr<basic_parser<T>> m_space;
5101 };
5102
5105#ifdef _UNICODE
5107#else
5109#endif
5111
5117 template <class T>
5119 {
5120 public:
5121 basic_si_reference_part(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
5122
5123 virtual bool match(
5124 _In_reads_or_z_opt_(end) const T* text,
5125 _In_ size_t start = 0,
5126 _In_ size_t end = SIZE_MAX,
5127 _In_ int flags = match_default)
5128 {
5129 _Assume_(text || start >= end);
5130 this->interval.end = start;
5131 for (;;) {
5132 if (this->interval.end >= end || !text[this->interval.end])
5133 break;
5134 if ('0' <= text[this->interval.end] && text[this->interval.end] <= '9')
5135 this->interval.end++;
5136 else
5137 break;
5138 }
5140 this->interval.start = start;
5141 return true;
5142 }
5143 this->interval.invalidate();
5144 return false;
5145 }
5146 };
5147
5150#ifdef _UNICODE
5152#else
5154#endif
5156
5162 template <class T>
5164 {
5165 public:
5166 basic_si_reference_delimiter(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
5167
5168 virtual bool match(
5169 _In_reads_or_z_opt_(end) const T* text,
5170 _In_ size_t start = 0,
5171 _In_ size_t end = SIZE_MAX,
5172 _In_ int flags = match_default)
5173 {
5174 _Assume_(text || start >= end);
5175 if (start < end && text[start] == '-') {
5176 this->interval.end = (this->interval.start = start) + 1;
5177 return true;
5178 }
5179 this->interval.invalidate();
5180 return false;
5181 }
5182 };
5183
5186#ifdef _UNICODE
5188#else
5190#endif
5192
5200 template <class T>
5202 {
5203 public:
5205 _In_ const std::shared_ptr<basic_parser<T>>& space,
5206 _In_ const std::locale& locale = std::locale()) :
5207 basic_parser<T>(locale),
5208 part1(locale),
5209 part2(locale),
5210 part3(locale),
5211 is_valid(false),
5212 m_space(space),
5213 m_delimiter(locale)
5214 {
5215 this->model[0] = 0;
5216 }
5217
5218 virtual bool match(
5219 _In_reads_or_z_opt_(end) const T* text,
5220 _In_ size_t start = 0,
5221 _In_ size_t end = SIZE_MAX,
5222 _In_ int flags = match_default)
5223 {
5224 _Assume_(text || start >= end);
5225 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
5226 const bool case_insensitive = flags & match_case_insensitive ? true : false;
5227
5228 this->interval.end = start;
5229 if (this->interval.end + 1 >= end ||
5230 (case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end]) != 'S' ||
5231 (case_insensitive ? ctype.toupper(text[this->interval.end + 1]) : text[this->interval.end + 1]) != 'I')
5232 goto error; // incomplete or wrong reference ID
5233 this->interval.end += 2;
5234
5235 for (size_t i = 0; i < 2; ++i, ++this->interval.end) {
5236 if (this->interval.end >= end || text[this->interval.end] < '0' || '9' < text[this->interval.end])
5237 goto error; // incomplete or invalid model
5238 this->model[i] = text[this->interval.end];
5239 }
5240 this->model[2] = 0;
5241
5242 this->part1.invalidate();
5243 this->part2.invalidate();
5244 this->part3.invalidate();
5245 if (this->model[0] == '9' && this->model[1] == '9') {
5246 is_valid = true;
5247 this->interval.start = start;
5248 return true;
5249 }
5250
5251 if (m_space && m_space->match(text, this->interval.end, end, flags))
5252 this->interval.end = m_space->interval.end;
5253
5254 this->part1.match(text, this->interval.end, end, flags) &&
5255 this->m_delimiter.match(text, this->part1.interval.end, end, flags) &&
5256 this->part2.match(text, this->m_delimiter.interval.end, end, flags) &&
5257 this->m_delimiter.match(text, this->part2.interval.end, end, flags) &&
5258 this->part3.match(text, this->m_delimiter.interval.end, end, flags);
5259
5260 this->interval.start = start;
5261 if (this->part3.interval)
5262 this->interval.end = this->part3.interval.end;
5263 else if (this->part2.interval)
5264 this->interval.end = this->part2.interval.end;
5265 else if (this->part1.interval)
5266 this->interval.end = this->part1.interval.end;
5267 else
5268 this->interval.end = start + 4;
5269
5270 if (this->model[0] == '0' && this->model[1] == '0')
5271 is_valid =
5272 this->part3.interval ?
5273 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5274 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 :
5275 this->part2.interval ?
5276 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5277 this->part1.interval.size() + this->part2.interval.size() <= 20 :
5278 this->part1.interval ?
5279 this->part1.interval.size() <= 12 :
5280 false;
5281 else if (this->model[0] == '0' && this->model[1] == '1')
5282 is_valid =
5283 this->part3.interval ?
5284 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5285 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5286 check11(
5287 text + this->part1.interval.start, this->part1.interval.size(),
5288 text + this->part2.interval.start, this->part2.interval.size(),
5289 text + this->part3.interval.start, this->part3.interval.size()) :
5290 this->part2.interval ?
5291 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5292 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5293 check11(
5294 text + this->part1.interval.start, this->part1.interval.size(),
5295 text + this->part2.interval.start, this->part2.interval.size()) :
5296 this->part1.interval ?
5297 this->part1.interval.size() <= 12 &&
5298 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5299 false;
5300 else if (this->model[0] == '0' && this->model[1] == '2')
5301 is_valid =
5302 this->part3.interval ?
5303 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5304 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5305 check11(text + this->part2.interval.start, this->part2.interval.size()) &&
5306 check11(text + this->part3.interval.start, this->part3.interval.size()) :
5307 false;
5308 else if (this->model[0] == '0' && this->model[1] == '3')
5309 is_valid =
5310 this->part3.interval ?
5311 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5312 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5313 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5314 check11(text + this->part2.interval.start, this->part2.interval.size()) &&
5315 check11(text + this->part3.interval.start, this->part3.interval.size()) :
5316 false;
5317 else if (this->model[0] == '0' && this->model[1] == '4')
5318 is_valid =
5319 this->part3.interval ?
5320 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5321 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5322 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5323 check11(text + this->part3.interval.start, this->part3.interval.size()) :
5324 false;
5325 else if ((this->model[0] == '0' || this->model[0] == '5') && this->model[1] == '5')
5326 is_valid =
5327 this->part3.interval ?
5328 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5329 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5330 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5331 this->part2.interval ?
5332 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5333 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5334 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5335 this->part1.interval ?
5336 this->part1.interval.size() <= 12 &&
5337 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5338 false;
5339 else if (this->model[0] == '0' && this->model[1] == '6')
5340 is_valid =
5341 this->part3.interval ?
5342 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5343 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5344 check11(
5345 text + this->part2.interval.start, this->part2.interval.size(),
5346 text + this->part3.interval.start, this->part3.interval.size()) :
5347 this->part2.interval ?
5348 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5349 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5350 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5351 false;
5352 else if (this->model[0] == '0' && this->model[1] == '7')
5353 is_valid =
5354 this->part3.interval ?
5355 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5356 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5357 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5358 this->part2.interval ?
5359 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5360 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5361 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5362 false;
5363 else if (this->model[0] == '0' && this->model[1] == '8')
5364 is_valid =
5365 this->part3.interval ?
5366 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5367 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5368 check11(
5369 text + this->part1.interval.start, this->part1.interval.size(),
5370 text + this->part2.interval.start, this->part2.interval.size()) &&
5371 check11(text + this->part3.interval.start, this->part3.interval.size()) :
5372 false;
5373 else if (this->model[0] == '0' && this->model[1] == '9')
5374 is_valid =
5375 this->part3.interval ?
5376 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5377 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5378 check11(
5379 text + this->part1.interval.start, this->part1.interval.size(),
5380 text + this->part2.interval.start, this->part2.interval.size()) :
5381 this->part2.interval ?
5382 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5383 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5384 check11(
5385 text + this->part1.interval.start, this->part1.interval.size(),
5386 text + this->part2.interval.start, this->part2.interval.size()) :
5387 this->part1.interval ?
5388 this->part1.interval.size() <= 12 &&
5389 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5390 false;
5391 else if (this->model[0] == '1' && this->model[1] == '0')
5392 is_valid =
5393 this->part3.interval ?
5394 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5395 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5396 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5397 check11(
5398 text + this->part2.interval.start, this->part2.interval.size(),
5399 text + this->part3.interval.start, this->part3.interval.size()) :
5400 this->part2.interval ?
5401 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5402 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5403 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5404 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5405 false;
5406 else if (
5407 (this->model[0] == '1' && (this->model[1] == '1' || this->model[1] == '8' || this->model[1] == '9')) ||
5408 ((this->model[0] == '2' || this->model[0] == '3') && this->model[1] == '8') ||
5409 (this->model[0] == '4' && (this->model[1] == '0' || this->model[1] == '1' || this->model[1] == '8' || this->model[1] == '9')) ||
5410 (this->model[0] == '5' && (this->model[1] == '1' || this->model[1] == '8')))
5411 is_valid =
5412 this->part3.interval ?
5413 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5414 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5415 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5416 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5417 this->part2.interval ?
5418 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5419 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5420 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5421 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5422 false;
5423 else if (this->model[0] == '1' && this->model[1] == '2')
5424 is_valid =
5425 this->part3.interval ? false :
5426 this->part2.interval ? false :
5427 this->part1.interval ?
5428 this->part1.interval.size() <= 13 &&
5429 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5430 false;
5431 else if ((this->model[0] == '2' || this->model[0] == '3') && this->model[1] == '1')
5432 is_valid =
5433 this->part3.interval ? false :
5434 this->part2.interval ?
5435 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5436 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5437 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5438 false;
5439 else
5440 is_valid = true; // Assume models we don't handle as valid
5441 return true;
5442
5443 error:
5444 this->model[0] = 0;
5445 this->part1.interval.start = (this->part1.interval.end = start) + 1;
5446 this->part2.interval.start = (this->part2.interval.end = start) + 1;
5447 this->part3.interval.start = (this->part3.interval.end = start) + 1;
5448 this->is_valid = false;
5449 this->interval.invalidate();
5450 return false;
5451 }
5452
5453 virtual void invalidate()
5454 {
5455 this->model[0] = 0;
5456 this->part1.invalidate();
5457 this->part2.invalidate();
5458 this->part3.invalidate();
5459 this->is_valid = false;
5461 }
5462
5463 protected:
5464 static bool check11(
5465 _In_count_(num_part1) const T* part1, _In_ size_t num_part1)
5466 {
5467 _Assume_(part1 && num_part1 >= 1);
5468 uint32_t nominator = 0, ponder = 2;
5469 for (size_t i = num_part1 - 1; i--; ++ponder)
5470 nominator += (part1[i] - '0') * ponder;
5471 uint8_t control = 11 - static_cast<uint8_t>(nominator % 11);
5472 if (control >= 10)
5473 control = 0;
5474 return control == part1[num_part1 - 1] - '0';
5475 }
5476
5477 static bool check11(
5478 _In_count_(num_part1) const T* part1, _In_ size_t num_part1,
5479 _In_count_(num_part2) const T* part2, _In_ size_t num_part2)
5480 {
5481 _Assume_(part1 || !num_part1);
5482 _Assume_(part2 && num_part2 >= 1);
5483 uint32_t nominator = 0, ponder = 2;
5484 for (size_t i = num_part2 - 1; i--; ++ponder)
5485 nominator += (part2[i] - '0') * ponder;
5486 for (size_t i = num_part1; i--; ++ponder)
5487 nominator += (part1[i] - '0') * ponder;
5488 uint8_t control = 11 - static_cast<uint8_t>(nominator % 11);
5489 if (control == 10)
5490 control = 0;
5491 return control == part2[num_part2 - 1] - '0';
5492 }
5493
5494 static bool check11(
5495 _In_count_(num_part1) const T* part1, _In_ size_t num_part1,
5496 _In_count_(num_part2) const T* part2, _In_ size_t num_part2,
5497 _In_count_(num_part3) const T* part3, _In_ size_t num_part3)
5498 {
5499 _Assume_(part1 || !num_part1);
5500 _Assume_(part2 || !num_part2);
5501 _Assume_(part3 && num_part3 >= 1);
5502 uint32_t nominator = 0, ponder = 2;
5503 for (size_t i = num_part3 - 1; i--; ++ponder)
5504 nominator += (part3[i] - '0') * ponder;
5505 for (size_t i = num_part2; i--; ++ponder)
5506 nominator += (part2[i] - '0') * ponder;
5507 for (size_t i = num_part1; i--; ++ponder)
5508 nominator += (part1[i] - '0') * ponder;
5509 uint8_t control = 11 - static_cast<uint8_t>(nominator % 11);
5510 if (control == 10)
5511 control = 0;
5512 return control == part2[num_part3 - 1] - '0';
5513 }
5514
5515 public:
5516 T model[3];
5521
5522 protected:
5523 std::shared_ptr<basic_parser<T>> m_space;
5525 };
5526
5529#ifdef _UNICODE
5531#else
5533#endif
5535
5539 template <class T>
5541 {
5542 public:
5544 _In_ const std::shared_ptr<basic_parser<T>>& element,
5545 _In_ const std::shared_ptr<basic_parser<T>>& digit,
5546 _In_ const std::shared_ptr<basic_parser<T>>& sign,
5547 _In_ const std::locale& locale = std::locale()) :
5548 basic_parser<T>(locale),
5549 m_element(element),
5550 m_digit(digit),
5551 m_sign(sign),
5552 has_digits(false),
5553 has_charge(false)
5554 {}
5555
5556 virtual bool match(
5557 _In_reads_or_z_opt_(end) const T* text,
5558 _In_ size_t start = 0,
5559 _In_ size_t end = SIZE_MAX,
5560 _In_ int flags = match_default)
5561 {
5562 _Assume_(text || start >= end);
5563
5564 has_digits = false;
5565 has_charge = false;
5566 this->interval.end = start;
5567
5568 const int element_match_flags = flags & ~match_case_insensitive; // Chemical elements are always case-sensitive.
5569 for (;;) {
5570 if (m_element->match(text, this->interval.end, end, element_match_flags)) {
5571 this->interval.end = m_element->interval.end;
5572 while (m_digit->match(text, this->interval.end, end, flags)) {
5573 this->interval.end = m_digit->interval.end;
5574 has_digits = true;
5575 }
5576 }
5577 else if (start < this->interval.end) {
5578 if (m_sign->match(text, this->interval.end, end, flags)) {
5579 this->interval.end = m_sign->interval.end;
5580 has_charge = true;
5581 }
5582 this->interval.start = start;
5583 return true;
5584 }
5585 else {
5586 this->interval.invalidate();
5587 return false;
5588 }
5589 }
5590 }
5591
5592 virtual void invalidate()
5593 {
5594 has_digits = false;
5595 has_charge = false;
5597 }
5598
5599 public:
5600 bool has_digits;
5601 bool has_charge;
5602
5603 protected:
5604 std::shared_ptr<basic_parser<T>> m_element;
5605 std::shared_ptr<basic_parser<T>> m_digit;
5606 std::shared_ptr<basic_parser<T>> m_sign;
5607 };
5608
5611#ifdef _UNICODE
5613#else
5615#endif
5617
5622 {
5623 public:
5624 virtual bool match(
5625 _In_reads_or_z_(end) const char* text,
5626 _In_ size_t start = 0,
5627 _In_ size_t end = SIZE_MAX,
5628 _In_ int flags = match_default)
5629 {
5630 _Assume_(text || start >= end);
5631 this->interval.end = start;
5632
5633 _Assume_(text || this->interval.end >= end);
5634 if (this->interval.end < end && text[this->interval.end]) {
5635 if (text[this->interval.end] == '\r') {
5636 this->interval.end++;
5637 if (this->interval.end < end && text[this->interval.end] == '\n') {
5638 this->interval.start = start;
5639 this->interval.end++;
5640 return true;
5641 }
5642 }
5643 else if (text[this->interval.end] == '\n') {
5644 this->interval.start = start;
5645 this->interval.end++;
5646 return true;
5647 }
5648 }
5649 this->interval.invalidate();
5650 return false;
5651 }
5652 };
5653
5657 class http_space : public parser
5658 {
5659 public:
5660 virtual bool match(
5661 _In_reads_or_z_(end) const char* text,
5662 _In_ size_t start = 0,
5663 _In_ size_t end = SIZE_MAX,
5664 _In_ int flags = match_default)
5665 {
5666 _Assume_(text || start >= end);
5667 this->interval.end = start;
5668 if (m_line_break.match(text, this->interval.end, end, flags)) {
5669 this->interval.end = m_line_break.interval.end;
5670 if (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) {
5671 this->interval.start = start;
5672 this->interval.end++;
5673 while (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) this->interval.end++;
5674 return true;
5675 }
5676 }
5677 else if (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) {
5678 this->interval.start = start;
5679 this->interval.end++;
5680 while (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) this->interval.end++;
5681 return true;
5682 }
5683 this->interval.invalidate();
5684 return false;
5685 }
5686
5687 protected:
5688 http_line_break m_line_break;
5689 };
5690
5694 class http_text_char : public parser
5695 {
5696 public:
5697 virtual bool match(
5698 _In_reads_or_z_(end) const char* text,
5699 _In_ size_t start = 0,
5700 _In_ size_t end = SIZE_MAX,
5701 _In_ int flags = match_default)
5702 {
5703 _Assume_(text || start >= end);
5704 this->interval.end = start;
5705
5706 _Assume_(text || this->interval.end >= end);
5707 if (m_space.match(text, this->interval.end, end, flags)) {
5708 this->interval.start = start;
5709 this->interval.end = m_space.interval.end;
5710 return true;
5711 }
5712 else if (this->interval.end < end && text[this->interval.end] && text[this->interval.end] >= 0x20) {
5713 this->interval.start = start;
5714 this->interval.end++;
5715 return true;
5716 }
5717 this->interval.invalidate();
5718 return false;
5719 }
5720
5721 protected:
5722 http_space m_space;
5723 };
5724
5728 class http_token : public parser
5729 {
5730 public:
5731 virtual bool match(
5732 _In_reads_or_z_(end) const char* text,
5733 _In_ size_t start = 0,
5734 _In_ size_t end = SIZE_MAX,
5735 _In_ int flags = match_default)
5736 {
5737 _Assume_(text || start >= end);
5738 this->interval.end = start;
5739 for (;;) {
5740 if (this->interval.end < end && text[this->interval.end]) {
5741 if ((unsigned int)text[this->interval.end] < 0x20 ||
5742 (unsigned int)text[this->interval.end] == 0x7f ||
5743 text[this->interval.end] == '(' ||
5744 text[this->interval.end] == ')' ||
5745 text[this->interval.end] == '<' ||
5746 text[this->interval.end] == '>' ||
5747 text[this->interval.end] == '@' ||
5748 text[this->interval.end] == ',' ||
5749 text[this->interval.end] == ';' ||
5750 text[this->interval.end] == ':' ||
5751 text[this->interval.end] == '\\' ||
5752 text[this->interval.end] == '\"' ||
5753 text[this->interval.end] == '/' ||
5754 text[this->interval.end] == '[' ||
5755 text[this->interval.end] == ']' ||
5756 text[this->interval.end] == '?' ||
5757 text[this->interval.end] == '=' ||
5758 text[this->interval.end] == '{' ||
5759 text[this->interval.end] == '}' ||
5760 stdex::isspace(text[this->interval.end]))
5761 break;
5762 else
5763 this->interval.end++;
5764 }
5765 else
5766 break;
5767 }
5769 this->interval.start = start;
5770 return true;
5771 }
5772 else {
5773 this->interval.invalidate();
5774 return false;
5775 }
5776 }
5777 };
5778
5783 {
5784 public:
5785 virtual bool match(
5786 _In_reads_or_z_(end) const char* text,
5787 _In_ size_t start = 0,
5788 _In_ size_t end = SIZE_MAX,
5789 _In_ int flags = match_default)
5790 {
5791 _Assume_(text || start >= end);
5792 this->interval.end = start;
5793 if (this->interval.end < end && text[this->interval.end] != '"')
5794 goto error;
5795 this->interval.end++;
5796 content.start = this->interval.end;
5797 for (;;) {
5798 _Assume_(text || this->interval.end >= end);
5799 if (this->interval.end < end && text[this->interval.end]) {
5800 if (text[this->interval.end] == '"') {
5801 content.end = this->interval.end;
5802 this->interval.end++;
5803 break;
5804 }
5805 else if (text[this->interval.end] == '\\') {
5806 this->interval.end++;
5807 if (this->interval.end < end && text[this->interval.end]) {
5808 this->interval.end++;
5809 }
5810 else
5811 goto error;
5812 }
5813 else if (m_chr.match(text, this->interval.end, end, flags))
5814 this->interval.end++;
5815 else
5816 goto error;
5817 }
5818 else
5819 goto error;
5820 }
5821 this->interval.start = start;
5822 return true;
5823
5824 error:
5825 content.start = 1;
5826 content.end = 0;
5827 this->interval.invalidate();
5828 return false;
5829 }
5830
5831 virtual void invalidate()
5832 {
5833 content.start = 1;
5834 content.end = 0;
5835 parser::invalidate();
5836 }
5837
5838 public:
5840
5841 protected:
5842 http_text_char m_chr;
5843 };
5844
5848 class http_value : public parser
5849 {
5850 public:
5851 virtual bool match(
5852 _In_reads_or_z_(end) const char* text,
5853 _In_ size_t start = 0,
5854 _In_ size_t end = SIZE_MAX,
5855 _In_ int flags = match_default)
5856 {
5857 _Assume_(text || start >= end);
5858 this->interval.end = start;
5859 if (string.match(text, this->interval.end, end, flags)) {
5860 token.invalidate();
5861 this->interval.end = string.interval.end;
5862 this->interval.start = start;
5863 return true;
5864 }
5865 else if (token.match(text, this->interval.end, end, flags)) {
5866 string.invalidate();
5867 this->interval.end = token.interval.end;
5868 this->interval.start = start;
5869 return true;
5870 }
5871 else {
5872 this->interval.invalidate();
5873 return false;
5874 }
5875 }
5876
5877 virtual void invalidate()
5878 {
5879 string.invalidate();
5880 token.invalidate();
5881 parser::invalidate();
5882 }
5883
5884 public:
5887 };
5888
5892 class http_parameter : public parser
5893 {
5894 public:
5895 virtual bool match(
5896 _In_reads_or_z_(end) const char* text,
5897 _In_ size_t start = 0,
5898 _In_ size_t end = SIZE_MAX,
5899 _In_ int flags = match_default)
5900 {
5901 _Assume_(text || start >= end);
5902 this->interval.end = start;
5903 if (name.match(text, this->interval.end, end, flags))
5904 this->interval.end = name.interval.end;
5905 else
5906 goto error;
5907 while (m_space.match(text, this->interval.end, end, flags))
5908 this->interval.end = m_space.interval.end;
5909 _Assume_(text || this->interval.end >= end);
5910 if (this->interval.end < end && text[this->interval.end] == '=')
5911 this->interval.end++;
5912 else
5913 while (m_space.match(text, this->interval.end, end, flags))
5914 this->interval.end = m_space.interval.end;
5915 if (value.match(text, this->interval.end, end, flags))
5916 this->interval.end = value.interval.end;
5917 else
5918 goto error;
5919 this->interval.start = start;
5920 return true;
5921
5922 error:
5923 name.invalidate();
5924 value.invalidate();
5925 this->interval.invalidate();
5926 return false;
5927 }
5928
5929 virtual void invalidate()
5930 {
5931 name.invalidate();
5932 value.invalidate();
5933 parser::invalidate();
5934 }
5935
5936 public:
5939
5940 protected:
5941 http_space m_space;
5942 };
5943
5947 class http_any_type : public parser
5948 {
5949 public:
5950 virtual bool match(
5951 _In_reads_or_z_(end) const char* text,
5952 _In_ size_t start = 0,
5953 _In_ size_t end = SIZE_MAX,
5954 _In_ int flags = match_default)
5955 {
5956 _Assume_(text || start >= end);
5957 if (start + 2 < end &&
5958 text[start] == '*' &&
5959 text[start + 1] == '/' &&
5960 text[start + 2] == '*')
5961 {
5962 this->interval.end = (this->interval.start = start) + 3;
5963 return true;
5964 }
5965 else if (start < end && text[start] == '*') {
5966 this->interval.end = (this->interval.start = start) + 1;
5967 return true;
5968 }
5969 else {
5970 this->interval.invalidate();
5971 return false;
5972 }
5973 }
5974 };
5975
5980 {
5981 public:
5982 virtual bool match(
5983 _In_reads_or_z_(end) const char* text,
5984 _In_ size_t start = 0,
5985 _In_ size_t end = SIZE_MAX,
5986 _In_ int flags = match_default)
5987 {
5988 _Assume_(text || start >= end);
5989 this->interval.end = start;
5990 if (type.match(text, this->interval.end, end, flags))
5991 this->interval.end = type.interval.end;
5992 else
5993 goto error;
5994 while (m_space.match(text, this->interval.end, end, flags))
5995 this->interval.end = m_space.interval.end;
5996 if (this->interval.end < end && text[this->interval.end] == '/')
5997 this->interval.end++;
5998 else
5999 goto error;
6000 while (m_space.match(text, this->interval.end, end, flags))
6001 this->interval.end = m_space.interval.end;
6002 if (subtype.match(text, this->interval.end, end, flags))
6003 this->interval.end = subtype.interval.end;
6004 else
6005 goto error;
6006 this->interval.start = start;
6007 return true;
6008
6009 error:
6010 type.invalidate();
6011 subtype.invalidate();
6012 this->interval.invalidate();
6013 return false;
6014 }
6015
6016 virtual void invalidate()
6017 {
6018 type.invalidate();
6019 subtype.invalidate();
6020 parser::invalidate();
6021 }
6022
6023 public:
6024 http_token type;
6025 http_token subtype;
6026
6027 protected:
6028 http_space m_space;
6029 };
6030
6035 {
6036 public:
6037 virtual bool match(
6038 _In_reads_or_z_(end) const char* text,
6039 _In_ size_t start = 0,
6040 _In_ size_t end = SIZE_MAX,
6041 _In_ int flags = match_default)
6042 {
6043 _Assume_(text || start >= end);
6044 if (!http_media_range::match(text, start, end, flags))
6045 goto error;
6046 params.clear();
6047 for (;;) {
6048 if (this->interval.end < end && text[this->interval.end]) {
6049 if (m_space.match(text, this->interval.end, end, flags))
6050 this->interval.end = m_space.interval.end;
6051 else if (text[this->interval.end] == ';') {
6052 this->interval.end++;
6053 while (m_space.match(text, this->interval.end, end, flags))
6054 this->interval.end = m_space.interval.end;
6055 http_parameter param;
6056 if (param.match(text, this->interval.end, end, flags)) {
6057 this->interval.end = param.interval.end;
6058 params.push_back(std::move(param));
6059 }
6060 else
6061 break;
6062 }
6063 else
6064 break;
6065 }
6066 else
6067 break;
6068 }
6069 this->interval.end = params.empty() ? subtype.interval.end : params.back().interval.end;
6070 return true;
6071
6072 error:
6073 http_media_range::invalidate();
6074 params.clear();
6075 this->interval.invalidate();
6076 return false;
6077 }
6078
6079 virtual void invalidate()
6080 {
6081 params.clear();
6082 http_media_range::invalidate();
6083 }
6084
6085 public:
6086 std::list<http_parameter> params;
6087 };
6088
6093 {
6094 public:
6095 virtual bool match(
6096 _In_reads_or_z_(end) const char* text,
6097 _In_ size_t start = 0,
6098 _In_ size_t end = SIZE_MAX,
6099 _In_ int flags = match_default)
6100 {
6101 _Assume_(text || start >= end);
6102 this->interval.end = start;
6103 for (;;) {
6104 if (this->interval.end < end && text[this->interval.end]) {
6105 if ((unsigned int)text[this->interval.end] < 0x20 ||
6106 (unsigned int)text[this->interval.end] == 0x7f ||
6107 text[this->interval.end] == ':' ||
6108 text[this->interval.end] == '/' ||
6109 stdex::isspace(text[this->interval.end]))
6110 break;
6111 else
6112 this->interval.end++;
6113 }
6114 else
6115 break;
6116 }
6118 this->interval.start = start;
6119 return true;
6120 }
6121 this->interval.invalidate();
6122 return false;
6123 }
6124 };
6125
6129 class http_url_port : public parser
6130 {
6131 public:
6132 http_url_port(_In_ const std::locale& locale = std::locale()) :
6133 parser(locale),
6134 value(0)
6135 {}
6136
6137 virtual bool match(
6138 _In_reads_or_z_(end) const char* text,
6139 _In_ size_t start = 0,
6140 _In_ size_t end = SIZE_MAX,
6141 _In_ int flags = match_default)
6142 {
6143 _Assume_(text || start >= end);
6144 value = 0;
6145 this->interval.end = start;
6146 for (;;) {
6147 if (this->interval.end < end && text[this->interval.end]) {
6148 if ('0' <= text[this->interval.end] && text[this->interval.end] <= '9') {
6149 size_t _value = (size_t)value * 10 + text[this->interval.end] - '0';
6150 if (_value > (uint16_t)-1) {
6151 value = 0;
6152 this->interval.invalidate();
6153 return false;
6154 }
6155 value = (uint16_t)_value;
6156 this->interval.end++;
6157 }
6158 else
6159 break;
6160 }
6161 else
6162 break;
6163 }
6165 this->interval.start = start;
6166 return true;
6167 }
6168 this->interval.invalidate();
6169 return false;
6170 }
6171
6172 virtual void invalidate()
6173 {
6174 value = 0;
6175 parser::invalidate();
6176 }
6177
6178 public:
6179 uint16_t value;
6180 };
6181
6186 {
6187 public:
6188 virtual bool match(
6189 _In_reads_or_z_(end) const char* text,
6190 _In_ size_t start = 0,
6191 _In_ size_t end = SIZE_MAX,
6192 _In_ int flags = match_default)
6193 {
6194 _Assume_(text || start >= end);
6195 this->interval.end = start;
6196 for (;;) {
6197 if (this->interval.end < end && text[this->interval.end]) {
6198 if ((unsigned int)text[this->interval.end] < 0x20 ||
6199 (unsigned int)text[this->interval.end] == 0x7f ||
6200 text[this->interval.end] == '?' ||
6201 text[this->interval.end] == '/' ||
6202 stdex::isspace(text[this->interval.end]))
6203 break;
6204 else
6205 this->interval.end++;
6206 }
6207 else
6208 break;
6209 }
6210 this->interval.start = start;
6211 return true;
6212 }
6213 };
6214
6218 class http_url_path : public parser
6219 {
6220 public:
6221 virtual bool match(
6222 _In_reads_or_z_(end) const char* text,
6223 _In_ size_t start = 0,
6224 _In_ size_t end = SIZE_MAX,
6225 _In_ int flags = match_default)
6226 {
6227 _Assume_(text || start >= end);
6229 this->interval.end = start;
6230 segments.clear();
6231 _Assume_(text || this->interval.end >= end);
6232 if (this->interval.end < end && text[this->interval.end] != '/')
6233 goto error;
6234 this->interval.end++;
6235 s.match(text, this->interval.end, end, flags);
6236 segments.push_back(s);
6237 this->interval.end = s.interval.end;
6238 for (;;) {
6239 if (this->interval.end < end && text[this->interval.end]) {
6240 if (text[this->interval.end] == '/') {
6241 this->interval.end++;
6242 s.match(text, this->interval.end, end, flags);
6243 segments.push_back(s);
6244 this->interval.end = s.interval.end;
6245 }
6246 else
6247 break;
6248 }
6249 else
6250 break;
6251 }
6252 this->interval.start = start;
6253 return true;
6254
6255 error:
6256 segments.clear();
6257 this->interval.invalidate();
6258 return false;
6259 }
6260
6261 virtual void invalidate()
6262 {
6263 segments.clear();
6264 parser::invalidate();
6265 }
6266
6267 public:
6268 std::vector<http_url_path_segment> segments;
6269 };
6270
6275 {
6276 public:
6277 virtual bool match(
6278 _In_reads_or_z_(end) const char* text,
6279 _In_ size_t start = 0,
6280 _In_ size_t end = SIZE_MAX,
6281 _In_ int flags = match_default)
6282 {
6283 _Assume_(text || start >= end);
6284 this->interval.end = start;
6285 name.start = this->interval.end;
6286 for (;;) {
6287 if (this->interval.end < end && text[this->interval.end]) {
6288 if ((unsigned int)text[this->interval.end] < 0x20 ||
6289 (unsigned int)text[this->interval.end] == 0x7f ||
6290 text[this->interval.end] == '&' ||
6291 text[this->interval.end] == '=' ||
6292 stdex::isspace(text[this->interval.end]))
6293 break;
6294 else
6295 this->interval.end++;
6296 }
6297 else
6298 break;
6299 }
6301 name.end = this->interval.end;
6302 else
6303 goto error;
6304 if (text[this->interval.end] == '=') {
6305 this->interval.end++;
6306 value.start = this->interval.end;
6307 for (;;) {
6308 if (this->interval.end < end && text[this->interval.end]) {
6309 if ((unsigned int)text[this->interval.end] < 0x20 ||
6310 (unsigned int)text[this->interval.end] == 0x7f ||
6311 text[this->interval.end] == '&' ||
6312 stdex::isspace(text[this->interval.end]))
6313 break;
6314 else
6315 this->interval.end++;
6316 }
6317 else
6318 break;
6319 }
6320 value.end = this->interval.end;
6321 }
6322 else {
6323 value.start = 1;
6324 value.end = 0;
6325 }
6326 this->interval.start = start;
6327 return true;
6328
6329 error:
6330 name.start = 1;
6331 name.end = 0;
6332 value.start = 1;
6333 value.end = 0;
6334 this->interval.invalidate();
6335 return false;
6336 }
6337
6338 virtual void invalidate()
6339 {
6340 name.start = 1;
6341 name.end = 0;
6342 value.start = 1;
6343 value.end = 0;
6344 parser::invalidate();
6345 }
6346
6347 public:
6350 };
6351
6355 class http_url : public parser
6356 {
6357 public:
6358 http_url(_In_ const std::locale& locale = std::locale()) :
6359 parser(locale),
6360 port(locale)
6361 {}
6362
6363 virtual bool match(
6364 _In_reads_or_z_(end) const char* text,
6365 _In_ size_t start = 0,
6366 _In_ size_t end = SIZE_MAX,
6367 _In_ int flags = match_default)
6368 {
6369 _Assume_(text || start >= end);
6370 this->interval.end = start;
6371
6372 if (this->interval.end + 7 <= end && stdex::strnicmp(text + this->interval.end, 7, "http://", SIZE_MAX, m_locale) == 0) {
6373 this->interval.end += 7;
6374 if (server.match(text, this->interval.end, end, flags))
6375 this->interval.end = server.interval.end;
6376 else
6377 goto error;
6378 if (this->interval.end < end && text[this->interval.end] == ':') {
6379 this->interval.end++;
6380 if (port.match(text, this->interval.end, end, flags))
6381 this->interval.end = port.interval.end;
6382 }
6383 else {
6384 port.invalidate();
6385 port.value = 80;
6386 }
6387 }
6388 else {
6389 server.invalidate();
6390 port.invalidate();
6391 port.value = 80;
6392 }
6393
6394 if (path.match(text, this->interval.end, end, flags))
6395 this->interval.end = path.interval.end;
6396 else
6397 goto error;
6398
6399 params.clear();
6400
6401 if (this->interval.end < end && text[this->interval.end] == '?') {
6402 this->interval.end++;
6403 for (;;) {
6404 if (this->interval.end < end && text[this->interval.end]) {
6405 if ((unsigned int)text[this->interval.end] < 0x20 ||
6406 (unsigned int)text[this->interval.end] == 0x7f ||
6407 stdex::isspace(text[this->interval.end]))
6408 break;
6409 else if (text[this->interval.end] == '&')
6410 this->interval.end++;
6411 else {
6412 http_url_parameter param;
6413 if (param.match(text, this->interval.end, end, flags)) {
6414 this->interval.end = param.interval.end;
6415 params.push_back(std::move(param));
6416 }
6417 else
6418 break;
6419 }
6420 }
6421 else
6422 break;
6423 }
6424 }
6425
6426 this->interval.start = start;
6427 return true;
6428
6429 error:
6430 server.invalidate();
6431 port.invalidate();
6432 path.invalidate();
6433 params.clear();
6434 this->interval.invalidate();
6435 return false;
6436 }
6437
6438 virtual void invalidate()
6439 {
6440 server.invalidate();
6441 port.invalidate();
6442 path.invalidate();
6443 params.clear();
6444 parser::invalidate();
6445 }
6446
6447 public:
6448 http_url_server server;
6449 http_url_port port;
6450 http_url_path path;
6451 std::list<http_url_parameter> params;
6452 };
6453
6457 class http_language : public parser
6458 {
6459 public:
6460 virtual bool match(
6461 _In_reads_or_z_(end) const char* text,
6462 _In_ size_t start = 0,
6463 _In_ size_t end = SIZE_MAX,
6464 _In_ int flags = match_default)
6465 {
6466 _Assume_(text || start >= end);
6467 this->interval.end = start;
6468 components.clear();
6469 for (;;) {
6470 if (this->interval.end < end && text[this->interval.end]) {
6472 k.end = this->interval.end;
6473 for (;;) {
6474 if (k.end < end && text[k.end]) {
6475 if (stdex::isalpha(text[k.end]))
6476 k.end++;
6477 else
6478 break;
6479 }
6480 else
6481 break;
6482 }
6483 if (this->interval.end < k.end) {
6484 k.start = this->interval.end;
6485 this->interval.end = k.end;
6486 components.push_back(k);
6487 }
6488 else
6489 break;
6490 if (this->interval.end < end && text[this->interval.end] == '-')
6491 this->interval.end++;
6492 else
6493 break;
6494 }
6495 else
6496 break;
6497 }
6498 if (!components.empty()) {
6499 this->interval.start = start;
6500 this->interval.end = components.back().end;
6501 return true;
6502 }
6503 this->interval.invalidate();
6504 return false;
6505 }
6506
6507 virtual void invalidate()
6508 {
6509 components.clear();
6510 parser::invalidate();
6511 }
6512
6513 public:
6514 std::vector<stdex::interval<size_t>> components;
6515 };
6516
6520 class http_weight : public parser
6521 {
6522 public:
6523 http_weight(_In_ const std::locale& locale = std::locale()) :
6524 parser(locale),
6525 value(1.0f)
6526 {}
6527
6528 virtual bool match(
6529 _In_reads_or_z_(end) const char* text,
6530 _In_ size_t start = 0,
6531 _In_ size_t end = SIZE_MAX,
6532 _In_ int flags = match_default)
6533 {
6534 _Assume_(text || start >= end);
6535 size_t celi_del = 0, decimalni_del = 0, decimalni_del_n = 1;
6536 this->interval.end = start;
6537 for (;;) {
6538 if (this->interval.end < end && text[this->interval.end]) {
6539 if ('0' <= text[this->interval.end] && text[this->interval.end] <= '9') {
6540 celi_del = celi_del * 10 + text[this->interval.end] - '0';
6541 this->interval.end++;
6542 }
6543 else if (text[this->interval.end] == '.') {
6544 this->interval.end++;
6545 for (;;) {
6546 if (this->interval.end < end && text[this->interval.end]) {
6547 if ('0' <= text[this->interval.end] && text[this->interval.end] <= '9') {
6548 decimalni_del = decimalni_del * 10 + text[this->interval.end] - '0';
6549 decimalni_del_n *= 10;
6550 this->interval.end++;
6551 }
6552 else
6553 break;
6554 }
6555 else
6556 break;
6557 }
6558 break;
6559 }
6560 else
6561 break;
6562 }
6563 else
6564 break;
6565 }
6568 this->interval.start = start;
6569 return true;
6570 }
6571 value = 1.0f;
6572 this->interval.invalidate();
6573 return false;
6574 }
6575
6576 virtual void invalidate()
6577 {
6578 value = 1.0f;
6579 parser::invalidate();
6580 }
6581
6582 public:
6583 float value;
6584 };
6585
6589 class http_asterisk : public parser
6590 {
6591 public:
6592 virtual bool match(
6593 _In_reads_or_z_(end) const char* text,
6594 _In_ size_t start = 0,
6595 _In_ size_t end = SIZE_MAX,
6596 _In_ int flags = match_default)
6597 {
6598 _Assume_(text || end <= start);
6599 if (start < end && text[start] == '*') {
6600 this->interval.end = (this->interval.start = start) + 1;
6601 return true;
6602 }
6603 this->interval.invalidate();
6604 return false;
6605 }
6606 };
6607
6611 template <class T, class T_asterisk = http_asterisk>
6613 {
6614 public:
6615 http_weighted_value(_In_ const std::locale& locale = std::locale()) :
6616 parser(locale),
6617 factor(locale)
6618 {}
6619
6620 virtual bool match(
6621 _In_reads_or_z_(end) const char* text,
6622 _In_ size_t start = 0,
6623 _In_ size_t end = SIZE_MAX,
6624 _In_ int flags = match_default)
6625 {
6626 _Assume_(text || start >= end);
6627 size_t konec_vrednosti;
6628 this->interval.end = start;
6629 if (asterisk.match(text, this->interval.end, end, flags)) {
6630 this->interval.end = konec_vrednosti = asterisk.interval.end;
6631 value.invalidate();
6632 }
6633 else if (value.match(text, this->interval.end, end, flags)) {
6634 this->interval.end = konec_vrednosti = value.interval.end;
6635 asterisk.invalidate();
6636 }
6637 else {
6638 asterisk.invalidate();
6639 value.invalidate();
6640 this->interval.invalidate();
6641 return false;
6642 }
6643
6644 while (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) this->interval.end++;
6645 if (this->interval.end < end && text[this->interval.end] == ';') {
6646 this->interval.end++;
6647 while (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) this->interval.end++;
6648 if (this->interval.end < end && (text[this->interval.end] == 'q' || text[this->interval.end] == 'Q')) {
6649 this->interval.end++;
6650 while (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) this->interval.end++;
6651 if (this->interval.end < end && text[this->interval.end] == '=') {
6652 this->interval.end++;
6653 while (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) this->interval.end++;
6654 if (factor.match(text, this->interval.end, end, flags))
6655 this->interval.end = factor.interval.end;
6656 }
6657 }
6658 }
6659 if (!factor.interval) {
6660 factor.invalidate();
6662 }
6663 this->interval.start = start;
6664 return true;
6665 }
6666
6667 virtual void invalidate()
6668 {
6669 asterisk.invalidate();
6670 value.invalidate();
6671 factor.invalidate();
6672 parser::invalidate();
6673 }
6674
6675 public:
6676 T_asterisk asterisk;
6677 T value;
6678 http_weight factor;
6679 };
6680
6685 {
6686 public:
6687 virtual bool match(
6688 _In_reads_or_z_(end) const char* text,
6689 _In_ size_t start = 0,
6690 _In_ size_t end = SIZE_MAX,
6691 _In_ int flags = match_default)
6692 {
6693 _Assume_(text || start >= end);
6694 this->interval.end = start;
6695 if (this->interval.end < end && text[this->interval.end] == '$')
6696 this->interval.end++;
6697 else
6698 goto error;
6699 if (name.match(text, this->interval.end, end, flags))
6700 this->interval.end = name.interval.end;
6701 else
6702 goto error;
6703 while (m_space.match(text, this->interval.end, end, flags))
6704 this->interval.end = m_space.interval.end;
6705 if (this->interval.end < end && text[this->interval.end] == '=')
6706 this->interval.end++;
6707 else
6708 goto error;
6709 while (m_space.match(text, this->interval.end, end, flags))
6710 this->interval.end = m_space.interval.end;
6711 if (value.match(text, this->interval.end, end, flags))
6712 this->interval.end = value.interval.end;
6713 else
6714 goto error;
6715 this->interval.start = start;
6716 return true;
6717
6718 error:
6719 name.invalidate();
6720 value.invalidate();
6721 this->interval.invalidate();
6722 return false;
6723 }
6724
6725 virtual void invalidate()
6726 {
6727 name.invalidate();
6728 value.invalidate();
6729 parser::invalidate();
6730 }
6731
6732 public:
6733 http_token name;
6734 http_value value;
6735
6736 protected:
6737 http_space m_space;
6738 };
6739
6743 class http_cookie : public parser
6744 {
6745 public:
6746 virtual bool match(
6747 _In_reads_or_z_(end) const char* text,
6748 _In_ size_t start = 0,
6749 _In_ size_t end = SIZE_MAX,
6750 _In_ int flags = match_default)
6751 {
6752 _Assume_(text || start >= end);
6753 this->interval.end = start;
6754 if (name.match(text, this->interval.end, end, flags))
6755 this->interval.end = name.interval.end;
6756 else
6757 goto error;
6758 while (m_space.match(text, this->interval.end, end, flags))
6759 this->interval.end = m_space.interval.end;
6760 if (this->interval.end < end && text[this->interval.end] == '=')
6761 this->interval.end++;
6762 else
6763 goto error;
6764 while (m_space.match(text, this->interval.end, end, flags))
6765 this->interval.end = m_space.interval.end;
6766 if (value.match(text, this->interval.end, end, flags))
6767 this->interval.end = value.interval.end;
6768 else
6769 goto error;
6770 params.clear();
6771 for (;;) {
6772 if (this->interval.end < end && text[this->interval.end]) {
6773 if (m_space.match(text, this->interval.end, end, flags))
6774 this->interval.end = m_space.interval.end;
6775 else if (text[this->interval.end] == ';') {
6776 this->interval.end++;
6777 while (m_space.match(text, this->interval.end, end, flags))
6778 this->interval.end = m_space.interval.end;
6780 if (param.match(text, this->interval.end, end, flags)) {
6781 this->interval.end = param.interval.end;
6782 params.push_back(std::move(param));
6783 }
6784 else
6785 break;
6786 }
6787 else
6788 break;
6789 }
6790 else
6791 break;
6792 }
6793 this->interval.start = start;
6794 this->interval.end = params.empty() ? value.interval.end : params.back().interval.end;
6795 return true;
6796
6797 error:
6798 name.invalidate();
6799 value.invalidate();
6800 params.clear();
6801 this->interval.invalidate();
6802 return false;
6803 }
6804
6805 virtual void invalidate()
6806 {
6807 name.invalidate();
6808 value.invalidate();
6809 params.clear();
6810 parser::invalidate();
6811 }
6812
6813 public:
6816 std::list<http_cookie_parameter> params;
6817
6818 protected:
6819 http_space m_space;
6820 };
6821
6825 class http_agent : public parser
6826 {
6827 public:
6828 virtual bool match(
6829 _In_reads_or_z_(end) const char* text,
6830 _In_ size_t start = 0,
6831 _In_ size_t end = SIZE_MAX,
6832 _In_ int flags = match_default)
6833 {
6834 _Assume_(text || start >= end);
6835 this->interval.end = start;
6836 type.start = this->interval.end;
6837 for (;;) {
6838 if (this->interval.end < end && text[this->interval.end]) {
6839 if (text[this->interval.end] == '/') {
6840 type.end = this->interval.end;
6841 this->interval.end++;
6842 version.start = this->interval.end;
6843 for (;;) {
6844 if (this->interval.end < end && text[this->interval.end]) {
6845 if (stdex::isspace(text[this->interval.end])) {
6846 version.end = this->interval.end;
6847 break;
6848 }
6849 else
6850 this->interval.end++;
6851 }
6852 else {
6853 version.end = this->interval.end;
6854 break;
6855 }
6856 }
6857 break;
6858 }
6859 else if (stdex::isspace(text[this->interval.end])) {
6860 type.end = this->interval.end;
6861 break;
6862 }
6863 else
6864 this->interval.end++;
6865 }
6866 else {
6867 type.end = this->interval.end;
6868 break;
6869 }
6870 }
6872 this->interval.start = start;
6873 return true;
6874 }
6875 type.start = 1;
6876 type.end = 0;
6877 version.start = 1;
6878 version.end = 0;
6879 this->interval.invalidate();
6880 return false;
6881 }
6882
6883 virtual void invalidate()
6884 {
6885 type.start = 1;
6886 type.end = 0;
6887 version.start = 1;
6888 version.end = 0;
6889 parser::invalidate();
6890 }
6891
6892 public:
6895 };
6896
6900 class http_protocol : public parser
6901 {
6902 public:
6903 http_protocol(_In_ const std::locale& locale = std::locale()) :
6904 parser(locale),
6905 version(0x009)
6906 {}
6907
6908 virtual bool match(
6909 _In_reads_or_z_(end) const char* text,
6910 _In_ size_t start = 0,
6911 _In_ size_t end = SIZE_MAX,
6912 _In_ int flags = match_default)
6913 {
6914 _Assume_(text || start >= end);
6915 this->interval.end = start;
6916 type.start = this->interval.end;
6917 for (;;) {
6918 if (this->interval.end < end && text[this->interval.end]) {
6919 if (text[this->interval.end] == '/') {
6920 type.end = this->interval.end;
6921 this->interval.end++;
6922 break;
6923 }
6924 else if (stdex::isspace(text[this->interval.end]))
6925 goto error;
6926 else
6927 this->interval.end++;
6928 }
6929 else {
6930 type.end = this->interval.end;
6931 goto error;
6932 }
6933 }
6934 version_maj.start = this->interval.end;
6935 for (;;) {
6936 if (this->interval.end < end && text[this->interval.end]) {
6937 if (text[this->interval.end] == '.') {
6938 version_maj.end = this->interval.end;
6939 this->interval.end++;
6940 version_min.start = this->interval.end;
6941 for (;;) {
6942 if (this->interval.end < end && text[this->interval.end]) {
6943 if (stdex::isspace(text[this->interval.end])) {
6944 version_min.end = this->interval.end;
6945 version =
6946 (uint16_t)strtoui(text + version_maj.start, version_maj.size(), nullptr, 10) * 0x100 +
6947 (uint16_t)strtoui(text + version_min.start, version_min.size(), nullptr, 10);
6948 break;
6949 }
6950 else
6951 this->interval.end++;
6952 }
6953 else
6954 goto error;
6955 }
6956 break;
6957 }
6958 else if (stdex::isspace(text[this->interval.end])) {
6959 version_maj.end = this->interval.end;
6960 version_min.start = 1;
6961 version_min.end = 0;
6962 version = (uint16_t)strtoui(text + version_maj.start, version_maj.size(), nullptr, 10) * 0x100;
6963 break;
6964 }
6965 else
6966 this->interval.end++;
6967 }
6968 else
6969 goto error;
6970 }
6971 this->interval.start = start;
6972 return true;
6973
6974 error:
6975 type.start = 1;
6976 type.end = 0;
6977 version_maj.start = 1;
6978 version_maj.end = 0;
6979 version_min.start = 1;
6980 version_min.end = 0;
6981 version = 0x009;
6982 this->interval.invalidate();
6983 return false;
6984 }
6985
6986 virtual void invalidate()
6987 {
6988 type.start = 1;
6989 type.end = 0;
6990 version_maj.start = 1;
6991 version_maj.end = 0;
6992 version_min.start = 1;
6993 version_min.end = 0;
6994 version = 0x009;
6995 parser::invalidate();
6996 }
6997
6998 public:
7000 stdex::interval<size_t> version_maj;
7001 stdex::interval<size_t> version_min;
7003 };
7004
7008 class http_request : public parser
7009 {
7010 public:
7011 http_request(_In_ const std::locale& locale = std::locale()) :
7012 parser(locale),
7013 url(locale),
7014 protocol(locale)
7015 {}
7016
7017 virtual bool match(
7018 _In_reads_or_z_(end) const char* text,
7019 _In_ size_t start = 0,
7020 _In_ size_t end = SIZE_MAX,
7021 _In_ int flags = match_default)
7022 {
7023 _Assume_(text || start >= end);
7024 this->interval.end = start;
7025
7026 for (;;) {
7027 if (m_line_break.match(text, this->interval.end, end, flags))
7028 goto error;
7029 else if (this->interval.end < end && text[this->interval.end]) {
7030 if (stdex::isspace(text[this->interval.end]))
7031 this->interval.end++;
7032 else
7033 break;
7034 }
7035 else
7036 goto error;
7037 }
7038 verb.start = this->interval.end;
7039 for (;;) {
7040 if (m_line_break.match(text, this->interval.end, end, flags))
7041 goto error;
7042 else if (this->interval.end < end && text[this->interval.end]) {
7043 if (stdex::isspace(text[this->interval.end])) {
7044 verb.end = this->interval.end;
7045 this->interval.end++;
7046 break;
7047 }
7048 else
7049 this->interval.end++;
7050 }
7051 else
7052 goto error;
7053 }
7054
7055 for (;;) {
7056 if (m_line_break.match(text, this->interval.end, end, flags))
7057 goto error;
7058 else if (this->interval.end < end && text[this->interval.end]) {
7059 if (stdex::isspace(text[this->interval.end]))
7060 this->interval.end++;
7061 else
7062 break;
7063 }
7064 else
7065 goto error;
7066 }
7067 if (url.match(text, this->interval.end, end, flags))
7068 this->interval.end = url.interval.end;
7069 else
7070 goto error;
7071
7072 protocol.invalidate();
7073 for (;;) {
7074 if (m_line_break.match(text, this->interval.end, end, flags)) {
7075 this->interval.end = m_line_break.interval.end;
7076 goto end;
7077 }
7078 else if (this->interval.end < end && text[this->interval.end]) {
7079 if (stdex::isspace(text[this->interval.end]))
7080 this->interval.end++;
7081 else
7082 break;
7083 }
7084 else
7085 goto end;
7086 }
7087 for (;;) {
7088 if (m_line_break.match(text, this->interval.end, end, flags)) {
7089 this->interval.end = m_line_break.interval.end;
7090 goto end;
7091 }
7092 else if (protocol.match(text, this->interval.end, end, flags)) {
7093 this->interval.end = protocol.interval.end;
7094 break;
7095 }
7096 else
7097 goto end;
7098 }
7099
7100 for (;;) {
7101 if (m_line_break.match(text, this->interval.end, end, flags)) {
7102 this->interval.end = m_line_break.interval.end;
7103 break;
7104 }
7105 else if (this->interval.end < end && text[this->interval.end])
7106 this->interval.end++;
7107 else
7108 goto end;
7109 }
7110
7111 end:
7112 this->interval.start = start;
7113 return true;
7114
7115 error:
7116 verb.start = 1;
7117 verb.end = 0;
7118 url.invalidate();
7119 protocol.invalidate();
7120 this->interval.invalidate();
7121 return false;
7122 }
7123
7124 virtual void invalidate()
7125 {
7126 verb.start = 1;
7127 verb.end = 0;
7128 url.invalidate();
7129 protocol.invalidate();
7130 parser::invalidate();
7131 }
7132
7133 public:
7135 http_url url;
7136 http_protocol protocol;
7137
7138 protected:
7139 http_line_break m_line_break;
7140 };
7141
7145 class http_header : public parser
7146 {
7147 public:
7148 virtual bool match(
7149 _In_reads_or_z_(end) const char* text,
7150 _In_ size_t start = 0,
7151 _In_ size_t end = SIZE_MAX,
7152 _In_ int flags = match_default)
7153 {
7154 _Assume_(text || start >= end);
7155 this->interval.end = start;
7156
7157 if (m_line_break.match(text, this->interval.end, end, flags) ||
7158 (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])))
7159 goto error;
7160 name.start = this->interval.end;
7161 for (;;) {
7162 if (m_line_break.match(text, this->interval.end, end, flags))
7163 goto error;
7164 else if (this->interval.end < end && text[this->interval.end]) {
7165 if (stdex::isspace(text[this->interval.end])) {
7166 name.end = this->interval.end;
7167 this->interval.end++;
7168 for (;;) {
7169 if (m_line_break.match(text, this->interval.end, end, flags))
7170 goto error;
7171 else if (this->interval.end < end && text[this->interval.end]) {
7172 if (stdex::isspace(text[this->interval.end]))
7173 this->interval.end++;
7174 else
7175 break;
7176 }
7177 else
7178 goto error;
7179 }
7180 if (this->interval.end < end && text[this->interval.end] == ':') {
7181 this->interval.end++;
7182 break;
7183 }
7184 else
7185 goto error;
7186 break;
7187 }
7188 else if (text[this->interval.end] == ':') {
7189 name.end = this->interval.end;
7190 this->interval.end++;
7191 break;
7192 }
7193 else
7194 this->interval.end++;
7195 }
7196 else
7197 goto error;
7198 }
7199 value.start = SIZE_MAX;
7200 value.end = 0;
7201 for (;;) {
7202 if (m_line_break.match(text, this->interval.end, end, flags)) {
7203 this->interval.end = m_line_break.interval.end;
7204 if (!m_line_break.match(text, this->interval.end, end, flags) &&
7205 this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end]))
7206 this->interval.end++;
7207 else
7208 break;
7209 }
7210 else if (this->interval.end < end && text[this->interval.end]) {
7211 if (stdex::isspace(text[this->interval.end]))
7212 this->interval.end++;
7213 else {
7214 if (value.start == SIZE_MAX) value.start = this->interval.end;
7215 value.end = ++this->interval.end;
7216 }
7217 }
7218 else
7219 break;
7220 }
7221 this->interval.start = start;
7222 return true;
7223
7224 error:
7225 name.start = 1;
7226 name.end = 0;
7227 value.start = 1;
7228 value.end = 0;
7229 this->interval.invalidate();
7230 return false;
7231 }
7232
7233 virtual void invalidate()
7234 {
7235 name.start = 1;
7236 name.end = 0;
7237 value.start = 1;
7238 value.end = 0;
7239 parser::invalidate();
7240 }
7241
7242 public:
7245
7246 protected:
7247 http_line_break m_line_break;
7248 };
7249
7253 template <class _Key, class T>
7254 class http_value_collection : public T
7255 {
7256 public:
7257 void insert(
7258 _In_reads_or_z_(end) const char* text,
7259 _In_ size_t start = 0,
7260 _In_ size_t end = SIZE_MAX,
7261 _In_ int flags = match_default)
7262 {
7263 while (start < end) {
7264 while (start < end && text[start] && stdex::isspace(text[start])) start++;
7265 if (start < end && text[start] == ',') {
7266 start++;
7267 while (start < end&& text[start] && stdex::isspace(text[start])) start++;
7268 }
7269 _Key el;
7270 if (el.match(text, start, end, flags)) {
7271 start = el.interval.end;
7272 T::insert(std::move(el));
7273 }
7274 else
7275 break;
7276 }
7277 }
7278 };
7279
7280 template <class T>
7282 constexpr bool operator()(const T& a, const T& b) const noexcept
7283 {
7284 return a.factor.value > b.factor.value;
7285 }
7286 };
7287
7291 template <class T, class _Alloc = std::allocator<T>>
7293
7297 template <class T>
7299 {
7300 public:
7302 _In_ const std::shared_ptr<basic_parser<T>>& quote,
7303 _In_ const std::shared_ptr<basic_parser<T>>& chr,
7304 _In_ const std::shared_ptr<basic_parser<T>>& escape,
7305 _In_ const std::shared_ptr<basic_parser<T>>& sol,
7306 _In_ const std::shared_ptr<basic_parser<T>>& bs,
7307 _In_ const std::shared_ptr<basic_parser<T>>& ff,
7308 _In_ const std::shared_ptr<basic_parser<T>>& lf,
7309 _In_ const std::shared_ptr<basic_parser<T>>& cr,
7310 _In_ const std::shared_ptr<basic_parser<T>>& htab,
7311 _In_ const std::shared_ptr<basic_parser<T>>& uni,
7312 _In_ const std::shared_ptr<basic_integer16<T>>& hex,
7313 _In_ const std::locale& locale = std::locale()) :
7314 basic_parser<T>(locale),
7315 m_quote(quote),
7316 m_chr(chr),
7317 m_escape(escape),
7318 m_sol(sol),
7319 m_bs(bs),
7320 m_ff(ff),
7321 m_lf(lf),
7322 m_cr(cr),
7323 m_htab(htab),
7324 m_uni(uni),
7325 m_hex(hex)
7326 {}
7327
7328 virtual bool match(
7329 _In_reads_or_z_opt_(end) const T* text,
7330 _In_ size_t start = 0,
7331 _In_ size_t end = SIZE_MAX,
7332 _In_ int flags = match_default)
7333 {
7334 _Assume_(text || start >= end);
7335 this->interval.end = start;
7336 if (m_quote->match(text, this->interval.end, end, flags)) {
7337 this->interval.end = m_quote->interval.end;
7338 value.clear();
7339 for (;;) {
7340 if (m_quote->match(text, this->interval.end, end, flags)) {
7341 this->interval.start = start;
7342 this->interval.end = m_quote->interval.end;
7343 return true;
7344 }
7345 if (m_escape->match(text, this->interval.end, end, flags)) {
7346 if (m_quote->match(text, m_escape->interval.end, end, flags)) {
7347 value += '"'; this->interval.end = m_quote->interval.end;
7348 continue;
7349 }
7350 if (m_sol->match(text, m_escape->interval.end, end, flags)) {
7351 value += '/'; this->interval.end = m_sol->interval.end;
7352 continue;
7353 }
7354 if (m_bs->match(text, m_escape->interval.end, end, flags)) {
7355 value += '\b'; this->interval.end = m_bs->interval.end;
7356 continue;
7357 }
7358 if (m_ff->match(text, m_escape->interval.end, end, flags)) {
7359 value += '\f'; this->interval.end = m_ff->interval.end;
7360 continue;
7361 }
7362 if (m_lf->match(text, m_escape->interval.end, end, flags)) {
7363 value += '\n'; this->interval.end = m_lf->interval.end;
7364 continue;
7365 }
7366 if (m_cr->match(text, m_escape->interval.end, end, flags)) {
7367 value += '\r'; this->interval.end = m_cr->interval.end;
7368 continue;
7369 }
7370 if (m_htab->match(text, m_escape->interval.end, end, flags)) {
7371 value += '\t'; this->interval.end = m_htab->interval.end;
7372 continue;
7373 }
7374 if (
7375 m_uni->match(text, m_escape->interval.end, end, flags) &&
7376 m_hex->match(text, m_uni->interval.end, std::min(m_uni->interval.end + 4, end), flags | match_case_insensitive) &&
7377 m_hex->interval.size() == 4 /* JSON requests 4-digit Unicode sequneces: \u.... */)
7378 {
7379 _Assume_(m_hex->value <= 0xffff);
7380 if (sizeof(T) == 1) {
7381 if (m_hex->value > 0x7ff) {
7382 value += (T)(0xe0 | ((m_hex->value >> 12) & 0x0f));
7383 value += (T)(0x80 | ((m_hex->value >> 6) & 0x3f));
7384 value += (T)(0x80 | (m_hex->value & 0x3f));
7385 }
7386 else if (m_hex->value > 0x7f) {
7387 value += (T)(0xc0 | ((m_hex->value >> 6) & 0x1f));
7388 value += (T)(0x80 | (m_hex->value & 0x3f));
7389 }
7390 else
7391 value += (T)(m_hex->value & 0x7f);
7392 }
7393 else
7394 value += (T)m_hex->value;
7395 this->interval.end = m_hex->interval.end;
7396 continue;
7397 }
7398 if (m_escape->match(text, m_escape->interval.end, end, flags)) {
7399 value += '\\'; this->interval.end = m_escape->interval.end;
7400 continue;
7401 }
7402 }
7403 if (m_chr->match(text, this->interval.end, end, flags)) {
7404 value.append(text + m_chr->interval.start, m_chr->interval.size());
7405 this->interval.end = m_chr->interval.end;
7406 continue;
7407 }
7408 break;
7409 }
7410 }
7411 value.clear();
7412 this->interval.invalidate();
7413 return false;
7414 }
7415
7416 virtual void invalidate()
7417 {
7418 value.clear();
7420 }
7421
7422 public:
7423 std::basic_string<T> value;
7424
7425 protected:
7426 std::shared_ptr<basic_parser<T>> m_quote;
7427 std::shared_ptr<basic_parser<T>> m_chr;
7428 std::shared_ptr<basic_parser<T>> m_escape;
7429 std::shared_ptr<basic_parser<T>> m_sol;
7430 std::shared_ptr<basic_parser<T>> m_bs;
7431 std::shared_ptr<basic_parser<T>> m_ff;
7432 std::shared_ptr<basic_parser<T>> m_lf;
7433 std::shared_ptr<basic_parser<T>> m_cr;
7434 std::shared_ptr<basic_parser<T>> m_htab;
7435 std::shared_ptr<basic_parser<T>> m_uni;
7436 std::shared_ptr<basic_integer16<T>> m_hex;
7437 };
7438
7441#ifdef _UNICODE
7442 using tjson_string = wjson_string;
7443#else
7444 using tjson_string = json_string;
7445#endif
7446
7450 template <class T>
7452 {
7453 public:
7454 virtual bool match(
7455 _In_reads_or_z_opt_(end) const T* text,
7456 _In_ size_t start = 0,
7457 _In_ size_t end = SIZE_MAX,
7458 _In_ int flags = match_multiline)
7459 {
7460 _Unreferenced_(flags);
7461 _Assume_(text || start + 1 >= end);
7462 if (start + 1 < end &&
7463 text[start] == '/' &&
7464 text[start + 1] == '*')
7465 {
7466 // /*
7467 this->content.start = this->interval.end = start + 2;
7468 for (;;) {
7469 if (this->interval.end >= end || !text[this->interval.end])
7470 break;
7471 if (this->interval.end + 1 < end &&
7472 text[this->interval.end] == '*' &&
7473 text[this->interval.end + 1] == '/')
7474 {
7475 // /*...*/
7476 this->content.end = this->interval.end;
7477 this->interval.start = start;
7478 this->interval.end = this->interval.end + 2;
7479 return true;
7480 }
7481 this->interval.end++;
7482 }
7483 }
7484 this->content.invalidate();
7485 this->interval.invalidate();
7486 return false;
7487 }
7488
7489 virtual void invalidate()
7490 {
7491 this->content.invalidate();
7492 basic_parser::invalidate();
7493 }
7494
7495 public:
7497 };
7498
7501#ifdef _UNICODE
7502 using tcss_comment = wcss_comment;
7503#else
7504 using tcss_comment = css_comment;
7505#endif
7506
7510 template <class T>
7511 class basic_css_cdo : public basic_parser<T>
7512 {
7513 public:
7514 virtual bool match(
7515 _In_reads_or_z_opt_(end) const T* text,
7516 _In_ size_t start = 0,
7517 _In_ size_t end = SIZE_MAX,
7518 _In_ int flags = match_multiline)
7519 {
7520 _Unreferenced_(flags);
7521 _Assume_(text || start + 3 >= end);
7522 if (start + 3 < end &&
7523 text[start] == '<' &&
7524 text[start + 1] == '!' &&
7525 text[start + 2] == '-' &&
7526 text[start + 3] == '-')
7527 {
7528 this->interval.start = start;
7529 this->interval.end = start + 4;
7530 return true;
7531 }
7532 this->interval.invalidate();
7533 return false;
7534 }
7535 };
7536
7539#ifdef _UNICODE
7540 using tcss_cdo = wcss_cdo;
7541#else
7542 using tcss_cdo = css_cdo;
7543#endif
7544
7548 template <class T>
7549 class basic_css_cdc : public basic_parser<T>
7550 {
7551 public:
7552 virtual bool match(
7553 _In_reads_or_z_opt_(end) const T* text,
7554 _In_ size_t start = 0,
7555 _In_ size_t end = SIZE_MAX,
7556 _In_ int flags = match_multiline)
7557 {
7558 _Unreferenced_(flags);
7559 _Assume_(text || start + 2 >= end);
7560 if (start + 2 < end &&
7561 text[start] == '-' &&
7562 text[start + 1] == '-' &&
7563 text[start + 2] == '>')
7564 {
7565 this->interval.start = start;
7566 this->interval.end = start + 3;
7567 return true;
7568 }
7569 this->interval.invalidate();
7570 return false;
7571 }
7572 };
7573
7576#ifdef _UNICODE
7577 using tcss_cdc = wcss_cdc;
7578#else
7579 using tcss_cdc = css_cdc;
7580#endif
7581
7585 template <class T>
7587 {
7588 public:
7589 virtual bool match(
7590 _In_reads_or_z_opt_(end) const T* text,
7591 _In_ size_t start = 0,
7592 _In_ size_t end = SIZE_MAX,
7593 _In_ int flags = match_multiline)
7594 {
7595 _Unreferenced_(flags);
7596 this->interval.end = start;
7597 _Assume_(text || this->interval.end >= end);
7598 if (this->interval.end < end &&
7599 (text[this->interval.end] == '\"' || text[this->interval.end] == '\''))
7600 {
7601 // "Quoted...
7602 T quote = text[this->interval.end];
7603 this->content.start = ++this->interval.end;
7604 for (;;) {
7605 if (this->interval.end >= end || !text[this->interval.end])
7606 break;
7607 if (text[this->interval.end] == quote) {
7608 // End quote"
7609 this->content.end = this->interval.end;
7610 this->interval.start = start;
7611 this->interval.end++;
7612 return true;
7613 }
7614 if (this->interval.end + 1 < end &&
7615 text[this->interval.end] == '\\' &&
7616 (text[this->interval.end + 1] == '\"' || text[this->interval.end + 1] == '\''))
7617 {
7618 // Escaped quote
7619 this->interval.end = this->interval.end + 2;
7620 }
7621 else
7622 this->interval.end++;
7623 }
7624 }
7625
7626 this->content.invalidate();
7627 this->interval.invalidate();
7628 return false;
7629 }
7630
7631 virtual void invalidate()
7632 {
7633 this->content.invalidate();
7634 basic_parser::invalidate();
7635 }
7636
7637 public:
7639 };
7640
7643#ifdef _UNICODE
7644 using tcss_string = wcss_string;
7645#else
7646 using tcss_string = css_string;
7647#endif
7648
7652 template <class T>
7653 class basic_css_uri : public basic_parser<T>
7654 {
7655 public:
7656 virtual bool match(
7657 _In_reads_or_z_opt_(end) const T* text,
7658 _In_ size_t start = 0,
7659 _In_ size_t end = SIZE_MAX,
7660 _In_ int flags = match_multiline)
7661 {
7662 _Unreferenced_(flags);
7663 this->interval.end = start;
7664 _Assume_(text || this->interval.end + 3 >= end);
7665 if (this->interval.end + 3 < end &&
7666 (text[this->interval.end] == 'u' || text[this->interval.end] == 'U') &&
7667 (text[this->interval.end + 1] == 'r' || text[this->interval.end + 1] == 'R') &&
7668 (text[this->interval.end + 2] == 'l' || text[this->interval.end + 2] == 'L') &&
7669 text[this->interval.end + 3] == '(')
7670 {
7671 // url(
7672 this->interval.end = this->interval.end + 4;
7673
7674 // Skip whitespace.
7675 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
7676 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
7677
7678 if (this->interval.end < end &&
7679 (text[this->interval.end] == '\"' || text[this->interval.end] == '\''))
7680 {
7681 // url("Quoted...
7682 T quote = text[this->interval.end];
7683 this->content.start = ++this->interval.end;
7684 for (;;) {
7685 if (this->interval.end >= end || !text[this->interval.end])
7686 goto error;
7687 if (text[this->interval.end] == quote) {
7688 // End quote"
7689 this->content.end = this->interval.end;
7690 this->interval.end++;
7691 break;
7692 }
7693 if (this->interval.end + 1 < end &&
7694 text[this->interval.end] == '\\' &&
7695 (text[this->interval.end + 1] == '\"' || text[this->interval.end + 1] == '\''))
7696 {
7697 // Escaped quote
7698 this->interval.end = this->interval.end + 2;
7699 }
7700 else
7701 this->interval.end++;
7702 }
7703
7704 // Skip whitespace.
7705 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
7706
7707 if (this->interval.end < end &&
7708 text[this->interval.end] == ')')
7709 {
7710 // url("...")
7711 this->interval.start = start;
7712 this->interval.end++;
7713 return true;
7714 }
7715 }
7716 else {
7717 // url(...
7718 this->content.start = content.end = this->interval.end;
7719 for (;;) {
7720 if (this->interval.end >= end || !text[this->interval.end])
7721 goto error;
7722 if (text[this->interval.end] == ')') {
7723 // url(...)
7724 this->interval.start = start;
7725 this->interval.end++;
7726 return true;
7727 }
7728 if (ctype.is(ctype.space, text[this->interval.end]))
7729 this->interval.end++;
7730 else
7731 this->content.end = ++this->interval.end;
7732 }
7733 }
7734 }
7735
7736 error:
7737 this->content.invalidate();
7738 this->interval.invalidate();
7739 return false;
7740 }
7741
7742 virtual void invalidate()
7743 {
7744 this->content.invalidate();
7745 basic_parser::invalidate();
7746 }
7747
7748 public:
7750 };
7751
7754#ifdef _UNICODE
7755 using tcss_uri = wcss_uri;
7756#else
7757 using tcss_uri = css_uri;
7758#endif
7759
7763 template <class T>
7765 {
7766 public:
7767 virtual bool match(
7768 _In_reads_or_z_opt_(end) const T* text,
7769 _In_ size_t start = 0,
7770 _In_ size_t end = SIZE_MAX,
7771 _In_ int flags = match_multiline)
7772 {
7773 _Unreferenced_(flags);
7774 this->interval.end = start;
7775 _Assume_(text || this->interval.end + 6 >= end);
7776 if (this->interval.end + 6 < end &&
7777 text[this->interval.end] == '@' &&
7778 (text[this->interval.end + 1] == 'i' || text[this->interval.end + 1] == 'I') &&
7779 (text[this->interval.end + 2] == 'm' || text[this->interval.end + 2] == 'M') &&
7780 (text[this->interval.end + 3] == 'p' || text[this->interval.end + 3] == 'P') &&
7781 (text[this->interval.end + 4] == 'o' || text[this->interval.end + 4] == 'O') &&
7782 (text[this->interval.end + 5] == 'r' || text[this->interval.end + 5] == 'R') &&
7783 (text[this->interval.end + 6] == 't' || text[this->interval.end + 6] == 'T'))
7784 {
7785 // @import...
7786 this->interval.end = this->interval.end + 7;
7787
7788 // Skip whitespace.
7789 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
7790 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
7791
7792 if (this->interval.end < end &&
7793 (text[this->interval.end] == '\"' || text[this->interval.end] == '\''))
7794 {
7795 // @import "Quoted
7796 T quote = text[this->interval.end];
7797 this->content.start = ++this->interval.end;
7798 for (;;) {
7799 if (this->interval.end >= end || !text[this->interval.end])
7800 goto error;
7801 if (text[this->interval.end] == quote) {
7802 // End quote"
7803 this->content.end = this->interval.end;
7804 this->interval.start = start;
7805 this->interval.end++;
7806 return true;
7807 }
7808 if (this->interval.end + 1 < end &&
7809 text[this->interval.end] == '\\' &&
7810 (text[this->interval.end + 1] == '\"' || text[this->interval.end + 1] == '\''))
7811 {
7812 // Escaped quote
7813 this->interval.end = this->interval.end + 2;
7814 }
7815 else
7816 this->interval.end++;
7817 }
7818 }
7819 }
7820
7821 error:
7822 this->content.invalidate();
7823 this->interval.invalidate();
7824 return false;
7825 }
7826
7827 virtual void invalidate()
7828 {
7829 this->content.invalidate();
7830 basic_parser::invalidate();
7831 }
7832
7833 public:
7835 };
7836
7839#ifdef _UNICODE
7840 using tcss_import = wcss_import;
7841#else
7842 using tcss_import = css_import;
7843#endif
7844
7848 template <class T>
7850 {
7851 public:
7852 virtual bool match(
7853 _In_reads_or_z_opt_(end) const T* text,
7854 _In_ size_t start = 0,
7855 _In_ size_t end = SIZE_MAX,
7856 _In_ int flags = match_multiline)
7857 {
7858 _Unreferenced_(flags);
7859 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
7860
7861 this->interval.end = start;
7862 this->base_type.start = this->interval.end;
7863 for (;;) {
7864 _Assume_(text || this->interval.end >= end);
7865 if (this->interval.end >= end || !text[this->interval.end])
7866 break;
7867 if (text[this->interval.end] == '/' ||
7868 text[this->interval.end] == ';' ||
7869 ctype.is(ctype.space, text[this->interval.end]))
7870 break;
7871 this->interval.end++;
7872 }
7873 if (this->interval.end <= this->base_type.start)
7874 goto error;
7875 this->base_type.end = this->interval.end;
7876
7877 if (end <= this->interval.end || text[this->interval.end] != '/')
7878 goto error;
7879
7880 this->interval.end++;
7881 this->sub_type.start = this->interval.end;
7882 for (;;) {
7883 if (this->interval.end >= end || !text[this->interval.end])
7884 break;
7885 if (text[this->interval.end] == '/' ||
7886 text[this->interval.end] == ';' ||
7887 ctype.is(ctype.space, text[this->interval.end]))
7888 break;
7889 this->interval.end++;
7890 }
7891 if (this->interval.end <= this->sub_type.start)
7892 goto error;
7893
7894 this->sub_type.end = this->interval.end;
7895 this->charset.invalidate();
7896 if (this->interval.end < end && text[this->interval.end] == ';') {
7897 this->interval.end++;
7898
7899 // Skip whitespace.
7900 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
7901
7902 if (this->interval.end + 7 < end &&
7903 (text[this->interval.end] == 'c' || text[this->interval.end] == 'C') &&
7904 (text[this->interval.end + 1] == 'h' || text[this->interval.end + 1] == 'H') &&
7905 (text[this->interval.end + 2] == 'a' || text[this->interval.end + 2] == 'A') &&
7906 (text[this->interval.end + 3] == 'r' || text[this->interval.end + 3] == 'R') &&
7907 (text[this->interval.end + 4] == 's' || text[this->interval.end + 4] == 'S') &&
7908 (text[this->interval.end + 5] == 'e' || text[this->interval.end + 5] == 'E') &&
7909 (text[this->interval.end + 6] == 't' || text[this->interval.end + 6] == 'T') &&
7910 text[this->interval.end + 7] == '=')
7911 {
7912 this->interval.end = this->interval.end + 8;
7913 if (this->interval.end < end &&
7914 (text[this->interval.end] == '\"' || text[this->interval.end] == '\''))
7915 {
7916 // "Quoted...
7917 T quote = text[this->interval.end];
7918 this->charset.start = ++this->interval.end;
7919 for (;;) {
7920 if (this->interval.end >= end || !text[this->interval.end]) {
7921 // No end quote!
7922 this->charset.invalidate();
7923 break;
7924 }
7925 if (text[this->interval.end] == quote) {
7926 // End quote"
7927 this->charset.end = this->interval.end;
7928 this->interval.end++;
7929 break;
7930 }
7931 this->interval.end++;
7932 }
7933 }
7934 else {
7935 // Nonquoted
7936 this->charset.start = this->interval.end;
7937 for (;;) {
7938 if (this->interval.end >= end || !text[this->interval.end] ||
7939 ctype.is(ctype.space, text[this->interval.end])) {
7940 this->charset.end = this->interval.end;
7941 break;
7942 }
7943 this->interval.end++;
7944 }
7945 }
7946 }
7947 }
7948 this->interval.start = start;
7949 return true;
7950
7951 error:
7952 this->base_type.invalidate();
7953 this->sub_type.invalidate();
7954 this->charset.invalidate();
7955 this->interval.invalidate();
7956 return false;
7957 }
7958
7959 virtual void invalidate()
7960 {
7961 this->base_type.invalidate();
7962 this->sub_type.invalidate();
7963 this->charset.invalidate();
7964 basic_parser::invalidate();
7965 }
7966
7967 public:
7971 };
7972
7975#ifdef _UNICODE
7976 using tmime_type = wmime_type;
7977#else
7978 using tmime_type = mime_type;
7979#endif
7980
7984 template <class T>
7986 {
7987 public:
7988 virtual bool match(
7989 _In_reads_or_z_opt_(end) const T* text,
7990 _In_ size_t start = 0,
7991 _In_ size_t end = SIZE_MAX,
7992 _In_ int flags = match_default)
7993 {
7994 _Unreferenced_(flags);
7995 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
7996 this->interval.end = start;
7997 for (;;) {
7998 _Assume_(text || this->interval.end >= end);
7999 if (this->interval.end >= end || !text[this->interval.end]) {
8001 this->interval.start = start;
8002 return true;
8003 }
8004 this->interval.invalidate();
8005 return false;
8006 }
8007 if (text[this->interval.end] == '>' ||
8008 text[this->interval.end] == '=' ||
8009 text[this->interval.end] == '/' && this->interval.end + 1 < end && text[this->interval.end + 1] == '>' ||
8010 ctype.is(ctype.space, text[this->interval.end]))
8011 {
8012 this->interval.start = start;
8013 return true;
8014 }
8015 this->interval.end++;
8016 }
8017 }
8018 };
8019
8022#ifdef _UNICODE
8023 using thtml_ident = whtml_ident;
8024#else
8025 using thtml_ident = html_ident;
8026#endif
8027
8031 template <class T>
8033 {
8034 public:
8035 virtual bool match(
8036 _In_reads_or_z_opt_(end) const T* text,
8037 _In_ size_t start = 0,
8038 _In_ size_t end = SIZE_MAX,
8039 _In_ int flags = match_default)
8040 {
8041 _Unreferenced_(flags);
8042 this->interval.end = start;
8043 _Assume_(text || this->interval.end >= end);
8044 if (this->interval.end < end &&
8045 (text[this->interval.end] == '\"' || text[this->interval.end] == '\''))
8046 {
8047 // "Quoted...
8048 T quote = text[this->interval.end];
8049 this->content.start = ++this->interval.end;
8050 for (;;) {
8051 if (this->interval.end >= end || !text[this->interval.end]) {
8052 // No end quote!
8053 this->content.invalidate();
8054 this->interval.invalidate();
8055 return false;
8056 }
8057 if (text[this->interval.end] == quote) {
8058 // End quote"
8059 this->content.end = this->interval.end;
8060 this->interval.start = start;
8061 this->interval.end++;
8062 return true;
8063 }
8064 this->interval.end++;
8065 }
8066 }
8067
8068 // Nonquoted
8069 this->content.start = this->interval.end;
8070 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
8071 for (;;) {
8072 _Assume_(text || this->interval.end >= end);
8073 if (this->interval.end >= end || !text[this->interval.end]) {
8074 this->content.end = this->interval.end;
8075 this->interval.start = start;
8076 return true;
8077 }
8078 if (text[this->interval.end] == '>' ||
8079 text[this->interval.end] == '/' && this->interval.end + 1 < end && text[this->interval.end + 1] == '>' ||
8080 ctype.is(ctype.space, text[this->interval.end]))
8081 {
8082 this->content.end = this->interval.end;
8083 this->interval.start = start;
8084 return true;
8085 }
8086 this->interval.end++;
8087 }
8088 }
8089
8090 virtual void invalidate()
8091 {
8092 this->content.invalidate();
8093 basic_parser::invalidate();
8094 }
8095
8096 public:
8098 };
8099
8102#ifdef _UNICODE
8103 using thtml_value = whtml_value;
8104#else
8105 using thtml_value = html_value;
8106#endif
8107
8111 enum class html_sequence_t {
8112 text = 0,
8113 element,
8114 element_start,
8115 element_end,
8116 declaration,
8117 comment,
8118 instruction,
8119 PCDATA,
8120 CDATA,
8121
8122 unknown = -1,
8123 };
8124
8132
8136 template <class T>
8138 {
8139 public:
8140 basic_html_tag(_In_ const std::locale& locale = std::locale()) :
8141 basic_parser(locale),
8142 type(html_sequence_t::unknown)
8143 {}
8144
8145 virtual bool match(
8146 _In_reads_or_z_opt_(end) const T* text,
8147 _In_ size_t start = 0,
8148 _In_ size_t end = SIZE_MAX,
8149 _In_ int flags = match_multiline)
8150 {
8151 _Assume_(text || start >= end);
8152 if (start >= end || text[start] != '<')
8153 goto error;
8154 this->interval.end = start + 1;
8155 if (this->interval.end >= end || !text[this->interval.end])
8156 goto error;
8157 if (text[this->interval.end] == '/' &&
8158 this->m_ident.match(text, this->interval.end + 1, end, flags))
8159 {
8160 // </...
8161 this->type = html_sequence_t::element_end;
8162 this->name = this->m_ident.interval;
8163 this->interval.end = this->m_ident.interval.end;
8164 }
8165 else if (text[this->interval.end] == '!') {
8166 // <!...
8167 this->interval.end++;
8168 if (this->interval.end + 1 < end &&
8169 text[this->interval.end] == '-' &&
8170 text[this->interval.end + 1] == '-')
8171 {
8172 // <!--...
8173 this->name.start = this->interval.end = this->interval.end + 2;
8174 for (;;) {
8175 if (this->interval.end >= end || !text[this->interval.end])
8176 goto error;
8177 if (this->interval.end + 2 < end &&
8178 text[this->interval.end] == '-' &&
8179 text[this->interval.end + 1] == '-' &&
8180 text[this->interval.end + 2] == '>')
8181 {
8182 // <!--...-->
8183 this->type = html_sequence_t::comment;
8184 this->name.end = this->interval.end;
8185 this->attributes.clear();
8186 this->interval.start = start;
8187 this->interval.end = this->interval.end + 3;
8188 return true;
8189 }
8190 this->interval.end++;
8191 }
8192 }
8193 this->type = html_sequence_t::declaration;
8194 this->name.start = this->name.end = this->interval.end;
8195 }
8196 else if (text[this->interval.end] == '?') {
8197 // <?...
8198 this->name.start = ++this->interval.end;
8199 for (;;) {
8200 if (this->interval.end >= end || !text[this->interval.end])
8201 goto error;
8202 if (text[this->interval.end] == '>') {
8203 // <?...>
8204 this->type = html_sequence_t::instruction;
8205 this->name.end = this->interval.end;
8206 this->attributes.clear();
8207 this->interval.start = start;
8208 this->interval.end++;
8209 return true;
8210 }
8211 if (this->interval.end + 1 < end &&
8212 text[this->interval.end] == '?' &&
8213 text[this->interval.end + 1] == '>')
8214 {
8215 // <?...?>
8216 this->type = html_sequence_t::instruction;
8217 this->name.end = this->interval.end;
8218 this->attributes.clear();
8219 this->interval.start = start;
8220 this->interval.end = this->interval.end + 2;
8221 return true;
8222 }
8223 this->interval.end++;
8224 }
8225 }
8226 else if (this->m_ident.match(text, this->interval.end, end, flags)) {
8227 // <tag...
8228 this->type = html_sequence_t::element_start;
8229 this->name = this->m_ident.interval;
8230 this->interval.end = this->m_ident.interval.end;
8231 }
8232 else
8233 goto error;
8234
8235 // Skip whitespace.
8236 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
8237 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8238
8239 this->attributes.clear();
8240 for (;;) {
8241 if (this->type == html_sequence_t::element_start &&
8242 this->interval.end + 1 < end &&
8243 text[this->interval.end] == '/' &&
8244 text[this->interval.end + 1] == '>')
8245 {
8246 // <tag .../>
8247 this->type = html_sequence_t::element;
8248 this->interval.end = this->interval.end + 2;
8249 break;
8250 }
8251 if (this->interval.end < end &&
8252 text[this->interval.end] == '>')
8253 {
8254 // <tag ...>
8255 this->interval.end++;
8256 break;
8257 }
8258 if (this->type == html_sequence_t::declaration &&
8259 this->interval.end + 1 < end &&
8260 text[this->interval.end] == '!' &&
8261 text[this->interval.end + 1] == '>')
8262 {
8263 // "<!...!>".
8264 this->interval.end = this->interval.end + 2;
8265 break;
8266 }
8267 if (this->type == html_sequence_t::declaration &&
8268 this->interval.end + 1 < end &&
8269 text[this->interval.end] == '-' &&
8270 text[this->interval.end + 1] == '-')
8271 {
8272 // "<! ... --...".
8273 this->interval.end = this->interval.end + 2;
8274 for (;;) {
8275 if (this->interval.end >= end || !text[this->interval.end])
8276 goto error;
8277 if (this->interval.end + 1 < end &&
8278 text[this->interval.end] == '-' &&
8279 text[this->interval.end + 1] == '-')
8280 {
8281 // "<! ... --...--".
8282 this->interval.end = this->interval.end + 2;
8283 break;
8284 }
8285 this->interval.end++;
8286 }
8287
8288 // Skip whitespace.
8289 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8290 continue;
8291 }
8292
8293 if (this->interval.end >= end || !text[this->interval.end])
8294 goto error;
8295
8296 // Attributes follow...
8297 html_attribute* a = nullptr;
8298 if (this->m_ident.match(text, this->interval.end, end, flags)) {
8299 this->attributes.push_back(std::move(html_attribute{ this->m_ident.interval }));
8300 a = &this->attributes.back();
8301 _Assume_(a);
8302 this->interval.end = this->m_ident.interval.end;
8303 }
8304 else {
8305 // What was that?! Skip.
8306 this->interval.end++;
8307 continue;
8308 }
8309
8310 // Skip whitespace.
8311 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8312
8313 if (this->interval.end < end && text[this->interval.end] == '=') {
8314 this->interval.end++;
8315
8316 // Skip whitespace.
8317 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8318
8319 if (this->m_value.match(text, this->interval.end, end, flags)) {
8320 // This attribute has value.
8321 a->value = this->m_value.content;
8322 this->interval.end = this->m_value.interval.end;
8323
8324 // Skip whitespace.
8325 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8326 }
8327 }
8328 else {
8329 // This attribute has no value.
8330 a->value.invalidate();
8331 }
8332 }
8333
8334 this->interval.start = start;
8335 return true;
8336
8337 error:
8338 this->type = html_sequence_t::unknown;
8339 this->name.invalidate();
8340 this->attributes.clear();
8341 this->interval.invalidate();
8342 return false;
8343 }
8344
8345 virtual void invalidate()
8346 {
8347 this->type = html_sequence_t::unknown;
8348 this->name.invalidate();
8349 this->attributes.clear();
8350 basic_parser::invalidate();
8351 }
8352
8353 public:
8354 html_sequence_t type;
8356 std::vector<html_attribute> attributes;
8357
8358 protected:
8359 basic_html_ident<T> m_ident;
8360 basic_html_value<T> m_value;
8361 };
8362
8365#ifdef _UNICODE
8366 using thtml_tag = whtml_tag;
8367#else
8368 using thtml_tag = html_tag;
8369#endif
8370
8374 template <class T>
8376 {
8377 public:
8378 virtual bool match(
8379 _In_reads_or_z_opt_(end) const T* text,
8380 _In_ size_t start = 0,
8381 _In_ size_t end = SIZE_MAX,
8382 _In_ int flags = match_multiline)
8383 {
8384 _Unreferenced_(flags);
8385 _Assume_(text || start + 2 >= end);
8386 if (start + 2 < end &&
8387 text[start] == '<' &&
8388 text[start + 1] == '!' &&
8389 text[start + 2] == '[')
8390 {
8391 this->interval.end = start + 3;
8392
8393 // Skip whitespace.
8394 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
8395 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8396
8397 this->condition.start = this->condition.end = this->interval.end;
8398
8399 for (;;) {
8400 if (this->interval.end >= end || !text[this->interval.end])
8401 break;
8402 if (text[this->interval.end] == '[') {
8403 this->interval.start = start;
8404 this->interval.end++;
8405 return true;
8406 }
8407 if (ctype.is(ctype.space, text[this->interval.end]))
8408 this->interval.end++;
8409 else
8410 this->condition.end = ++this->interval.end;
8411 }
8412 }
8413
8414 this->condition.invalidate();
8415 this->interval.invalidate();
8416 return false;
8417 }
8418
8419 virtual void invalidate()
8420 {
8421 this->condition.invalidate();
8422 basic_parser::invalidate();
8423 }
8424
8425 public:
8426 stdex::interval<size_t> condition;
8427 };
8428
8431#ifdef _UNICODE
8433#else
8435#endif
8436
8440 template <class T>
8442 {
8443 public:
8444 virtual bool match(
8445 _In_reads_or_z_opt_(end) const T* text,
8446 _In_ size_t start = 0,
8447 _In_ size_t end = SIZE_MAX,
8448 _In_ int flags = match_multiline)
8449 {
8450 _Unreferenced_(flags);
8451 _Assume_(text || start + 2 >= end);
8452 if (start + 2 < end &&
8453 text[start] == ']' &&
8454 text[start + 1] == ']' &&
8455 text[start + 2] == '>')
8456 {
8457 this->interval.start = start;
8458 this->interval.end = start + 3;
8459 return true;
8460 }
8461 this->interval.invalidate();
8462 return false;
8463 }
8464 };
8465
8468#ifdef _UNICODE
8470#else
8472#endif
8473 }
8474}
8475
8476#undef ENUM_FLAG_OPERATOR
8477#undef ENUM_FLAGS
8478
8479#ifdef _MSC_VER
8480#pragma warning(pop)
8481#endif
Test for angle in d°mm'ss.dddd form.
Definition parser.hpp:4397
Test for any code unit.
Definition parser.hpp:224
Test for beginning of line.
Definition parser.hpp:618
Test for any.
Definition parser.hpp:1061
Test for chemical formula.
Definition parser.hpp:5541
Test for Creditor Reference.
Definition parser.hpp:4967
T reference[22]
Normalized national reference number.
Definition parser.hpp:5096
T check_digits[3]
Two check digits.
Definition parser.hpp:5095
bool is_valid
Is reference valid per ISO 7064.
Definition parser.hpp:5097
Legacy CSS comment end -->
Definition parser.hpp:7550
Legacy CSS comment start <!--
Definition parser.hpp:7512
CSS comment.
Definition parser.hpp:7452
stdex::interval< size_t > content
content position in source
Definition parser.hpp:7496
CSS import directive.
Definition parser.hpp:7765
stdex::interval< size_t > content
content position in source
Definition parser.hpp:7834
CSS string.
Definition parser.hpp:7587
stdex::interval< size_t > content
content position in source
Definition parser.hpp:7638
URI in CSS.
Definition parser.hpp:7654
stdex::interval< size_t > content
content position in source
Definition parser.hpp:7749
Test for any code unit from a given string of code units.
Definition parser.hpp:724
Test for specific code unit.
Definition parser.hpp:294
Test for date.
Definition parser.hpp:4027
Test for valid DNS domain character.
Definition parser.hpp:2808
bool allow_on_edge
Is character allowed at the beginning or an end of a DNS domain?
Definition parser.hpp:2846
Test for DNS domain/hostname.
Definition parser.hpp:2908
bool m_allow_absolute
May DNS names end with a dot (absolute name)?
Definition parser.hpp:2972
Test for e-mail address.
Definition parser.hpp:3796
Test for emoticon.
Definition parser.hpp:3904
std::shared_ptr< basic_parser< T > > apex
apex/eyebrows/halo (e.g. O, 0)
Definition parser.hpp:3993
std::shared_ptr< basic_parser< T > > eyes
eyes (e.g. :, ;, >, |, B)
Definition parser.hpp:3994
std::shared_ptr< basic_set< T > > mouth
mouth (e.g. ), ), (, (, |, P, D, p, d)
Definition parser.hpp:3996
std::shared_ptr< basic_parser< T > > nose
nose (e.g. -, o)
Definition parser.hpp:3995
std::shared_ptr< basic_parser< T > > emoticon
emoticon as a whole (e.g. 😀, 🤔, 😶)
Definition parser.hpp:3992
Test for end of line.
Definition parser.hpp:657
Test for fraction.
Definition parser.hpp:1690
End of condition ...]]>
Definition parser.hpp:8442
Start of condition <![condition[...
Definition parser.hpp:8376
Contiguous sequence of characters representing name of element, attribute etc.
Definition parser.hpp:7986
Tag.
Definition parser.hpp:8138
std::vector< html_attribute > attributes
tag attributes
Definition parser.hpp:8356
html_sequence_t type
tag type
Definition parser.hpp:8354
stdex::interval< size_t > name
tag name position in source
Definition parser.hpp:8355
Optionally-quoted string representing value of an attribute.
Definition parser.hpp:8033
stdex::interval< size_t > content
content position in source
Definition parser.hpp:8097
Test for International Bank Account Number.
Definition parser.hpp:4673
T bban[31]
Normalized Basic Bank Account Number.
Definition parser.hpp:4944
T country[3]
ISO 3166-1 alpha-2 country code.
Definition parser.hpp:4942
T check_digits[3]
Two check digits.
Definition parser.hpp:4943
bool is_valid
Is IBAN valid per ISO 7064.
Definition parser.hpp:4945
Test for decimal integer.
Definition parser.hpp:1299
Test for decimal integer possibly containing thousand separators.
Definition parser.hpp:1384
bool has_separators
Did integer have any separators?
Definition parser.hpp:1444
size_t digit_count
Total number of digits in integer.
Definition parser.hpp:1443
Test for hexadecimal integer.
Definition parser.hpp:1465
Base class for integer testing.
Definition parser.hpp:1277
size_t value
Calculated value of the numeral.
Definition parser.hpp:1291
Test for IPv4 address.
Definition parser.hpp:2350
stdex::interval< size_t > components[4]
Individual component intervals.
Definition parser.hpp:2465
struct in_addr value
IPv4 address value.
Definition parser.hpp:2466
Test for IPv6 address.
Definition parser.hpp:2569
std::shared_ptr< basic_parser< T > > scope_id
Scope ID (e.g. NIC index with link-local addresses)
Definition parser.hpp:2771
stdex::interval< size_t > components[8]
Individual component intervals.
Definition parser.hpp:2769
struct in6_addr value
IPv6 address value.
Definition parser.hpp:2770
Test for valid IPv6 address scope ID character.
Definition parser.hpp:2497
Test for repeating.
Definition parser.hpp:914
bool m_greedy
try to match as long sequence as possible
Definition parser.hpp:953
std::shared_ptr< basic_parser< T > > m_el
repeating element
Definition parser.hpp:950
size_t m_min_iterations
minimum number of iterations
Definition parser.hpp:951
size_t m_max_iterations
maximum number of iterations
Definition parser.hpp:952
Test for JSON string.
Definition parser.hpp:7299
MIME content type.
Definition parser.hpp:7850
stdex::interval< size_t > base_type
basic type position in source
Definition parser.hpp:7968
stdex::interval< size_t > sub_type
sub-type position in source
Definition parser.hpp:7969
stdex::interval< size_t > charset
charset position in source
Definition parser.hpp:7970
Test for mixed numeral.
Definition parser.hpp:1926
std::shared_ptr< basic_parser< T > > fraction
fraction
Definition parser.hpp:2032
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:2030
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:2029
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:2028
std::shared_ptr< basic_parser< T > > integer
Integer part.
Definition parser.hpp:2031
Test for monetary numeral.
Definition parser.hpp:2221
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:2327
std::shared_ptr< basic_parser< T > > decimal_separator
Decimal separator.
Definition parser.hpp:2332
std::shared_ptr< basic_parser< T > > currency
Currency part.
Definition parser.hpp:2330
std::shared_ptr< basic_parser< T > > decimal
Decimal part.
Definition parser.hpp:2333
std::shared_ptr< basic_parser< T > > integer
Integer part.
Definition parser.hpp:2331
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:2328
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:2329
"No-op" match
Definition parser.hpp:192
Base template for all parsers.
Definition parser.hpp:74
stdex::interval< size_t > interval
Region of the last match.
Definition parser.hpp:172
Test for permutation.
Definition parser.hpp:1201
Test for phone number.
Definition parser.hpp:4520
std::basic_string< T > value
Normalized phone number.
Definition parser.hpp:4646
Test for any punctuation code unit.
Definition parser.hpp:466
Test for Roman numeral.
Definition parser.hpp:1574
Test for scientific numeral.
Definition parser.hpp:2052
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:2196
std::shared_ptr< basic_parser< T > > exponent_symbol
Exponent symbol (e.g. 'e')
Definition parser.hpp:2200
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:2194
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:2195
double value
Calculated value of the numeral.
Definition parser.hpp:2204
std::shared_ptr< basic_parser< T > > negative_exp_sign
Negative exponent sign (e.g. '-')
Definition parser.hpp:2202
std::shared_ptr< basic_integer< T > > decimal
Decimal part.
Definition parser.hpp:2199
std::shared_ptr< basic_parser< T > > positive_exp_sign
Positive exponent sign (e.g. '+')
Definition parser.hpp:2201
std::shared_ptr< basic_integer< T > > exponent
Exponent part.
Definition parser.hpp:2203
std::shared_ptr< basic_parser< T > > decimal_separator
Decimal separator.
Definition parser.hpp:2198
std::shared_ptr< basic_integer< T > > integer
Integer part.
Definition parser.hpp:2197
Test for match score.
Definition parser.hpp:1753
Test for sequence.
Definition parser.hpp:1010
Definition parser.hpp:692
Test for SI Reference delimiter.
Definition parser.hpp:5164
Test for SI Reference part.
Definition parser.hpp:5119
Test for SI Reference.
Definition parser.hpp:5202
basic_si_reference_part< T > part3
Reference data part 3 (P3)
Definition parser.hpp:5519
basic_si_reference_part< T > part1
Reference data part 1 (P1)
Definition parser.hpp:5517
bool is_valid
Is reference valid.
Definition parser.hpp:5520
T model[3]
Reference model.
Definition parser.hpp:5516
basic_si_reference_part< T > part2
Reference data part 2 (P2)
Definition parser.hpp:5518
Test for signed numeral.
Definition parser.hpp:1840
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:1908
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:1907
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:1906
std::shared_ptr< basic_parser< T > > number
Number.
Definition parser.hpp:1909
Test for any space code unit.
Definition parser.hpp:387
Test for any space or punctuation code unit.
Definition parser.hpp:540
Test for any string.
Definition parser.hpp:1129
Test for given string.
Definition parser.hpp:819
Test for time.
Definition parser.hpp:4294
Test for valid URL password character.
Definition parser.hpp:3090
Test for valid URL path character.
Definition parser.hpp:3190
Test for URL path.
Definition parser.hpp:3298
Test for valid URL username character.
Definition parser.hpp:2991
Test for URL.
Definition parser.hpp:3439
Test for HTTP agent.
Definition parser.hpp:6826
Test for HTTP any type.
Definition parser.hpp:5948
Test for HTTP asterisk.
Definition parser.hpp:6590
Test for HTTP header.
Definition parser.hpp:7146
Test for HTTP language (RFC1766)
Definition parser.hpp:6458
Test for HTTP line break (RFC2616: CRLF | LF)
Definition parser.hpp:5622
Test for HTTP media range (RFC2616: media-range)
Definition parser.hpp:5980
Test for HTTP media type (RFC2616: media-type)
Definition parser.hpp:6035
Test for HTTP parameter (RFC2616: parameter)
Definition parser.hpp:5893
http_token name
Parameter name.
Definition parser.hpp:5937
http_value value
Parameter value.
Definition parser.hpp:5938
Test for HTTP protocol.
Definition parser.hpp:6901
uint16_t version
HTTP protocol version: 0x100 = 1.0, 0x101 = 1.1...
Definition parser.hpp:7002
Test for HTTP quoted string (RFC2616: quoted-string)
Definition parser.hpp:5783
stdex::interval< size_t > content
String content (without quotes)
Definition parser.hpp:5839
Test for HTTP request.
Definition parser.hpp:7009
Test for HTTP space (RFC2616: LWS)
Definition parser.hpp:5658
Test for HTTP text character (RFC2616: TEXT)
Definition parser.hpp:5695
Test for HTTP token (RFC2616: token - tolerates non-ASCII)
Definition parser.hpp:5729
Test for HTTP URL parameter.
Definition parser.hpp:6275
Test for HTTP URL path segment.
Definition parser.hpp:6186
Test for HTTP URL path segment.
Definition parser.hpp:6219
std::vector< http_url_path_segment > segments
Path segments.
Definition parser.hpp:6268
Test for HTTP URL port.
Definition parser.hpp:6130
Test for HTTP URL server.
Definition parser.hpp:6093
Test for HTTP URL.
Definition parser.hpp:6356
Collection of HTTP values.
Definition parser.hpp:7255
Test for HTTP value (RFC2616: value)
Definition parser.hpp:5849
http_quoted_string string
Value when matched as quoted string.
Definition parser.hpp:5885
http_token token
Value when matched as token.
Definition parser.hpp:5886
Test for HTTP weight factor.
Definition parser.hpp:6521
float value
Calculated value of the weight factor.
Definition parser.hpp:6583
Test for HTTP weighted value.
Definition parser.hpp:6613
Base template for collection-holding parsers.
Definition parser.hpp:970
Test for any SGML code point.
Definition parser.hpp:256
Test for any SGML code point from a given string of SGML code points.
Definition parser.hpp:776
Test for specific SGML code point.
Definition parser.hpp:343
Test for valid DNS domain SGML character.
Definition parser.hpp:2864
Test for valid IPv6 address scope ID SGML character.
Definition parser.hpp:2535
Test for any SGML punctuation code point.
Definition parser.hpp:507
Test for any SGML space code point.
Definition parser.hpp:430
Test for any SGML space or punctuation code point.
Definition parser.hpp:583
Test for SGML given string.
Definition parser.hpp:866
Test for valid URL password SGML character.
Definition parser.hpp:3142
Test for valid URL path SGML character.
Definition parser.hpp:3246
Test for valid URL username SGML character.
Definition parser.hpp:3042
Numerical interval.
Definition interval.hpp:18
T size() const
Returns interval size.
Definition interval.hpp:47
T end
interval end
Definition interval.hpp:20
interval() noexcept
Constructs an invalid interval.
Definition interval.hpp:25
void invalidate()
Invalidates interval.
Definition interval.hpp:59
T start
interval start
Definition interval.hpp:19
Tag attribute.
Definition parser.hpp:8128
stdex::interval< size_t > name
attribute name position in source
Definition parser.hpp:8129
stdex::interval< size_t > value
attribute value position in source
Definition parser.hpp:8130
Definition parser.hpp:7281