stdex
Additional custom or not Standard C++ covered algorithms
Loading...
Searching...
No Matches
parser.hpp
1/*
2 SPDX-License-Identifier: MIT
3 Copyright © 2023 Amebis
4*/
5
6#pragma once
7
8#include "compat.hpp"
9#include "interval.hpp"
10#include "memory.hpp"
11#include "sgml.hpp"
12#include "string.hpp"
13#include <stdarg.h>
14#include <stdint.h>
15#include <math.h>
16#if defined(_WIN32)
17#include <winsock2.h>
18#if _MSC_VER >= 1300
19#include <ws2ipdef.h>
20#endif
21#include <ws2tcpip.h>
22#else
23#include <netinet/in.h>
24#endif
25#include <limits>
26#include <list>
27#include <locale>
28#include <memory>
29#include <set>
30#include <string>
31
32#ifdef _MSC_VER
33#pragma warning(push)
34#pragma warning(disable: 4100)
35#endif
36
37#define ENUM_FLAG_OPERATOR(T,X) \
38inline T operator X (const T lhs, const T rhs) { return static_cast<T>(static_cast<std::underlying_type_t<T>>(lhs) X static_cast<std::underlying_type_t<T>>(rhs)); } \
39inline T operator X (const T lhs, const std::underlying_type_t<T> rhs) { return static_cast<T>(static_cast<std::underlying_type_t<T>>(lhs) X rhs); } \
40inline T operator X (const std::underlying_type_t<T> lhs, const T rhs) { return static_cast<T>(lhs X static_cast<std::underlying_type_t<T>>(rhs)); } \
41inline T& operator X= (T& lhs, const T rhs) { return lhs = lhs X rhs; } \
42inline T& operator X= (T& lhs, const std::underlying_type_t<T> rhs) { return lhs = lhs X rhs; }
43#define ENUM_FLAGS(T, type) \
44enum class T : type; \
45inline T operator ~ (T t) { return (T) (~static_cast<std::underlying_type_t <T>>(t)); } \
46ENUM_FLAG_OPERATOR(T,|) \
47ENUM_FLAG_OPERATOR(T,^) \
48ENUM_FLAG_OPERATOR(T,&) \
49enum class T : type
50
51#if defined(_WIN32)
52#elif defined(__APPLE__)
53#define s6_words __u6_addr.__u6_addr16
54#else
55#define s6_words s6_addr16
56#endif
57
58namespace stdex
59{
60 namespace parser
61 {
65 constexpr int match_default = 0;
66 constexpr int match_case_insensitive = 0x1;
67 constexpr int match_multiline = 0x2;
68
72 template <class T>
74 {
75 public:
76 basic_parser(_In_ const std::locale& locale = std::locale()) : m_locale(locale) {}
77 virtual ~basic_parser() {}
78
79 bool search(
80 _In_reads_or_z_(end) const T* text,
81 _In_ size_t start = 0,
82 _In_ size_t end = SIZE_MAX,
83 _In_ int flags = match_default)
84 {
85 for (size_t i = start; i < end && text[i]; i++)
86 if (match(text, i, end, flags))
87 return true;
88 return false;
89 }
90
91 virtual bool match(
92 _In_reads_or_z_(end) const T* text,
93 _In_ size_t start = 0,
94 _In_ size_t end = SIZE_MAX,
95 _In_ int flags = match_default) = 0;
96
97 template<class _Traits, class _Ax>
98 inline bool match(
99 const std::basic_string<T, _Traits, _Ax>& text,
100 _In_ size_t start = 0,
101 _In_ size_t end = SIZE_MAX,
102 _In_ int flags = match_default)
103 {
104 return match(text.c_str(), start, std::min<size_t>(end, text.size()), flags);
105 }
106
107 virtual void invalidate()
108 {
109 this->interval.invalidate();
110 }
111
112 protected:
114 const wchar_t* next_sgml_cp(_In_ const char* text, _In_ size_t start, _In_ size_t end, _Out_ size_t& chr_end, _Out_ wchar_t(&buf)[3])
115 {
116 if (text[start] == '&') {
117 // Potential entity start
118 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
119 for (chr_end = start + 1;; chr_end++) {
120 if (chr_end >= end || text[chr_end] == 0) {
121 // Unterminated entity
122 break;
123 }
124 if (text[chr_end] == ';') {
125 // Entity end
126 size_t n = chr_end - start - 1;
127 if (n >= 2 && text[start + 1] == '#') {
128 // Numerical entity
129 char32_t unicode;
130 if (text[start + 2] == 'x' || text[start + 2] == 'X')
131 unicode = strtou32(text + start + 3, n - 2, nullptr, 16);
132 else
133 unicode = strtou32(text + start + 2, n - 1, nullptr, 10);
134#ifdef _WIN32
135 if (unicode < 0x10000) {
136 buf[0] = (wchar_t)unicode;
137 buf[1] = 0;
138 }
139 else {
140 ucs4_to_surrogate_pair(buf, unicode);
141 buf[2] = 0;
142 }
143#else
144 buf[0] = (wchar_t)unicode;
145 buf[1] = 0;
146#endif
147 chr_end++;
148 return buf;
149 }
150 const wchar_t* entity_w = sgml2uni(text + start + 1, n);
151 if (entity_w) {
152 chr_end++;
153 return entity_w;
154 }
155 // Unknown entity.
156 break;
157 }
158 else if (text[chr_end] == '&' || ctype.is(ctype.space, text[chr_end])) {
159 // This char cannot possibly be a part of entity.
160 break;
161 }
162 }
163 }
164 buf[0] = text[start];
165 buf[1] = 0;
166 chr_end = start + 1;
167 return buf;
168 }
170
171 public:
173
174 protected:
175 std::locale m_locale;
176 };
177
180#ifdef _UNICODE
181 using tparser = wparser;
182#else
183 using tparser = parser;
184#endif
186
190 template <class T>
191 class basic_noop : public basic_parser<T>
192 {
193 public:
194 virtual bool match(
195 _In_reads_or_z_(end) const T* text,
196 _In_ size_t start = 0,
197 _In_ size_t end = SIZE_MAX,
198 _In_ int flags = match_default)
199 {
200 _Assume_(text || start >= end);
201 if (start < end && text[start]) {
202 this->interval.start = this->interval.end = start;
203 return true;
204 }
205 this->interval.invalidate();
206 return false;
207 }
208 };
209
210 using noop = basic_noop<char>;
212#ifdef _UNICODE
213 using tnoop = wnoop;
214#else
215 using tnoop = noop;
216#endif
218
222 template <class T>
223 class basic_any_cu : public basic_parser<T>
224 {
225 public:
226 basic_any_cu(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
227
228 virtual bool match(
229 _In_reads_or_z_(end) const T* text,
230 _In_ size_t start = 0,
231 _In_ size_t end = SIZE_MAX,
232 _In_ int flags = match_default)
233 {
234 _Assume_(text || start >= end);
235 if (start < end && text[start]) {
236 this->interval.end = (this->interval.start = start) + 1;
237 return true;
238 }
239 this->interval.invalidate();
240 return false;
241 }
242 };
243
246#ifdef _UNICODE
247 using tany_cu = wany_cu;
248#else
249 using tany_cu = any_cu;
250#endif
251
255 class sgml_any_cp : public basic_any_cu<char>
256 {
257 public:
258 sgml_any_cp(_In_ const std::locale& locale = std::locale()) : basic_any_cu<char>(locale) {}
259
260 virtual bool match(
261 _In_reads_or_z_(end) const char* text,
262 _In_ size_t start = 0,
263 _In_ size_t end = SIZE_MAX,
264 _In_ int flags = match_default)
265 {
266 _Assume_(text || start >= end);
267 if (start < end && text[start]) {
268 if (text[start] == '&') {
269 // SGML entity
270 const auto& ctype = std::use_facet<std::ctype<char>>(m_locale);
271 for (this->interval.end = start + 1; this->interval.end < end && text[this->interval.end]; this->interval.end++)
272 if (text[this->interval.end] == ';') {
273 this->interval.end++;
274 this->interval.start = start;
275 return true;
276 }
277 else if (text[this->interval.end] == '&' || ctype.is(ctype.space, text[this->interval.end]))
278 break;
279 // Unterminated entity
280 }
281 this->interval.end = (this->interval.start = start) + 1;
282 return true;
283 }
284 this->interval.invalidate();
285 return false;
286 }
287 };
288
292 template <class T>
293 class basic_cu : public basic_parser<T>
294 {
295 public:
296 basic_cu(T chr, bool invert = false, _In_ const std::locale& locale = std::locale()) :
297 basic_parser<T>(locale),
298 m_chr(chr),
299 m_invert(invert)
300 {}
301
302 virtual bool match(
303 _In_reads_or_z_(end) const T* text,
304 _In_ size_t start = 0,
305 _In_ size_t end = SIZE_MAX,
306 _In_ int flags = match_default)
307 {
308 _Assume_(text || start >= end);
309 if (start < end && text[start]) {
310 bool r;
311 if (flags & match_case_insensitive) {
312 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
313 r = ctype.tolower(text[start]) == ctype.tolower(m_chr);
314 }
315 else
316 r = text[start] == m_chr;
317 if ((r && !m_invert) || (!r && m_invert)) {
318 this->interval.end = (this->interval.start = start) + 1;
319 return true;
320 }
321 }
322 this->interval.invalidate();
323 return false;
324 }
325
326 protected:
327 T m_chr;
328 bool m_invert;
329 };
330
331 using cu = basic_cu<char>;
332 using wcu = basic_cu<wchar_t>;
333#ifdef _UNICODE
334 using tcu = wcu;
335#else
336 using tcu = cu;
337#endif
338
342 class sgml_cp : public sgml_parser
343 {
344 public:
345 sgml_cp(const char* chr, size_t count = SIZE_MAX, bool invert = false, _In_ const std::locale& locale = std::locale()) :
346 sgml_parser(locale),
347 m_invert(invert)
348 {
349 _Assume_(chr || !count);
350 wchar_t buf[3];
351 size_t chr_end;
352 m_chr.assign(count ? next_sgml_cp(chr, 0, count, chr_end, buf) : L"");
353 }
354
355 virtual bool match(
356 _In_reads_or_z_(end) const char* text,
357 _In_ size_t start = 0,
358 _In_ size_t end = SIZE_MAX,
359 _In_ int flags = match_default)
360 {
361 _Assume_(text || start >= end);
362 if (start < end && text[start]) {
363 wchar_t buf[3];
364 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
365 bool r = ((flags & match_case_insensitive) ?
366 stdex::strnicmp(chr, SIZE_MAX, m_chr.c_str(), m_chr.size(), m_locale) :
367 stdex::strncmp(chr, SIZE_MAX, m_chr.c_str(), m_chr.size())) == 0;
368 if ((r && !m_invert) || (!r && m_invert)) {
369 this->interval.start = start;
370 return true;
371 }
372 }
373 this->interval.invalidate();
374 return false;
375 }
376
377 protected:
378 std::wstring m_chr;
379 bool m_invert;
380 };
381
385 template <class T>
386 class basic_space_cu : public basic_parser<T>
387 {
388 public:
389 basic_space_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) :
390 basic_parser<T>(locale),
391 m_invert(invert)
392 {}
393
394 virtual bool match(
395 _In_reads_or_z_(end) const T* text,
396 _In_ size_t start = 0,
397 _In_ size_t end = SIZE_MAX,
398 _In_ int flags = match_default)
399 {
400 _Assume_(text || start >= end);
401 if (start < end && text[start]) {
402 bool r =
403 ((flags & match_multiline) || !islbreak(text[start])) &&
404 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::space, text[start]);
405 if ((r && !m_invert) || (!r && m_invert)) {
406 this->interval.end = (this->interval.start = start) + 1;
407 return true;
408 }
409 }
410 this->interval.invalidate();
411 return false;
412 }
413
414 protected:
415 bool m_invert;
416 };
417
420#ifdef _UNICODE
421 using tspace_cu = wspace_cu;
422#else
423 using tspace_cu = space_cu;
424#endif
425
429 class sgml_space_cp : public basic_space_cu<char>
430 {
431 public:
432 sgml_space_cp(_In_ bool invert = false, _In_ const std::locale& locale = std::locale()) :
434 {}
435
436 virtual bool match(
437 _In_reads_or_z_(end) const char* text,
438 _In_ size_t start = 0,
439 _In_ size_t end = SIZE_MAX,
440 _In_ int flags = match_default)
441 {
442 _Assume_(text || start >= end);
443 if (start < end && text[start]) {
444 wchar_t buf[3];
445 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
446 const wchar_t* chr_end = chr + stdex::strlen(chr);
447 bool r =
448 ((flags & match_multiline) || !islbreak(chr, SIZE_MAX)) &&
449 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::space, chr, chr_end) == chr_end;
450 if ((r && !m_invert) || (!r && m_invert)) {
451 this->interval.start = start;
452 return true;
453 }
454 }
455
456 this->interval.invalidate();
457 return false;
458 }
459 };
460
464 template <class T>
465 class basic_punct_cu : public basic_parser<T>
466 {
467 public:
468 basic_punct_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) :
469 basic_parser<T>(locale),
470 m_invert(invert)
471 {}
472
473 virtual bool match(
474 _In_reads_or_z_(end) const T* text,
475 _In_ size_t start = 0,
476 _In_ size_t end = SIZE_MAX,
477 _In_ int flags = match_default)
478 {
479 _Assume_(text || start >= end);
480 if (start < end && text[start]) {
481 bool r = std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::punct, text[start]);
482 if ((r && !m_invert) || (!r && m_invert)) {
483 this->interval.end = (this->interval.start = start) + 1;
484 return true;
485 }
486 }
487 this->interval.invalidate();
488 return false;
489 }
490
491 protected:
492 bool m_invert;
493 };
494
497#ifdef _UNICODE
498 using tpunct_cu = wpunct_cu;
499#else
500 using tpunct_cu = punct_cu;
501#endif
502
506 class sgml_punct_cp : public basic_punct_cu<char>
507 {
508 public:
509 sgml_punct_cp(bool invert = false, _In_ const std::locale& locale = std::locale()) :
511 {}
512
513 virtual bool match(
514 _In_reads_or_z_(end) const char* text,
515 _In_ size_t start = 0,
516 _In_ size_t end = SIZE_MAX,
517 _In_ int flags = match_default)
518 {
519 _Assume_(text || start >= end);
520 if (start < end && text[start]) {
521 wchar_t buf[3];
522 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
523 const wchar_t* chr_end = chr + stdex::strlen(chr);
524 bool r = std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::punct, chr, chr_end) == chr_end;
525 if ((r && !m_invert) || (!r && m_invert)) {
526 this->interval.start = start;
527 return true;
528 }
529 }
530 this->interval.invalidate();
531 return false;
532 }
533 };
534
538 template <class T>
540 {
541 public:
542 basic_space_or_punct_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) :
543 basic_parser<T>(locale),
544 m_invert(invert)
545 {}
546
547 virtual bool match(
548 _In_reads_or_z_(end) const T* text,
549 _In_ size_t start = 0,
550 _In_ size_t end = SIZE_MAX,
551 _In_ int flags = match_default)
552 {
553 _Assume_(text || start >= end);
554 if (start < end && text[start]) {
555 bool r =
556 ((flags & match_multiline) || !islbreak(text[start])) &&
557 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::space | std::ctype_base::punct, text[start]);
558 if ((r && !m_invert) || (!r && m_invert)) {
559 this->interval.end = (this->interval.start = start) + 1;
560 return true;
561 }
562 }
563 this->interval.invalidate();
564 return false;
565 }
566
567 protected:
568 bool m_invert;
569 };
570
573#ifdef _UNICODE
575#else
577#endif
578
583 {
584 public:
585 sgml_space_or_punct_cp(bool invert = false, _In_ const std::locale& locale = std::locale()) :
587 {}
588
589 virtual bool match(
590 _In_reads_or_z_(end) const char* text,
591 _In_ size_t start = 0,
592 _In_ size_t end = SIZE_MAX,
593 _In_ int flags = match_default)
594 {
595 _Assume_(text || start >= end);
596 if (start < end && text[start]) {
597 wchar_t buf[3];
598 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
599 const wchar_t* chr_end = chr + stdex::strlen(chr);
600 bool r =
601 ((flags & match_multiline) || !islbreak(chr, SIZE_MAX)) &&
602 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::space | std::ctype_base::punct, chr, chr_end) == chr_end;
603 if ((r && !m_invert) || (!r && m_invert)) {
604 this->interval.start = start;
605 return true;
606 }
607 }
608 this->interval.invalidate();
609 return false;
610 }
611 };
612
616 template <class T>
617 class basic_bol : public basic_parser<T>
618 {
619 public:
620 basic_bol(bool invert = false) : m_invert(invert) {}
621
622 virtual bool match(
623 _In_reads_or_z_(end) const T* text,
624 _In_ size_t start = 0,
625 _In_ size_t end = SIZE_MAX,
626 _In_ int flags = match_default)
627 {
628 _Assume_(text || start >= end);
629 bool r = start == 0 || (start <= end && islbreak(text[start - 1]));
630 if ((r && !m_invert) || (!r && m_invert)) {
631 this->interval.end = this->interval.start = start;
632 return true;
633 }
634 this->interval.invalidate();
635 return false;
636 }
637
638 protected:
639 bool m_invert;
640 };
641
642 using bol = basic_bol<char>;
643 using wbol = basic_bol<wchar_t>;
644#ifdef _UNICODE
645 using tbol = wbol;
646#else
647 using tbol = bol;
648#endif
650
654 template <class T>
655 class basic_eol : public basic_parser<T>
656 {
657 public:
658 basic_eol(bool invert = false) : m_invert(invert) {}
659
660 virtual bool match(
661 _In_reads_or_z_(end) const T* text,
662 _In_ size_t start = 0,
663 _In_ size_t end = SIZE_MAX,
664 _In_ int flags = match_default)
665 {
666 _Assume_(text || start >= end);
667 bool r = islbreak(text[start]);
668 if ((r && !m_invert) || (!r && m_invert)) {
669 this->interval.end = this->interval.start = start;
670 return true;
671 }
672 this->interval.invalidate();
673 return false;
674 }
675
676 protected:
677 bool m_invert;
678 };
679
680 using eol = basic_eol<char>;
681 using weol = basic_eol<wchar_t>;
682#ifdef _UNICODE
683 using teol = weol;
684#else
685 using teol = eol;
686#endif
688
689 template <class T>
690 class basic_set : public basic_parser<T>
691 {
692 public:
693 basic_set(bool invert = false, _In_ const std::locale& locale = std::locale()) :
694 basic_parser<T>(locale),
695 hit_offset(SIZE_MAX),
696 m_invert(invert)
697 {}
698
699 virtual bool match(
700 _In_reads_or_z_(end) const T* text,
701 _In_ size_t start = 0,
702 _In_ size_t end = SIZE_MAX,
703 _In_ int flags = match_default) = 0;
704
705 virtual void invalidate()
706 {
707 hit_offset = SIZE_MAX;
709 }
710
711 public:
712 size_t hit_offset;
713
714 protected:
715 bool m_invert;
716 };
717
721 template <class T>
722 class basic_cu_set : public basic_set<T>
723 {
724 public:
726 _In_reads_or_z_(count) const T* set,
727 _In_ size_t count = SIZE_MAX,
728 _In_ bool invert = false,
729 _In_ const std::locale& locale = std::locale()) :
730 basic_set<T>(invert, locale)
731 {
732 if (set)
733 m_set.assign(set, set + stdex::strnlen(set, count));
734 }
735
736 virtual bool match(
737 _In_reads_or_z_(end) const T* text,
738 _In_ size_t start = 0,
739 _In_ size_t end = SIZE_MAX,
740 _In_ int flags = match_default)
741 {
742 _Assume_(text || start >= end);
743 if (start < end && text[start]) {
744 const T* set = m_set.c_str();
745 size_t r = (flags & match_case_insensitive) ?
746 stdex::strnichr(set, m_set.size(), text[start], this->m_locale) :
747 stdex::strnchr(set, m_set.size(), text[start]);
748 if ((r != stdex::npos && !this->m_invert) || (r == stdex::npos && this->m_invert)) {
749 this->hit_offset = r;
750 this->interval.end = (this->interval.start = start) + 1;
751 return true;
752 }
753 }
754 this->hit_offset = SIZE_MAX;
755 this->interval.invalidate();
756 return false;
757 }
758
759 protected:
760 std::basic_string<T> m_set;
761 };
762
765#ifdef _UNICODE
766 using tcu_set = wcu_set;
767#else
768 using tcu_set = cu_set;
769#endif
770
774 class sgml_cp_set : public basic_set<char>
775 {
776 public:
777 sgml_cp_set(const char* set, size_t count = SIZE_MAX, bool invert = false, _In_ const std::locale& locale = std::locale()) :
778 basic_set<char>(invert, locale)
779 {
780 if (set)
781 m_set = sgml2str(set, count);
782 }
783
784 virtual bool match(
785 _In_reads_or_z_(end) const char* text,
786 _In_ size_t start = 0,
787 _In_ size_t end = SIZE_MAX,
788 _In_ int flags = match_default)
789 {
790 _Assume_(text || start >= end);
791 if (start < end && text[start]) {
792 wchar_t buf[3];
793 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
794 const wchar_t* set = m_set.c_str();
795 size_t r = (flags & match_case_insensitive) ?
796 stdex::strnistr(set, m_set.size(), chr, m_locale) :
797 stdex::strnstr(set, m_set.size(), chr);
798 if ((r != stdex::npos && !m_invert) || (r == stdex::npos && m_invert)) {
799 hit_offset = r;
800 this->interval.start = start;
801 return true;
802 }
803 }
804 hit_offset = SIZE_MAX;
805 this->interval.invalidate();
806 return false;
807 }
808
809 protected:
810 std::wstring m_set;
811 };
812
816 template <class T>
817 class basic_string : public basic_parser<T>
818 {
819 public:
821 _In_reads_or_z_(count) const T* str,
822 _In_ size_t count = SIZE_MAX,
823 _In_ const std::locale& locale = std::locale()) :
824 basic_parser<T>(locale),
825 m_str(str, str + stdex::strnlen(str, count))
826 {}
827
828 virtual bool match(
829 _In_reads_or_z_(end) const T* text,
830 _In_ size_t start = 0,
831 _In_ size_t end = SIZE_MAX,
832 _In_ int flags = match_default)
833 {
834 _Assume_(text || start >= end);
835 size_t
836 m = m_str.size(),
837 n = std::min<size_t>(end - start, m);
838 bool r = ((flags & match_case_insensitive) ?
839 stdex::strnicmp(text + start, n, m_str.c_str(), m, this->m_locale) :
840 stdex::strncmp(text + start, n, m_str.c_str(), m)) == 0;
841 if (r) {
842 this->interval.end = (this->interval.start = start) + n;
843 return true;
844 }
845 this->interval.invalidate();
846 return false;
847 }
848
849 protected:
850 std::basic_string<T> m_str;
851 };
852
855#ifdef _UNICODE
856 using tstring = wstring;
857#else
858 using tstring = string;
859#endif
860
865 {
866 public:
867 sgml_string(const char* str, size_t count = SIZE_MAX, _In_ const std::locale& locale = std::locale()) :
868 sgml_parser(locale),
869 m_str(sgml2str(str, count))
870 {}
871
872 virtual bool match(
873 _In_reads_or_z_(end) const char* text,
874 _In_ size_t start = 0,
875 _In_ size_t end = SIZE_MAX,
876 _In_ int flags = match_default)
877 {
878 _Assume_(text || start >= end);
879 const wchar_t* str = m_str.c_str();
880 const bool case_insensitive = flags & match_case_insensitive ? true : false;
881 const auto& ctype = std::use_facet<std::ctype<wchar_t>>(m_locale);
882 for (this->interval.end = start;;) {
883 if (!*str) {
884 this->interval.start = start;
885 return true;
886 }
887 if (this->interval.end >= end || !text[this->interval.end]) {
888 this->interval.invalidate();
889 return false;
890 }
891 wchar_t buf[3];
892 const wchar_t* chr = next_sgml_cp(text, this->interval.end, end, this->interval.end, buf);
893 for (; *chr; ++str, ++chr) {
894 if (!*str ||
895 (case_insensitive ? ctype.tolower(*str) != ctype.tolower(*chr) : *str != *chr))
896 {
897 this->interval.invalidate();
898 return false;
899 }
900 }
901 }
902 }
903
904 protected:
905 std::wstring m_str;
906 };
907
911 template <class T>
913 {
914 public:
915 basic_iterations(const std::shared_ptr<basic_parser<T>>& el, size_t min_iterations = 0, size_t max_iterations = SIZE_MAX, bool greedy = true) :
916 m_el(el),
920 {}
921
922 virtual bool match(
923 _In_reads_or_z_(end) const T* text,
924 _In_ size_t start = 0,
925 _In_ size_t end = SIZE_MAX,
926 _In_ int flags = match_default)
927 {
928 _Assume_(text || start >= end);
929 this->interval.start = this->interval.end = start;
930 for (size_t i = 0; ; i++) {
931 if ((!m_greedy && i >= m_min_iterations) || i >= m_max_iterations)
932 return true;
933 if (!m_el->match(text, this->interval.end, end, flags)) {
934 if (i >= m_min_iterations)
935 return true;
936 break;
937 }
938 if (m_el->interval.end == this->interval.end) {
939 // Element did match, but the matching interval was empty. Quit instead of spinning.
940 return true;
941 }
942 this->interval.end = m_el->interval.end;
943 }
944 this->interval.invalidate();
945 return false;
946 }
947
948 protected:
949 std::shared_ptr<basic_parser<T>> m_el;
952 bool m_greedy;
953 };
954
957#ifdef _UNICODE
958 using titerations = witerations;
959#else
960 using titerations = iterations;
961#endif
963
967 template <class T>
969 {
970 protected:
971 parser_collection(_In_ const std::locale& locale) : basic_parser<T>(locale) {}
972
973 public:
975 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el,
976 _In_ size_t count,
977 _In_ const std::locale& locale = std::locale()) :
978 basic_parser<T>(locale)
979 {
980 _Assume_(el || !count);
981 m_collection.reserve(count);
982 for (size_t i = 0; i < count; i++)
983 m_collection.push_back(el[i]);
984 }
985
987 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
988 _In_ const std::locale& locale = std::locale()) :
989 basic_parser<T>(locale),
990 m_collection(std::move(collection))
991 {}
992
993 virtual void invalidate()
994 {
995 for (auto& el : m_collection)
996 el->invalidate();
998 }
999
1000 protected:
1001 std::vector<std::shared_ptr<basic_parser<T>>> m_collection;
1002 };
1003
1007 template <class T>
1009 {
1010 public:
1012 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el = nullptr,
1013 _In_ size_t count = 0,
1014 _In_ const std::locale& locale = std::locale()) :
1015 parser_collection<T>(el, count, locale)
1016 {}
1017
1019 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1020 _In_ const std::locale& locale = std::locale()) :
1021 parser_collection<T>(std::move(collection), locale)
1022 {}
1023
1024 virtual bool match(
1025 _In_reads_or_z_(end) const T* text,
1026 _In_ size_t start = 0,
1027 _In_ size_t end = SIZE_MAX,
1028 _In_ int flags = match_default)
1029 {
1030 _Assume_(text || start >= end);
1031 this->interval.end = start;
1032 for (auto i = this->m_collection.begin(); i != this->m_collection.end(); ++i) {
1033 if (!(*i)->match(text, this->interval.end, end, flags)) {
1034 for (++i; i != this->m_collection.end(); ++i)
1035 (*i)->invalidate();
1036 this->interval.invalidate();
1037 return false;
1038 }
1039 this->interval.end = (*i)->interval.end;
1040 }
1041 this->interval.start = start;
1042 return true;
1043 }
1044 };
1045
1048#ifdef _UNICODE
1049 using tsequence = wsequence;
1050#else
1051 using tsequence = sequence;
1052#endif
1054
1058 template <class T>
1060 {
1061 protected:
1062 basic_branch(_In_ const std::locale& locale) :
1063 parser_collection<T>(locale),
1064 hit_offset(SIZE_MAX)
1065 {}
1066
1067 public:
1069 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el = nullptr,
1070 _In_ size_t count = 0,
1071 _In_ const std::locale& locale = std::locale()) :
1072 parser_collection<T>(el, count, locale),
1073 hit_offset(SIZE_MAX)
1074 {}
1075
1077 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1078 _In_ const std::locale& locale = std::locale()) :
1079 parser_collection<T>(std::move(collection), locale),
1080 hit_offset(SIZE_MAX)
1081 {}
1082
1083 virtual bool match(
1084 _In_reads_or_z_(end) const T* text,
1085 _In_ size_t start = 0,
1086 _In_ size_t end = SIZE_MAX,
1087 _In_ int flags = match_default)
1088 {
1089 _Assume_(text || start >= end);
1090 hit_offset = 0;
1091 for (auto i = this->m_collection.begin(); i != this->m_collection.end(); ++i, ++hit_offset) {
1092 if ((*i)->match(text, start, end, flags)) {
1093 this->interval = (*i)->interval;
1094 for (++i; i != this->m_collection.end(); ++i)
1095 (*i)->invalidate();
1096 return true;
1097 }
1098 }
1099 hit_offset = SIZE_MAX;
1100 this->interval.invalidate();
1101 return false;
1102 }
1103
1104 virtual void invalidate()
1105 {
1106 hit_offset = SIZE_MAX;
1108 }
1109
1110 public:
1111 size_t hit_offset;
1112 };
1113
1114 using branch = basic_branch<char>;
1116#ifdef _UNICODE
1117 using tbranch = wbranch;
1118#else
1119 using tbranch = branch;
1120#endif
1122
1126 template <class T, class T_parser = basic_string<T>>
1128 {
1129 public:
1130 inline basic_string_branch(
1131 _In_reads_(count) const T* str_z = nullptr,
1132 _In_ size_t count = 0,
1133 _In_ const std::locale& locale = std::locale()) :
1134 basic_branch<T>(locale)
1135 {
1136 build(str_z, count);
1137 }
1138
1139 inline basic_string_branch(_In_z_ const T* str, ...) :
1140 basic_branch<T>(std::locale())
1141 {
1142 va_list params;
1143 va_start(params, str);
1144 build(str, params);
1145 va_end(params);
1146 }
1147
1148 inline basic_string_branch(_In_ const std::locale& locale, _In_z_ const T* str, ...) :
1149 basic_branch<T>(locale)
1150 {
1151 va_list params;
1152 va_start(params, str);
1153 build(str, params);
1154 va_end(params);
1155 }
1156
1157 protected:
1158 void build(_In_reads_(count) const T* str_z, _In_ size_t count)
1159 {
1160 _Assume_(str_z || !count);
1161 if (count) {
1162 size_t offset, n;
1163 for (
1164 offset = n = 0;
1165 offset < count && str_z[offset];
1166 offset += stdex::strnlen(str_z + offset, count - offset) + 1, ++n);
1167 this->m_collection.reserve(n);
1168 for (
1169 offset = 0;
1170 offset < count && str_z[offset];
1171 offset += stdex::strnlen(str_z + offset, count - offset) + 1)
1172 this->m_collection.push_back(std::move(std::make_shared<T_parser>(str_z + offset, count - offset, this->m_locale)));
1173 }
1174 }
1175
1176 void build(_In_z_ const T* str, _In_ va_list params)
1177 {
1178 const T* p;
1179 for (
1180 this->m_collection.push_back(std::move(std::make_shared<T_parser>(str, SIZE_MAX, this->m_locale)));
1181 (p = va_arg(params, const T*)) != nullptr;
1182 this->m_collection.push_back(std::move(std::make_shared<T_parser>(p, SIZE_MAX, this->m_locale))));
1183 }
1184 };
1185
1188#ifdef _UNICODE
1190#else
1192#endif
1194
1198 template <class T>
1200 {
1201 public:
1203 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el = nullptr,
1204 _In_ size_t count = 0,
1205 _In_ const std::locale& locale = std::locale()) :
1206 parser_collection<T>(el, count, locale)
1207 {}
1208
1210 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1211 _In_ const std::locale& locale = std::locale()) :
1212 parser_collection<T>(std::move(collection), locale)
1213 {}
1214
1215 virtual bool match(
1216 _In_reads_or_z_(end) const T* text,
1217 _In_ size_t start = 0,
1218 _In_ size_t end = SIZE_MAX,
1219 _In_ int flags = match_default)
1220 {
1221 _Assume_(text || start >= end);
1222 for (auto& el : this->m_collection)
1223 el->invalidate();
1224 if (match_recursively(text, start, end, flags)) {
1225 this->interval.start = start;
1226 return true;
1227 }
1228 this->interval.invalidate();
1229 return false;
1230 }
1231
1232 protected:
1233 bool match_recursively(
1234 _In_reads_or_z_(end) const T* text,
1235 _In_ size_t start = 0,
1236 _In_ size_t end = SIZE_MAX,
1237 _In_ int flags = match_default)
1238 {
1239 bool all_matched = true;
1240 for (auto& el : this->m_collection) {
1241 if (!el->interval) {
1242 // Element was not matched in permutatuion yet.
1243 all_matched = false;
1244 if (el->match(text, start, end, flags)) {
1245 // Element matched for the first time.
1246 if (match_recursively(text, el->interval.end, end, flags)) {
1247 // Rest of the elements matched too.
1248 return true;
1249 }
1250 el->invalidate();
1251 }
1252 }
1253 }
1254 if (all_matched) {
1255 this->interval.end = start;
1256 return true;
1257 }
1258 return false;
1259 }
1260 };
1261
1264#ifdef _UNICODE
1265 using tpermutation = wpermutation;
1266#else
1267 using tpermutation = permutation;
1268#endif
1270
1274 template <class T>
1275 class basic_integer : public basic_parser<T>
1276 {
1277 public:
1278 basic_integer(_In_ const std::locale& locale = std::locale()) :
1279 basic_parser<T>(locale),
1280 value(0)
1281 {}
1282
1283 virtual void invalidate()
1284 {
1285 value = 0;
1287 }
1288
1289 public:
1290 size_t value;
1291 };
1292
1296 template <class T>
1298 {
1299 public:
1301 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
1302 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
1303 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
1304 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
1305 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
1306 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
1307 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
1308 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
1309 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
1310 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
1311 _In_ const std::locale& locale = std::locale()) :
1312 basic_integer<T>(locale),
1313 m_digit_0(digit_0),
1314 m_digit_1(digit_1),
1315 m_digit_2(digit_2),
1316 m_digit_3(digit_3),
1317 m_digit_4(digit_4),
1318 m_digit_5(digit_5),
1319 m_digit_6(digit_6),
1320 m_digit_7(digit_7),
1321 m_digit_8(digit_8),
1322 m_digit_9(digit_9)
1323 {}
1324
1325 virtual bool match(
1326 _In_reads_or_z_(end) const T* text,
1327 _In_ size_t start = 0,
1328 _In_ size_t end = SIZE_MAX,
1329 _In_ int flags = match_default)
1330 {
1331 _Assume_(text || start >= end);
1332 for (this->interval.end = start, this->value = 0; this->interval.end < end && text[this->interval.end];) {
1333 size_t dig;
1334 if (m_digit_0->match(text, this->interval.end, end, flags)) { dig = 0; this->interval.end = m_digit_0->interval.end; }
1335 else if (m_digit_1->match(text, this->interval.end, end, flags)) { dig = 1; this->interval.end = m_digit_1->interval.end; }
1336 else if (m_digit_2->match(text, this->interval.end, end, flags)) { dig = 2; this->interval.end = m_digit_2->interval.end; }
1337 else if (m_digit_3->match(text, this->interval.end, end, flags)) { dig = 3; this->interval.end = m_digit_3->interval.end; }
1338 else if (m_digit_4->match(text, this->interval.end, end, flags)) { dig = 4; this->interval.end = m_digit_4->interval.end; }
1339 else if (m_digit_5->match(text, this->interval.end, end, flags)) { dig = 5; this->interval.end = m_digit_5->interval.end; }
1340 else if (m_digit_6->match(text, this->interval.end, end, flags)) { dig = 6; this->interval.end = m_digit_6->interval.end; }
1341 else if (m_digit_7->match(text, this->interval.end, end, flags)) { dig = 7; this->interval.end = m_digit_7->interval.end; }
1342 else if (m_digit_8->match(text, this->interval.end, end, flags)) { dig = 8; this->interval.end = m_digit_8->interval.end; }
1343 else if (m_digit_9->match(text, this->interval.end, end, flags)) { dig = 9; this->interval.end = m_digit_9->interval.end; }
1344 else break;
1345 this->value = this->value * 10 + dig;
1346 }
1348 this->interval.start = start;
1349 return true;
1350 }
1351 this->interval.invalidate();
1352 return false;
1353 }
1354
1355 protected:
1356 std::shared_ptr<basic_parser<T>>
1357 m_digit_0,
1358 m_digit_1,
1359 m_digit_2,
1360 m_digit_3,
1361 m_digit_4,
1362 m_digit_5,
1363 m_digit_6,
1364 m_digit_7,
1365 m_digit_8,
1366 m_digit_9;
1367 };
1368
1371#ifdef _UNICODE
1372 using tinteger10 = winteger10;
1373#else
1374 using tinteger10 = integer10;
1375#endif
1377
1381 template <class T>
1383 {
1384 public:
1386 _In_ const std::shared_ptr<basic_integer10<T>>& digits,
1387 _In_ const std::shared_ptr<basic_set<T>>& separator,
1388 _In_ const std::locale& locale = std::locale()) :
1389 basic_integer<T>(locale),
1390 digit_count(0),
1391 has_separators(false),
1392 m_digits(digits),
1393 m_separator(separator)
1394 {}
1395
1396 virtual bool match(
1397 _In_reads_or_z_(end) const T* text,
1398 _In_ size_t start = 0,
1399 _In_ size_t end = SIZE_MAX,
1400 _In_ int flags = match_default)
1401 {
1402 _Assume_(text || start >= end);
1403 if (m_digits->match(text, start, end, flags)) {
1404 // Leading part match.
1405 this->value = m_digits->value;
1406 digit_count = m_digits->interval.size();
1407 has_separators = false;
1408 this->interval.start = start;
1409 this->interval.end = m_digits->interval.end;
1410 if (m_digits->interval.size() <= 3) {
1411 // Maybe separated with thousand separators?
1412 size_t hit_offset = SIZE_MAX;
1413 while (m_separator->match(text, this->interval.end, end, flags) &&
1414 (hit_offset == SIZE_MAX || hit_offset == m_separator->hit_offset) && // All separators must be the same, no mixing.
1415 m_digits->match(text, m_separator->interval.end, end, flags) &&
1416 m_digits->interval.size() == 3)
1417 {
1418 // Thousand separator and three-digit integer followed.
1419 this->value = this->value * 1000 + m_digits->value;
1420 digit_count += 3;
1421 has_separators = true;
1422 this->interval.end = m_digits->interval.end;
1423 hit_offset = m_separator->hit_offset;
1424 }
1425 }
1426
1427 return true;
1428 }
1429 this->value = 0;
1430 this->interval.invalidate();
1431 return false;
1432 }
1433
1434 virtual void invalidate()
1435 {
1436 digit_count = 0;
1437 has_separators = false;
1439 }
1440
1441 public:
1444
1445 protected:
1446 std::shared_ptr<basic_integer10<T>> m_digits;
1447 std::shared_ptr<basic_set<T>> m_separator;
1448 };
1449
1452#ifdef _UNICODE
1453 using tinteger10ts = winteger10ts;
1454#else
1455 using tinteger10ts = integer10ts;
1456#endif
1458
1462 template <class T>
1464 {
1465 public:
1467 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
1468 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
1469 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
1470 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
1471 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
1472 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
1473 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
1474 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
1475 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
1476 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
1477 _In_ const std::shared_ptr<basic_parser<T>>& digit_10,
1478 _In_ const std::shared_ptr<basic_parser<T>>& digit_11,
1479 _In_ const std::shared_ptr<basic_parser<T>>& digit_12,
1480 _In_ const std::shared_ptr<basic_parser<T>>& digit_13,
1481 _In_ const std::shared_ptr<basic_parser<T>>& digit_14,
1482 _In_ const std::shared_ptr<basic_parser<T>>& digit_15,
1483 _In_ const std::locale& locale = std::locale()) :
1484 basic_integer<T>(locale),
1485 m_digit_0(digit_0),
1486 m_digit_1(digit_1),
1487 m_digit_2(digit_2),
1488 m_digit_3(digit_3),
1489 m_digit_4(digit_4),
1490 m_digit_5(digit_5),
1491 m_digit_6(digit_6),
1492 m_digit_7(digit_7),
1493 m_digit_8(digit_8),
1494 m_digit_9(digit_9),
1495 m_digit_10(digit_10),
1496 m_digit_11(digit_11),
1497 m_digit_12(digit_12),
1498 m_digit_13(digit_13),
1499 m_digit_14(digit_14),
1500 m_digit_15(digit_15)
1501 {}
1502
1503 virtual bool match(
1504 _In_reads_or_z_(end) const T* text,
1505 _In_ size_t start = 0,
1506 _In_ size_t end = SIZE_MAX,
1507 _In_ int flags = match_default)
1508 {
1509 _Assume_(text || start >= end);
1510 for (this->interval.end = start, this->value = 0; this->interval.end < end && text[this->interval.end];) {
1511 size_t dig;
1512 if (m_digit_0->match(text, this->interval.end, end, flags)) { dig = 0; this->interval.end = m_digit_0->interval.end; }
1513 else if (m_digit_1->match(text, this->interval.end, end, flags)) { dig = 1; this->interval.end = m_digit_1->interval.end; }
1514 else if (m_digit_2->match(text, this->interval.end, end, flags)) { dig = 2; this->interval.end = m_digit_2->interval.end; }
1515 else if (m_digit_3->match(text, this->interval.end, end, flags)) { dig = 3; this->interval.end = m_digit_3->interval.end; }
1516 else if (m_digit_4->match(text, this->interval.end, end, flags)) { dig = 4; this->interval.end = m_digit_4->interval.end; }
1517 else if (m_digit_5->match(text, this->interval.end, end, flags)) { dig = 5; this->interval.end = m_digit_5->interval.end; }
1518 else if (m_digit_6->match(text, this->interval.end, end, flags)) { dig = 6; this->interval.end = m_digit_6->interval.end; }
1519 else if (m_digit_7->match(text, this->interval.end, end, flags)) { dig = 7; this->interval.end = m_digit_7->interval.end; }
1520 else if (m_digit_8->match(text, this->interval.end, end, flags)) { dig = 8; this->interval.end = m_digit_8->interval.end; }
1521 else if (m_digit_9->match(text, this->interval.end, end, flags)) { dig = 9; this->interval.end = m_digit_9->interval.end; }
1522 else if (m_digit_10->match(text, this->interval.end, end, flags)) { dig = 10; this->interval.end = m_digit_10->interval.end; }
1523 else if (m_digit_11->match(text, this->interval.end, end, flags)) { dig = 11; this->interval.end = m_digit_11->interval.end; }
1524 else if (m_digit_12->match(text, this->interval.end, end, flags)) { dig = 12; this->interval.end = m_digit_12->interval.end; }
1525 else if (m_digit_13->match(text, this->interval.end, end, flags)) { dig = 13; this->interval.end = m_digit_13->interval.end; }
1526 else if (m_digit_14->match(text, this->interval.end, end, flags)) { dig = 14; this->interval.end = m_digit_14->interval.end; }
1527 else if (m_digit_15->match(text, this->interval.end, end, flags)) { dig = 15; this->interval.end = m_digit_15->interval.end; }
1528 else break;
1529 this->value = this->value * 16 + dig;
1530 }
1532 this->interval.start = start;
1533 return true;
1534 }
1535 this->interval.invalidate();
1536 return false;
1537 }
1538
1539 protected:
1540 std::shared_ptr<basic_parser<T>>
1541 m_digit_0,
1542 m_digit_1,
1543 m_digit_2,
1544 m_digit_3,
1545 m_digit_4,
1546 m_digit_5,
1547 m_digit_6,
1548 m_digit_7,
1549 m_digit_8,
1550 m_digit_9,
1551 m_digit_10,
1552 m_digit_11,
1553 m_digit_12,
1554 m_digit_13,
1555 m_digit_14,
1556 m_digit_15;
1557 };
1558
1561#ifdef _UNICODE
1562 using tinteger16 = winteger16;
1563#else
1564 using tinteger16 = integer16;
1565#endif
1567
1571 template <class T>
1573 {
1574 public:
1576 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
1577 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
1578 _In_ const std::shared_ptr<basic_parser<T>>& digit_10,
1579 _In_ const std::shared_ptr<basic_parser<T>>& digit_50,
1580 _In_ const std::shared_ptr<basic_parser<T>>& digit_100,
1581 _In_ const std::shared_ptr<basic_parser<T>>& digit_500,
1582 _In_ const std::shared_ptr<basic_parser<T>>& digit_1000,
1583 _In_ const std::shared_ptr<basic_parser<T>>& digit_5000,
1584 _In_ const std::shared_ptr<basic_parser<T>>& digit_10000,
1585 _In_ const std::locale& locale = std::locale()) :
1586 basic_integer<T>(locale),
1587 m_digit_1(digit_1),
1588 m_digit_5(digit_5),
1589 m_digit_10(digit_10),
1590 m_digit_50(digit_50),
1591 m_digit_100(digit_100),
1592 m_digit_500(digit_500),
1593 m_digit_1000(digit_1000),
1594 m_digit_5000(digit_5000),
1595 m_digit_10000(digit_10000)
1596 {}
1597
1598 virtual bool match(
1599 _In_reads_or_z_(end) const T* text,
1600 _In_ size_t start = 0,
1601 _In_ size_t end = SIZE_MAX,
1602 _In_ int flags = match_default)
1603 {
1604 _Assume_(text || start >= end);
1605 size_t
1607 end2;
1608
1609 for (this->interval.end = start, this->value = 0; this->interval.end < end && text[this->interval.end]; dig[3] = dig[2], dig[2] = dig[1], dig[1] = dig[0], this->interval.end = end2) {
1610 if (m_digit_1 && m_digit_1->match(text, this->interval.end, end, flags)) { dig[0] = 1; end2 = m_digit_1->interval.end; }
1611 else if (m_digit_5 && m_digit_5->match(text, this->interval.end, end, flags)) { dig[0] = 5; end2 = m_digit_5->interval.end; }
1612 else if (m_digit_10 && m_digit_10->match(text, this->interval.end, end, flags)) { dig[0] = 10; end2 = m_digit_10->interval.end; }
1613 else if (m_digit_50 && m_digit_50->match(text, this->interval.end, end, flags)) { dig[0] = 50; end2 = m_digit_50->interval.end; }
1614 else if (m_digit_100 && m_digit_100->match(text, this->interval.end, end, flags)) { dig[0] = 100; end2 = m_digit_100->interval.end; }
1615 else if (m_digit_500 && m_digit_500->match(text, this->interval.end, end, flags)) { dig[0] = 500; end2 = m_digit_500->interval.end; }
1616 else if (m_digit_1000 && m_digit_1000->match(text, this->interval.end, end, flags)) { dig[0] = 1000; end2 = m_digit_1000->interval.end; }
1617 else if (m_digit_5000 && m_digit_5000->match(text, this->interval.end, end, flags)) { dig[0] = 5000; end2 = m_digit_5000->interval.end; }
1618 else if (m_digit_10000 && m_digit_10000->match(text, this->interval.end, end, flags)) { dig[0] = 10000; end2 = m_digit_10000->interval.end; }
1619 else break;
1620
1621 // Store first digit.
1622 if (dig[4] == SIZE_MAX) dig[4] = dig[0];
1623
1624 if (dig[3] == dig[2] && dig[2] == dig[1] && dig[1] == dig[0] && dig[0] != dig[4]) {
1625 // Same digit repeated four times. No-go, unless first digit. E.g. XIIII vs. XIV. MMMMMCD allowed, IIII also...
1626 break;
1627 }
1628 if (dig[0] <= dig[1]) {
1629 // Digit is less or equal previous one: add.
1630 this->value += dig[0];
1631 }
1632 else if (
1633 (dig[1] == 1 && (dig[0] == 5 || dig[0] == 10)) ||
1634 (dig[1] == 10 && (dig[0] == 50 || dig[0] == 100)) ||
1635 (dig[1] == 100 && (dig[0] == 500 || dig[0] == 1000)) ||
1636 (dig[1] == 1000 && (dig[0] == 5000 || dig[0] == 10000)))
1637 {
1638 // Digit is up to two orders bigger than previous one: subtract. But...
1639 if (dig[2] < dig[0]) {
1640 // Digit is also bigger than pre-previous one. E.g. VIX (V < X => invalid)
1641 break;
1642 }
1643 this->value -= dig[1]; // Cancel addition in the previous step.
1644 dig[0] -= dig[1]; // Combine last two digits.
1645 dig[1] = dig[2]; // The true previous digit is now pre-previous one. :)
1646 dig[2] = dig[3]; // The true pre-previous digit is now pre-pre-previous one. :)
1647 this->value += dig[0]; // Add combined value.
1648 }
1649 else {
1650 // New digit is too big than the previous one. E.g. VX (V < X => invalid)
1651 break;
1652 }
1653 }
1654 if (this->value) {
1655 this->interval.start = start;
1656 return true;
1657 }
1658 this->interval.invalidate();
1659 return false;
1660 }
1661
1662 protected:
1663 std::shared_ptr<basic_parser<T>>
1664 m_digit_1,
1665 m_digit_5,
1666 m_digit_10,
1667 m_digit_50,
1668 m_digit_100,
1669 m_digit_500,
1670 m_digit_1000,
1671 m_digit_5000,
1672 m_digit_10000;
1673 };
1674
1677#ifdef _UNICODE
1679#else
1681#endif
1683
1687 template <class T>
1689 {
1690 public:
1692 _In_ const std::shared_ptr<basic_parser<T>>& _numerator,
1693 _In_ const std::shared_ptr<basic_parser<T>>& _fraction_line,
1694 _In_ const std::shared_ptr<basic_parser<T>>& _denominator,
1695 _In_ const std::locale& locale = std::locale()) :
1696 basic_parser<T>(locale),
1697 numerator(_numerator),
1698 fraction_line(_fraction_line),
1699 denominator(_denominator)
1700 {}
1701
1702 virtual bool match(
1703 _In_reads_or_z_(end) const T* text,
1704 _In_ size_t start = 0,
1705 _In_ size_t end = SIZE_MAX,
1706 _In_ int flags = match_default)
1707 {
1708 _Assume_(text || start >= end);
1709 if (numerator->match(text, start, end, flags) &&
1710 fraction_line->match(text, numerator->interval.end, end, flags) &&
1711 denominator->match(text, fraction_line->interval.end, end, flags))
1712 {
1713 this->interval.start = start;
1714 this->interval.end = denominator->interval.end;
1715 return true;
1716 }
1717 numerator->invalidate();
1718 fraction_line->invalidate();
1719 denominator->invalidate();
1720 this->interval.invalidate();
1721 return false;
1722 }
1723
1724 virtual void invalidate()
1725 {
1726 numerator->invalidate();
1727 fraction_line->invalidate();
1728 denominator->invalidate();
1730 }
1731
1732 public:
1733 std::shared_ptr<basic_parser<T>> numerator;
1734 std::shared_ptr<basic_parser<T>> fraction_line;
1735 std::shared_ptr<basic_parser<T>> denominator;
1736 };
1737
1740#ifdef _UNICODE
1741 using tfraction = wfraction;
1742#else
1743 using tfraction = fraction;
1744#endif
1746
1750 template <class T>
1751 class basic_score : public basic_parser<T>
1752 {
1753 public:
1755 _In_ const std::shared_ptr<basic_parser<T>>& _home,
1756 _In_ const std::shared_ptr<basic_parser<T>>& _separator,
1757 _In_ const std::shared_ptr<basic_parser<T>>& _guest,
1758 _In_ const std::shared_ptr<basic_parser<T>>& space,
1759 _In_ const std::locale& locale = std::locale()) :
1760 basic_parser<T>(locale),
1761 home(_home),
1762 separator(_separator),
1763 guest(_guest),
1764 m_space(space)
1765 {}
1766
1767 virtual bool match(
1768 _In_reads_or_z_(end) const T* text,
1769 _In_ size_t start = 0,
1770 _In_ size_t end = SIZE_MAX,
1771 _In_ int flags = match_default)
1772 {
1773 _Assume_(text || start >= end);
1774 this->interval.end = start;
1775
1776 const int space_match_flags = flags & ~match_multiline; // Spaces in score must never be broken in new line.
1777
1778 if (home->match(text, this->interval.end, end, flags))
1779 this->interval.end = home->interval.end;
1780 else
1781 goto end;
1782
1783 for (; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
1784
1785 if (separator->match(text, this->interval.end, end, flags))
1786 this->interval.end = separator->interval.end;
1787 else
1788 goto end;
1789
1790 for (; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
1791
1792 if (guest->match(text, this->interval.end, end, flags))
1793 this->interval.end = guest->interval.end;
1794 else
1795 goto end;
1796
1797 this->interval.start = start;
1798 return true;
1799
1800 end:
1801 home->invalidate();
1802 separator->invalidate();
1803 guest->invalidate();
1804 this->interval.invalidate();
1805 return false;
1806 }
1807
1808 virtual void invalidate()
1809 {
1810 home->invalidate();
1811 separator->invalidate();
1812 guest->invalidate();
1814 }
1815
1816 public:
1817 std::shared_ptr<basic_parser<T>> home;
1818 std::shared_ptr<basic_parser<T>> separator;
1819 std::shared_ptr<basic_parser<T>> guest;
1820
1821 protected:
1822 std::shared_ptr<basic_parser<T>> m_space;
1823 };
1824
1825 using score = basic_score<char>;
1827#ifdef _UNICODE
1828 using tscore = wscore;
1829#else
1830 using tscore = score;
1831#endif
1833
1837 template <class T>
1839 {
1840 public:
1842 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
1843 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
1844 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
1845 _In_ const std::shared_ptr<basic_parser<T>>& _number,
1846 _In_ const std::locale& locale = std::locale()) :
1847 basic_parser<T>(locale),
1852 {}
1853
1854 virtual bool match(
1855 _In_reads_or_z_(end) const T* text,
1856 _In_ size_t start = 0,
1857 _In_ size_t end = SIZE_MAX,
1858 _In_ int flags = match_default)
1859 {
1860 _Assume_(text || start >= end);
1861 this->interval.end = start;
1862 if (positive_sign && positive_sign->match(text, this->interval.end, end, flags)) {
1863 this->interval.end = positive_sign->interval.end;
1864 if (negative_sign) negative_sign->invalidate();
1865 if (special_sign) special_sign->invalidate();
1866 }
1867 else if (negative_sign && negative_sign->match(text, this->interval.end, end, flags)) {
1868 this->interval.end = negative_sign->interval.end;
1869 if (positive_sign) positive_sign->invalidate();
1870 if (special_sign) special_sign->invalidate();
1871 }
1872 else if (special_sign && special_sign->match(text, this->interval.end, end, flags)) {
1873 this->interval.end = special_sign->interval.end;
1874 if (positive_sign) positive_sign->invalidate();
1875 if (negative_sign) negative_sign->invalidate();
1876 }
1877 else {
1878 if (positive_sign) positive_sign->invalidate();
1879 if (negative_sign) negative_sign->invalidate();
1880 if (special_sign) special_sign->invalidate();
1881 }
1882 if (number->match(text, this->interval.end, end, flags)) {
1883 this->interval.start = start;
1884 this->interval.end = number->interval.end;
1885 return true;
1886 }
1887 if (positive_sign) positive_sign->invalidate();
1888 if (negative_sign) negative_sign->invalidate();
1889 if (special_sign) special_sign->invalidate();
1890 number->invalidate();
1891 this->interval.invalidate();
1892 return false;
1893 }
1894
1895 virtual void invalidate()
1896 {
1897 if (positive_sign) positive_sign->invalidate();
1898 if (negative_sign) negative_sign->invalidate();
1899 if (special_sign) special_sign->invalidate();
1900 number->invalidate();
1902 }
1903
1904 public:
1905 std::shared_ptr<basic_parser<T>> positive_sign;
1906 std::shared_ptr<basic_parser<T>> negative_sign;
1907 std::shared_ptr<basic_parser<T>> special_sign;
1908 std::shared_ptr<basic_parser<T>> number;
1909 };
1910
1913#ifdef _UNICODE
1915#else
1917#endif
1919
1923 template <class T>
1925 {
1926 public:
1928 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
1929 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
1930 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
1931 _In_ const std::shared_ptr<basic_parser<T>>& _integer,
1932 _In_ const std::shared_ptr<basic_parser<T>>& space,
1933 _In_ const std::shared_ptr<basic_parser<T>>& _fraction,
1934 _In_ const std::locale& locale = std::locale()) :
1935 basic_parser<T>(locale),
1941 m_space(space)
1942 {}
1943
1944 virtual bool match(
1945 _In_reads_or_z_(end) const T* text,
1946 _In_ size_t start = 0,
1947 _In_ size_t end = SIZE_MAX,
1948 _In_ int flags = match_default)
1949 {
1950 _Assume_(text || start >= end);
1951 this->interval.end = start;
1952
1953 if (positive_sign && positive_sign->match(text, this->interval.end, end, flags)) {
1954 this->interval.end = positive_sign->interval.end;
1955 if (negative_sign) negative_sign->invalidate();
1956 if (special_sign) special_sign->invalidate();
1957 }
1958 else if (negative_sign && negative_sign->match(text, this->interval.end, end, flags)) {
1959 this->interval.end = negative_sign->interval.end;
1960 if (positive_sign) positive_sign->invalidate();
1961 if (special_sign) special_sign->invalidate();
1962 }
1963 else if (special_sign && special_sign->match(text, this->interval.end, end, flags)) {
1964 this->interval.end = special_sign->interval.end;
1965 if (positive_sign) positive_sign->invalidate();
1966 if (negative_sign) negative_sign->invalidate();
1967 }
1968 else {
1969 if (positive_sign) positive_sign->invalidate();
1970 if (negative_sign) negative_sign->invalidate();
1971 if (special_sign) special_sign->invalidate();
1972 }
1973
1974 // Check for <integer> <fraction>
1975 const int space_match_flags = flags & ~match_multiline; // Spaces in fractions must never be broken in new line.
1976 if (integer->match(text, this->interval.end, end, flags) &&
1977 m_space->match(text, integer->interval.end, end, space_match_flags))
1978 {
1979 for (this->interval.end = m_space->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
1980 if (fraction->match(text, this->interval.end, end, flags)) {
1981 this->interval.start = start;
1982 this->interval.end = fraction->interval.end;
1983 return true;
1984 }
1985 fraction->invalidate();
1986 this->interval.start = start;
1987 this->interval.end = integer->interval.end;
1988 return true;
1989 }
1990
1991 // Check for <fraction>
1992 if (fraction->match(text, this->interval.end, end, flags)) {
1993 integer->invalidate();
1994 this->interval.start = start;
1995 this->interval.end = fraction->interval.end;
1996 return true;
1997 }
1998
1999 // Check for <integer>
2000 if (integer->match(text, this->interval.end, end, flags)) {
2001 fraction->invalidate();
2002 this->interval.start = start;
2003 this->interval.end = integer->interval.end;
2004 return true;
2005 }
2006
2007 if (positive_sign) positive_sign->invalidate();
2008 if (negative_sign) negative_sign->invalidate();
2009 if (special_sign) special_sign->invalidate();
2010 integer->invalidate();
2011 fraction->invalidate();
2012 this->interval.invalidate();
2013 return false;
2014 }
2015
2016 virtual void invalidate()
2017 {
2018 if (positive_sign) positive_sign->invalidate();
2019 if (negative_sign) negative_sign->invalidate();
2020 if (special_sign) special_sign->invalidate();
2021 integer->invalidate();
2022 fraction->invalidate();
2024 }
2025
2026 public:
2027 std::shared_ptr<basic_parser<T>> positive_sign;
2028 std::shared_ptr<basic_parser<T>> negative_sign;
2029 std::shared_ptr<basic_parser<T>> special_sign;
2030 std::shared_ptr<basic_parser<T>> integer;
2031 std::shared_ptr<basic_parser<T>> fraction;
2032
2033 protected:
2034 std::shared_ptr<basic_parser<T>> m_space;
2035 };
2036
2039#ifdef _UNICODE
2041#else
2043#endif
2045
2049 template <class T>
2051 {
2052 public:
2054 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
2055 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
2056 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
2057 _In_ const std::shared_ptr<basic_integer<T>>& _integer,
2058 _In_ const std::shared_ptr<basic_parser<T>>& _decimal_separator,
2059 _In_ const std::shared_ptr<basic_integer<T>>& _decimal,
2060 _In_ const std::shared_ptr<basic_parser<T>>& _exponent_symbol,
2061 _In_ const std::shared_ptr<basic_parser<T>>& _positive_exp_sign,
2062 _In_ const std::shared_ptr<basic_parser<T>>& _negative_exp_sign,
2063 _In_ const std::shared_ptr<basic_integer<T>>& _exponent,
2064 _In_ const std::locale& locale = std::locale()) :
2065 basic_parser<T>(locale),
2076 value(std::numeric_limits<double>::quiet_NaN())
2077 {}
2078
2079 virtual bool match(
2080 _In_reads_or_z_(end) const T* text,
2081 _In_ size_t start = 0,
2082 _In_ size_t end = SIZE_MAX,
2083 _In_ int flags = match_default)
2084 {
2085 _Assume_(text || start >= end);
2086 this->interval.end = start;
2087
2088 if (positive_sign && positive_sign->match(text, this->interval.end, end, flags)) {
2089 this->interval.end = positive_sign->interval.end;
2090 if (negative_sign) negative_sign->invalidate();
2091 if (special_sign) special_sign->invalidate();
2092 }
2093 else if (negative_sign && negative_sign->match(text, this->interval.end, end, flags)) {
2094 this->interval.end = negative_sign->interval.end;
2095 if (positive_sign) positive_sign->invalidate();
2096 if (special_sign) special_sign->invalidate();
2097 }
2098 else if (special_sign && special_sign->match(text, this->interval.end, end, flags)) {
2099 this->interval.end = special_sign->interval.end;
2100 if (positive_sign) positive_sign->invalidate();
2101 if (negative_sign) negative_sign->invalidate();
2102 }
2103 else {
2104 if (positive_sign) positive_sign->invalidate();
2105 if (negative_sign) negative_sign->invalidate();
2106 if (special_sign) special_sign->invalidate();
2107 }
2108
2109 if (integer->match(text, this->interval.end, end, flags))
2110 this->interval.end = integer->interval.end;
2111
2112 if (decimal_separator->match(text, this->interval.end, end, flags) &&
2113 decimal->match(text, decimal_separator->interval.end, end, flags))
2114 this->interval.end = decimal->interval.end;
2115 else {
2116 decimal_separator->invalidate();
2117 decimal->invalidate();
2118 }
2119
2120 if (integer->interval.empty() &&
2121 decimal->interval.empty())
2122 {
2123 // No integer part, no decimal part.
2124 if (positive_sign) positive_sign->invalidate();
2125 if (negative_sign) negative_sign->invalidate();
2126 if (special_sign) special_sign->invalidate();
2127 integer->invalidate();
2128 decimal_separator->invalidate();
2129 decimal->invalidate();
2130 if (exponent_symbol) exponent_symbol->invalidate();
2131 if (positive_exp_sign) positive_exp_sign->invalidate();
2132 if (negative_exp_sign) negative_exp_sign->invalidate();
2133 if (exponent) exponent->invalidate();
2134 this->interval.invalidate();
2135 return false;
2136 }
2137
2138 if (exponent_symbol && exponent_symbol->match(text, this->interval.end, end, flags) &&
2139 ((positive_exp_sign && positive_exp_sign->match(text, exponent_symbol->interval.end, end, flags) &&
2140 exponent && exponent->match(text, positive_exp_sign->interval.end, end, flags)) ||
2141 (exponent && exponent->match(text, exponent_symbol->interval.end, end, flags))))
2142 {
2143 this->interval.end = exponent->interval.end;
2144 if (negative_exp_sign) negative_exp_sign->invalidate();
2145 }
2146 else if (exponent_symbol && exponent_symbol->match(text, this->interval.end, end, flags) &&
2147 negative_exp_sign && negative_exp_sign->match(text, exponent_symbol->interval.end, end, flags) &&
2148 exponent && exponent->match(text, negative_exp_sign->interval.end, end, flags))
2149 {
2150 this->interval.end = exponent->interval.end;
2151 if (positive_exp_sign) positive_exp_sign->invalidate();
2152 }
2153 else {
2154 if (exponent_symbol) exponent_symbol->invalidate();
2155 if (positive_exp_sign) positive_exp_sign->invalidate();
2156 if (negative_exp_sign) negative_exp_sign->invalidate();
2157 if (exponent) exponent->invalidate();
2158 }
2159
2160 value = (double)integer->value;
2161 if (decimal->interval)
2162 value += (double)decimal->value * pow(10.0, -(double)decimal->interval.size());
2163 if (negative_sign && negative_sign->interval)
2164 value = -value;
2165 if (exponent && exponent->interval) {
2166 double e = (double)exponent->value;
2167 if (negative_exp_sign && negative_exp_sign->interval)
2168 e = -e;
2169 value *= pow(10.0, e);
2170 }
2171
2172 this->interval.start = start;
2173 return true;
2174 }
2175
2176 virtual void invalidate()
2177 {
2178 if (positive_sign) positive_sign->invalidate();
2179 if (negative_sign) negative_sign->invalidate();
2180 if (special_sign) special_sign->invalidate();
2181 integer->invalidate();
2182 decimal_separator->invalidate();
2183 decimal->invalidate();
2184 if (exponent_symbol) exponent_symbol->invalidate();
2185 if (positive_exp_sign) positive_exp_sign->invalidate();
2186 if (negative_exp_sign) negative_exp_sign->invalidate();
2187 if (exponent) exponent->invalidate();
2188 value = std::numeric_limits<double>::quiet_NaN();
2190 }
2191
2192 public:
2193 std::shared_ptr<basic_parser<T>> positive_sign;
2194 std::shared_ptr<basic_parser<T>> negative_sign;
2195 std::shared_ptr<basic_parser<T>> special_sign;
2196 std::shared_ptr<basic_integer<T>> integer;
2197 std::shared_ptr<basic_parser<T>> decimal_separator;
2198 std::shared_ptr<basic_integer<T>> decimal;
2199 std::shared_ptr<basic_parser<T>> exponent_symbol;
2200 std::shared_ptr<basic_parser<T>> positive_exp_sign;
2201 std::shared_ptr<basic_parser<T>> negative_exp_sign;
2202 std::shared_ptr<basic_integer<T>> exponent;
2203 double value;
2204 };
2205
2208#ifdef _UNICODE
2210#else
2212#endif
2214
2218 template <class T>
2220 {
2221 public:
2223 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
2224 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
2225 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
2226 _In_ const std::shared_ptr<basic_parser<T>>& _currency,
2227 _In_ const std::shared_ptr<basic_parser<T>>& _integer,
2228 _In_ const std::shared_ptr<basic_parser<T>>& _decimal_separator,
2229 _In_ const std::shared_ptr<basic_parser<T>>& _decimal,
2230 _In_ const std::locale& locale = std::locale()) :
2231 basic_parser<T>(locale),
2239 {}
2240
2241 virtual bool match(
2242 _In_reads_or_z_(end) const T* text,
2243 _In_ size_t start = 0,
2244 _In_ size_t end = SIZE_MAX,
2245 _In_ int flags = match_default)
2246 {
2247 _Assume_(text || start >= end);
2248 this->interval.end = start;
2249
2250 if (positive_sign->match(text, this->interval.end, end, flags)) {
2251 this->interval.end = positive_sign->interval.end;
2252 if (negative_sign) negative_sign->invalidate();
2253 if (special_sign) special_sign->invalidate();
2254 }
2255 else if (negative_sign->match(text, this->interval.end, end, flags)) {
2256 this->interval.end = negative_sign->interval.end;
2257 if (positive_sign) positive_sign->invalidate();
2258 if (special_sign) special_sign->invalidate();
2259 }
2260 else if (special_sign->match(text, this->interval.end, end, flags)) {
2261 this->interval.end = special_sign->interval.end;
2262 if (positive_sign) positive_sign->invalidate();
2263 if (negative_sign) negative_sign->invalidate();
2264 }
2265 else {
2266 if (positive_sign) positive_sign->invalidate();
2267 if (negative_sign) negative_sign->invalidate();
2268 if (special_sign) special_sign->invalidate();
2269 }
2270
2271 if (currency->match(text, this->interval.end, end, flags))
2272 this->interval.end = currency->interval.end;
2273 else {
2274 if (positive_sign) positive_sign->invalidate();
2275 if (negative_sign) negative_sign->invalidate();
2276 if (special_sign) special_sign->invalidate();
2277 integer->invalidate();
2278 decimal_separator->invalidate();
2279 decimal->invalidate();
2280 this->interval.invalidate();
2281 return false;
2282 }
2283
2284 if (integer->match(text, this->interval.end, end, flags))
2285 this->interval.end = integer->interval.end;
2286 if (decimal_separator->match(text, this->interval.end, end, flags) &&
2287 decimal->match(text, decimal_separator->interval.end, end, flags))
2288 this->interval.end = decimal->interval.end;
2289 else {
2290 decimal_separator->invalidate();
2291 decimal->invalidate();
2292 }
2293
2294 if (integer->interval.empty() &&
2295 decimal->interval.empty())
2296 {
2297 // No integer part, no decimal part.
2298 if (positive_sign) positive_sign->invalidate();
2299 if (negative_sign) negative_sign->invalidate();
2300 if (special_sign) special_sign->invalidate();
2301 currency->invalidate();
2302 integer->invalidate();
2303 decimal_separator->invalidate();
2304 decimal->invalidate();
2305 this->interval.invalidate();
2306 return false;
2307 }
2308
2309 this->interval.start = start;
2310 return true;
2311 }
2312
2313 virtual void invalidate()
2314 {
2315 if (positive_sign) positive_sign->invalidate();
2316 if (negative_sign) negative_sign->invalidate();
2317 if (special_sign) special_sign->invalidate();
2318 currency->invalidate();
2319 integer->invalidate();
2320 decimal_separator->invalidate();
2321 decimal->invalidate();
2323 }
2324
2325 public:
2326 std::shared_ptr<basic_parser<T>> positive_sign;
2327 std::shared_ptr<basic_parser<T>> negative_sign;
2328 std::shared_ptr<basic_parser<T>> special_sign;
2329 std::shared_ptr<basic_parser<T>> currency;
2330 std::shared_ptr<basic_parser<T>> integer;
2331 std::shared_ptr<basic_parser<T>> decimal_separator;
2332 std::shared_ptr<basic_parser<T>> decimal;
2333 };
2334
2337#ifdef _UNICODE
2339#else
2341#endif
2343
2347 template <class T>
2349 {
2350 public:
2352 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
2353 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
2354 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
2355 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
2356 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
2357 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
2358 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
2359 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
2360 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
2361 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
2362 _In_ const std::shared_ptr<basic_parser<T>>& separator,
2363 _In_ const std::locale& locale = std::locale()) :
2364 basic_parser<T>(locale),
2365 m_digit_0(digit_0),
2366 m_digit_1(digit_1),
2367 m_digit_2(digit_2),
2368 m_digit_3(digit_3),
2369 m_digit_4(digit_4),
2370 m_digit_5(digit_5),
2371 m_digit_6(digit_6),
2372 m_digit_7(digit_7),
2373 m_digit_8(digit_8),
2374 m_digit_9(digit_9),
2375 m_separator(separator)
2376 {
2377 value.s_addr = 0;
2378 }
2379
2380 virtual bool match(
2381 _In_reads_or_z_(end) const T* text,
2382 _In_ size_t start = 0,
2383 _In_ size_t end = SIZE_MAX,
2384 _In_ int flags = match_default)
2385 {
2386 _Assume_(text || start >= end);
2387 this->interval.end = start;
2388 value.s_addr = 0;
2389
2390 size_t i;
2391 for (i = 0; i < 4; i++) {
2392 if (i) {
2393 if (m_separator->match(text, this->interval.end, end, flags))
2394 this->interval.end = m_separator->interval.end;
2395 else
2396 goto error;
2397 }
2398
2399 components[i].start = this->interval.end;
2400 bool is_empty = true;
2401 size_t x;
2402 for (x = 0; this->interval.end < end && text[this->interval.end];) {
2403 size_t dig, digit_end;
2404 if (m_digit_0->match(text, this->interval.end, end, flags)) { dig = 0; digit_end = m_digit_0->interval.end; }
2405 else if (m_digit_1->match(text, this->interval.end, end, flags)) { dig = 1; digit_end = m_digit_1->interval.end; }
2406 else if (m_digit_2->match(text, this->interval.end, end, flags)) { dig = 2; digit_end = m_digit_2->interval.end; }
2407 else if (m_digit_3->match(text, this->interval.end, end, flags)) { dig = 3; digit_end = m_digit_3->interval.end; }
2408 else if (m_digit_4->match(text, this->interval.end, end, flags)) { dig = 4; digit_end = m_digit_4->interval.end; }
2409 else if (m_digit_5->match(text, this->interval.end, end, flags)) { dig = 5; digit_end = m_digit_5->interval.end; }
2410 else if (m_digit_6->match(text, this->interval.end, end, flags)) { dig = 6; digit_end = m_digit_6->interval.end; }
2411 else if (m_digit_7->match(text, this->interval.end, end, flags)) { dig = 7; digit_end = m_digit_7->interval.end; }
2412 else if (m_digit_8->match(text, this->interval.end, end, flags)) { dig = 8; digit_end = m_digit_8->interval.end; }
2413 else if (m_digit_9->match(text, this->interval.end, end, flags)) { dig = 9; digit_end = m_digit_9->interval.end; }
2414 else break;
2415 size_t x_n = x * 10 + dig;
2416 if (x_n <= 255) {
2417 x = x_n;
2418 this->interval.end = digit_end;
2419 is_empty = false;
2420 }
2421 else
2422 break;
2423 }
2424 if (is_empty)
2425 goto error;
2426 components[i].end = this->interval.end;
2427 value.s_addr = (value.s_addr << 8) | (uint8_t)x;
2428 }
2429 if (i < 4)
2430 goto error;
2431
2432 this->interval.start = start;
2433 return true;
2434
2435 error:
2436 components[0].start = 1;
2437 components[0].end = 0;
2438 components[1].start = 1;
2439 components[1].end = 0;
2440 components[2].start = 1;
2441 components[2].end = 0;
2442 components[3].start = 1;
2443 components[3].end = 0;
2444 value.s_addr = 0;
2445 this->interval.invalidate();
2446 return false;
2447 }
2448
2449 virtual void invalidate()
2450 {
2451 components[0].start = 1;
2452 components[0].end = 0;
2453 components[1].start = 1;
2454 components[1].end = 0;
2455 components[2].start = 1;
2456 components[2].end = 0;
2457 components[3].start = 1;
2458 components[3].end = 0;
2459 value.s_addr = 0;
2461 }
2462
2463 public:
2466
2467 protected:
2468 std::shared_ptr<basic_parser<T>>
2469 m_digit_0,
2470 m_digit_1,
2471 m_digit_2,
2472 m_digit_3,
2473 m_digit_4,
2474 m_digit_5,
2475 m_digit_6,
2476 m_digit_7,
2477 m_digit_8,
2478 m_digit_9;
2479 std::shared_ptr<basic_parser<T>> m_separator;
2480 };
2481
2484#ifdef _UNICODE
2486#else
2488#endif
2490
2494 template <class T>
2496 {
2497 public:
2498 basic_ipv6_scope_id_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
2499
2500 virtual bool match(
2501 _In_reads_or_z_(end) const T* text,
2502 _In_ size_t start = 0,
2503 _In_ size_t end = SIZE_MAX,
2504 _In_ int flags = match_default)
2505 {
2506 _Assume_(text || start >= end);
2507 if (start < end && text[start]) {
2508 if (text[start] == '-' ||
2509 text[start] == '_' ||
2510 text[start] == ':' ||
2511 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
2512 {
2513 this->interval.end = (this->interval.start = start) + 1;
2514 return true;
2515 }
2516 }
2517 this->interval.invalidate();
2518 return false;
2519 }
2520 };
2521
2524#ifdef _UNICODE
2526#else
2528#endif
2529
2534 {
2535 public:
2536 sgml_ipv6_scope_id_char(_In_ const std::locale& locale = std::locale()) : sgml_parser(locale) {}
2537
2538 virtual bool match(
2539 _In_reads_or_z_(end) const char* text,
2540 _In_ size_t start = 0,
2541 _In_ size_t end = SIZE_MAX,
2542 _In_ int flags = match_default)
2543 {
2544 _Assume_(text || start >= end);
2545 if (start < end && text[start]) {
2546 wchar_t buf[3];
2547 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
2548 const wchar_t* chr_end = chr + stdex::strlen(chr);
2549 if (((chr[0] == L'-' ||
2550 chr[0] == L'_' ||
2551 chr[0] == L':') && chr[1] == 0) ||
2552 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
2553 {
2554 this->interval.start = start;
2555 return true;
2556 }
2557 }
2558 this->interval.invalidate();
2559 return false;
2560 }
2561 };
2562
2566 template <class T>
2568 {
2569 public:
2571 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
2572 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
2573 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
2574 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
2575 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
2576 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
2577 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
2578 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
2579 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
2580 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
2581 _In_ const std::shared_ptr<basic_parser<T>>& digit_10,
2582 _In_ const std::shared_ptr<basic_parser<T>>& digit_11,
2583 _In_ const std::shared_ptr<basic_parser<T>>& digit_12,
2584 _In_ const std::shared_ptr<basic_parser<T>>& digit_13,
2585 _In_ const std::shared_ptr<basic_parser<T>>& digit_14,
2586 _In_ const std::shared_ptr<basic_parser<T>>& digit_15,
2587 _In_ const std::shared_ptr<basic_parser<T>>& separator,
2588 _In_ const std::shared_ptr<basic_parser<T>>& scope_id_separator = nullptr,
2589 _In_ const std::shared_ptr<basic_parser<T>>& _scope_id = nullptr,
2590 _In_ const std::locale& locale = std::locale()) :
2591 basic_parser<T>(locale),
2592 m_digit_0(digit_0),
2593 m_digit_1(digit_1),
2594 m_digit_2(digit_2),
2595 m_digit_3(digit_3),
2596 m_digit_4(digit_4),
2597 m_digit_5(digit_5),
2598 m_digit_6(digit_6),
2599 m_digit_7(digit_7),
2600 m_digit_8(digit_8),
2601 m_digit_9(digit_9),
2602 m_digit_10(digit_10),
2603 m_digit_11(digit_11),
2604 m_digit_12(digit_12),
2605 m_digit_13(digit_13),
2606 m_digit_14(digit_14),
2607 m_digit_15(digit_15),
2608 m_separator(separator),
2609 m_scope_id_separator(scope_id_separator),
2611 {
2612 memset(&value, 0, sizeof(value));
2613 }
2614
2615 virtual bool match(
2616 _In_reads_or_z_(end) const T* text,
2617 _In_ size_t start = 0,
2618 _In_ size_t end = SIZE_MAX,
2619 _In_ int flags = match_default)
2620 {
2621 _Assume_(text || start >= end);
2622 this->interval.end = start;
2623 memset(&value, 0, sizeof(value));
2624
2625 size_t i, compaction_i = SIZE_MAX, compaction_start = start;
2626 for (i = 0; i < 8; i++) {
2627 bool is_empty = true;
2628
2629 if (m_separator->match(text, this->interval.end, end, flags)) {
2630 if (m_separator->match(text, m_separator->interval.end, end, flags)) {
2631 // :: found
2632 if (compaction_i == SIZE_MAX) {
2633 // Zero compaction start
2634 compaction_i = i;
2635 compaction_start = m_separator->interval.start;
2636 this->interval.end = m_separator->interval.end;
2637 }
2638 else {
2639 // More than one zero compaction
2640 break;
2641 }
2642 }
2643 else if (i) {
2644 // Inner : found
2645 this->interval.end = m_separator->interval.end;
2646 }
2647 else {
2648 // Leading : found
2649 goto error;
2650 }
2651 }
2652 else if (i) {
2653 // : missing
2654 break;
2655 }
2656
2657 components[i].start = this->interval.end;
2658 size_t x;
2659 for (x = 0; this->interval.end < end && text[this->interval.end];) {
2660 size_t dig, digit_end;
2661 if (m_digit_0->match(text, this->interval.end, end, flags)) { dig = 0; digit_end = m_digit_0->interval.end; }
2662 else if (m_digit_1->match(text, this->interval.end, end, flags)) { dig = 1; digit_end = m_digit_1->interval.end; }
2663 else if (m_digit_2->match(text, this->interval.end, end, flags)) { dig = 2; digit_end = m_digit_2->interval.end; }
2664 else if (m_digit_3->match(text, this->interval.end, end, flags)) { dig = 3; digit_end = m_digit_3->interval.end; }
2665 else if (m_digit_4->match(text, this->interval.end, end, flags)) { dig = 4; digit_end = m_digit_4->interval.end; }
2666 else if (m_digit_5->match(text, this->interval.end, end, flags)) { dig = 5; digit_end = m_digit_5->interval.end; }
2667 else if (m_digit_6->match(text, this->interval.end, end, flags)) { dig = 6; digit_end = m_digit_6->interval.end; }
2668 else if (m_digit_7->match(text, this->interval.end, end, flags)) { dig = 7; digit_end = m_digit_7->interval.end; }
2669 else if (m_digit_8->match(text, this->interval.end, end, flags)) { dig = 8; digit_end = m_digit_8->interval.end; }
2670 else if (m_digit_9->match(text, this->interval.end, end, flags)) { dig = 9; digit_end = m_digit_9->interval.end; }
2671 else if (m_digit_10->match(text, this->interval.end, end, flags)) { dig = 10; digit_end = m_digit_10->interval.end; }
2672 else if (m_digit_11->match(text, this->interval.end, end, flags)) { dig = 11; digit_end = m_digit_11->interval.end; }
2673 else if (m_digit_12->match(text, this->interval.end, end, flags)) { dig = 12; digit_end = m_digit_12->interval.end; }
2674 else if (m_digit_13->match(text, this->interval.end, end, flags)) { dig = 13; digit_end = m_digit_13->interval.end; }
2675 else if (m_digit_14->match(text, this->interval.end, end, flags)) { dig = 14; digit_end = m_digit_14->interval.end; }
2676 else if (m_digit_15->match(text, this->interval.end, end, flags)) { dig = 15; digit_end = m_digit_15->interval.end; }
2677 else break;
2678 size_t x_n = x * 16 + dig;
2679 if (x_n <= 0xffff) {
2680 x = x_n;
2681 this->interval.end = digit_end;
2682 is_empty = false;
2683 }
2684 else
2685 break;
2686 }
2687 if (is_empty) {
2688 if (compaction_i != SIZE_MAX) {
2689 // Zero compaction active: no sweat.
2690 break;
2691 }
2692 goto error;
2693 }
2694 components[i].end = this->interval.end;
2695 this->value.s6_words[i] = (uint16_t)x;
2696 }
2697
2698 if (compaction_i != SIZE_MAX) {
2699 // Align components right due to zero compaction.
2700 size_t j, k;
2701 for (j = 8, k = i; k > compaction_i;) {
2702 this->value.s6_words[--j] = this->value.s6_words[--k];
2704 }
2705 for (; j > compaction_i;) {
2706 this->value.s6_words[--j] = 0;
2707 components[j].start =
2709 }
2710 }
2711 else if (i < 8)
2712 goto error;
2713
2714 if (m_scope_id_separator && m_scope_id_separator->match(text, this->interval.end, end, flags) &&
2715 scope_id && scope_id->match(text, m_scope_id_separator->interval.end, end, flags))
2716 this->interval.end = scope_id->interval.end;
2717 else if (scope_id)
2718 scope_id->invalidate();
2719
2720 this->interval.start = start;
2721 return true;
2722
2723 error:
2724 components[0].start = 1;
2725 components[0].end = 0;
2726 components[1].start = 1;
2727 components[1].end = 0;
2728 components[2].start = 1;
2729 components[2].end = 0;
2730 components[3].start = 1;
2731 components[3].end = 0;
2732 components[4].start = 1;
2733 components[4].end = 0;
2734 components[5].start = 1;
2735 components[5].end = 0;
2736 components[6].start = 1;
2737 components[6].end = 0;
2738 components[7].start = 1;
2739 components[7].end = 0;
2740 memset(&value, 0, sizeof(value));
2741 if (scope_id) scope_id->invalidate();
2742 this->interval.invalidate();
2743 return false;
2744 }
2745
2746 virtual void invalidate()
2747 {
2748 components[0].start = 1;
2749 components[0].end = 0;
2750 components[1].start = 1;
2751 components[1].end = 0;
2752 components[2].start = 1;
2753 components[2].end = 0;
2754 components[3].start = 1;
2755 components[3].end = 0;
2756 components[4].start = 1;
2757 components[4].end = 0;
2758 components[5].start = 1;
2759 components[5].end = 0;
2760 components[6].start = 1;
2761 components[6].end = 0;
2762 components[7].start = 1;
2763 components[7].end = 0;
2764 memset(&value, 0, sizeof(value));
2765 if (scope_id) scope_id->invalidate();
2767 }
2768
2769 public:
2772 std::shared_ptr<basic_parser<T>> scope_id;
2773
2774 protected:
2775 std::shared_ptr<basic_parser<T>>
2776 m_digit_0,
2777 m_digit_1,
2778 m_digit_2,
2779 m_digit_3,
2780 m_digit_4,
2781 m_digit_5,
2782 m_digit_6,
2783 m_digit_7,
2784 m_digit_8,
2785 m_digit_9,
2786 m_digit_10,
2787 m_digit_11,
2788 m_digit_12,
2789 m_digit_13,
2790 m_digit_14,
2791 m_digit_15;
2792 std::shared_ptr<basic_parser<T>> m_separator, m_scope_id_separator;
2793 };
2794
2797#ifdef _UNICODE
2799#else
2801#endif
2803
2807 template <class T>
2809 {
2810 public:
2812 _In_ bool allow_idn,
2813 _In_ const std::locale& locale = std::locale()) :
2814 basic_parser<T>(locale),
2815 m_allow_idn(allow_idn),
2816 allow_on_edge(true)
2817 {}
2818
2819 virtual bool match(
2820 _In_reads_or_z_(end) const T* text,
2821 _In_ size_t start = 0,
2822 _In_ size_t end = SIZE_MAX,
2823 _In_ int flags = match_default)
2824 {
2825 _Assume_(text || start >= end);
2826 if (start < end && text[start]) {
2827 if (('A' <= text[start] && text[start] <= 'Z') ||
2828 ('a' <= text[start] && text[start] <= 'z') ||
2829 ('0' <= text[start] && text[start] <= '9'))
2830 allow_on_edge = true;
2831 else if (text[start] == '-')
2832 allow_on_edge = false;
2833 else if (m_allow_idn && std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
2834 allow_on_edge = true;
2835 else {
2836 this->interval.invalidate();
2837 return false;
2838 }
2839 this->interval.end = (this->interval.start = start) + 1;
2840 return true;
2841 }
2842 this->interval.invalidate();
2843 return false;
2844 }
2845
2846 public:
2848
2849 protected:
2850 bool m_allow_idn;
2851 };
2852
2855#ifdef _UNICODE
2857#else
2859#endif
2860
2865 {
2866 public:
2868 _In_ bool allow_idn,
2869 _In_ const std::locale& locale = std::locale()) :
2871 {}
2872
2873 virtual bool match(
2874 _In_reads_or_z_(end) const char* text,
2875 _In_ size_t start = 0,
2876 _In_ size_t end = SIZE_MAX,
2877 _In_ int flags = match_default)
2878 {
2879 _Assume_(text || start >= end);
2880 if (start < end && text[start]) {
2881 wchar_t buf[3];
2882 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
2883 const wchar_t* chr_end = chr + stdex::strlen(chr);
2884 if ((('A' <= chr[0] && chr[0] <= 'Z') ||
2885 ('a' <= chr[0] && chr[0] <= 'z') ||
2886 ('0' <= chr[0] && chr[0] <= '9')) && chr[1] == 0)
2887 allow_on_edge = true;
2888 else if (chr[0] == '-' && chr[1] == 0)
2889 allow_on_edge = false;
2890 else if (m_allow_idn && std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
2891 allow_on_edge = true;
2892 else {
2893 this->interval.invalidate();
2894 return false;
2895 }
2896 this->interval.start = start;
2897 return true;
2898 }
2899 this->interval.invalidate();
2900 return false;
2901 }
2902 };
2903
2907 template <class T>
2909 {
2910 public:
2912 _In_ bool allow_absolute,
2913 _In_ const std::shared_ptr<basic_dns_domain_char<T>>& domain_char,
2914 _In_ const std::shared_ptr<basic_parser<T>>& separator,
2915 _In_ const std::locale& locale = std::locale()) :
2916 basic_parser<T>(locale),
2918 m_domain_char(domain_char),
2919 m_separator(separator)
2920 {}
2921
2922 virtual bool match(
2923 _In_reads_or_z_(end) const T* text,
2924 _In_ size_t start = 0,
2925 _In_ size_t end = SIZE_MAX,
2926 _In_ int flags = match_default)
2927 {
2928 _Assume_(text || start >= end);
2929 size_t i = start, count;
2930 for (count = 0; i < end && text[i] && count < 127; count++) {
2931 if (m_domain_char->match(text, i, end, flags) &&
2932 m_domain_char->allow_on_edge)
2933 {
2934 // Domain start
2935 this->interval.end = i = m_domain_char->interval.end;
2936 while (i < end && text[i]) {
2937 if (m_domain_char->allow_on_edge &&
2938 m_separator->match(text, i, end, flags))
2939 {
2940 // Domain end
2941 if (m_allow_absolute)
2942 this->interval.end = i = m_separator->interval.end;
2943 else {
2944 this->interval.end = i;
2945 i = m_separator->interval.end;
2946 }
2947 break;
2948 }
2949 if (m_domain_char->match(text, i, end, flags)) {
2950 if (m_domain_char->allow_on_edge)
2951 this->interval.end = i = m_domain_char->interval.end;
2952 else
2953 i = m_domain_char->interval.end;
2954 }
2955 else {
2956 this->interval.start = start;
2957 return true;
2958 }
2959 }
2960 }
2961 else
2962 break;
2963 }
2964 if (count) {
2965 this->interval.start = start;
2966 return true;
2967 }
2968 this->interval.invalidate();
2969 return false;
2970 }
2971
2972 protected:
2974 std::shared_ptr<basic_dns_domain_char<T>> m_domain_char;
2975 std::shared_ptr<basic_parser<T>> m_separator;
2976 };
2977
2980#ifdef _UNICODE
2981 using tdns_name = wdns_name;
2982#else
2983 using tdns_name = dns_name;
2984#endif
2986
2990 template <class T>
2992 {
2993 public:
2994 basic_url_username_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
2995
2996 virtual bool match(
2997 _In_reads_or_z_(end) const T* text,
2998 _In_ size_t start = 0,
2999 _In_ size_t end = SIZE_MAX,
3000 _In_ int flags = match_default)
3001 {
3002 _Assume_(text || start >= end);
3003 if (start < end && text[start]) {
3004 if (text[start] == '-' ||
3005 text[start] == '.' ||
3006 text[start] == '_' ||
3007 text[start] == '~' ||
3008 text[start] == '%' ||
3009 text[start] == '!' ||
3010 text[start] == '$' ||
3011 text[start] == '&' ||
3012 text[start] == '\'' ||
3013 //text[start] == '(' ||
3014 //text[start] == ')' ||
3015 text[start] == '*' ||
3016 text[start] == '+' ||
3017 text[start] == ',' ||
3018 text[start] == ';' ||
3019 text[start] == '=' ||
3020 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
3021 {
3022 this->interval.end = (this->interval.start = start) + 1;
3023 return true;
3024 }
3025 }
3026 this->interval.invalidate();
3027 return false;
3028 }
3029 };
3030
3033#ifdef _UNICODE
3035#else
3037#endif
3038
3043 {
3044 public:
3045 sgml_url_username_char(_In_ const std::locale& locale = std::locale()) : basic_url_username_char<char>(locale) {}
3046
3047 virtual bool match(
3048 _In_reads_or_z_(end) const char* text,
3049 _In_ size_t start = 0,
3050 _In_ size_t end = SIZE_MAX,
3051 _In_ int flags = match_default)
3052 {
3053 _Assume_(text || start >= end);
3054 if (start < end && text[start]) {
3055 wchar_t buf[3];
3056 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
3057 const wchar_t* chr_end = chr + stdex::strlen(chr);
3058 if (((chr[0] == L'-' ||
3059 chr[0] == L'.' ||
3060 chr[0] == L'_' ||
3061 chr[0] == L'~' ||
3062 chr[0] == L'%' ||
3063 chr[0] == L'!' ||
3064 chr[0] == L'$' ||
3065 chr[0] == L'&' ||
3066 chr[0] == L'\'' ||
3067 //chr[0] == L'(' ||
3068 //chr[0] == L')' ||
3069 chr[0] == L'*' ||
3070 chr[0] == L'+' ||
3071 chr[0] == L',' ||
3072 chr[0] == L';' ||
3073 chr[0] == L'=') && chr[1] == 0) ||
3074 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
3075 {
3076 this->interval.start = start;
3077 return true;
3078 }
3079 }
3080
3081 this->interval.invalidate();
3082 return false;
3083 }
3084 };
3085
3089 template <class T>
3091 {
3092 public:
3093 basic_url_password_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
3094
3095 virtual bool match(
3096 _In_reads_or_z_(end) const T* text,
3097 _In_ size_t start = 0,
3098 _In_ size_t end = SIZE_MAX,
3099 _In_ int flags = match_default)
3100 {
3101 _Assume_(text || start >= end);
3102 if (start < end && text[start]) {
3103 if (text[start] == '-' ||
3104 text[start] == '.' ||
3105 text[start] == '_' ||
3106 text[start] == '~' ||
3107 text[start] == '%' ||
3108 text[start] == '!' ||
3109 text[start] == '$' ||
3110 text[start] == '&' ||
3111 text[start] == '\'' ||
3112 text[start] == '(' ||
3113 text[start] == ')' ||
3114 text[start] == '*' ||
3115 text[start] == '+' ||
3116 text[start] == ',' ||
3117 text[start] == ';' ||
3118 text[start] == '=' ||
3119 text[start] == ':' ||
3120 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
3121 {
3122 this->interval.end = (this->interval.start = start) + 1;
3123 return true;
3124 }
3125 }
3126 this->interval.invalidate();
3127 return false;
3128 }
3129 };
3130
3133#ifdef _UNICODE
3135#else
3137#endif
3138
3143 {
3144 public:
3145 sgml_url_password_char(_In_ const std::locale& locale = std::locale()) : basic_url_password_char<char>(locale) {}
3146
3147 virtual bool match(
3148 _In_reads_or_z_(end) const char* text,
3149 _In_ size_t start = 0,
3150 _In_ size_t end = SIZE_MAX,
3151 _In_ int flags = match_default)
3152 {
3153 _Assume_(text || start >= end);
3154 if (start < end && text[start]) {
3155 wchar_t buf[3];
3156 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
3157 const wchar_t* chr_end = chr + stdex::strlen(chr);
3158 if (((chr[0] == L'-' ||
3159 chr[0] == L'.' ||
3160 chr[0] == L'_' ||
3161 chr[0] == L'~' ||
3162 chr[0] == L'%' ||
3163 chr[0] == L'!' ||
3164 chr[0] == L'$' ||
3165 chr[0] == L'&' ||
3166 chr[0] == L'\'' ||
3167 chr[0] == L'(' ||
3168 chr[0] == L')' ||
3169 chr[0] == L'*' ||
3170 chr[0] == L'+' ||
3171 chr[0] == L',' ||
3172 chr[0] == L';' ||
3173 chr[0] == L'=' ||
3174 chr[0] == L':') && chr[1] == 0) ||
3175 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
3176 {
3177 this->interval.start = start;
3178 return true;
3179 }
3180 }
3181 this->interval.invalidate();
3182 return false;
3183 }
3184 };
3185
3189 template <class T>
3191 {
3192 public:
3193 basic_url_path_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
3194
3195 virtual bool match(
3196 _In_reads_or_z_(end) const T* text,
3197 _In_ size_t start = 0,
3198 _In_ size_t end = SIZE_MAX,
3199 _In_ int flags = match_default)
3200 {
3201 _Assume_(text || start >= end);
3202 if (start < end && text[start]) {
3203 if (text[start] == '/' ||
3204 text[start] == '-' ||
3205 text[start] == '.' ||
3206 text[start] == '_' ||
3207 text[start] == '~' ||
3208 text[start] == '%' ||
3209 text[start] == '!' ||
3210 text[start] == '$' ||
3211 text[start] == '&' ||
3212 text[start] == '\'' ||
3213 text[start] == '(' ||
3214 text[start] == ')' ||
3215 text[start] == '*' ||
3216 text[start] == '+' ||
3217 text[start] == ',' ||
3218 text[start] == ';' ||
3219 text[start] == '=' ||
3220 text[start] == ':' ||
3221 text[start] == '@' ||
3222 text[start] == '?' ||
3223 text[start] == '#' ||
3224 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
3225 {
3226 this->interval.end = (this->interval.start = start) + 1;
3227 return true;
3228 }
3229 }
3230 this->interval.invalidate();
3231 return false;
3232 }
3233 };
3234
3237#ifdef _UNICODE
3239#else
3241#endif
3242
3247 {
3248 public:
3249 sgml_url_path_char(_In_ const std::locale& locale = std::locale()) : basic_url_path_char<char>(locale) {}
3250
3251 virtual bool match(
3252 _In_reads_or_z_(end) const char* text,
3253 _In_ size_t start = 0,
3254 _In_ size_t end = SIZE_MAX,
3255 _In_ int flags = match_default)
3256 {
3257 _Assume_(text || start >= end);
3258 if (start < end && text[start]) {
3259 wchar_t buf[3];
3260 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
3261 const wchar_t* chr_end = chr + stdex::strlen(chr);
3262 if (((chr[0] == L'/' ||
3263 chr[0] == L'-' ||
3264 chr[0] == L'.' ||
3265 chr[0] == L'_' ||
3266 chr[0] == L'~' ||
3267 chr[0] == L'%' ||
3268 chr[0] == L'!' ||
3269 chr[0] == L'$' ||
3270 chr[0] == L'&' ||
3271 chr[0] == L'\'' ||
3272 chr[0] == L'(' ||
3273 chr[0] == L')' ||
3274 chr[0] == L'*' ||
3275 chr[0] == L'+' ||
3276 chr[0] == L',' ||
3277 chr[0] == L';' ||
3278 chr[0] == L'=' ||
3279 chr[0] == L':' ||
3280 chr[0] == L'@' ||
3281 chr[0] == L'?' ||
3282 chr[0] == L'#') && chr[1] == 0) ||
3283 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
3284 {
3285 this->interval.start = start;
3286 return true;
3287 }
3288 }
3289 this->interval.invalidate();
3290 return false;
3291 }
3292 };
3293
3297 template <class T>
3299 {
3300 public:
3302 _In_ const std::shared_ptr<basic_parser<T>>& path_char,
3303 _In_ const std::shared_ptr<basic_parser<T>>& query_start,
3304 _In_ const std::shared_ptr<basic_parser<T>>& bookmark_start,
3305 _In_ const std::locale& locale = std::locale()) :
3306 basic_parser<T>(locale),
3307 m_path_char(path_char),
3308 m_query_start(query_start),
3309 m_bookmark_start(bookmark_start)
3310 {}
3311
3312 virtual bool match(
3313 _In_reads_or_z_(end) const T* text,
3314 _In_ size_t start = 0,
3315 _In_ size_t end = SIZE_MAX,
3316 _In_ int flags = match_default)
3317 {
3318 _Assume_(text || start >= end);
3319
3320 this->interval.end = start;
3321 path.start = start;
3322 query.start = 1;
3323 query.end = 0;
3324 bookmark.start = 1;
3325 bookmark.end = 0;
3326
3327 for (;;) {
3328 if (this->interval.end >= end || !text[this->interval.end])
3329 break;
3330 if (m_query_start->match(text, this->interval.end, end, flags)) {
3331 path.end = this->interval.end;
3332 query.start = this->interval.end = m_query_start->interval.end;
3333 for (;;) {
3334 if (this->interval.end >= end || !text[this->interval.end]) {
3335 query.end = this->interval.end;
3336 break;
3337 }
3338 if (m_bookmark_start->match(text, this->interval.end, end, flags)) {
3339 query.end = this->interval.end;
3340 bookmark.start = this->interval.end = m_bookmark_start->interval.end;
3341 for (;;) {
3342 if (this->interval.end >= end || !text[this->interval.end]) {
3343 bookmark.end = this->interval.end;
3344 break;
3345 }
3346 if (m_path_char->match(text, this->interval.end, end, flags))
3347 this->interval.end = m_path_char->interval.end;
3348 else {
3349 bookmark.end = this->interval.end;
3350 break;
3351 }
3352 }
3353 this->interval.start = start;
3354 return true;
3355 }
3356 if (m_path_char->match(text, this->interval.end, end, flags))
3357 this->interval.end = m_path_char->interval.end;
3358 else {
3359 query.end = this->interval.end;
3360 break;
3361 }
3362 }
3363 this->interval.start = start;
3364 return true;
3365 }
3366 if (m_bookmark_start->match(text, this->interval.end, end, flags)) {
3367 path.end = this->interval.end;
3368 bookmark.start = this->interval.end = m_bookmark_start->interval.end;
3369 for (;;) {
3370 if (this->interval.end >= end || !text[this->interval.end]) {
3371 bookmark.end = this->interval.end;
3372 break;
3373 }
3374 if (m_path_char->match(text, this->interval.end, end, flags))
3375 this->interval.end = m_path_char->interval.end;
3376 else {
3377 bookmark.end = this->interval.end;
3378 break;
3379 }
3380 }
3381 this->interval.start = start;
3382 return true;
3383 }
3384 if (m_path_char->match(text, this->interval.end, end, flags))
3385 this->interval.end = m_path_char->interval.end;
3386 else
3387 break;
3388 }
3389
3391 path.end = this->interval.end;
3392 this->interval.start = start;
3393 return true;
3394 }
3395
3396 path.start = 1;
3397 path.end = 0;
3398 bookmark.start = 1;
3399 bookmark.end = 0;
3400 this->interval.invalidate();
3401 return false;
3402 }
3403
3404 virtual void invalidate()
3405 {
3406 path.start = 1;
3407 path.end = 0;
3408 query.start = 1;
3409 query.end = 0;
3410 bookmark.start = 1;
3411 bookmark.end = 0;
3413 }
3414
3415 public:
3418 stdex::interval<size_t> bookmark;
3419
3420 protected:
3421 std::shared_ptr<basic_parser<T>> m_path_char;
3422 std::shared_ptr<basic_parser<T>> m_query_start;
3423 std::shared_ptr<basic_parser<T>> m_bookmark_start;
3424 };
3425
3428#ifdef _UNICODE
3429 using turl_path = wurl_path;
3430#else
3431 using turl_path = url_path;
3432#endif
3434
3438 template <class T>
3439 class basic_url : public basic_parser<T>
3440 {
3441 public:
3442 basic_url(
3443 _In_ const std::shared_ptr<basic_parser<T>>& _http_scheme,
3444 _In_ const std::shared_ptr<basic_parser<T>>& _ftp_scheme,
3445 _In_ const std::shared_ptr<basic_parser<T>>& _mailto_scheme,
3446 _In_ const std::shared_ptr<basic_parser<T>>& _file_scheme,
3447 _In_ const std::shared_ptr<basic_parser<T>>& colon,
3448 _In_ const std::shared_ptr<basic_parser<T>>& slash,
3449 _In_ const std::shared_ptr<basic_parser<T>>& _username,
3450 _In_ const std::shared_ptr<basic_parser<T>>& _password,
3451 _In_ const std::shared_ptr<basic_parser<T>>& at,
3452 _In_ const std::shared_ptr<basic_parser<T>>& ip_lbracket,
3453 _In_ const std::shared_ptr<basic_parser<T>>& ip_rbracket,
3454 _In_ const std::shared_ptr<basic_parser<T>>& _ipv4_host,
3455 _In_ const std::shared_ptr<basic_parser<T>>& _ipv6_host,
3456 _In_ const std::shared_ptr<basic_parser<T>>& _dns_host,
3457 _In_ const std::shared_ptr<basic_parser<T>>& _port,
3458 _In_ const std::shared_ptr<basic_parser<T>>& _path,
3459 _In_ const std::locale& locale = std::locale()) :
3460 basic_parser<T>(locale),
3461 http_scheme(_http_scheme),
3462 ftp_scheme(_ftp_scheme),
3463 mailto_scheme(_mailto_scheme),
3464 file_scheme(_file_scheme),
3465 m_colon(colon),
3466 m_slash(slash),
3467 username(_username),
3468 password(_password),
3469 m_at(at),
3470 m_ip_lbracket(ip_lbracket),
3471 m_ip_rbracket(ip_rbracket),
3472 ipv4_host(_ipv4_host),
3473 ipv6_host(_ipv6_host),
3474 dns_host(_dns_host),
3475 port(_port),
3476 path(_path)
3477 {}
3478
3479 virtual bool match(
3480 _In_reads_or_z_(end) const T* text,
3481 _In_ size_t start = 0,
3482 _In_ size_t end = SIZE_MAX,
3483 _In_ int flags = match_default)
3484 {
3485 _Assume_(text || start >= end);
3486
3487 this->interval.end = start;
3488
3489 if (http_scheme->match(text, this->interval.end, end, flags) &&
3490 m_colon->match(text, http_scheme->interval.end, end, flags) &&
3491 m_slash->match(text, m_colon->interval.end, end, flags) &&
3492 m_slash->match(text, m_slash->interval.end, end, flags))
3493 {
3494 // http://
3495 this->interval.end = m_slash->interval.end;
3496 ftp_scheme->invalidate();
3497 mailto_scheme->invalidate();
3498 file_scheme->invalidate();
3499 }
3500 else if (ftp_scheme->match(text, this->interval.end, end, flags) &&
3501 m_colon->match(text, ftp_scheme->interval.end, end, flags) &&
3502 m_slash->match(text, m_colon->interval.end, end, flags) &&
3503 m_slash->match(text, m_slash->interval.end, end, flags))
3504 {
3505 // ftp://
3506 this->interval.end = m_slash->interval.end;
3507 http_scheme->invalidate();
3508 mailto_scheme->invalidate();
3509 file_scheme->invalidate();
3510 }
3511 else if (mailto_scheme->match(text, this->interval.end, end, flags) &&
3512 m_colon->match(text, mailto_scheme->interval.end, end, flags))
3513 {
3514 // mailto:
3515 this->interval.end = m_colon->interval.end;
3516 http_scheme->invalidate();
3517 ftp_scheme->invalidate();
3518 file_scheme->invalidate();
3519 }
3520 else if (file_scheme->match(text, this->interval.end, end, flags) &&
3521 m_colon->match(text, file_scheme->interval.end, end, flags) &&
3522 m_slash->match(text, m_colon->interval.end, end, flags) &&
3523 m_slash->match(text, m_slash->interval.end, end, flags))
3524 {
3525 // file://
3526 this->interval.end = m_slash->interval.end;
3527 http_scheme->invalidate();
3528 ftp_scheme->invalidate();
3529 mailto_scheme->invalidate();
3530 }
3531 else {
3532 // Default to http:
3533 http_scheme->invalidate();
3534 ftp_scheme->invalidate();
3535 mailto_scheme->invalidate();
3536 file_scheme->invalidate();
3537 }
3538
3539 if (ftp_scheme->interval) {
3540 if (username->match(text, this->interval.end, end, flags)) {
3541 if (m_colon->match(text, username->interval.end, end, flags) &&
3542 password->match(text, m_colon->interval.end, end, flags) &&
3543 m_at->match(text, password->interval.end, end, flags))
3544 {
3545 // Username and password
3546 this->interval.end = m_at->interval.end;
3547 }
3548 else if (m_at->match(text, this->interval.end, end, flags)) {
3549 // Username only
3550 this->interval.end = m_at->interval.end;
3551 password->invalidate();
3552 }
3553 else {
3554 username->invalidate();
3555 password->invalidate();
3556 }
3557 }
3558 else {
3559 username->invalidate();
3560 password->invalidate();
3561 }
3562
3563 if (ipv4_host->match(text, this->interval.end, end, flags)) {
3564 // Host is IPv4
3565 this->interval.end = ipv4_host->interval.end;
3566 ipv6_host->invalidate();
3567 dns_host->invalidate();
3568 }
3569 else if (
3570 m_ip_lbracket->match(text, this->interval.end, end, flags) &&
3571 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3572 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3573 {
3574 // Host is IPv6
3575 this->interval.end = m_ip_rbracket->interval.end;
3576 ipv4_host->invalidate();
3577 dns_host->invalidate();
3578 }
3579 else if (dns_host->match(text, this->interval.end, end, flags)) {
3580 // Host is hostname
3581 this->interval.end = dns_host->interval.end;
3582 ipv4_host->invalidate();
3583 ipv6_host->invalidate();
3584 }
3585 else {
3586 invalidate();
3587 return false;
3588 }
3589
3590 if (m_colon->match(text, this->interval.end, end, flags) &&
3591 port->match(text, m_colon->interval.end, end, flags))
3592 {
3593 // Port
3594 this->interval.end = port->interval.end;
3595 }
3596 else
3597 port->invalidate();
3598
3599 if (path->match(text, this->interval.end, end, flags)) {
3600 // Path
3601 this->interval.end = path->interval.end;
3602 }
3603
3604 this->interval.start = start;
3605 return true;
3606 }
3607
3608 if (mailto_scheme->interval) {
3609 if (username->match(text, this->interval.end, end, flags) &&
3610 m_at->match(text, username->interval.end, end, flags))
3611 {
3612 // Username
3613 this->interval.end = m_at->interval.end;
3614 }
3615 else {
3616 invalidate();
3617 return false;
3618 }
3619
3620 if (m_ip_lbracket->match(text, this->interval.end, end, flags) &&
3621 ipv4_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3622 m_ip_rbracket->match(text, ipv4_host->interval.end, end, flags))
3623 {
3624 // Host is IPv4
3625 this->interval.end = m_ip_rbracket->interval.end;
3626 ipv6_host->invalidate();
3627 dns_host->invalidate();
3628 }
3629 else if (
3630 m_ip_lbracket->match(text, this->interval.end, end, flags) &&
3631 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3632 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3633 {
3634 // Host is IPv6
3635 this->interval.end = m_ip_rbracket->interval.end;
3636 ipv4_host->invalidate();
3637 dns_host->invalidate();
3638 }
3639 else if (dns_host->match(text, this->interval.end, end, flags)) {
3640 // Host is hostname
3641 this->interval.end = dns_host->interval.end;
3642 ipv4_host->invalidate();
3643 ipv6_host->invalidate();
3644 }
3645 else {
3646 invalidate();
3647 return false;
3648 }
3649
3650 password->invalidate();
3651 port->invalidate();
3652 path->invalidate();
3653 this->interval.start = start;
3654 return true;
3655 }
3656
3657 if (file_scheme->interval) {
3658 if (path->match(text, this->interval.end, end, flags)) {
3659 // Path
3660 this->interval.end = path->interval.end;
3661 }
3662
3663 username->invalidate();
3664 password->invalidate();
3665 ipv4_host->invalidate();
3666 ipv6_host->invalidate();
3667 dns_host->invalidate();
3668 port->invalidate();
3669 this->interval.start = start;
3670 return true;
3671 }
3672
3673 // "http://" found or defaulted to
3674
3675 // If "http://" explicit, test for username&password.
3676 if (http_scheme->interval &&
3677 username->match(text, this->interval.end, end, flags))
3678 {
3679 if (m_colon->match(text, username->interval.end, end, flags) &&
3680 password->match(text, m_colon->interval.end, end, flags) &&
3681 m_at->match(text, password->interval.end, end, flags))
3682 {
3683 // Username and password
3684 this->interval.end = m_at->interval.end;
3685 }
3686 else if (m_at->match(text, username->interval.end, end, flags)) {
3687 // Username only
3688 this->interval.end = m_at->interval.end;
3689 password->invalidate();
3690 }
3691 else {
3692 username->invalidate();
3693 password->invalidate();
3694 }
3695 }
3696 else {
3697 username->invalidate();
3698 password->invalidate();
3699 }
3700
3701 if (ipv4_host->match(text, this->interval.end, end, flags)) {
3702 // Host is IPv4
3703 this->interval.end = ipv4_host->interval.end;
3704 ipv6_host->invalidate();
3705 dns_host->invalidate();
3706 }
3707 else if (
3708 m_ip_lbracket->match(text, this->interval.end, end, flags) &&
3709 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3710 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3711 {
3712 // Host is IPv6
3713 this->interval.end = m_ip_rbracket->interval.end;
3714 ipv4_host->invalidate();
3715 dns_host->invalidate();
3716 }
3717 else if (dns_host->match(text, this->interval.end, end, flags)) {
3718 // Host is hostname
3719 this->interval.end = dns_host->interval.end;
3720 ipv4_host->invalidate();
3721 ipv6_host->invalidate();
3722 }
3723 else {
3724 invalidate();
3725 return false;
3726 }
3727
3728 if (m_colon->match(text, this->interval.end, end, flags) &&
3729 port->match(text, m_colon->interval.end, end, flags))
3730 {
3731 // Port
3732 this->interval.end = port->interval.end;
3733 }
3734 else
3735 port->invalidate();
3736
3737 if (path->match(text, this->interval.end, end, flags)) {
3738 // Path
3739 this->interval.end = path->interval.end;
3740 }
3741
3742 this->interval.start = start;
3743 return true;
3744 }
3745
3746 virtual void invalidate()
3747 {
3748 http_scheme->invalidate();
3749 ftp_scheme->invalidate();
3750 mailto_scheme->invalidate();
3751 file_scheme->invalidate();
3752 username->invalidate();
3753 password->invalidate();
3754 ipv4_host->invalidate();
3755 ipv6_host->invalidate();
3756 dns_host->invalidate();
3757 port->invalidate();
3758 path->invalidate();
3760 }
3761
3762 public:
3763 std::shared_ptr<basic_parser<T>> http_scheme;
3764 std::shared_ptr<basic_parser<T>> ftp_scheme;
3765 std::shared_ptr<basic_parser<T>> mailto_scheme;
3766 std::shared_ptr<basic_parser<T>> file_scheme;
3767 std::shared_ptr<basic_parser<T>> username;
3768 std::shared_ptr<basic_parser<T>> password;
3769 std::shared_ptr<basic_parser<T>> ipv4_host;
3770 std::shared_ptr<basic_parser<T>> ipv6_host;
3771 std::shared_ptr<basic_parser<T>> dns_host;
3772 std::shared_ptr<basic_parser<T>> port;
3773 std::shared_ptr<basic_parser<T>> path;
3774
3775 protected:
3776 std::shared_ptr<basic_parser<T>> m_colon;
3777 std::shared_ptr<basic_parser<T>> m_slash;
3778 std::shared_ptr<basic_parser<T>> m_at;
3779 std::shared_ptr<basic_parser<T>> m_ip_lbracket;
3780 std::shared_ptr<basic_parser<T>> m_ip_rbracket;
3781 };
3782
3783 using url = basic_url<char>;
3784 using wurl = basic_url<wchar_t>;
3785#ifdef _UNICODE
3786 using turl = wurl;
3787#else
3788 using turl = url;
3789#endif
3790 using sgml_url = basic_url<char>;
3791
3795 template <class T>
3797 {
3798 public:
3800 _In_ const std::shared_ptr<basic_parser<T>>& _username,
3801 _In_ const std::shared_ptr<basic_parser<T>>& at,
3802 _In_ const std::shared_ptr<basic_parser<T>>& ip_lbracket,
3803 _In_ const std::shared_ptr<basic_parser<T>>& ip_rbracket,
3804 _In_ const std::shared_ptr<basic_parser<T>>& _ipv4_host,
3805 _In_ const std::shared_ptr<basic_parser<T>>& _ipv6_host,
3806 _In_ const std::shared_ptr<basic_parser<T>>& _dns_host,
3807 _In_ const std::locale& locale = std::locale()) :
3808 basic_parser<T>(locale),
3809 username(_username),
3810 m_at(at),
3811 m_ip_lbracket(ip_lbracket),
3812 m_ip_rbracket(ip_rbracket),
3813 ipv4_host(_ipv4_host),
3814 ipv6_host(_ipv6_host),
3815 dns_host(_dns_host)
3816 {}
3817
3818 virtual bool match(
3819 _In_reads_or_z_(end) const T* text,
3820 _In_ size_t start = 0,
3821 _In_ size_t end = SIZE_MAX,
3822 _In_ int flags = match_default)
3823 {
3824 _Assume_(text || start >= end);
3825
3826 if (username->match(text, start, end, flags) &&
3827 m_at->match(text, username->interval.end, end, flags))
3828 {
3829 // Username@
3830 if (m_ip_lbracket->match(text, m_at->interval.end, end, flags) &&
3831 ipv4_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3832 m_ip_rbracket->match(text, ipv4_host->interval.end, end, flags))
3833 {
3834 // Host is IPv4
3835 this->interval.end = m_ip_rbracket->interval.end;
3836 ipv6_host->invalidate();
3837 dns_host->invalidate();
3838 }
3839 else if (
3840 m_ip_lbracket->match(text, m_at->interval.end, end, flags) &&
3841 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3842 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3843 {
3844 // Host is IPv6
3845 this->interval.end = m_ip_rbracket->interval.end;
3846 ipv4_host->invalidate();
3847 dns_host->invalidate();
3848 }
3849 else if (dns_host->match(text, m_at->interval.end, end, flags)) {
3850 // Host is hostname
3851 this->interval.end = dns_host->interval.end;
3852 ipv4_host->invalidate();
3853 ipv6_host->invalidate();
3854 }
3855 else
3856 goto error;
3857 this->interval.start = start;
3858 return true;
3859 }
3860
3861 error:
3862 username->invalidate();
3863 ipv4_host->invalidate();
3864 ipv6_host->invalidate();
3865 dns_host->invalidate();
3866 this->interval.invalidate();
3867 return false;
3868 }
3869
3870 virtual void invalidate()
3871 {
3872 username->invalidate();
3873 ipv4_host->invalidate();
3874 ipv6_host->invalidate();
3875 dns_host->invalidate();
3877 }
3878
3879 public:
3880 std::shared_ptr<basic_parser<T>> username;
3881 std::shared_ptr<basic_parser<T>> ipv4_host;
3882 std::shared_ptr<basic_parser<T>> ipv6_host;
3883 std::shared_ptr<basic_parser<T>> dns_host;
3884
3885 protected:
3886 std::shared_ptr<basic_parser<T>> m_at;
3887 std::shared_ptr<basic_parser<T>> m_ip_lbracket;
3888 std::shared_ptr<basic_parser<T>> m_ip_rbracket;
3889 };
3890
3893#ifdef _UNICODE
3895#else
3897#endif
3899
3903 template <class T>
3905 {
3906 public:
3908 _In_ const std::shared_ptr<basic_parser<T>>& _emoticon,
3909 _In_ const std::shared_ptr<basic_parser<T>>& _apex,
3910 _In_ const std::shared_ptr<basic_parser<T>>& _eyes,
3911 _In_ const std::shared_ptr<basic_parser<T>>& _nose,
3912 _In_ const std::shared_ptr<basic_set<T>>& _mouth,
3913 _In_ const std::locale& locale = std::locale()) :
3914 basic_parser<T>(locale),
3916 apex(_apex),
3917 eyes(_eyes),
3918 nose(_nose),
3919 mouth(_mouth)
3920 {}
3921
3922 virtual bool match(
3923 _In_reads_or_z_(end) const T* text,
3924 _In_ size_t start = 0,
3925 _In_ size_t end = SIZE_MAX,
3926 _In_ int flags = match_default)
3927 {
3928 _Assume_(text || start >= end);
3929
3930 if (emoticon && emoticon->match(text, start, end, flags)) {
3931 if (apex) apex->invalidate();
3932 eyes->invalidate();
3933 if (nose) nose->invalidate();
3934 mouth->invalidate();
3935 this->interval.start = start;
3936 this->interval.end = emoticon->interval.end;
3937 return true;
3938 }
3939
3940 this->interval.end = start;
3941
3942 if (apex && apex->match(text, this->interval.end, end, flags))
3943 this->interval.end = apex->interval.end;
3944
3945 if (eyes->match(text, this->interval.end, end, flags)) {
3946 if (nose && nose->match(text, eyes->interval.end, end, flags) &&
3947 mouth->match(text, nose->interval.end, end, flags))
3948 {
3949 size_t
3951 hit_offset = mouth->hit_offset;
3952 // Mouth may repeat :-)))))))
3953 for (this->interval.end = mouth->interval.end; mouth->match(text, this->interval.end, end, flags) && mouth->hit_offset == hit_offset; this->interval.end = mouth->interval.end);
3954 mouth->interval.start = start_mouth;
3955 mouth->interval.end = this->interval.end;
3956 this->interval.start = start;
3957 return true;
3958 }
3959 if (mouth->match(text, eyes->interval.end, end, flags)) {
3960 size_t
3962 hit_offset = mouth->hit_offset;
3963 // Mouth may repeat :-)))))))
3964 for (this->interval.end = mouth->interval.end; mouth->match(text, this->interval.end, end, flags) && mouth->hit_offset == hit_offset; this->interval.end = mouth->interval.end);
3965 if (nose) nose->invalidate();
3966 mouth->interval.start = start_mouth;
3967 mouth->interval.end = this->interval.end;
3968 this->interval.start = start;
3969 return true;
3970 }
3971 }
3972
3973 if (emoticon) emoticon->invalidate();
3974 if (apex) apex->invalidate();
3975 eyes->invalidate();
3976 if (nose) nose->invalidate();
3977 mouth->invalidate();
3978 this->interval.invalidate();
3979 return false;
3980 }
3981
3982 virtual void invalidate()
3983 {
3984 if (emoticon) emoticon->invalidate();
3985 if (apex) apex->invalidate();
3986 eyes->invalidate();
3987 if (nose) nose->invalidate();
3988 mouth->invalidate();
3990 }
3991
3992 public:
3993 std::shared_ptr<basic_parser<T>> emoticon;
3994 std::shared_ptr<basic_parser<T>> apex;
3995 std::shared_ptr<basic_parser<T>> eyes;
3996 std::shared_ptr<basic_parser<T>> nose;
3997 std::shared_ptr<basic_set<T>> mouth;
3998 };
3999
4002#ifdef _UNICODE
4003 using temoticon = wemoticon;
4004#else
4005 using temoticon = emoticon;
4006#endif
4008
4012 enum date_format_t {
4013 date_format_none = 0,
4014 date_format_dmy = 0x1,
4015 date_format_mdy = 0x2,
4016 date_format_ymd = 0x4,
4017 date_format_ym = 0x8,
4018 date_format_my = 0x10,
4019 date_format_dm = 0x20,
4020 date_format_md = 0x40,
4021 };
4022
4026 template <class T>
4027 class basic_date : public basic_parser<T>
4028 {
4029 public:
4030 basic_date(
4031 _In_ int format_mask,
4032 _In_ const std::shared_ptr<basic_integer<T>>& _day,
4033 _In_ const std::shared_ptr<basic_integer<T>>& _month,
4034 _In_ const std::shared_ptr<basic_integer<T>>& _year,
4035 _In_ const std::shared_ptr<basic_set<T>>& separator,
4036 _In_ const std::shared_ptr<basic_parser<T>>& space,
4037 _In_ const std::locale& locale = std::locale()) :
4038 basic_parser<T>(locale),
4039 format(date_format_none),
4040 m_format_mask(format_mask),
4041 day(_day),
4042 month(_month),
4043 year(_year),
4044 m_separator(separator),
4045 m_space(space)
4046 {}
4047
4048 virtual bool match(
4049 _In_reads_or_z_(end) const T* text,
4050 _In_ size_t start = 0,
4051 _In_ size_t end = SIZE_MAX,
4052 _In_ int flags = match_default)
4053 {
4054 _Assume_(text || start >= end);
4055
4056 const int space_match_flags = flags & ~match_multiline; // Spaces in dates must never be broken in new line.
4057 if ((m_format_mask & date_format_dmy) == date_format_dmy) {
4058 if (day->match(text, start, end, flags)) {
4059 for (this->interval.end = day->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4060 if (m_separator->match(text, this->interval.end, end, flags)) {
4061 size_t hit_offset = m_separator->hit_offset;
4062 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4063 if (month->match(text, this->interval.end, end, flags)) {
4064 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4065 if (m_separator->match(text, this->interval.end, end, flags) &&
4066 m_separator->hit_offset == hit_offset) // Both separators must match.
4067 {
4068 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4069 if (year->match(text, this->interval.end, end, flags) &&
4070 is_valid(day->value, month->value))
4071 {
4072 this->interval.start = start;
4073 this->interval.end = year->interval.end;
4074 format = date_format_dmy;
4075 return true;
4076 }
4077 }
4078 }
4079 }
4080 }
4081 }
4082
4083 if ((m_format_mask & date_format_mdy) == date_format_mdy) {
4084 if (month->match(text, start, end, flags)) {
4085 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4086 if (m_separator->match(text, this->interval.end, end, flags)) {
4087 size_t hit_offset = m_separator->hit_offset;
4088 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4089 if (day->match(text, this->interval.end, end, flags)) {
4090 for (this->interval.end = day->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4091 if (m_separator->match(text, this->interval.end, end, flags) &&
4092 m_separator->hit_offset == hit_offset) // Both separators must match.
4093 {
4094 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4095 if (year->match(text, this->interval.end, end, flags) &&
4096 is_valid(day->value, month->value))
4097 {
4098 this->interval.start = start;
4099 this->interval.end = year->interval.end;
4100 format = date_format_mdy;
4101 return true;
4102 }
4103 }
4104 }
4105 }
4106 }
4107 }
4108
4109 if ((m_format_mask & date_format_ymd) == date_format_ymd) {
4110 if (year->match(text, start, end, flags)) {
4111 for (this->interval.end = year->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4112 if (m_separator->match(text, this->interval.end, end, flags)) {
4113 size_t hit_offset = m_separator->hit_offset;
4114 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4115 if (month->match(text, this->interval.end, end, flags)) {
4116 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4117 if (m_separator->match(text, this->interval.end, end, flags) &&
4118 m_separator->hit_offset == hit_offset) // Both separators must match.
4119 {
4120 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4121 if (day->match(text, this->interval.end, end, flags) &&
4122 is_valid(day->value, month->value))
4123 {
4124 this->interval.start = start;
4125 this->interval.end = day->interval.end;
4126 format = date_format_ymd;
4127 return true;
4128 }
4129 }
4130 }
4131 }
4132 }
4133 }
4134
4135 if ((m_format_mask & date_format_ym) == date_format_ym) {
4136 if (year->match(text, start, end, flags)) {
4137 for (this->interval.end = year->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4138 if (m_separator->match(text, this->interval.end, end, flags)) {
4139 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4140 if (month->match(text, this->interval.end, end, flags) &&
4141 is_valid(SIZE_MAX, month->value))
4142 {
4143 if (day) day->invalidate();
4144 this->interval.start = start;
4145 this->interval.end = month->interval.end;
4146 format = date_format_ym;
4147 return true;
4148 }
4149 }
4150 }
4151 }
4152
4153 if ((m_format_mask & date_format_my) == date_format_my) {
4154 if (month->match(text, start, end, flags)) {
4155 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4156 if (m_separator->match(text, this->interval.end, end, flags)) {
4157 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4158 if (year->match(text, this->interval.end, end, flags) &&
4159 is_valid(SIZE_MAX, month->value))
4160 {
4161 if (day) day->invalidate();
4162 this->interval.start = start;
4163 this->interval.end = year->interval.end;
4164 format = date_format_my;
4165 return true;
4166 }
4167 }
4168 }
4169 }
4170
4171 if ((m_format_mask & date_format_dm) == date_format_dm) {
4172 if (day->match(text, start, end, flags)) {
4173 for (this->interval.end = day->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4174 if (m_separator->match(text, this->interval.end, end, flags)) {
4175 size_t hit_offset = m_separator->hit_offset;
4176 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4177 if (month->match(text, this->interval.end, end, flags) &&
4178 is_valid(day->value, month->value))
4179 {
4180 if (year) year->invalidate();
4181 this->interval.start = start;
4182 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4183 if (m_separator->match(text, this->interval.end, end, flags) &&
4184 m_separator->hit_offset == hit_offset) // Both separators must match.
4185 this->interval.end = m_separator->interval.end;
4186 else
4187 this->interval.end = month->interval.end;
4188 format = date_format_dm;
4189 return true;
4190 }
4191 }
4192 }
4193 }
4194
4195 if ((m_format_mask & date_format_md) == date_format_md) {
4196 if (month->match(text, start, end, flags)) {
4197 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4198 if (m_separator->match(text, this->interval.end, end, flags)) {
4199 size_t hit_offset = m_separator->hit_offset;
4200 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4201 if (day->match(text, this->interval.end, end, flags) &&
4202 is_valid(day->value, month->value))
4203 {
4204 if (year) year->invalidate();
4205 this->interval.start = start;
4206 for (this->interval.end = day->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4207 if (m_separator->match(text, this->interval.end, end, flags) &&
4208 m_separator->hit_offset == hit_offset) // Both separators must match.
4209 this->interval.end = m_separator->interval.end;
4210 else
4211 this->interval.end = day->interval.end;
4212 format = date_format_md;
4213 return true;
4214 }
4215 }
4216 }
4217 }
4218
4219 if (day) day->invalidate();
4220 if (month) month->invalidate();
4221 if (year) year->invalidate();
4222 format = date_format_none;
4223 this->interval.invalidate();
4224 return false;
4225 }
4226
4227 virtual void invalidate()
4228 {
4229 if (day) day->invalidate();
4230 if (month) month->invalidate();
4231 if (year) year->invalidate();
4232 format = date_format_none;
4234 }
4235
4236 protected:
4237 static inline bool is_valid(size_t day, size_t month)
4238 {
4239 if (month == SIZE_MAX) {
4240 // Default to January. This allows validating day only, as January has all 31 days.
4241 month = 1;
4242 }
4243 if (day == SIZE_MAX) {
4244 // Default to 1st day in month. This allows validating month only, as each month has 1st day.
4245 day = 1;
4246 }
4247
4248 switch (month) {
4249 case 1:
4250 case 3:
4251 case 5:
4252 case 7:
4253 case 8:
4254 case 10:
4255 case 12:
4256 return 1 <= day && day <= 31;
4257 case 2:
4258 return 1 <= day && day <= 29;
4259 case 4:
4260 case 6:
4261 case 9:
4262 case 11:
4263 return 1 <= day && day <= 30;
4264 default:
4265 return false;
4266 }
4267 }
4268
4269 public:
4270 date_format_t format;
4271 std::shared_ptr<basic_integer<T>> day;
4272 std::shared_ptr<basic_integer<T>> month;
4273 std::shared_ptr<basic_integer<T>> year;
4274
4275 protected:
4276 int m_format_mask;
4277 std::shared_ptr<basic_set<T>> m_separator;
4278 std::shared_ptr<basic_parser<T>> m_space;
4279 };
4280
4281 using date = basic_date<char>;
4282 using wdate = basic_date<wchar_t>;
4283#ifdef _UNICODE
4284 using tdate = wdate;
4285#else
4286 using tdate = date;
4287#endif
4289
4293 template <class T>
4294 class basic_time : public basic_parser<T>
4295 {
4296 public:
4297 basic_time(
4298 _In_ const std::shared_ptr<basic_integer10<T>>& _hour,
4299 _In_ const std::shared_ptr<basic_integer10<T>>& _minute,
4300 _In_ const std::shared_ptr<basic_integer10<T>>& _second,
4301 _In_ const std::shared_ptr<basic_integer10<T>>& _millisecond,
4302 _In_ const std::shared_ptr<basic_set<T>>& separator,
4303 _In_ const std::shared_ptr<basic_parser<T>>& millisecond_separator,
4304 _In_ const std::locale& locale = std::locale()) :
4305 basic_parser<T>(locale),
4306 hour(_hour),
4307 minute(_minute),
4308 second(_second),
4309 millisecond(_millisecond),
4310 m_separator(separator),
4311 m_millisecond_separator(millisecond_separator)
4312 {}
4313
4314 virtual bool match(
4315 _In_reads_or_z_(end) const T* text,
4316 _In_ size_t start = 0,
4317 _In_ size_t end = SIZE_MAX,
4318 _In_ int flags = match_default)
4319 {
4320 _Assume_(text || start >= end);
4321
4322 if (hour->match(text, start, end, flags) &&
4323 m_separator->match(text, hour->interval.end, end, flags) &&
4324 minute->match(text, m_separator->interval.end, end, flags) &&
4325 minute->value < 60)
4326 {
4327 // hh::mm
4328 size_t hit_offset = m_separator->hit_offset;
4329 if (m_separator->match(text, minute->interval.end, end, flags) &&
4330 m_separator->hit_offset == hit_offset && // Both separators must match.
4331 second && second->match(text, m_separator->interval.end, end, flags) &&
4332 second->value < 60)
4333 {
4334 // hh::mm:ss
4335 if (m_millisecond_separator && m_millisecond_separator->match(text, second->interval.end, end, flags) &&
4336 millisecond && millisecond->match(text, m_millisecond_separator->interval.end, end, flags) &&
4337 millisecond->value < 1000)
4338 {
4339 // hh::mm:ss.mmmm
4340 this->interval.end = millisecond->interval.end;
4341 }
4342 else {
4343 if (millisecond) millisecond->invalidate();
4344 this->interval.end = second->interval.end;
4345 }
4346 }
4347 else {
4348 if (second) second->invalidate();
4349 if (millisecond) millisecond->invalidate();
4350 this->interval.end = minute->interval.end;
4351 }
4352 this->interval.start = start;
4353 return true;
4354 }
4355
4356 hour->invalidate();
4357 minute->invalidate();
4358 if (second) second->invalidate();
4359 if (millisecond) millisecond->invalidate();
4360 this->interval.invalidate();
4361 return false;
4362 }
4363
4364 virtual void invalidate()
4365 {
4366 hour->invalidate();
4367 minute->invalidate();
4368 if (second) second->invalidate();
4369 if (millisecond) millisecond->invalidate();
4371 }
4372
4373 public:
4374 std::shared_ptr<basic_integer10<T>> hour;
4375 std::shared_ptr<basic_integer10<T>> minute;
4376 std::shared_ptr<basic_integer10<T>> second;
4377 std::shared_ptr<basic_integer10<T>> millisecond;
4378
4379 protected:
4380 std::shared_ptr<basic_set<T>> m_separator;
4381 std::shared_ptr<basic_parser<T>> m_millisecond_separator;
4382 };
4383
4384 using time = basic_time<char>;
4385 using wtime = basic_time<wchar_t>;
4386#ifdef _UNICODE
4387 using ttime = wtime;
4388#else
4389 using ttime = time;
4390#endif
4392
4396 template <class T>
4397 class basic_angle : public basic_parser<T>
4398 {
4399 public:
4401 _In_ const std::shared_ptr<basic_integer10<T>>& _degree,
4402 _In_ const std::shared_ptr<basic_parser<T>>& _degree_separator,
4403 _In_ const std::shared_ptr<basic_integer10<T>>& _minute,
4404 _In_ const std::shared_ptr<basic_parser<T>>& _minute_separator,
4405 _In_ const std::shared_ptr<basic_integer10<T>>& _second,
4406 _In_ const std::shared_ptr<basic_parser<T>>& _second_separator,
4407 _In_ const std::shared_ptr<basic_parser<T>>& _decimal,
4408 _In_ const std::locale& locale = std::locale()) :
4409 basic_parser<T>(locale),
4410 degree(_degree),
4411 degree_separator(_degree_separator),
4412 minute(_minute),
4413 minute_separator(_minute_separator),
4414 second(_second),
4415 second_separator(_second_separator),
4416 decimal(_decimal)
4417 {}
4418
4419 virtual bool match(
4420 _In_reads_or_z_(end) const T* text,
4421 _In_ size_t start = 0,
4422 _In_ size_t end = SIZE_MAX,
4423 _In_ int flags = match_default)
4424 {
4425 _Assume_(text || start >= end);
4426
4427 this->interval.end = start;
4428
4429 if (degree->match(text, this->interval.end, end, flags) &&
4430 degree_separator->match(text, degree->interval.end, end, flags))
4431 {
4432 // Degrees
4433 this->interval.end = degree_separator->interval.end;
4434 }
4435 else {
4436 degree->invalidate();
4437 degree_separator->invalidate();
4438 }
4439
4440 if (minute->match(text, this->interval.end, end, flags) &&
4441 minute->value < 60 &&
4442 minute_separator->match(text, minute->interval.end, end, flags))
4443 {
4444 // Minutes
4445 this->interval.end = minute_separator->interval.end;
4446 }
4447 else {
4448 minute->invalidate();
4449 minute_separator->invalidate();
4450 }
4451
4452 if (second && second->match(text, this->interval.end, end, flags) &&
4453 second->value < 60)
4454 {
4455 // Seconds
4456 this->interval.end = second->interval.end;
4457 if (second_separator && second_separator->match(text, this->interval.end, end, flags))
4458 this->interval.end = second_separator->interval.end;
4459 else
4460 if (second_separator) second_separator->invalidate();
4461 }
4462 else {
4463 if (second) second->invalidate();
4464 if (second_separator) second_separator->invalidate();
4465 }
4466
4467 if (degree->interval.start < degree->interval.end ||
4468 minute->interval.start < minute->interval.end ||
4469 (second && second->interval.start < second->interval.end))
4470 {
4471 if (decimal && decimal->match(text, this->interval.end, end, flags)) {
4472 // Decimals
4473 this->interval.end = decimal->interval.end;
4474 }
4475 else if (decimal)
4476 decimal->invalidate();
4477 this->interval.start = start;
4478 return true;
4479 }
4480 if (decimal) decimal->invalidate();
4481 this->interval.invalidate();
4482 return false;
4483 }
4484
4485 virtual void invalidate()
4486 {
4487 degree->invalidate();
4488 degree_separator->invalidate();
4489 minute->invalidate();
4490 minute_separator->invalidate();
4491 if (second) second->invalidate();
4492 if (second_separator) second_separator->invalidate();
4493 if (decimal) decimal->invalidate();
4495 }
4496
4497 public:
4498 std::shared_ptr<basic_integer10<T>> degree;
4499 std::shared_ptr<basic_parser<T>> degree_separator;
4500 std::shared_ptr<basic_integer10<T>> minute;
4501 std::shared_ptr<basic_parser<T>> minute_separator;
4502 std::shared_ptr<basic_integer10<T>> second;
4503 std::shared_ptr<basic_parser<T>> second_separator;
4504 std::shared_ptr<basic_parser<T>> decimal;
4505 };
4506
4507 using angle = basic_angle<char>;
4509#ifdef _UNICODE
4510 using RRegElKot = wangle;
4511#else
4512 using RRegElKot = angle;
4513#endif
4515
4519 template <class T>
4521 {
4522 public:
4524 _In_ const std::shared_ptr<basic_parser<T>>& digit,
4525 _In_ const std::shared_ptr<basic_parser<T>>& plus_sign,
4526 _In_ const std::shared_ptr<basic_set<T>>& lparenthesis,
4527 _In_ const std::shared_ptr<basic_set<T>>& rparenthesis,
4528 _In_ const std::shared_ptr<basic_parser<T>>& separator,
4529 _In_ const std::shared_ptr<basic_parser<T>>& space,
4530 _In_ const std::locale& locale = std::locale()) :
4531 basic_parser<T>(locale),
4532 m_digit(digit),
4533 m_plus_sign(plus_sign),
4534 m_lparenthesis(lparenthesis),
4535 m_rparenthesis(rparenthesis),
4536 m_separator(separator),
4537 m_space(space)
4538 {}
4539
4540 virtual bool match(
4541 _In_reads_or_z_(end) const T* text,
4542 _In_ size_t start = 0,
4543 _In_ size_t end = SIZE_MAX,
4544 _In_ int flags = match_default)
4545 {
4546 _Assume_(text || start >= end);
4547
4548 size_t safe_digit_end = start, safe_value_size = 0;
4549 bool has_digits = false, after_digit = false, in_parentheses = false, after_parentheses = false;
4550 const int space_match_flags = flags & ~match_multiline; // Spaces in phone numbers must never be broken in new line.
4551
4552 this->interval.end = start;
4553 value.clear();
4554 m_lparenthesis->invalidate();
4555 m_rparenthesis->invalidate();
4556
4557 if (m_plus_sign && m_plus_sign->match(text, this->interval.end, end, flags)) {
4558 value.append(text + m_plus_sign->interval.start, text + m_plus_sign->interval.end);
4559 safe_value_size = value.size();
4560 this->interval.end = m_plus_sign->interval.end;
4561 }
4562
4563 for (;;) {
4564 _Assume_(text || this->interval.end >= end);
4565 if (this->interval.end >= end || !text[this->interval.end])
4566 break;
4567 if (m_digit->match(text, this->interval.end, end, flags)) {
4568 // Digit
4569 value.append(text + m_digit->interval.start, text + m_digit->interval.end);
4570 this->interval.end = m_digit->interval.end;
4571 if (!in_parentheses) {
4572 safe_digit_end = this->interval.end;
4573 safe_value_size = value.size();
4574 has_digits = true;
4575 }
4576 after_digit = true;
4577 after_parentheses = false;
4578 }
4579 else if (
4580 m_lparenthesis && !m_lparenthesis->interval && // No left parenthesis yet
4581 m_rparenthesis && !m_rparenthesis->interval && // Right parenthesis after left
4582 m_lparenthesis->match(text, this->interval.end, end, flags))
4583 {
4584 // Left parenthesis
4585 value.append(text + m_lparenthesis->interval.start, m_lparenthesis->interval.size());
4586 this->interval.end = m_lparenthesis->interval.end;
4587 in_parentheses = true;
4588 after_digit = false;
4589 after_parentheses = false;
4590 }
4591 else if (
4592 in_parentheses && // After left parenthesis
4593 m_rparenthesis && !m_rparenthesis->interval && // No right parenthesis yet
4594 m_rparenthesis->match(text, this->interval.end, end, flags) &&
4595 m_lparenthesis->hit_offset == m_rparenthesis->hit_offset) // Left and right parentheses must match
4596 {
4597 // Right parenthesis
4598 value.append(text + m_rparenthesis->interval.start, text + m_rparenthesis->interval.end);
4599 this->interval.end = m_rparenthesis->interval.end;
4600 safe_digit_end = this->interval.end;
4601 safe_value_size = value.size();
4602 in_parentheses = false;
4603 after_digit = false;
4604 after_parentheses = true;
4605 }
4606 else if (
4607 after_digit &&
4608 !in_parentheses && // No separators inside parentheses
4609 !after_parentheses && // No separators following right parenthesis
4610 m_separator && m_separator->match(text, this->interval.end, end, flags))
4611 {
4612 // Separator
4613 this->interval.end = m_separator->interval.end;
4614 after_digit = false;
4615 after_parentheses = false;
4616 }
4617 else if (
4619 m_space && m_space->match(text, this->interval.end, end, space_match_flags))
4620 {
4621 // Space
4622 this->interval.end = m_space->interval.end;
4623 after_digit = false;
4624 after_parentheses = false;
4625 }
4626 else
4627 break;
4628 }
4629 if (has_digits) {
4630 value.erase(safe_value_size);
4631 this->interval.start = start;
4632 this->interval.end = safe_digit_end;
4633 return true;
4634 }
4635 value.clear();
4636 this->interval.invalidate();
4637 return false;
4638 }
4639
4640 virtual void invalidate()
4641 {
4642 value.clear();
4644 }
4645
4646 public:
4647 std::basic_string<T> value;
4648
4649 protected:
4650 std::shared_ptr<basic_parser<T>> m_digit;
4651 std::shared_ptr<basic_parser<T>> m_plus_sign;
4652 std::shared_ptr<basic_set<T>> m_lparenthesis;
4653 std::shared_ptr<basic_set<T>> m_rparenthesis;
4654 std::shared_ptr<basic_parser<T>> m_separator;
4655 std::shared_ptr<basic_parser<T>> m_space;
4656 };
4657
4660#ifdef _UNICODE
4662#else
4664#endif
4666
4672 template <class T>
4673 class basic_iban : public basic_parser<T>
4674 {
4675 public:
4676 basic_iban(
4677 _In_ const std::shared_ptr<basic_parser<T>>& space,
4678 _In_ const std::locale& locale = std::locale()) :
4679 basic_parser<T>(locale),
4680 m_space(space)
4681 {
4682 this->country[0] = 0;
4683 this->check_digits[0] = 0;
4684 this->bban[0] = 0;
4685 this->is_valid = false;
4686 }
4687
4688 virtual bool match(
4689 _In_reads_or_z_(end) const T* text,
4690 _In_ size_t start = 0,
4691 _In_ size_t end = SIZE_MAX,
4692 _In_ int flags = match_default)
4693 {
4694 _Assume_(text || start >= end);
4695 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
4696 const bool case_insensitive = flags & match_case_insensitive ? true : false;
4697 struct country_t {
4698 T country[2];
4699 T check_digits[2];
4700 size_t length;
4701 };
4702 static const country_t s_countries[] = {
4703 { { 'A', 'D' }, {}, 24 }, // Andorra
4704 { { 'A', 'E' }, {}, 23 }, // United Arab Emirates
4705 { { 'A', 'L' }, {}, 28 }, // Albania
4706 { { 'A', 'O' }, {}, 25 }, // Angola
4707 { { 'A', 'T' }, {}, 20 }, // Austria
4708 { { 'A', 'Z' }, {}, 28 }, // Azerbaijan
4709 { { 'B', 'A' }, { '3', '9' }, 20}, // Bosnia and Herzegovina
4710 { { 'B', 'E' }, {}, 16 }, // Belgium
4711 { { 'B', 'F' }, {}, 28 }, // Burkina Faso
4712 { { 'B', 'G' }, {}, 22 }, // Bulgaria
4713 { { 'B', 'H' }, {}, 22 }, // Bahrain
4714 { { 'B', 'I' }, {}, 27 }, // Burundi
4715 { { 'B', 'J' }, {}, 28 }, // Benin
4716 { { 'B', 'R' }, {}, 29 }, // Brazil
4717 { { 'B', 'Y' }, {}, 28 }, // Belarus
4718 { { 'C', 'F' }, {}, 27 }, // Central African Republic
4719 { { 'C', 'G' }, {}, 27 }, // Congo, Republic of the
4720 { { 'C', 'H' }, {}, 21 }, // Switzerland
4721 { { 'C', 'I' }, {}, 28 }, // Côte d'Ivoire
4722 { { 'C', 'M' }, {}, 27 }, // Cameroon
4723 { { 'C', 'R' }, {}, 22 }, // Costa Rica
4724 { { 'C', 'V' }, {}, 25 }, // Cabo Verde
4725 { { 'C', 'Y' }, {}, 28 }, // Cyprus
4726 { { 'C', 'Z' }, {}, 24 }, // Czech Republic
4727 { { 'D', 'E' }, {}, 22 }, // Germany
4728 { { 'D', 'J' }, {}, 27 }, // Djibouti
4729 { { 'D', 'K' }, {}, 18 }, // Denmark
4730 { { 'D', 'O' }, {}, 28 }, // Dominican Republic
4731 { { 'D', 'Z' }, {}, 26 }, // Algeria
4732 { { 'E', 'E' }, {}, 20 }, // Estonia
4733 { { 'E', 'G' }, {}, 29 }, // Egypt
4734 { { 'E', 'S' }, {}, 24 }, // Spain
4735 { { 'F', 'I' }, {}, 18 }, // Finland
4736 { { 'F', 'O' }, {}, 18 }, // Faroe Islands
4737 { { 'F', 'R' }, {}, 27 }, // France
4738 { { 'G', 'A' }, {}, 27 }, // Gabon
4739 { { 'G', 'B' }, {}, 22 }, // United Kingdom
4740 { { 'G', 'E' }, {}, 22 }, // Georgia
4741 { { 'G', 'I' }, {}, 23 }, // Gibraltar
4742 { { 'G', 'L' }, {}, 18 }, // Greenland
4743 { { 'G', 'Q' }, {}, 27 }, // Equatorial Guinea
4744 { { 'G', 'R' }, {}, 27 }, // Greece
4745 { { 'G', 'T' }, {}, 28 }, // Guatemala
4746 { { 'G', 'W' }, {}, 25 }, // Guinea-Bissau
4747 { { 'H', 'N' }, {}, 28 }, // Honduras
4748 { { 'H', 'R' }, {}, 21 }, // Croatia
4749 { { 'H', 'U' }, {}, 28 }, // Hungary
4750 { { 'I', 'E' }, {}, 22 }, // Ireland
4751 { { 'I', 'L' }, {}, 23 }, // Israel
4752 { { 'I', 'Q' }, {}, 23 }, // Iraq
4753 { { 'I', 'R' }, {}, 26 }, // Iran
4754 { { 'I', 'S' }, {}, 26 }, // Iceland
4755 { { 'I', 'T' }, {}, 27 }, // Italy
4756 { { 'J', 'O' }, {}, 30 }, // Jordan
4757 { { 'K', 'M' }, {}, 27 }, // Comoros
4758 { { 'K', 'W' }, {}, 30 }, // Kuwait
4759 { { 'K', 'Z' }, {}, 20 }, // Kazakhstan
4760 { { 'L', 'B' }, {}, 28 }, // Lebanon
4761 { { 'L', 'C' }, {}, 32 }, // Saint Lucia
4762 { { 'L', 'I' }, {}, 21 }, // Liechtenstein
4763 { { 'L', 'T' }, {}, 20 }, // Lithuania
4764 { { 'L', 'U' }, {}, 20 }, // Luxembourg
4765 { { 'L', 'V' }, {}, 21 }, // Latvia
4766 { { 'L', 'Y' }, {}, 25 }, // Libya
4767 { { 'M', 'A' }, {}, 28 }, // Morocco
4768 { { 'M', 'C' }, {}, 27 }, // Monaco
4769 { { 'M', 'D' }, {}, 24 }, // Moldova
4770 { { 'M', 'E' }, { '2', '5' }, 22 }, // Montenegro
4771 { { 'M', 'G' }, {}, 27 }, // Madagascar
4772 { { 'M', 'K' }, { '0', '7' }, 19 }, // North Macedonia
4773 { { 'M', 'L' }, {}, 28 }, // Mali
4774 { { 'M', 'R' }, { '1', '3' }, 27}, // Mauritania
4775 { { 'M', 'T' }, {}, 31 }, // Malta
4776 { { 'M', 'U' }, {}, 30 }, // Mauritius
4777 { { 'M', 'Z' }, {}, 25 }, // Mozambique
4778 { { 'N', 'E' }, {}, 28 }, // Niger
4779 { { 'N', 'I' }, {}, 32 }, // Nicaragua
4780 { { 'N', 'L' }, {}, 18 }, // Netherlands
4781 { { 'N', 'O' }, {}, 15 }, // Norway
4782 { { 'P', 'K' }, {}, 24 }, // Pakistan
4783 { { 'P', 'L' }, {}, 28 }, // Poland
4784 { { 'P', 'S' }, {}, 29 }, // Palestinian territories
4785 { { 'P', 'T' }, { '5', '0' }, 25 }, // Portugal
4786 { { 'Q', 'A' }, {}, 29 }, // Qatar
4787 { { 'R', 'O' }, {}, 24 }, // Romania
4788 { { 'R', 'S' }, { '3', '5' }, 22 }, // Serbia
4789 { { 'R', 'U' }, {}, 33 }, // Russia
4790 { { 'S', 'A' }, {}, 24 }, // Saudi Arabia
4791 { { 'S', 'C' }, {}, 31 }, // Seychelles
4792 { { 'S', 'D' }, {}, 18 }, // Sudan
4793 { { 'S', 'E' }, {}, 24 }, // Sweden
4794 { { 'S', 'I' }, { '5', '6' }, 19 }, // Slovenia
4795 { { 'S', 'K' }, {}, 24 }, // Slovakia
4796 { { 'S', 'M' }, {}, 27 }, // San Marino
4797 { { 'S', 'N' }, {}, 28 }, // Senegal
4798 { { 'S', 'T' }, {}, 25 }, // São Tomé and Príncipe
4799 { { 'S', 'V' }, {}, 28 }, // El Salvador
4800 { { 'T', 'D' }, {}, 27 }, // Chad
4801 { { 'T', 'G' }, {}, 28 }, // Togo
4802 { { 'T', 'L' }, { '3', '8' }, 23}, // East Timor
4803 { { 'T', 'N' }, { '5', '9' }, 24 }, // Tunisia
4804 { { 'T', 'R' }, {}, 26 }, // Turkey
4805 { { 'U', 'A' }, {}, 29 }, // Ukraine
4806 { { 'V', 'A' }, {}, 22 }, // Vatican City
4807 { { 'V', 'G' }, {}, 24 }, // Virgin Islands, British
4808 { { 'X', 'K' }, {}, 20 }, // Kosovo
4809 };
4810 const country_t* country_desc = nullptr;
4811 size_t n, available, next, bban_length;
4813
4814 this->interval.end = start;
4815 for (size_t i = 0; i < 2; ++i, ++this->interval.end) {
4816 if (this->interval.end >= end || !text[this->interval.end])
4817 goto error; // incomplete country code
4818 T chr = case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end];
4819 if (chr < 'A' || 'Z' < chr)
4820 goto error; // invalid country code
4821 this->country[i] = chr;
4822 }
4823 for (size_t l = 0, r = _countof(s_countries);;) {
4824 if (l >= r)
4825 goto error; // unknown country
4826 size_t m = (l + r) / 2;
4827 const country_t& c = s_countries[m];
4828 if (c.country[0] < this->country[0] || (c.country[0] == this->country[0] && c.country[1] < this->country[1]))
4829 l = m + 1;
4830 else if (this->country[0] < c.country[0] || (this->country[0] == c.country[0] && this->country[1] < c.country[1]))
4831 r = m;
4832 else {
4833 country_desc = &c;
4834 break;
4835 }
4836 }
4837 this->country[2] = 0;
4838
4839 for (size_t i = 0; i < 2; ++i, ++this->interval.end) {
4840 if (this->interval.end >= end || text[this->interval.end] < '0' || '9' < text[this->interval.end])
4841 goto error; // incomplete or invalid check digits
4842 this->check_digits[i] = text[this->interval.end];
4843 }
4844 this->check_digits[2] = 0;
4845
4846 if ((country_desc->check_digits[0] && this->check_digits[0] != country_desc->check_digits[0]) ||
4847 (country_desc->check_digits[1] && this->check_digits[1] != country_desc->check_digits[1]))
4848 goto error; // unexpected check digits
4849
4850 bban_length = country_desc->length - 4;
4851 for (n = 0; n < bban_length;) {
4852 if (this->interval.end >= end || !text[this->interval.end])
4853 goto error; // bban too short
4854 if (m_space && m_space->match(text, this->interval.end, end, flags)) {
4855 this->interval.end = m_space->interval.end;
4856 continue;
4857 }
4858 T chr = case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end];
4859 if (('0' <= chr && chr <= '9') || ('A' <= chr && chr <= 'Z')) {
4860 this->bban[n++] = chr;
4861 this->interval.end++;
4862 }
4863 else
4864 goto error; // invalid bban
4865 }
4866 this->bban[n] = 0;
4867
4868 // Normalize IBAN.
4869 T normalized[69];
4870 available = 0;
4871 for (size_t i = 0; ; ++i) {
4872 if (!this->bban[i]) {
4873 for (i = 0; i < 2; ++i) {
4874 if ('A' <= this->country[i] && this->country[i] <= 'J') {
4875 normalized[available++] = '1';
4876 normalized[available++] = '0' + this->country[i] - 'A';
4877 }
4878 else if ('K' <= this->country[i] && this->country[i] <= 'T') {
4879 normalized[available++] = '2';
4880 normalized[available++] = '0' + this->country[i] - 'K';
4881 }
4882 else if ('U' <= this->country[i] && this->country[i] <= 'Z') {
4883 normalized[available++] = '3';
4884 normalized[available++] = '0' + this->country[i] - 'U';
4885 }
4886 }
4887 normalized[available++] = this->check_digits[0];
4888 normalized[available++] = this->check_digits[1];
4889 normalized[available] = 0;
4890 break;
4891 }
4892 if ('0' <= this->bban[i] && this->bban[i] <= '9')
4893 normalized[available++] = this->bban[i];
4894 else if ('A' <= this->bban[i] && this->bban[i] <= 'J') {
4895 normalized[available++] = '1';
4896 normalized[available++] = '0' + this->bban[i] - 'A';
4897 }
4898 else if ('K' <= this->bban[i] && this->bban[i] <= 'T') {
4899 normalized[available++] = '2';
4900 normalized[available++] = '0' + this->bban[i] - 'K';
4901 }
4902 else if ('U' <= this->bban[i] && this->bban[i] <= 'Z') {
4903 normalized[available++] = '3';
4904 normalized[available++] = '0' + this->bban[i] - 'U';
4905 }
4906 }
4907
4908 // Calculate modulo 97.
4909 nominator = stdex::strtou32(normalized, 9, &next, 10);
4910 for (;;) {
4911 nominator %= 97;
4912 if (!normalized[next]) {
4913 this->is_valid = nominator == 1;
4914 break;
4915 }
4916 size_t digit_count = nominator == 0 ? 0 : nominator < 10 ? 1 : 2;
4917 for (; digit_count < 9 && normalized[next]; ++next, ++digit_count)
4918 nominator = nominator * 10 + (normalized[next] - '0');
4919 }
4920
4921 this->interval.start = start;
4922 return true;
4923
4924 error:
4925 this->country[0] = 0;
4926 this->check_digits[0] = 0;
4927 this->bban[0] = 0;
4928 this->is_valid = false;
4929 this->interval.invalidate();
4930 return false;
4931 }
4932
4933 virtual void invalidate()
4934 {
4935 this->country[0] = 0;
4936 this->check_digits[0] = 0;
4937 this->bban[0] = 0;
4938 this->is_valid = false;
4940 }
4941
4942 public:
4943 T country[3];
4945 T bban[31];
4947
4948 protected:
4949 std::shared_ptr<basic_parser<T>> m_space;
4950 };
4951
4952 using iban = basic_iban<char>;
4953 using wiban = basic_iban<wchar_t>;
4954#ifdef _UNICODE
4955 using tiban = wiban;
4956#else
4957 using tiban = iban;
4958#endif
4960
4966 template <class T>
4968 {
4969 public:
4971 _In_ const std::shared_ptr<basic_parser<T>>& space,
4972 _In_ const std::locale& locale = std::locale()) :
4973 basic_parser<T>(locale),
4974 m_space(space)
4975 {
4976 this->check_digits[0] = 0;
4977 this->reference[0] = 0;
4978 this->is_valid = false;
4979 }
4980
4981 virtual bool match(
4982 _In_reads_or_z_(end) const T* text,
4983 _In_ size_t start = 0,
4984 _In_ size_t end = SIZE_MAX,
4985 _In_ int flags = match_default)
4986 {
4987 _Assume_(text || start >= end);
4988 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
4989 const bool case_insensitive = flags & match_case_insensitive ? true : false;
4990 size_t n, available, next;
4992
4993 this->interval.end = start;
4994 if (this->interval.end + 1 >= end ||
4995 (case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end]) != 'R' ||
4996 (case_insensitive ? ctype.toupper(text[this->interval.end + 1]) : text[this->interval.end + 1]) != 'F')
4997 goto error; // incomplete or wrong reference ID
4998 this->interval.end += 2;
4999
5000 for (size_t i = 0; i < 2; ++i, ++this->interval.end) {
5001 if (this->interval.end >= end || text[this->interval.end] < '0' || '9' < text[this->interval.end])
5002 goto error; // incomplete or invalid check digits
5003 this->check_digits[i] = text[this->interval.end];
5004 }
5005 this->check_digits[2] = 0;
5006
5007 for (n = 0;;) {
5008 if (m_space && m_space->match(text, this->interval.end, end, flags))
5009 this->interval.end = m_space->interval.end;
5010 for (size_t j = 0; j < 4; ++j) {
5011 if (this->interval.end >= end || !text[this->interval.end])
5012 goto out;
5013 T chr = case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end];
5014 if (('0' <= chr && chr <= '9') || ('A' <= chr && chr <= 'Z')) {
5015 if (n >= _countof(reference) - 1)
5016 goto error; // reference overflow
5017 this->reference[n++] = chr;
5018 this->interval.end++;
5019 }
5020 else
5021 goto out;
5022 }
5023 }
5024 out:
5025 if (!n)
5026 goto error; // reference too short
5027 this->reference[_countof(this->reference) - 1] = 0;
5028 for (size_t i = n, j = _countof(this->reference) - 1; i;)
5029 this->reference[--j] = this->reference[--i];
5030 for (size_t j = _countof(this->reference) - 1 - n; j;)
5031 this->reference[--j] = '0';
5032
5033 // Normalize creditor reference.
5034 T normalized[47];
5035 available = 0;
5036 for (size_t i = 0; ; ++i) {
5037 if (!this->reference[i]) {
5038 normalized[available++] = '2'; // R
5039 normalized[available++] = '7';
5040 normalized[available++] = '1'; // F
5041 normalized[available++] = '5';
5042 normalized[available++] = this->check_digits[0];
5043 normalized[available++] = this->check_digits[1];
5044 normalized[available] = 0;
5045 break;
5046 }
5047 if ('0' <= this->reference[i] && this->reference[i] <= '9')
5048 normalized[available++] = this->reference[i];
5049 else if ('A' <= this->reference[i] && this->reference[i] <= 'J') {
5050 normalized[available++] = '1';
5051 normalized[available++] = '0' + this->reference[i] - 'A';
5052 }
5053 else if ('K' <= this->reference[i] && this->reference[i] <= 'T') {
5054 normalized[available++] = '2';
5055 normalized[available++] = '0' + this->reference[i] - 'K';
5056 }
5057 else if ('U' <= this->reference[i] && this->reference[i] <= 'Z') {
5058 normalized[available++] = '3';
5059 normalized[available++] = '0' + this->reference[i] - 'U';
5060 }
5061 }
5062
5063 // Calculate modulo 97.
5064 nominator = stdex::strtou32(normalized, 9, &next, 10);
5065 for (;;) {
5066 nominator %= 97;
5067 if (!normalized[next]) {
5068 this->is_valid = nominator == 1;
5069 break;
5070 }
5071 size_t digit_count = nominator == 0 ? 0 : nominator < 10 ? 1 : 2;
5072 for (; digit_count < 9 && normalized[next]; ++next, ++digit_count)
5073 nominator = nominator * 10 + (normalized[next] - '0');
5074 }
5075
5076 this->interval.start = start;
5077 return true;
5078
5079 error:
5080 this->check_digits[0] = 0;
5081 this->reference[0] = 0;
5082 this->is_valid = false;
5083 this->interval.invalidate();
5084 return false;
5085 }
5086
5087 virtual void invalidate()
5088 {
5089 this->check_digits[0] = 0;
5090 this->reference[0] = 0;
5091 this->is_valid = false;
5093 }
5094
5095 public:
5099
5100 protected:
5101 std::shared_ptr<basic_parser<T>> m_space;
5102 };
5103
5106#ifdef _UNICODE
5108#else
5110#endif
5112
5118 template <class T>
5120 {
5121 public:
5122 basic_si_reference_part(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
5123
5124 virtual bool match(
5125 _In_reads_or_z_(end) const T* text,
5126 _In_ size_t start = 0,
5127 _In_ size_t end = SIZE_MAX,
5128 _In_ int flags = match_default)
5129 {
5130 _Assume_(text || start >= end);
5131 this->interval.end = start;
5132 for (;;) {
5133 if (this->interval.end >= end || !text[this->interval.end])
5134 break;
5135 if ('0' <= text[this->interval.end] && text[this->interval.end] <= '9')
5136 this->interval.end++;
5137 else
5138 break;
5139 }
5141 this->interval.start = start;
5142 return true;
5143 }
5144 this->interval.invalidate();
5145 return false;
5146 }
5147 };
5148
5151#ifdef _UNICODE
5153#else
5155#endif
5157
5163 template <class T>
5165 {
5166 public:
5167 basic_si_reference_delimiter(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
5168
5169 virtual bool match(
5170 _In_reads_or_z_(end) const T* text,
5171 _In_ size_t start = 0,
5172 _In_ size_t end = SIZE_MAX,
5173 _In_ int flags = match_default)
5174 {
5175 _Assume_(text || start >= end);
5176 if (start < end && text[start] == '-') {
5177 this->interval.end = (this->interval.start = start) + 1;
5178 return true;
5179 }
5180 this->interval.invalidate();
5181 return false;
5182 }
5183 };
5184
5187#ifdef _UNICODE
5189#else
5191#endif
5193
5201 template <class T>
5203 {
5204 public:
5206 _In_ const std::shared_ptr<basic_parser<T>>& space,
5207 _In_ const std::locale& locale = std::locale()) :
5208 basic_parser<T>(locale),
5209 part1(locale),
5210 part2(locale),
5211 part3(locale),
5212 is_valid(false),
5213 m_space(space),
5214 m_delimiter(locale)
5215 {
5216 this->model[0] = 0;
5217 }
5218
5219 virtual bool match(
5220 _In_reads_or_z_(end) const T* text,
5221 _In_ size_t start = 0,
5222 _In_ size_t end = SIZE_MAX,
5223 _In_ int flags = match_default)
5224 {
5225 _Assume_(text || start >= end);
5226 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
5227 const bool case_insensitive = flags & match_case_insensitive ? true : false;
5228
5229 this->interval.end = start;
5230 if (this->interval.end + 1 >= end ||
5231 (case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end]) != 'S' ||
5232 (case_insensitive ? ctype.toupper(text[this->interval.end + 1]) : text[this->interval.end + 1]) != 'I')
5233 goto error; // incomplete or wrong reference ID
5234 this->interval.end += 2;
5235
5236 for (size_t i = 0; i < 2; ++i, ++this->interval.end) {
5237 if (this->interval.end >= end || text[this->interval.end] < '0' || '9' < text[this->interval.end])
5238 goto error; // incomplete or invalid model
5239 this->model[i] = text[this->interval.end];
5240 }
5241 this->model[2] = 0;
5242
5243 this->part1.invalidate();
5244 this->part2.invalidate();
5245 this->part3.invalidate();
5246 if (this->model[0] == '9' && this->model[1] == '9') {
5247 is_valid = true;
5248 this->interval.start = start;
5249 return true;
5250 }
5251
5252 if (m_space && m_space->match(text, this->interval.end, end, flags))
5253 this->interval.end = m_space->interval.end;
5254
5255 this->part1.match(text, this->interval.end, end, flags) &&
5256 this->m_delimiter.match(text, this->part1.interval.end, end, flags) &&
5257 this->part2.match(text, this->m_delimiter.interval.end, end, flags) &&
5258 this->m_delimiter.match(text, this->part2.interval.end, end, flags) &&
5259 this->part3.match(text, this->m_delimiter.interval.end, end, flags);
5260
5261 this->interval.start = start;
5262 if (this->part3.interval)
5263 this->interval.end = this->part3.interval.end;
5264 else if (this->part2.interval)
5265 this->interval.end = this->part2.interval.end;
5266 else if (this->part1.interval)
5267 this->interval.end = this->part1.interval.end;
5268 else
5269 this->interval.end = start + 4;
5270
5271 if (this->model[0] == '0' && this->model[1] == '0')
5272 is_valid =
5273 this->part3.interval ?
5274 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5275 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 :
5276 this->part2.interval ?
5277 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5278 this->part1.interval.size() + this->part2.interval.size() <= 20 :
5279 this->part1.interval ?
5280 this->part1.interval.size() <= 12 :
5281 false;
5282 else if (this->model[0] == '0' && this->model[1] == '1')
5283 is_valid =
5284 this->part3.interval ?
5285 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5286 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5287 check11(
5288 text + this->part1.interval.start, this->part1.interval.size(),
5289 text + this->part2.interval.start, this->part2.interval.size(),
5290 text + this->part3.interval.start, this->part3.interval.size()) :
5291 this->part2.interval ?
5292 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5293 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5294 check11(
5295 text + this->part1.interval.start, this->part1.interval.size(),
5296 text + this->part2.interval.start, this->part2.interval.size()) :
5297 this->part1.interval ?
5298 this->part1.interval.size() <= 12 &&
5299 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5300 false;
5301 else if (this->model[0] == '0' && this->model[1] == '2')
5302 is_valid =
5303 this->part3.interval ?
5304 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5305 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5306 check11(text + this->part2.interval.start, this->part2.interval.size()) &&
5307 check11(text + this->part3.interval.start, this->part3.interval.size()) :
5308 false;
5309 else if (this->model[0] == '0' && this->model[1] == '3')
5310 is_valid =
5311 this->part3.interval ?
5312 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5313 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5314 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5315 check11(text + this->part2.interval.start, this->part2.interval.size()) &&
5316 check11(text + this->part3.interval.start, this->part3.interval.size()) :
5317 false;
5318 else if (this->model[0] == '0' && this->model[1] == '4')
5319 is_valid =
5320 this->part3.interval ?
5321 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5322 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5323 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5324 check11(text + this->part3.interval.start, this->part3.interval.size()) :
5325 false;
5326 else if ((this->model[0] == '0' || this->model[0] == '5') && this->model[1] == '5')
5327 is_valid =
5328 this->part3.interval ?
5329 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5330 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5331 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5332 this->part2.interval ?
5333 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5334 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5335 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5336 this->part1.interval ?
5337 this->part1.interval.size() <= 12 &&
5338 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5339 false;
5340 else if (this->model[0] == '0' && this->model[1] == '6')
5341 is_valid =
5342 this->part3.interval ?
5343 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5344 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5345 check11(
5346 text + this->part2.interval.start, this->part2.interval.size(),
5347 text + this->part3.interval.start, this->part3.interval.size()) :
5348 this->part2.interval ?
5349 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5350 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5351 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5352 false;
5353 else if (this->model[0] == '0' && this->model[1] == '7')
5354 is_valid =
5355 this->part3.interval ?
5356 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5357 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5358 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5359 this->part2.interval ?
5360 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5361 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5362 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5363 false;
5364 else if (this->model[0] == '0' && this->model[1] == '8')
5365 is_valid =
5366 this->part3.interval ?
5367 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5368 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5369 check11(
5370 text + this->part1.interval.start, this->part1.interval.size(),
5371 text + this->part2.interval.start, this->part2.interval.size()) &&
5372 check11(text + this->part3.interval.start, this->part3.interval.size()) :
5373 false;
5374 else if (this->model[0] == '0' && this->model[1] == '9')
5375 is_valid =
5376 this->part3.interval ?
5377 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5378 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5379 check11(
5380 text + this->part1.interval.start, this->part1.interval.size(),
5381 text + this->part2.interval.start, this->part2.interval.size()) :
5382 this->part2.interval ?
5383 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5384 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5385 check11(
5386 text + this->part1.interval.start, this->part1.interval.size(),
5387 text + this->part2.interval.start, this->part2.interval.size()) :
5388 this->part1.interval ?
5389 this->part1.interval.size() <= 12 &&
5390 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5391 false;
5392 else if (this->model[0] == '1' && this->model[1] == '0')
5393 is_valid =
5394 this->part3.interval ?
5395 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5396 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5397 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5398 check11(
5399 text + this->part2.interval.start, this->part2.interval.size(),
5400 text + this->part3.interval.start, this->part3.interval.size()) :
5401 this->part2.interval ?
5402 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5403 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5404 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5405 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5406 false;
5407 else if (
5408 (this->model[0] == '1' && (this->model[1] == '1' || this->model[1] == '8' || this->model[1] == '9')) ||
5409 ((this->model[0] == '2' || this->model[0] == '3') && this->model[1] == '8') ||
5410 (this->model[0] == '4' && (this->model[1] == '0' || this->model[1] == '1' || this->model[1] == '8' || this->model[1] == '9')) ||
5411 (this->model[0] == '5' && (this->model[1] == '1' || this->model[1] == '8')))
5412 is_valid =
5413 this->part3.interval ?
5414 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5415 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5416 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5417 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5418 this->part2.interval ?
5419 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5420 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5421 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5422 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5423 false;
5424 else if (this->model[0] == '1' && this->model[1] == '2')
5425 is_valid =
5426 this->part3.interval ? false :
5427 this->part2.interval ? false :
5428 this->part1.interval ?
5429 this->part1.interval.size() <= 13 &&
5430 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5431 false;
5432 else if ((this->model[0] == '2' || this->model[0] == '3') && this->model[1] == '1')
5433 is_valid =
5434 this->part3.interval ? false :
5435 this->part2.interval ?
5436 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5437 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5438 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5439 false;
5440 else
5441 is_valid = true; // Assume models we don't handle as valid
5442 return true;
5443
5444 error:
5445 this->model[0] = 0;
5446 this->part1.interval.start = (this->part1.interval.end = start) + 1;
5447 this->part2.interval.start = (this->part2.interval.end = start) + 1;
5448 this->part3.interval.start = (this->part3.interval.end = start) + 1;
5449 this->is_valid = false;
5450 this->interval.invalidate();
5451 return false;
5452 }
5453
5454 virtual void invalidate()
5455 {
5456 this->model[0] = 0;
5457 this->part1.invalidate();
5458 this->part2.invalidate();
5459 this->part3.invalidate();
5460 this->is_valid = false;
5462 }
5463
5464 protected:
5465 static bool check11(
5466 _In_count_(num_part1) const T* part1, _In_ size_t num_part1)
5467 {
5468 _Assume_(part1 && num_part1 >= 1);
5469 uint32_t nominator = 0, ponder = 2;
5470 for (size_t i = num_part1 - 1; i--; ++ponder)
5471 nominator += (part1[i] - '0') * ponder;
5472 uint8_t control = 11 - static_cast<uint8_t>(nominator % 11);
5473 if (control >= 10)
5474 control = 0;
5475 return control == part1[num_part1 - 1] - '0';
5476 }
5477
5478 static bool check11(
5479 _In_count_(num_part1) const T* part1, _In_ size_t num_part1,
5480 _In_count_(num_part2) const T* part2, _In_ size_t num_part2)
5481 {
5482 _Assume_(part1 || !num_part1);
5483 _Assume_(part2 && num_part2 >= 1);
5484 uint32_t nominator = 0, ponder = 2;
5485 for (size_t i = num_part2 - 1; i--; ++ponder)
5486 nominator += (part2[i] - '0') * ponder;
5487 for (size_t i = num_part1; i--; ++ponder)
5488 nominator += (part1[i] - '0') * ponder;
5489 uint8_t control = 11 - static_cast<uint8_t>(nominator % 11);
5490 if (control == 10)
5491 control = 0;
5492 return control == part2[num_part2 - 1] - '0';
5493 }
5494
5495 static bool check11(
5496 _In_count_(num_part1) const T* part1, _In_ size_t num_part1,
5497 _In_count_(num_part2) const T* part2, _In_ size_t num_part2,
5498 _In_count_(num_part3) const T* part3, _In_ size_t num_part3)
5499 {
5500 _Assume_(part1 || !num_part1);
5501 _Assume_(part2 || !num_part2);
5502 _Assume_(part3 && num_part3 >= 1);
5503 uint32_t nominator = 0, ponder = 2;
5504 for (size_t i = num_part3 - 1; i--; ++ponder)
5505 nominator += (part3[i] - '0') * ponder;
5506 for (size_t i = num_part2; i--; ++ponder)
5507 nominator += (part2[i] - '0') * ponder;
5508 for (size_t i = num_part1; i--; ++ponder)
5509 nominator += (part1[i] - '0') * ponder;
5510 uint8_t control = 11 - static_cast<uint8_t>(nominator % 11);
5511 if (control == 10)
5512 control = 0;
5513 return control == part2[num_part3 - 1] - '0';
5514 }
5515
5516 public:
5517 T model[3];
5522
5523 protected:
5524 std::shared_ptr<basic_parser<T>> m_space;
5526 };
5527
5530#ifdef _UNICODE
5532#else
5534#endif
5536
5540 template <class T>
5542 {
5543 public:
5545 _In_ const std::shared_ptr<basic_parser<T>>& element,
5546 _In_ const std::shared_ptr<basic_parser<T>>& digit,
5547 _In_ const std::shared_ptr<basic_parser<T>>& sign,
5548 _In_ const std::locale& locale = std::locale()) :
5549 basic_parser<T>(locale),
5550 m_element(element),
5551 m_digit(digit),
5552 m_sign(sign),
5553 has_digits(false),
5554 has_charge(false)
5555 {}
5556
5557 virtual bool match(
5558 _In_reads_or_z_(end) const T* text,
5559 _In_ size_t start = 0,
5560 _In_ size_t end = SIZE_MAX,
5561 _In_ int flags = match_default)
5562 {
5563 _Assume_(text || start >= end);
5564
5565 has_digits = false;
5566 has_charge = false;
5567 this->interval.end = start;
5568
5569 const int element_match_flags = flags & ~match_case_insensitive; // Chemical elements are always case-sensitive.
5570 for (;;) {
5571 if (m_element->match(text, this->interval.end, end, element_match_flags)) {
5572 this->interval.end = m_element->interval.end;
5573 while (m_digit->match(text, this->interval.end, end, flags)) {
5574 this->interval.end = m_digit->interval.end;
5575 has_digits = true;
5576 }
5577 }
5578 else if (start < this->interval.end) {
5579 if (m_sign->match(text, this->interval.end, end, flags)) {
5580 this->interval.end = m_sign->interval.end;
5581 has_charge = true;
5582 }
5583 this->interval.start = start;
5584 return true;
5585 }
5586 else {
5587 this->interval.invalidate();
5588 return false;
5589 }
5590 }
5591 }
5592
5593 virtual void invalidate()
5594 {
5595 has_digits = false;
5596 has_charge = false;
5598 }
5599
5600 public:
5601 bool has_digits;
5602 bool has_charge;
5603
5604 protected:
5605 std::shared_ptr<basic_parser<T>> m_element;
5606 std::shared_ptr<basic_parser<T>> m_digit;
5607 std::shared_ptr<basic_parser<T>> m_sign;
5608 };
5609
5612#ifdef _UNICODE
5614#else
5616#endif
5618
5623 {
5624 public:
5625 virtual bool match(
5626 _In_reads_or_z_(end) const char* text,
5627 _In_ size_t start = 0,
5628 _In_ size_t end = SIZE_MAX,
5629 _In_ int flags = match_default)
5630 {
5631 _Assume_(text || start >= end);
5632 this->interval.end = start;
5633
5634 _Assume_(text || this->interval.end >= end);
5635 if (this->interval.end < end && text[this->interval.end]) {
5636 if (text[this->interval.end] == '\r') {
5637 this->interval.end++;
5638 if (this->interval.end < end && text[this->interval.end] == '\n') {
5639 this->interval.start = start;
5640 this->interval.end++;
5641 return true;
5642 }
5643 }
5644 else if (text[this->interval.end] == '\n') {
5645 this->interval.start = start;
5646 this->interval.end++;
5647 return true;
5648 }
5649 }
5650 this->interval.invalidate();
5651 return false;
5652 }
5653 };
5654
5658 class http_space : public parser
5659 {
5660 public:
5661 virtual bool match(
5662 _In_reads_or_z_(end) const char* text,
5663 _In_ size_t start = 0,
5664 _In_ size_t end = SIZE_MAX,
5665 _In_ int flags = match_default)
5666 {
5667 _Assume_(text || start >= end);
5668 this->interval.end = start;
5669 if (m_line_break.match(text, this->interval.end, end, flags)) {
5670 this->interval.end = m_line_break.interval.end;
5671 if (this->interval.end < end && text[this->interval.end] && isspace(text[this->interval.end])) {
5672 this->interval.start = start;
5673 this->interval.end++;
5674 while (this->interval.end < end && text[this->interval.end] && isspace(text[this->interval.end])) this->interval.end++;
5675 return true;
5676 }
5677 }
5678 else if (this->interval.end < end && text[this->interval.end] && isspace(text[this->interval.end])) {
5679 this->interval.start = start;
5680 this->interval.end++;
5681 while (this->interval.end < end && text[this->interval.end] && isspace(text[this->interval.end])) this->interval.end++;
5682 return true;
5683 }
5684 this->interval.invalidate();
5685 return false;
5686 }
5687
5688 protected:
5689 http_line_break m_line_break;
5690 };
5691
5695 class http_text_char : public parser
5696 {
5697 public:
5698 virtual bool match(
5699 _In_reads_or_z_(end) const char* text,
5700 _In_ size_t start = 0,
5701 _In_ size_t end = SIZE_MAX,
5702 _In_ int flags = match_default)
5703 {
5704 _Assume_(text || start >= end);
5705 this->interval.end = start;
5706
5707 _Assume_(text || this->interval.end >= end);
5708 if (m_space.match(text, this->interval.end, end, flags)) {
5709 this->interval.start = start;
5710 this->interval.end = m_space.interval.end;
5711 return true;
5712 }
5713 else if (this->interval.end < end && text[this->interval.end] && text[this->interval.end] >= 0x20) {
5714 this->interval.start = start;
5715 this->interval.end++;
5716 return true;
5717 }
5718 this->interval.invalidate();
5719 return false;
5720 }
5721
5722 protected:
5723 http_space m_space;
5724 };
5725
5729 class http_token : public parser
5730 {
5731 public:
5732 virtual bool match(
5733 _In_reads_or_z_(end) const char* text,
5734 _In_ size_t start = 0,
5735 _In_ size_t end = SIZE_MAX,
5736 _In_ int flags = match_default)
5737 {
5738 _Assume_(text || start >= end);
5739 this->interval.end = start;
5740 for (;;) {
5741 if (this->interval.end < end && text[this->interval.end]) {
5742 if ((unsigned int)text[this->interval.end] < 0x20 ||
5743 (unsigned int)text[this->interval.end] == 0x7f ||
5744 text[this->interval.end] == '(' ||
5745 text[this->interval.end] == ')' ||
5746 text[this->interval.end] == '<' ||
5747 text[this->interval.end] == '>' ||
5748 text[this->interval.end] == '@' ||
5749 text[this->interval.end] == ',' ||
5750 text[this->interval.end] == ';' ||
5751 text[this->interval.end] == ':' ||
5752 text[this->interval.end] == '\\' ||
5753 text[this->interval.end] == '\"' ||
5754 text[this->interval.end] == '/' ||
5755 text[this->interval.end] == '[' ||
5756 text[this->interval.end] == ']' ||
5757 text[this->interval.end] == '?' ||
5758 text[this->interval.end] == '=' ||
5759 text[this->interval.end] == '{' ||
5760 text[this->interval.end] == '}' ||
5761 isspace(text[this->interval.end]))
5762 break;
5763 else
5764 this->interval.end++;
5765 }
5766 else
5767 break;
5768 }
5770 this->interval.start = start;
5771 return true;
5772 }
5773 else {
5774 this->interval.invalidate();
5775 return false;
5776 }
5777 }
5778 };
5779
5784 {
5785 public:
5786 virtual bool match(
5787 _In_reads_or_z_(end) const char* text,
5788 _In_ size_t start = 0,
5789 _In_ size_t end = SIZE_MAX,
5790 _In_ int flags = match_default)
5791 {
5792 _Assume_(text || start >= end);
5793 this->interval.end = start;
5794 if (this->interval.end < end && text[this->interval.end] != '"')
5795 goto error;
5796 this->interval.end++;
5797 content.start = this->interval.end;
5798 for (;;) {
5799 _Assume_(text || this->interval.end >= end);
5800 if (this->interval.end < end && text[this->interval.end]) {
5801 if (text[this->interval.end] == '"') {
5802 content.end = this->interval.end;
5803 this->interval.end++;
5804 break;
5805 }
5806 else if (text[this->interval.end] == '\\') {
5807 this->interval.end++;
5808 if (this->interval.end < end && text[this->interval.end]) {
5809 this->interval.end++;
5810 }
5811 else
5812 goto error;
5813 }
5814 else if (m_chr.match(text, this->interval.end, end, flags))
5815 this->interval.end++;
5816 else
5817 goto error;
5818 }
5819 else
5820 goto error;
5821 }
5822 this->interval.start = start;
5823 return true;
5824
5825 error:
5826 content.start = 1;
5827 content.end = 0;
5828 this->interval.invalidate();
5829 return false;
5830 }
5831
5832 virtual void invalidate()
5833 {
5834 content.start = 1;
5835 content.end = 0;
5836 parser::invalidate();
5837 }
5838
5839 public:
5841
5842 protected:
5843 http_text_char m_chr;
5844 };
5845
5849 class http_value : public parser
5850 {
5851 public:
5852 virtual bool match(
5853 _In_reads_or_z_(end) const char* text,
5854 _In_ size_t start = 0,
5855 _In_ size_t end = SIZE_MAX,
5856 _In_ int flags = match_default)
5857 {
5858 _Assume_(text || start >= end);
5859 this->interval.end = start;
5860 if (string.match(text, this->interval.end, end, flags)) {
5861 token.invalidate();
5862 this->interval.end = string.interval.end;
5863 this->interval.start = start;
5864 return true;
5865 }
5866 else if (token.match(text, this->interval.end, end, flags)) {
5867 string.invalidate();
5868 this->interval.end = token.interval.end;
5869 this->interval.start = start;
5870 return true;
5871 }
5872 else {
5873 this->interval.invalidate();
5874 return false;
5875 }
5876 }
5877
5878 virtual void invalidate()
5879 {
5880 string.invalidate();
5881 token.invalidate();
5882 parser::invalidate();
5883 }
5884
5885 public:
5888 };
5889
5893 class http_parameter : public parser
5894 {
5895 public:
5896 virtual bool match(
5897 _In_reads_or_z_(end) const char* text,
5898 _In_ size_t start = 0,
5899 _In_ size_t end = SIZE_MAX,
5900 _In_ int flags = match_default)
5901 {
5902 _Assume_(text || start >= end);
5903 this->interval.end = start;
5904 if (name.match(text, this->interval.end, end, flags))
5905 this->interval.end = name.interval.end;
5906 else
5907 goto error;
5908 while (m_space.match(text, this->interval.end, end, flags))
5909 this->interval.end = m_space.interval.end;
5910 _Assume_(text || this->interval.end >= end);
5911 if (this->interval.end < end && text[this->interval.end] == '=')
5912 this->interval.end++;
5913 else
5914 while (m_space.match(text, this->interval.end, end, flags))
5915 this->interval.end = m_space.interval.end;
5916 if (value.match(text, this->interval.end, end, flags))
5917 this->interval.end = value.interval.end;
5918 else
5919 goto error;
5920 this->interval.start = start;
5921 return true;
5922
5923 error:
5924 name.invalidate();
5925 value.invalidate();
5926 this->interval.invalidate();
5927 return false;
5928 }
5929
5930 virtual void invalidate()
5931 {
5932 name.invalidate();
5933 value.invalidate();
5934 parser::invalidate();
5935 }
5936
5937 public:
5940
5941 protected:
5942 http_space m_space;
5943 };
5944
5948 class http_any_type : public parser
5949 {
5950 public:
5951 virtual bool match(
5952 _In_reads_or_z_(end) const char* text,
5953 _In_ size_t start = 0,
5954 _In_ size_t end = SIZE_MAX,
5955 _In_ int flags = match_default)
5956 {
5957 _Assume_(text || start >= end);
5958 if (start + 2 < end &&
5959 text[start] == '*' &&
5960 text[start + 1] == '/' &&
5961 text[start + 2] == '*')
5962 {
5963 this->interval.end = (this->interval.start = start) + 3;
5964 return true;
5965 }
5966 else if (start < end && text[start] == '*') {
5967 this->interval.end = (this->interval.start = start) + 1;
5968 return true;
5969 }
5970 else {
5971 this->interval.invalidate();
5972 return false;
5973 }
5974 }
5975 };
5976
5981 {
5982 public:
5983 virtual bool match(
5984 _In_reads_or_z_(end) const char* text,
5985 _In_ size_t start = 0,
5986 _In_ size_t end = SIZE_MAX,
5987 _In_ int flags = match_default)
5988 {
5989 _Assume_(text || start >= end);
5990 this->interval.end = start;
5991 if (type.match(text, this->interval.end, end, flags))
5992 this->interval.end = type.interval.end;
5993 else
5994 goto error;
5995 while (m_space.match(text, this->interval.end, end, flags))
5996 this->interval.end = m_space.interval.end;
5997 if (this->interval.end < end && text[this->interval.end] == '/')
5998 this->interval.end++;
5999 else
6000 goto error;
6001 while (m_space.match(text, this->interval.end, end, flags))
6002 this->interval.end = m_space.interval.end;
6003 if (subtype.match(text, this->interval.end, end, flags))
6004 this->interval.end = subtype.interval.end;
6005 else
6006 goto error;
6007 this->interval.start = start;
6008 return true;
6009
6010 error:
6011 type.invalidate();
6012 subtype.invalidate();
6013 this->interval.invalidate();
6014 return false;
6015 }
6016
6017 virtual void invalidate()
6018 {
6019 type.invalidate();
6020 subtype.invalidate();
6021 parser::invalidate();
6022 }
6023
6024 public:
6025 http_token type;
6026 http_token subtype;
6027
6028 protected:
6029 http_space m_space;
6030 };
6031
6036 {
6037 public:
6038 virtual bool match(
6039 _In_reads_or_z_(end) const char* text,
6040 _In_ size_t start = 0,
6041 _In_ size_t end = SIZE_MAX,
6042 _In_ int flags = match_default)
6043 {
6044 _Assume_(text || start >= end);
6045 if (!http_media_range::match(text, start, end, flags))
6046 goto error;
6047 params.clear();
6048 for (;;) {
6049 if (this->interval.end < end && text[this->interval.end]) {
6050 if (m_space.match(text, this->interval.end, end, flags))
6051 this->interval.end = m_space.interval.end;
6052 else if (text[this->interval.end] == ';') {
6053 this->interval.end++;
6054 while (m_space.match(text, this->interval.end, end, flags))
6055 this->interval.end = m_space.interval.end;
6056 http_parameter param;
6057 if (param.match(text, this->interval.end, end, flags)) {
6058 this->interval.end = param.interval.end;
6059 params.push_back(std::move(param));
6060 }
6061 else
6062 break;
6063 }
6064 else
6065 break;
6066 }
6067 else
6068 break;
6069 }
6070 this->interval.end = params.empty() ? subtype.interval.end : params.back().interval.end;
6071 return true;
6072
6073 error:
6074 http_media_range::invalidate();
6075 params.clear();
6076 this->interval.invalidate();
6077 return false;
6078 }
6079
6080 virtual void invalidate()
6081 {
6082 params.clear();
6083 http_media_range::invalidate();
6084 }
6085
6086 public:
6087 std::list<http_parameter> params;
6088 };
6089
6094 {
6095 public:
6096 virtual bool match(
6097 _In_reads_or_z_(end) const char* text,
6098 _In_ size_t start = 0,
6099 _In_ size_t end = SIZE_MAX,
6100 _In_ int flags = match_default)
6101 {
6102 _Assume_(text || start >= end);
6103 this->interval.end = start;
6104 for (;;) {
6105 if (this->interval.end < end && text[this->interval.end]) {
6106 if ((unsigned int)text[this->interval.end] < 0x20 ||
6107 (unsigned int)text[this->interval.end] == 0x7f ||
6108 text[this->interval.end] == ':' ||
6109 text[this->interval.end] == '/' ||
6110 isspace(text[this->interval.end]))
6111 break;
6112 else
6113 this->interval.end++;
6114 }
6115 else
6116 break;
6117 }
6119 this->interval.start = start;
6120 return true;
6121 }
6122 this->interval.invalidate();
6123 return false;
6124 }
6125 };
6126
6130 class http_url_port : public parser
6131 {
6132 public:
6133 http_url_port(_In_ const std::locale& locale = std::locale()) :
6134 parser(locale),
6135 value(0)
6136 {}
6137
6138 virtual bool match(
6139 _In_reads_or_z_(end) const char* text,
6140 _In_ size_t start = 0,
6141 _In_ size_t end = SIZE_MAX,
6142 _In_ int flags = match_default)
6143 {
6144 _Assume_(text || start >= end);
6145 value = 0;
6146 this->interval.end = start;
6147 for (;;) {
6148 if (this->interval.end < end && text[this->interval.end]) {
6149 if ('0' <= text[this->interval.end] && text[this->interval.end] <= '9') {
6150 size_t _value = (size_t)value * 10 + text[this->interval.end] - '0';
6151 if (_value > (uint16_t)-1) {
6152 value = 0;
6153 this->interval.invalidate();
6154 return false;
6155 }
6156 value = (uint16_t)_value;
6157 this->interval.end++;
6158 }
6159 else
6160 break;
6161 }
6162 else
6163 break;
6164 }
6166 this->interval.start = start;
6167 return true;
6168 }
6169 this->interval.invalidate();
6170 return false;
6171 }
6172
6173 virtual void invalidate()
6174 {
6175 value = 0;
6176 parser::invalidate();
6177 }
6178
6179 public:
6180 uint16_t value;
6181 };
6182
6187 {
6188 public:
6189 virtual bool match(
6190 _In_reads_or_z_(end) const char* text,
6191 _In_ size_t start = 0,
6192 _In_ size_t end = SIZE_MAX,
6193 _In_ int flags = match_default)
6194 {
6195 _Assume_(text || start >= end);
6196 this->interval.end = start;
6197 for (;;) {
6198 if (this->interval.end < end && text[this->interval.end]) {
6199 if ((unsigned int)text[this->interval.end] < 0x20 ||
6200 (unsigned int)text[this->interval.end] == 0x7f ||
6201 text[this->interval.end] == '?' ||
6202 text[this->interval.end] == '/' ||
6203 isspace(text[this->interval.end]))
6204 break;
6205 else
6206 this->interval.end++;
6207 }
6208 else
6209 break;
6210 }
6211 this->interval.start = start;
6212 return true;
6213 }
6214 };
6215
6219 class http_url_path : public parser
6220 {
6221 public:
6222 virtual bool match(
6223 _In_reads_or_z_(end) const char* text,
6224 _In_ size_t start = 0,
6225 _In_ size_t end = SIZE_MAX,
6226 _In_ int flags = match_default)
6227 {
6228 _Assume_(text || start >= end);
6230 this->interval.end = start;
6231 segments.clear();
6232 _Assume_(text || this->interval.end >= end);
6233 if (this->interval.end < end && text[this->interval.end] != '/')
6234 goto error;
6235 this->interval.end++;
6236 s.match(text, this->interval.end, end, flags);
6237 segments.push_back(s);
6238 this->interval.end = s.interval.end;
6239 for (;;) {
6240 if (this->interval.end < end && text[this->interval.end]) {
6241 if (text[this->interval.end] == '/') {
6242 this->interval.end++;
6243 s.match(text, this->interval.end, end, flags);
6244 segments.push_back(s);
6245 this->interval.end = s.interval.end;
6246 }
6247 else
6248 break;
6249 }
6250 else
6251 break;
6252 }
6253 this->interval.start = start;
6254 return true;
6255
6256 error:
6257 segments.clear();
6258 this->interval.invalidate();
6259 return false;
6260 }
6261
6262 virtual void invalidate()
6263 {
6264 segments.clear();
6265 parser::invalidate();
6266 }
6267
6268 public:
6269 std::vector<http_url_path_segment> segments;
6270 };
6271
6276 {
6277 public:
6278 virtual bool match(
6279 _In_reads_or_z_(end) const char* text,
6280 _In_ size_t start = 0,
6281 _In_ size_t end = SIZE_MAX,
6282 _In_ int flags = match_default)
6283 {
6284 _Assume_(text || start >= end);
6285 this->interval.end = start;
6286 name.start = this->interval.end;
6287 for (;;) {
6288 if (this->interval.end < end && text[this->interval.end]) {
6289 if ((unsigned int)text[this->interval.end] < 0x20 ||
6290 (unsigned int)text[this->interval.end] == 0x7f ||
6291 text[this->interval.end] == '&' ||
6292 text[this->interval.end] == '=' ||
6293 isspace(text[this->interval.end]))
6294 break;
6295 else
6296 this->interval.end++;
6297 }
6298 else
6299 break;
6300 }
6302 name.end = this->interval.end;
6303 else
6304 goto error;
6305 if (text[this->interval.end] == '=') {
6306 this->interval.end++;
6307 value.start = this->interval.end;
6308 for (;;) {
6309 if (this->interval.end < end && text[this->interval.end]) {
6310 if ((unsigned int)text[this->interval.end] < 0x20 ||
6311 (unsigned int)text[this->interval.end] == 0x7f ||
6312 text[this->interval.end] == '&' ||
6313 isspace(text[this->interval.end]))
6314 break;
6315 else
6316 this->interval.end++;
6317 }
6318 else
6319 break;
6320 }
6321 value.end = this->interval.end;
6322 }
6323 else {
6324 value.start = 1;
6325 value.end = 0;
6326 }
6327 this->interval.start = start;
6328 return true;
6329
6330 error:
6331 name.start = 1;
6332 name.end = 0;
6333 value.start = 1;
6334 value.end = 0;
6335 this->interval.invalidate();
6336 return false;
6337 }
6338
6339 virtual void invalidate()
6340 {
6341 name.start = 1;
6342 name.end = 0;
6343 value.start = 1;
6344 value.end = 0;
6345 parser::invalidate();
6346 }
6347
6348 public:
6351 };
6352
6356 class http_url : public parser
6357 {
6358 public:
6359 http_url(_In_ const std::locale& locale = std::locale()) :
6360 parser(locale),
6361 port(locale)
6362 {}
6363
6364 virtual bool match(
6365 _In_reads_or_z_(end) const char* text,
6366 _In_ size_t start = 0,
6367 _In_ size_t end = SIZE_MAX,
6368 _In_ int flags = match_default)
6369 {
6370 _Assume_(text || start >= end);
6371 this->interval.end = start;
6372
6373 if (this->interval.end + 7 <= end && stdex::strnicmp(text + this->interval.end, 7, "http://", SIZE_MAX, m_locale) == 0) {
6374 this->interval.end += 7;
6375 if (server.match(text, this->interval.end, end, flags))
6376 this->interval.end = server.interval.end;
6377 else
6378 goto error;
6379 if (this->interval.end < end && text[this->interval.end] == ':') {
6380 this->interval.end++;
6381 if (port.match(text, this->interval.end, end, flags))
6382 this->interval.end = port.interval.end;
6383 }
6384 else {
6385 port.invalidate();
6386 port.value = 80;
6387 }
6388 }
6389 else {
6390 server.invalidate();
6391 port.invalidate();
6392 port.value = 80;
6393 }
6394
6395 if (path.match(text, this->interval.end, end, flags))
6396 this->interval.end = path.interval.end;
6397 else
6398 goto error;
6399
6400 params.clear();
6401
6402 if (this->interval.end < end && text[this->interval.end] == '?') {
6403 this->interval.end++;
6404 for (;;) {
6405 if (this->interval.end < end && text[this->interval.end]) {
6406 if ((unsigned int)text[this->interval.end] < 0x20 ||
6407 (unsigned int)text[this->interval.end] == 0x7f ||
6408 isspace(text[this->interval.end]))
6409 break;
6410 else if (text[this->interval.end] == '&')
6411 this->interval.end++;
6412 else {
6413 http_url_parameter param;
6414 if (param.match(text, this->interval.end, end, flags)) {
6415 this->interval.end = param.interval.end;
6416 params.push_back(std::move(param));
6417 }
6418 else
6419 break;
6420 }
6421 }
6422 else
6423 break;
6424 }
6425 }
6426
6427 this->interval.start = start;
6428 return true;
6429
6430 error:
6431 server.invalidate();
6432 port.invalidate();
6433 path.invalidate();
6434 params.clear();
6435 this->interval.invalidate();
6436 return false;
6437 }
6438
6439 virtual void invalidate()
6440 {
6441 server.invalidate();
6442 port.invalidate();
6443 path.invalidate();
6444 params.clear();
6445 parser::invalidate();
6446 }
6447
6448 public:
6449 http_url_server server;
6450 http_url_port port;
6451 http_url_path path;
6452 std::list<http_url_parameter> params;
6453 };
6454
6458 class http_language : public parser
6459 {
6460 public:
6461 virtual bool match(
6462 _In_reads_or_z_(end) const char* text,
6463 _In_ size_t start = 0,
6464 _In_ size_t end = SIZE_MAX,
6465 _In_ int flags = match_default)
6466 {
6467 _Assume_(text || start >= end);
6468 this->interval.end = start;
6469 components.clear();
6470 for (;;) {
6471 if (this->interval.end < end && text[this->interval.end]) {
6473 k.end = this->interval.end;
6474 for (;;) {
6475 if (k.end < end && text[k.end]) {
6476 if (isalpha(text[k.end]))
6477 k.end++;
6478 else
6479 break;
6480 }
6481 else
6482 break;
6483 }
6484 if (this->interval.end < k.end) {
6485 k.start = this->interval.end;
6486 this->interval.end = k.end;
6487 components.push_back(k);
6488 }
6489 else
6490 break;
6491 if (this->interval.end < end && text[this->interval.end] == '-')
6492 this->interval.end++;
6493 else
6494 break;
6495 }
6496 else
6497 break;
6498 }
6499 if (!components.empty()) {
6500 this->interval.start = start;
6501 this->interval.end = components.back().end;
6502 return true;
6503 }
6504 this->interval.invalidate();
6505 return false;
6506 }
6507
6508 virtual void invalidate()
6509 {
6510 components.clear();
6511 parser::invalidate();
6512 }
6513
6514 public:
6515 std::vector<stdex::interval<size_t>> components;
6516 };
6517
6521 class http_weight : public parser
6522 {
6523 public:
6524 http_weight(_In_ const std::locale& locale = std::locale()) :
6525 parser(locale),
6526 value(1.0f)
6527 {}
6528
6529 virtual bool match(
6530 _In_reads_or_z_(end) const char* text,
6531 _In_ size_t start = 0,
6532 _In_ size_t end = SIZE_MAX,
6533 _In_ int flags = match_default)
6534 {
6535 _Assume_(text || start >= end);
6536 size_t celi_del = 0, decimalni_del = 0, decimalni_del_n = 1;
6537 this->interval.end = start;
6538 for (;;) {
6539 if (this->interval.end < end && text[this->interval.end]) {
6540 if ('0' <= text[this->interval.end] && text[this->interval.end] <= '9') {
6541 celi_del = celi_del * 10 + text[this->interval.end] - '0';
6542 this->interval.end++;
6543 }
6544 else if (text[this->interval.end] == '.') {
6545 this->interval.end++;
6546 for (;;) {
6547 if (this->interval.end < end && text[this->interval.end]) {
6548 if ('0' <= text[this->interval.end] && text[this->interval.end] <= '9') {
6549 decimalni_del = decimalni_del * 10 + text[this->interval.end] - '0';
6550 decimalni_del_n *= 10;
6551 this->interval.end++;
6552 }
6553 else
6554 break;
6555 }
6556 else
6557 break;
6558 }
6559 break;
6560 }
6561 else
6562 break;
6563 }
6564 else
6565 break;
6566 }
6569 this->interval.start = start;
6570 return true;
6571 }
6572 value = 1.0f;
6573 this->interval.invalidate();
6574 return false;
6575 }
6576
6577 virtual void invalidate()
6578 {
6579 value = 1.0f;
6580 parser::invalidate();
6581 }
6582
6583 public:
6584 float value;
6585 };
6586
6590 class http_asterisk : public parser
6591 {
6592 public:
6593 virtual bool match(
6594 _In_reads_or_z_(end) const char* text,
6595 _In_ size_t start = 0,
6596 _In_ size_t end = SIZE_MAX,
6597 _In_ int flags = match_default)
6598 {
6599 _Assume_(text || end <= start);
6600 if (start < end && text[start] == '*') {
6601 this->interval.end = (this->interval.start = start) + 1;
6602 return true;
6603 }
6604 this->interval.invalidate();
6605 return false;
6606 }
6607 };
6608
6612 template <class T, class T_asterisk = http_asterisk>
6614 {
6615 public:
6616 http_weighted_value(_In_ const std::locale& locale = std::locale()) :
6617 parser(locale),
6618 factor(locale)
6619 {}
6620
6621 virtual bool match(
6622 _In_reads_or_z_(end) const char* text,
6623 _In_ size_t start = 0,
6624 _In_ size_t end = SIZE_MAX,
6625 _In_ int flags = match_default)
6626 {
6627 _Assume_(text || start >= end);
6628 size_t konec_vrednosti;
6629 this->interval.end = start;
6630 if (asterisk.match(text, this->interval.end, end, flags)) {
6631 this->interval.end = konec_vrednosti = asterisk.interval.end;
6632 value.invalidate();
6633 }
6634 else if (value.match(text, this->interval.end, end, flags)) {
6635 this->interval.end = konec_vrednosti = value.interval.end;
6636 asterisk.invalidate();
6637 }
6638 else {
6639 asterisk.invalidate();
6640 value.invalidate();
6641 this->interval.invalidate();
6642 return false;
6643 }
6644
6645 while (this->interval.end < end && text[this->interval.end] && isspace(text[this->interval.end])) this->interval.end++;
6646 if (this->interval.end < end && text[this->interval.end] == ';') {
6647 this->interval.end++;
6648 while (this->interval.end < end && text[this->interval.end] && isspace(text[this->interval.end])) this->interval.end++;
6649 if (this->interval.end < end && (text[this->interval.end] == 'q' || text[this->interval.end] == 'Q')) {
6650 this->interval.end++;
6651 while (this->interval.end < end && text[this->interval.end] && isspace(text[this->interval.end])) this->interval.end++;
6652 if (this->interval.end < end && text[this->interval.end] == '=') {
6653 this->interval.end++;
6654 while (this->interval.end < end && text[this->interval.end] && isspace(text[this->interval.end])) this->interval.end++;
6655 if (factor.match(text, this->interval.end, end, flags))
6656 this->interval.end = factor.interval.end;
6657 }
6658 }
6659 }
6660 if (!factor.interval) {
6661 factor.invalidate();
6663 }
6664 this->interval.start = start;
6665 return true;
6666 }
6667
6668 virtual void invalidate()
6669 {
6670 asterisk.invalidate();
6671 value.invalidate();
6672 factor.invalidate();
6673 parser::invalidate();
6674 }
6675
6676 public:
6677 T_asterisk asterisk;
6678 T value;
6679 http_weight factor;
6680 };
6681
6686 {
6687 public:
6688 virtual bool match(
6689 _In_reads_or_z_(end) const char* text,
6690 _In_ size_t start = 0,
6691 _In_ size_t end = SIZE_MAX,
6692 _In_ int flags = match_default)
6693 {
6694 _Assume_(text || start >= end);
6695 this->interval.end = start;
6696 if (this->interval.end < end && text[this->interval.end] == '$')
6697 this->interval.end++;
6698 else
6699 goto error;
6700 if (name.match(text, this->interval.end, end, flags))
6701 this->interval.end = name.interval.end;
6702 else
6703 goto error;
6704 while (m_space.match(text, this->interval.end, end, flags))
6705 this->interval.end = m_space.interval.end;
6706 if (this->interval.end < end && text[this->interval.end] == '=')
6707 this->interval.end++;
6708 else
6709 goto error;
6710 while (m_space.match(text, this->interval.end, end, flags))
6711 this->interval.end = m_space.interval.end;
6712 if (value.match(text, this->interval.end, end, flags))
6713 this->interval.end = value.interval.end;
6714 else
6715 goto error;
6716 this->interval.start = start;
6717 return true;
6718
6719 error:
6720 name.invalidate();
6721 value.invalidate();
6722 this->interval.invalidate();
6723 return false;
6724 }
6725
6726 virtual void invalidate()
6727 {
6728 name.invalidate();
6729 value.invalidate();
6730 parser::invalidate();
6731 }
6732
6733 public:
6734 http_token name;
6735 http_value value;
6736
6737 protected:
6738 http_space m_space;
6739 };
6740
6744 class http_cookie : public parser
6745 {
6746 public:
6747 virtual bool match(
6748 _In_reads_or_z_(end) const char* text,
6749 _In_ size_t start = 0,
6750 _In_ size_t end = SIZE_MAX,
6751 _In_ int flags = match_default)
6752 {
6753 _Assume_(text || start >= end);
6754 this->interval.end = start;
6755 if (name.match(text, this->interval.end, end, flags))
6756 this->interval.end = name.interval.end;
6757 else
6758 goto error;
6759 while (m_space.match(text, this->interval.end, end, flags))
6760 this->interval.end = m_space.interval.end;
6761 if (this->interval.end < end && text[this->interval.end] == '=')
6762 this->interval.end++;
6763 else
6764 goto error;
6765 while (m_space.match(text, this->interval.end, end, flags))
6766 this->interval.end = m_space.interval.end;
6767 if (value.match(text, this->interval.end, end, flags))
6768 this->interval.end = value.interval.end;
6769 else
6770 goto error;
6771 params.clear();
6772 for (;;) {
6773 if (this->interval.end < end && text[this->interval.end]) {
6774 if (m_space.match(text, this->interval.end, end, flags))
6775 this->interval.end = m_space.interval.end;
6776 else if (text[this->interval.end] == ';') {
6777 this->interval.end++;
6778 while (m_space.match(text, this->interval.end, end, flags))
6779 this->interval.end = m_space.interval.end;
6781 if (param.match(text, this->interval.end, end, flags)) {
6782 this->interval.end = param.interval.end;
6783 params.push_back(std::move(param));
6784 }
6785 else
6786 break;
6787 }
6788 else
6789 break;
6790 }
6791 else
6792 break;
6793 }
6794 this->interval.start = start;
6795 this->interval.end = params.empty() ? value.interval.end : params.back().interval.end;
6796 return true;
6797
6798 error:
6799 name.invalidate();
6800 value.invalidate();
6801 params.clear();
6802 this->interval.invalidate();
6803 return false;
6804 }
6805
6806 virtual void invalidate()
6807 {
6808 name.invalidate();
6809 value.invalidate();
6810 params.clear();
6811 parser::invalidate();
6812 }
6813
6814 public:
6817 std::list<http_cookie_parameter> params;
6818
6819 protected:
6820 http_space m_space;
6821 };
6822
6826 class http_agent : public parser
6827 {
6828 public:
6829 virtual bool match(
6830 _In_reads_or_z_(end) const char* text,
6831 _In_ size_t start = 0,
6832 _In_ size_t end = SIZE_MAX,
6833 _In_ int flags = match_default)
6834 {
6835 _Assume_(text || start >= end);
6836 this->interval.end = start;
6837 type.start = this->interval.end;
6838 for (;;) {
6839 if (this->interval.end < end && text[this->interval.end]) {
6840 if (text[this->interval.end] == '/') {
6841 type.end = this->interval.end;
6842 this->interval.end++;
6843 version.start = this->interval.end;
6844 for (;;) {
6845 if (this->interval.end < end && text[this->interval.end]) {
6846 if (isspace(text[this->interval.end])) {
6847 version.end = this->interval.end;
6848 break;
6849 }
6850 else
6851 this->interval.end++;
6852 }
6853 else {
6854 version.end = this->interval.end;
6855 break;
6856 }
6857 }
6858 break;
6859 }
6860 else if (isspace(text[this->interval.end])) {
6861 type.end = this->interval.end;
6862 break;
6863 }
6864 else
6865 this->interval.end++;
6866 }
6867 else {
6868 type.end = this->interval.end;
6869 break;
6870 }
6871 }
6873 this->interval.start = start;
6874 return true;
6875 }
6876 type.start = 1;
6877 type.end = 0;
6878 version.start = 1;
6879 version.end = 0;
6880 this->interval.invalidate();
6881 return false;
6882 }
6883
6884 virtual void invalidate()
6885 {
6886 type.start = 1;
6887 type.end = 0;
6888 version.start = 1;
6889 version.end = 0;
6890 parser::invalidate();
6891 }
6892
6893 public:
6896 };
6897
6901 class http_protocol : public parser
6902 {
6903 public:
6904 http_protocol(_In_ const std::locale& locale = std::locale()) :
6905 parser(locale),
6906 version(0x009)
6907 {}
6908
6909 virtual bool match(
6910 _In_reads_or_z_(end) const char* text,
6911 _In_ size_t start = 0,
6912 _In_ size_t end = SIZE_MAX,
6913 _In_ int flags = match_default)
6914 {
6915 _Assume_(text || start >= end);
6916 this->interval.end = start;
6917 type.start = this->interval.end;
6918 for (;;) {
6919 if (this->interval.end < end && text[this->interval.end]) {
6920 if (text[this->interval.end] == '/') {
6921 type.end = this->interval.end;
6922 this->interval.end++;
6923 break;
6924 }
6925 else if (isspace(text[this->interval.end]))
6926 goto error;
6927 else
6928 this->interval.end++;
6929 }
6930 else {
6931 type.end = this->interval.end;
6932 goto error;
6933 }
6934 }
6935 version_maj.start = this->interval.end;
6936 for (;;) {
6937 if (this->interval.end < end && text[this->interval.end]) {
6938 if (text[this->interval.end] == '.') {
6939 version_maj.end = this->interval.end;
6940 this->interval.end++;
6941 version_min.start = this->interval.end;
6942 for (;;) {
6943 if (this->interval.end < end && text[this->interval.end]) {
6944 if (isspace(text[this->interval.end])) {
6945 version_min.end = this->interval.end;
6946 version =
6947 (uint16_t)strtoui(text + version_maj.start, version_maj.size(), nullptr, 10) * 0x100 +
6948 (uint16_t)strtoui(text + version_min.start, version_min.size(), nullptr, 10);
6949 break;
6950 }
6951 else
6952 this->interval.end++;
6953 }
6954 else
6955 goto error;
6956 }
6957 break;
6958 }
6959 else if (isspace(text[this->interval.end])) {
6960 version_maj.end = this->interval.end;
6961 version_min.start = 1;
6962 version_min.end = 0;
6963 version = (uint16_t)strtoui(text + version_maj.start, version_maj.size(), nullptr, 10) * 0x100;
6964 break;
6965 }
6966 else
6967 this->interval.end++;
6968 }
6969 else
6970 goto error;
6971 }
6972 this->interval.start = start;
6973 return true;
6974
6975 error:
6976 type.start = 1;
6977 type.end = 0;
6978 version_maj.start = 1;
6979 version_maj.end = 0;
6980 version_min.start = 1;
6981 version_min.end = 0;
6982 version = 0x009;
6983 this->interval.invalidate();
6984 return false;
6985 }
6986
6987 virtual void invalidate()
6988 {
6989 type.start = 1;
6990 type.end = 0;
6991 version_maj.start = 1;
6992 version_maj.end = 0;
6993 version_min.start = 1;
6994 version_min.end = 0;
6995 version = 0x009;
6996 parser::invalidate();
6997 }
6998
6999 public:
7001 stdex::interval<size_t> version_maj;
7002 stdex::interval<size_t> version_min;
7004 };
7005
7009 class http_request : public parser
7010 {
7011 public:
7012 http_request(_In_ const std::locale& locale = std::locale()) :
7013 parser(locale),
7014 url(locale),
7015 protocol(locale)
7016 {}
7017
7018 virtual bool match(
7019 _In_reads_or_z_(end) const char* text,
7020 _In_ size_t start = 0,
7021 _In_ size_t end = SIZE_MAX,
7022 _In_ int flags = match_default)
7023 {
7024 _Assume_(text || start >= end);
7025 this->interval.end = start;
7026
7027 for (;;) {
7028 if (m_line_break.match(text, this->interval.end, end, flags))
7029 goto error;
7030 else if (this->interval.end < end && text[this->interval.end]) {
7031 if (isspace(text[this->interval.end]))
7032 this->interval.end++;
7033 else
7034 break;
7035 }
7036 else
7037 goto error;
7038 }
7039 verb.start = this->interval.end;
7040 for (;;) {
7041 if (m_line_break.match(text, this->interval.end, end, flags))
7042 goto error;
7043 else if (this->interval.end < end && text[this->interval.end]) {
7044 if (isspace(text[this->interval.end])) {
7045 verb.end = this->interval.end;
7046 this->interval.end++;
7047 break;
7048 }
7049 else
7050 this->interval.end++;
7051 }
7052 else
7053 goto error;
7054 }
7055
7056 for (;;) {
7057 if (m_line_break.match(text, this->interval.end, end, flags))
7058 goto error;
7059 else if (this->interval.end < end && text[this->interval.end]) {
7060 if (isspace(text[this->interval.end]))
7061 this->interval.end++;
7062 else
7063 break;
7064 }
7065 else
7066 goto error;
7067 }
7068 if (url.match(text, this->interval.end, end, flags))
7069 this->interval.end = url.interval.end;
7070 else
7071 goto error;
7072
7073 protocol.invalidate();
7074 for (;;) {
7075 if (m_line_break.match(text, this->interval.end, end, flags)) {
7076 this->interval.end = m_line_break.interval.end;
7077 goto end;
7078 }
7079 else if (this->interval.end < end && text[this->interval.end]) {
7080 if (isspace(text[this->interval.end]))
7081 this->interval.end++;
7082 else
7083 break;
7084 }
7085 else
7086 goto end;
7087 }
7088 for (;;) {
7089 if (m_line_break.match(text, this->interval.end, end, flags)) {
7090 this->interval.end = m_line_break.interval.end;
7091 goto end;
7092 }
7093 else if (protocol.match(text, this->interval.end, end, flags)) {
7094 this->interval.end = protocol.interval.end;
7095 break;
7096 }
7097 else
7098 goto end;
7099 }
7100
7101 for (;;) {
7102 if (m_line_break.match(text, this->interval.end, end, flags)) {
7103 this->interval.end = m_line_break.interval.end;
7104 break;
7105 }
7106 else if (this->interval.end < end && text[this->interval.end])
7107 this->interval.end++;
7108 else
7109 goto end;
7110 }
7111
7112 end:
7113 this->interval.start = start;
7114 return true;
7115
7116 error:
7117 verb.start = 1;
7118 verb.end = 0;
7119 url.invalidate();
7120 protocol.invalidate();
7121 this->interval.invalidate();
7122 return false;
7123 }
7124
7125 virtual void invalidate()
7126 {
7127 verb.start = 1;
7128 verb.end = 0;
7129 url.invalidate();
7130 protocol.invalidate();
7131 parser::invalidate();
7132 }
7133
7134 public:
7136 http_url url;
7137 http_protocol protocol;
7138
7139 protected:
7140 http_line_break m_line_break;
7141 };
7142
7146 class http_header : public parser
7147 {
7148 public:
7149 virtual bool match(
7150 _In_reads_or_z_(end) const char* text,
7151 _In_ size_t start = 0,
7152 _In_ size_t end = SIZE_MAX,
7153 _In_ int flags = match_default)
7154 {
7155 _Assume_(text || start >= end);
7156 this->interval.end = start;
7157
7158 if (m_line_break.match(text, this->interval.end, end, flags) ||
7159 (this->interval.end < end && text[this->interval.end] && isspace(text[this->interval.end])))
7160 goto error;
7161 name.start = this->interval.end;
7162 for (;;) {
7163 if (m_line_break.match(text, this->interval.end, end, flags))
7164 goto error;
7165 else if (this->interval.end < end && text[this->interval.end]) {
7166 if (isspace(text[this->interval.end])) {
7167 name.end = this->interval.end;
7168 this->interval.end++;
7169 for (;;) {
7170 if (m_line_break.match(text, this->interval.end, end, flags))
7171 goto error;
7172 else if (this->interval.end < end && text[this->interval.end]) {
7173 if (isspace(text[this->interval.end]))
7174 this->interval.end++;
7175 else
7176 break;
7177 }
7178 else
7179 goto error;
7180 }
7181 if (this->interval.end < end && text[this->interval.end] == ':') {
7182 this->interval.end++;
7183 break;
7184 }
7185 else
7186 goto error;
7187 break;
7188 }
7189 else if (text[this->interval.end] == ':') {
7190 name.end = this->interval.end;
7191 this->interval.end++;
7192 break;
7193 }
7194 else
7195 this->interval.end++;
7196 }
7197 else
7198 goto error;
7199 }
7200 value.start = SIZE_MAX;
7201 value.end = 0;
7202 for (;;) {
7203 if (m_line_break.match(text, this->interval.end, end, flags)) {
7204 this->interval.end = m_line_break.interval.end;
7205 if (!m_line_break.match(text, this->interval.end, end, flags) &&
7206 this->interval.end < end && text[this->interval.end] && isspace(text[this->interval.end]))
7207 this->interval.end++;
7208 else
7209 break;
7210 }
7211 else if (this->interval.end < end && text[this->interval.end]) {
7212 if (isspace(text[this->interval.end]))
7213 this->interval.end++;
7214 else {
7215 if (value.start == SIZE_MAX) value.start = this->interval.end;
7216 value.end = ++this->interval.end;
7217 }
7218 }
7219 else
7220 break;
7221 }
7222 this->interval.start = start;
7223 return true;
7224
7225 error:
7226 name.start = 1;
7227 name.end = 0;
7228 value.start = 1;
7229 value.end = 0;
7230 this->interval.invalidate();
7231 return false;
7232 }
7233
7234 virtual void invalidate()
7235 {
7236 name.start = 1;
7237 name.end = 0;
7238 value.start = 1;
7239 value.end = 0;
7240 parser::invalidate();
7241 }
7242
7243 public:
7246
7247 protected:
7248 http_line_break m_line_break;
7249 };
7250
7254 template <class _Key, class T>
7255 class http_value_collection : public T
7256 {
7257 public:
7258 void insert(
7259 _In_reads_or_z_(end) const char* text,
7260 _In_ size_t start = 0,
7261 _In_ size_t end = SIZE_MAX,
7262 _In_ int flags = match_default)
7263 {
7264 while (start < end) {
7265 while (start < end && text[start] && isspace(text[start])) start++;
7266 if (start < end && text[start] == ',') {
7267 start++;
7268 while (start < end&& text[start] && isspace(text[start])) start++;
7269 }
7270 _Key el;
7271 if (el.match(text, start, end, flags)) {
7272 start = el.interval.end;
7273 T::insert(std::move(el));
7274 }
7275 else
7276 break;
7277 }
7278 }
7279 };
7280
7281 template <class T>
7283 constexpr bool operator()(const T& a, const T& b) const noexcept
7284 {
7285 return a.factor.value > b.factor.value;
7286 }
7287 };
7288
7292 template <class T, class _Alloc = std::allocator<T>>
7294
7298 template <class T>
7300 {
7301 public:
7303 _In_ const std::shared_ptr<basic_parser<T>>& quote,
7304 _In_ const std::shared_ptr<basic_parser<T>>& chr,
7305 _In_ const std::shared_ptr<basic_parser<T>>& escape,
7306 _In_ const std::shared_ptr<basic_parser<T>>& sol,
7307 _In_ const std::shared_ptr<basic_parser<T>>& bs,
7308 _In_ const std::shared_ptr<basic_parser<T>>& ff,
7309 _In_ const std::shared_ptr<basic_parser<T>>& lf,
7310 _In_ const std::shared_ptr<basic_parser<T>>& cr,
7311 _In_ const std::shared_ptr<basic_parser<T>>& htab,
7312 _In_ const std::shared_ptr<basic_parser<T>>& uni,
7313 _In_ const std::shared_ptr<basic_integer16<T>>& hex,
7314 _In_ const std::locale& locale = std::locale()) :
7315 basic_parser<T>(locale),
7316 m_quote(quote),
7317 m_chr(chr),
7318 m_escape(escape),
7319 m_sol(sol),
7320 m_bs(bs),
7321 m_ff(ff),
7322 m_lf(lf),
7323 m_cr(cr),
7324 m_htab(htab),
7325 m_uni(uni),
7326 m_hex(hex)
7327 {}
7328
7329 virtual bool match(
7330 _In_reads_or_z_(end) const T* text,
7331 _In_ size_t start = 0,
7332 _In_ size_t end = SIZE_MAX,
7333 _In_ int flags = match_default)
7334 {
7335 _Assume_(text || start >= end);
7336 this->interval.end = start;
7337 if (m_quote->match(text, this->interval.end, end, flags)) {
7338 this->interval.end = m_quote->interval.end;
7339 value.clear();
7340 for (;;) {
7341 if (m_quote->match(text, this->interval.end, end, flags)) {
7342 this->interval.start = start;
7343 this->interval.end = m_quote->interval.end;
7344 return true;
7345 }
7346 if (m_escape->match(text, this->interval.end, end, flags)) {
7347 if (m_quote->match(text, m_escape->interval.end, end, flags)) {
7348 value += '"'; this->interval.end = m_quote->interval.end;
7349 continue;
7350 }
7351 if (m_sol->match(text, m_escape->interval.end, end, flags)) {
7352 value += '/'; this->interval.end = m_sol->interval.end;
7353 continue;
7354 }
7355 if (m_bs->match(text, m_escape->interval.end, end, flags)) {
7356 value += '\b'; this->interval.end = m_bs->interval.end;
7357 continue;
7358 }
7359 if (m_ff->match(text, m_escape->interval.end, end, flags)) {
7360 value += '\f'; this->interval.end = m_ff->interval.end;
7361 continue;
7362 }
7363 if (m_lf->match(text, m_escape->interval.end, end, flags)) {
7364 value += '\n'; this->interval.end = m_lf->interval.end;
7365 continue;
7366 }
7367 if (m_cr->match(text, m_escape->interval.end, end, flags)) {
7368 value += '\r'; this->interval.end = m_cr->interval.end;
7369 continue;
7370 }
7371 if (m_htab->match(text, m_escape->interval.end, end, flags)) {
7372 value += '\t'; this->interval.end = m_htab->interval.end;
7373 continue;
7374 }
7375 if (
7376 m_uni->match(text, m_escape->interval.end, end, flags) &&
7377 m_hex->match(text, m_uni->interval.end, std::min(m_uni->interval.end + 4, end), flags | match_case_insensitive) &&
7378 m_hex->interval.size() == 4 /* JSON requests 4-digit Unicode sequneces: \u.... */)
7379 {
7380 _Assume_(m_hex->value <= 0xffff);
7381 if (sizeof(T) == 1) {
7382 if (m_hex->value > 0x7ff) {
7383 value += (T)(0xe0 | ((m_hex->value >> 12) & 0x0f));
7384 value += (T)(0x80 | ((m_hex->value >> 6) & 0x3f));
7385 value += (T)(0x80 | (m_hex->value & 0x3f));
7386 }
7387 else if (m_hex->value > 0x7f) {
7388 value += (T)(0xc0 | ((m_hex->value >> 6) & 0x1f));
7389 value += (T)(0x80 | (m_hex->value & 0x3f));
7390 }
7391 else
7392 value += (T)(m_hex->value & 0x7f);
7393 }
7394 else
7395 value += (T)m_hex->value;
7396 this->interval.end = m_hex->interval.end;
7397 continue;
7398 }
7399 if (m_escape->match(text, m_escape->interval.end, end, flags)) {
7400 value += '\\'; this->interval.end = m_escape->interval.end;
7401 continue;
7402 }
7403 }
7404 if (m_chr->match(text, this->interval.end, end, flags)) {
7405 value.append(text + m_chr->interval.start, m_chr->interval.size());
7406 this->interval.end = m_chr->interval.end;
7407 continue;
7408 }
7409 break;
7410 }
7411 }
7412 value.clear();
7413 this->interval.invalidate();
7414 return false;
7415 }
7416
7417 virtual void invalidate()
7418 {
7419 value.clear();
7421 }
7422
7423 public:
7424 std::basic_string<T> value;
7425
7426 protected:
7427 std::shared_ptr<basic_parser<T>> m_quote;
7428 std::shared_ptr<basic_parser<T>> m_chr;
7429 std::shared_ptr<basic_parser<T>> m_escape;
7430 std::shared_ptr<basic_parser<T>> m_sol;
7431 std::shared_ptr<basic_parser<T>> m_bs;
7432 std::shared_ptr<basic_parser<T>> m_ff;
7433 std::shared_ptr<basic_parser<T>> m_lf;
7434 std::shared_ptr<basic_parser<T>> m_cr;
7435 std::shared_ptr<basic_parser<T>> m_htab;
7436 std::shared_ptr<basic_parser<T>> m_uni;
7437 std::shared_ptr<basic_integer16<T>> m_hex;
7438 };
7439
7442#ifdef _UNICODE
7443 using tjson_string = wjson_string;
7444#else
7445 using tjson_string = json_string;
7446#endif
7447
7451 template <class T>
7453 {
7454 public:
7455 virtual bool match(
7456 _In_reads_or_z_opt_(end) const T* text,
7457 _In_ size_t start = 0,
7458 _In_ size_t end = SIZE_MAX,
7459 _In_ int flags = match_multiline)
7460 {
7461 _Unreferenced_(flags);
7462 _Assume_(text || start + 1 >= end);
7463 if (start + 1 < end &&
7464 text[start] == '/' &&
7465 text[start + 1] == '*')
7466 {
7467 // /*
7468 this->content.start = this->interval.end = start + 2;
7469 for (;;) {
7470 if (this->interval.end >= end || !text[this->interval.end])
7471 break;
7472 if (this->interval.end + 1 < end &&
7473 text[this->interval.end] == '*' &&
7474 text[this->interval.end + 1] == '/')
7475 {
7476 // /*...*/
7477 this->content.end = this->interval.end;
7478 this->interval.start = start;
7479 this->interval.end = this->interval.end + 2;
7480 return true;
7481 }
7482 this->interval.end++;
7483 }
7484 }
7485 this->content.invalidate();
7486 this->interval.invalidate();
7487 return false;
7488 }
7489
7490 virtual void invalidate()
7491 {
7492 this->content.invalidate();
7493 basic_parser::invalidate();
7494 }
7495
7496 public:
7498 };
7499
7502#ifdef _UNICODE
7503 using tcss_comment = wcss_comment;
7504#else
7505 using tcss_comment = css_comment;
7506#endif
7507
7511 template <class T>
7512 class basic_css_cdo : public basic_parser<T>
7513 {
7514 public:
7515 virtual bool match(
7516 _In_reads_or_z_opt_(end) const T* text,
7517 _In_ size_t start = 0,
7518 _In_ size_t end = SIZE_MAX,
7519 _In_ int flags = match_multiline)
7520 {
7521 _Unreferenced_(flags);
7522 _Assume_(text || start + 3 >= end);
7523 if (start + 3 < end &&
7524 text[start] == '<' &&
7525 text[start + 1] == '!' &&
7526 text[start + 2] == '-' &&
7527 text[start + 3] == '-')
7528 {
7529 this->interval.start = start;
7530 this->interval.end = start + 4;
7531 return true;
7532 }
7533 this->interval.invalidate();
7534 return false;
7535 }
7536 };
7537
7540#ifdef _UNICODE
7541 using tcss_cdo = wcss_cdo;
7542#else
7543 using tcss_cdo = css_cdo;
7544#endif
7545
7549 template <class T>
7550 class basic_css_cdc : public basic_parser<T>
7551 {
7552 public:
7553 virtual bool match(
7554 _In_reads_or_z_opt_(end) const T* text,
7555 _In_ size_t start = 0,
7556 _In_ size_t end = SIZE_MAX,
7557 _In_ int flags = match_multiline)
7558 {
7559 _Unreferenced_(flags);
7560 _Assume_(text || start + 2 >= end);
7561 if (start + 2 < end &&
7562 text[start] == '-' &&
7563 text[start + 1] == '-' &&
7564 text[start + 2] == '>')
7565 {
7566 this->interval.start = start;
7567 this->interval.end = start + 3;
7568 return true;
7569 }
7570 this->interval.invalidate();
7571 return false;
7572 }
7573 };
7574
7577#ifdef _UNICODE
7578 using tcss_cdc = wcss_cdc;
7579#else
7580 using tcss_cdc = css_cdc;
7581#endif
7582
7586 template <class T>
7588 {
7589 public:
7590 virtual bool match(
7591 _In_reads_or_z_opt_(end) const T* text,
7592 _In_ size_t start = 0,
7593 _In_ size_t end = SIZE_MAX,
7594 _In_ int flags = match_multiline)
7595 {
7596 _Unreferenced_(flags);
7597 this->interval.end = start;
7598 _Assume_(text || this->interval.end >= end);
7599 if (this->interval.end < end &&
7600 (text[this->interval.end] == '\"' || text[this->interval.end] == '\''))
7601 {
7602 // "Quoted...
7603 T quote = text[this->interval.end];
7604 this->content.start = ++this->interval.end;
7605 for (;;) {
7606 if (this->interval.end >= end || !text[this->interval.end])
7607 break;
7608 if (text[this->interval.end] == quote) {
7609 // End quote"
7610 this->content.end = this->interval.end;
7611 this->interval.start = start;
7612 this->interval.end++;
7613 return true;
7614 }
7615 if (this->interval.end + 1 < end &&
7616 text[this->interval.end] == '\\' &&
7617 (text[this->interval.end + 1] == '\"' || text[this->interval.end + 1] == '\''))
7618 {
7619 // Escaped quote
7620 this->interval.end = this->interval.end + 2;
7621 }
7622 else
7623 this->interval.end++;
7624 }
7625 }
7626
7627 this->content.invalidate();
7628 this->interval.invalidate();
7629 return false;
7630 }
7631
7632 virtual void invalidate()
7633 {
7634 this->content.invalidate();
7635 basic_parser::invalidate();
7636 }
7637
7638 public:
7640 };
7641
7644#ifdef _UNICODE
7645 using tcss_string = wcss_string;
7646#else
7647 using tcss_string = css_string;
7648#endif
7649
7653 template <class T>
7654 class basic_css_uri : public basic_parser<T>
7655 {
7656 public:
7657 virtual bool match(
7658 _In_reads_or_z_opt_(end) const T* text,
7659 _In_ size_t start = 0,
7660 _In_ size_t end = SIZE_MAX,
7661 _In_ int flags = match_multiline)
7662 {
7663 _Unreferenced_(flags);
7664 this->interval.end = start;
7665 _Assume_(text || this->interval.end + 3 >= end);
7666 if (this->interval.end + 3 < end &&
7667 (text[this->interval.end] == 'u' || text[this->interval.end] == 'U') &&
7668 (text[this->interval.end + 1] == 'r' || text[this->interval.end + 1] == 'R') &&
7669 (text[this->interval.end + 2] == 'l' || text[this->interval.end + 2] == 'L') &&
7670 text[this->interval.end + 3] == '(')
7671 {
7672 // url(
7673 this->interval.end = this->interval.end + 4;
7674
7675 // Skip whitespace.
7676 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
7677 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
7678
7679 if (this->interval.end < end &&
7680 (text[this->interval.end] == '\"' || text[this->interval.end] == '\''))
7681 {
7682 // url("Quoted...
7683 T quote = text[this->interval.end];
7684 this->content.start = ++this->interval.end;
7685 for (;;) {
7686 if (this->interval.end >= end || !text[this->interval.end])
7687 goto error;
7688 if (text[this->interval.end] == quote) {
7689 // End quote"
7690 this->content.end = this->interval.end;
7691 this->interval.end++;
7692 break;
7693 }
7694 if (this->interval.end + 1 < end &&
7695 text[this->interval.end] == '\\' &&
7696 (text[this->interval.end + 1] == '\"' || text[this->interval.end + 1] == '\''))
7697 {
7698 // Escaped quote
7699 this->interval.end = this->interval.end + 2;
7700 }
7701 else
7702 this->interval.end++;
7703 }
7704
7705 // Skip whitespace.
7706 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
7707
7708 if (this->interval.end < end &&
7709 text[this->interval.end] == ')')
7710 {
7711 // url("...")
7712 this->interval.start = start;
7713 this->interval.end++;
7714 return true;
7715 }
7716 }
7717 else {
7718 // url(...
7719 this->content.start = content.end = this->interval.end;
7720 for (;;) {
7721 if (this->interval.end >= end || !text[this->interval.end])
7722 goto error;
7723 if (text[this->interval.end] == ')') {
7724 // url(...)
7725 this->interval.start = start;
7726 this->interval.end++;
7727 return true;
7728 }
7729 if (ctype.is(ctype.space, text[this->interval.end]))
7730 this->interval.end++;
7731 else
7732 this->content.end = ++this->interval.end;
7733 }
7734 }
7735 }
7736
7737 error:
7738 this->content.invalidate();
7739 this->interval.invalidate();
7740 return false;
7741 }
7742
7743 virtual void invalidate()
7744 {
7745 this->content.invalidate();
7746 basic_parser::invalidate();
7747 }
7748
7749 public:
7751 };
7752
7755#ifdef _UNICODE
7756 using tcss_uri = wcss_uri;
7757#else
7758 using tcss_uri = css_uri;
7759#endif
7760
7764 template <class T>
7766 {
7767 public:
7768 virtual bool match(
7769 _In_reads_or_z_opt_(end) const T* text,
7770 _In_ size_t start = 0,
7771 _In_ size_t end = SIZE_MAX,
7772 _In_ int flags = match_multiline)
7773 {
7774 _Unreferenced_(flags);
7775 this->interval.end = start;
7776 _Assume_(text || this->interval.end + 6 >= end);
7777 if (this->interval.end + 6 < end &&
7778 text[this->interval.end] == '@' &&
7779 (text[this->interval.end + 1] == 'i' || text[this->interval.end + 1] == 'I') &&
7780 (text[this->interval.end + 2] == 'm' || text[this->interval.end + 2] == 'M') &&
7781 (text[this->interval.end + 3] == 'p' || text[this->interval.end + 3] == 'P') &&
7782 (text[this->interval.end + 4] == 'o' || text[this->interval.end + 4] == 'O') &&
7783 (text[this->interval.end + 5] == 'r' || text[this->interval.end + 5] == 'R') &&
7784 (text[this->interval.end + 6] == 't' || text[this->interval.end + 6] == 'T'))
7785 {
7786 // @import...
7787 this->interval.end = this->interval.end + 7;
7788
7789 // Skip whitespace.
7790 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
7791 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
7792
7793 if (this->interval.end < end &&
7794 (text[this->interval.end] == '\"' || text[this->interval.end] == '\''))
7795 {
7796 // @import "Quoted
7797 T quote = text[this->interval.end];
7798 this->content.start = ++this->interval.end;
7799 for (;;) {
7800 if (this->interval.end >= end || !text[this->interval.end])
7801 goto error;
7802 if (text[this->interval.end] == quote) {
7803 // End quote"
7804 this->content.end = this->interval.end;
7805 this->interval.start = start;
7806 this->interval.end++;
7807 return true;
7808 }
7809 if (this->interval.end + 1 < end &&
7810 text[this->interval.end] == '\\' &&
7811 (text[this->interval.end + 1] == '\"' || text[this->interval.end + 1] == '\''))
7812 {
7813 // Escaped quote
7814 this->interval.end = this->interval.end + 2;
7815 }
7816 else
7817 this->interval.end++;
7818 }
7819 }
7820 }
7821
7822 error:
7823 this->content.invalidate();
7824 this->interval.invalidate();
7825 return false;
7826 }
7827
7828 virtual void invalidate()
7829 {
7830 this->content.invalidate();
7831 basic_parser::invalidate();
7832 }
7833
7834 public:
7836 };
7837
7840#ifdef _UNICODE
7841 using tcss_import = wcss_import;
7842#else
7843 using tcss_import = css_import;
7844#endif
7845
7849 template <class T>
7851 {
7852 public:
7853 virtual bool match(
7854 _In_reads_or_z_opt_(end) const T* text,
7855 _In_ size_t start = 0,
7856 _In_ size_t end = SIZE_MAX,
7857 _In_ int flags = match_multiline)
7858 {
7859 _Unreferenced_(flags);
7860 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
7861
7862 this->interval.end = start;
7863 this->base_type.start = this->interval.end;
7864 for (;;) {
7865 _Assume_(text || this->interval.end >= end);
7866 if (this->interval.end >= end || !text[this->interval.end])
7867 break;
7868 if (text[this->interval.end] == '/' ||
7869 text[this->interval.end] == ';' ||
7870 ctype.is(ctype.space, text[this->interval.end]))
7871 break;
7872 this->interval.end++;
7873 }
7874 if (this->interval.end <= this->base_type.start)
7875 goto error;
7876 this->base_type.end = this->interval.end;
7877
7878 if (end <= this->interval.end || text[this->interval.end] != '/')
7879 goto error;
7880
7881 this->interval.end++;
7882 this->sub_type.start = this->interval.end;
7883 for (;;) {
7884 if (this->interval.end >= end || !text[this->interval.end])
7885 break;
7886 if (text[this->interval.end] == '/' ||
7887 text[this->interval.end] == ';' ||
7888 ctype.is(ctype.space, text[this->interval.end]))
7889 break;
7890 this->interval.end++;
7891 }
7892 if (this->interval.end <= this->sub_type.start)
7893 goto error;
7894
7895 this->sub_type.end = this->interval.end;
7896 this->charset.invalidate();
7897 if (this->interval.end < end && text[this->interval.end] == ';') {
7898 this->interval.end++;
7899
7900 // Skip whitespace.
7901 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
7902
7903 if (this->interval.end + 7 < end &&
7904 (text[this->interval.end] == 'c' || text[this->interval.end] == 'C') &&
7905 (text[this->interval.end + 1] == 'h' || text[this->interval.end + 1] == 'H') &&
7906 (text[this->interval.end + 2] == 'a' || text[this->interval.end + 2] == 'A') &&
7907 (text[this->interval.end + 3] == 'r' || text[this->interval.end + 3] == 'R') &&
7908 (text[this->interval.end + 4] == 's' || text[this->interval.end + 4] == 'S') &&
7909 (text[this->interval.end + 5] == 'e' || text[this->interval.end + 5] == 'E') &&
7910 (text[this->interval.end + 6] == 't' || text[this->interval.end + 6] == 'T') &&
7911 text[this->interval.end + 7] == '=')
7912 {
7913 this->interval.end = this->interval.end + 8;
7914 if (this->interval.end < end &&
7915 (text[this->interval.end] == '\"' || text[this->interval.end] == '\''))
7916 {
7917 // "Quoted...
7918 T quote = text[this->interval.end];
7919 this->charset.start = ++this->interval.end;
7920 for (;;) {
7921 if (this->interval.end >= end || !text[this->interval.end]) {
7922 // No end quote!
7923 this->charset.invalidate();
7924 break;
7925 }
7926 if (text[this->interval.end] == quote) {
7927 // End quote"
7928 this->charset.end = this->interval.end;
7929 this->interval.end++;
7930 break;
7931 }
7932 this->interval.end++;
7933 }
7934 }
7935 else {
7936 // Nonquoted
7937 this->charset.start = this->interval.end;
7938 for (;;) {
7939 if (this->interval.end >= end || !text[this->interval.end] ||
7940 ctype.is(ctype.space, text[this->interval.end])) {
7941 this->charset.end = this->interval.end;
7942 break;
7943 }
7944 this->interval.end++;
7945 }
7946 }
7947 }
7948 }
7949 this->interval.start = start;
7950 return true;
7951
7952 error:
7953 this->base_type.invalidate();
7954 this->sub_type.invalidate();
7955 this->charset.invalidate();
7956 this->interval.invalidate();
7957 return false;
7958 }
7959
7960 virtual void invalidate()
7961 {
7962 this->base_type.invalidate();
7963 this->sub_type.invalidate();
7964 this->charset.invalidate();
7965 basic_parser::invalidate();
7966 }
7967
7968 public:
7972 };
7973
7976#ifdef _UNICODE
7977 using tmime_type = wmime_type;
7978#else
7979 using tmime_type = mime_type;
7980#endif
7981
7985 template <class T>
7987 {
7988 public:
7989 virtual bool match(
7990 _In_reads_or_z_opt_(end) const T* text,
7991 _In_ size_t start = 0,
7992 _In_ size_t end = SIZE_MAX,
7993 _In_ int flags = match_default)
7994 {
7995 _Unreferenced_(flags);
7996 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
7997 this->interval.end = start;
7998 for (;;) {
7999 _Assume_(text || this->interval.end >= end);
8000 if (this->interval.end >= end || !text[this->interval.end]) {
8002 this->interval.start = start;
8003 return true;
8004 }
8005 this->interval.invalidate();
8006 return false;
8007 }
8008 if (text[this->interval.end] == '>' ||
8009 text[this->interval.end] == '=' ||
8010 text[this->interval.end] == '/' && this->interval.end + 1 < end && text[this->interval.end + 1] == '>' ||
8011 ctype.is(ctype.space, text[this->interval.end]))
8012 {
8013 this->interval.start = start;
8014 return true;
8015 }
8016 this->interval.end++;
8017 }
8018 }
8019 };
8020
8023#ifdef _UNICODE
8024 using thtml_ident = whtml_ident;
8025#else
8026 using thtml_ident = html_ident;
8027#endif
8028
8032 template <class T>
8034 {
8035 public:
8036 virtual bool match(
8037 _In_reads_or_z_opt_(end) const T* text,
8038 _In_ size_t start = 0,
8039 _In_ size_t end = SIZE_MAX,
8040 _In_ int flags = match_default)
8041 {
8042 _Unreferenced_(flags);
8043 this->interval.end = start;
8044 _Assume_(text || this->interval.end >= end);
8045 if (this->interval.end < end &&
8046 (text[this->interval.end] == '\"' || text[this->interval.end] == '\''))
8047 {
8048 // "Quoted...
8049 T quote = text[this->interval.end];
8050 this->content.start = ++this->interval.end;
8051 for (;;) {
8052 if (this->interval.end >= end || !text[this->interval.end]) {
8053 // No end quote!
8054 this->content.invalidate();
8055 this->interval.invalidate();
8056 return false;
8057 }
8058 if (text[this->interval.end] == quote) {
8059 // End quote"
8060 this->content.end = this->interval.end;
8061 this->interval.start = start;
8062 this->interval.end++;
8063 return true;
8064 }
8065 this->interval.end++;
8066 }
8067 }
8068
8069 // Nonquoted
8070 this->content.start = this->interval.end;
8071 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
8072 for (;;) {
8073 _Assume_(text || this->interval.end >= end);
8074 if (this->interval.end >= end || !text[this->interval.end]) {
8075 this->content.end = this->interval.end;
8076 this->interval.start = start;
8077 return true;
8078 }
8079 if (text[this->interval.end] == '>' ||
8080 text[this->interval.end] == '/' && this->interval.end + 1 < end && text[this->interval.end + 1] == '>' ||
8081 ctype.is(ctype.space, text[this->interval.end]))
8082 {
8083 this->content.end = this->interval.end;
8084 this->interval.start = start;
8085 return true;
8086 }
8087 this->interval.end++;
8088 }
8089 }
8090
8091 virtual void invalidate()
8092 {
8093 this->content.invalidate();
8094 basic_parser::invalidate();
8095 }
8096
8097 public:
8099 };
8100
8103#ifdef _UNICODE
8104 using thtml_value = whtml_value;
8105#else
8106 using thtml_value = html_value;
8107#endif
8108
8112 enum class html_sequence_t {
8113 text = 0,
8114 element,
8115 element_start,
8116 element_end,
8117 declaration,
8118 comment,
8119 instruction,
8120 PCDATA,
8121 CDATA,
8122
8123 unknown = -1,
8124 };
8125
8133
8137 template <class T>
8139 {
8140 public:
8141 basic_html_tag(_In_ const std::locale& locale = std::locale()) :
8142 basic_parser(locale),
8143 type(html_sequence_t::unknown)
8144 {}
8145
8146 virtual bool match(
8147 _In_reads_or_z_opt_(end) const T* text,
8148 _In_ size_t start = 0,
8149 _In_ size_t end = SIZE_MAX,
8150 _In_ int flags = match_multiline)
8151 {
8152 _Assume_(text || start >= end);
8153 if (start >= end || text[start] != '<')
8154 goto error;
8155 this->interval.end = start + 1;
8156 if (this->interval.end >= end || !text[this->interval.end])
8157 goto error;
8158 if (text[this->interval.end] == '/' &&
8159 this->m_ident.match(text, this->interval.end + 1, end, flags))
8160 {
8161 // </...
8162 this->type = html_sequence_t::element_end;
8163 this->name = this->m_ident.interval;
8164 this->interval.end = this->m_ident.interval.end;
8165 }
8166 else if (text[this->interval.end] == '!') {
8167 // <!...
8168 this->interval.end++;
8169 if (this->interval.end + 1 < end &&
8170 text[this->interval.end] == '-' &&
8171 text[this->interval.end + 1] == '-')
8172 {
8173 // <!--...
8174 this->name.start = this->interval.end = this->interval.end + 2;
8175 for (;;) {
8176 if (this->interval.end >= end || !text[this->interval.end])
8177 goto error;
8178 if (this->interval.end + 2 < end &&
8179 text[this->interval.end] == '-' &&
8180 text[this->interval.end + 1] == '-' &&
8181 text[this->interval.end + 2] == '>')
8182 {
8183 // <!--...-->
8184 this->type = html_sequence_t::comment;
8185 this->name.end = this->interval.end;
8186 this->attributes.clear();
8187 this->interval.start = start;
8188 this->interval.end = this->interval.end + 3;
8189 return true;
8190 }
8191 this->interval.end++;
8192 }
8193 }
8194 this->type = html_sequence_t::declaration;
8195 this->name.start = this->name.end = this->interval.end;
8196 }
8197 else if (text[this->interval.end] == '?') {
8198 // <?...
8199 this->name.start = ++this->interval.end;
8200 for (;;) {
8201 if (this->interval.end >= end || !text[this->interval.end])
8202 goto error;
8203 if (text[this->interval.end] == '>') {
8204 // <?...>
8205 this->type = html_sequence_t::instruction;
8206 this->name.end = this->interval.end;
8207 this->attributes.clear();
8208 this->interval.start = start;
8209 this->interval.end++;
8210 return true;
8211 }
8212 if (this->interval.end + 1 < end &&
8213 text[this->interval.end] == '?' &&
8214 text[this->interval.end + 1] == '>')
8215 {
8216 // <?...?>
8217 this->type = html_sequence_t::instruction;
8218 this->name.end = this->interval.end;
8219 this->attributes.clear();
8220 this->interval.start = start;
8221 this->interval.end = this->interval.end + 2;
8222 return true;
8223 }
8224 this->interval.end++;
8225 }
8226 }
8227 else if (this->m_ident.match(text, this->interval.end, end, flags)) {
8228 // <tag...
8229 this->type = html_sequence_t::element_start;
8230 this->name = this->m_ident.interval;
8231 this->interval.end = this->m_ident.interval.end;
8232 }
8233 else
8234 goto error;
8235
8236 // Skip whitespace.
8237 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
8238 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8239
8240 this->attributes.clear();
8241 for (;;) {
8242 if (this->type == html_sequence_t::element_start &&
8243 this->interval.end + 1 < end &&
8244 text[this->interval.end] == '/' &&
8245 text[this->interval.end + 1] == '>')
8246 {
8247 // <tag .../>
8248 this->type = html_sequence_t::element;
8249 this->interval.end = this->interval.end + 2;
8250 break;
8251 }
8252 if (this->interval.end < end &&
8253 text[this->interval.end] == '>')
8254 {
8255 // <tag ...>
8256 this->interval.end++;
8257 break;
8258 }
8259 if (this->type == html_sequence_t::declaration &&
8260 this->interval.end + 1 < end &&
8261 text[this->interval.end] == '!' &&
8262 text[this->interval.end + 1] == '>')
8263 {
8264 // "<!...!>".
8265 this->interval.end = this->interval.end + 2;
8266 break;
8267 }
8268 if (this->type == html_sequence_t::declaration &&
8269 this->interval.end + 1 < end &&
8270 text[this->interval.end] == '-' &&
8271 text[this->interval.end + 1] == '-')
8272 {
8273 // "<! ... --...".
8274 this->interval.end = this->interval.end + 2;
8275 for (;;) {
8276 if (this->interval.end >= end || !text[this->interval.end])
8277 goto error;
8278 if (this->interval.end + 1 < end &&
8279 text[this->interval.end] == '-' &&
8280 text[this->interval.end + 1] == '-')
8281 {
8282 // "<! ... --...--".
8283 this->interval.end = this->interval.end + 2;
8284 break;
8285 }
8286 this->interval.end++;
8287 }
8288
8289 // Skip whitespace.
8290 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8291 continue;
8292 }
8293
8294 if (this->interval.end >= end || !text[this->interval.end])
8295 goto error;
8296
8297 // Attributes follow...
8298 html_attribute* a = nullptr;
8299 if (this->m_ident.match(text, this->interval.end, end, flags)) {
8300 this->attributes.push_back(std::move(html_attribute{ this->m_ident.interval }));
8301 a = &this->attributes.back();
8302 _Assume_(a);
8303 this->interval.end = this->m_ident.interval.end;
8304 }
8305 else {
8306 // What was that?! Skip.
8307 this->interval.end++;
8308 continue;
8309 }
8310
8311 // Skip whitespace.
8312 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8313
8314 if (this->interval.end < end && text[this->interval.end] == '=') {
8315 this->interval.end++;
8316
8317 // Skip whitespace.
8318 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8319
8320 if (this->m_value.match(text, this->interval.end, end, flags)) {
8321 // This attribute has value.
8322 a->value = this->m_value.content;
8323 this->interval.end = this->m_value.interval.end;
8324
8325 // Skip whitespace.
8326 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8327 }
8328 }
8329 else {
8330 // This attribute has no value.
8331 a->value.invalidate();
8332 }
8333 }
8334
8335 this->interval.start = start;
8336 return true;
8337
8338 error:
8339 this->type = html_sequence_t::unknown;
8340 this->name.invalidate();
8341 this->attributes.clear();
8342 this->interval.invalidate();
8343 return false;
8344 }
8345
8346 virtual void invalidate()
8347 {
8348 this->type = html_sequence_t::unknown;
8349 this->name.invalidate();
8350 this->attributes.clear();
8351 basic_parser::invalidate();
8352 }
8353
8354 public:
8355 html_sequence_t type;
8357 std::vector<html_attribute> attributes;
8358
8359 protected:
8360 basic_html_ident<T> m_ident;
8361 basic_html_value<T> m_value;
8362 };
8363
8366#ifdef _UNICODE
8367 using thtml_tag = whtml_tag;
8368#else
8369 using thtml_tag = html_tag;
8370#endif
8371
8375 template <class T>
8377 {
8378 public:
8379 virtual bool match(
8380 _In_reads_or_z_opt_(end) const T* text,
8381 _In_ size_t start = 0,
8382 _In_ size_t end = SIZE_MAX,
8383 _In_ int flags = match_multiline)
8384 {
8385 _Unreferenced_(flags);
8386 _Assume_(text || start + 2 >= end);
8387 if (start + 2 < end &&
8388 text[start] == '<' &&
8389 text[start + 1] == '!' &&
8390 text[start + 2] == '[')
8391 {
8392 this->interval.end = start + 3;
8393
8394 // Skip whitespace.
8395 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
8396 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8397
8398 this->condition.start = this->condition.end = this->interval.end;
8399
8400 for (;;) {
8401 if (this->interval.end >= end || !text[this->interval.end])
8402 break;
8403 if (text[this->interval.end] == '[') {
8404 this->interval.start = start;
8405 this->interval.end++;
8406 return true;
8407 }
8408 if (ctype.is(ctype.space, text[this->interval.end]))
8409 this->interval.end++;
8410 else
8411 this->condition.end = ++this->interval.end;
8412 }
8413 }
8414
8415 this->condition.invalidate();
8416 this->interval.invalidate();
8417 return false;
8418 }
8419
8420 virtual void invalidate()
8421 {
8422 this->condition.invalidate();
8423 basic_parser::invalidate();
8424 }
8425
8426 public:
8427 stdex::interval<size_t> condition;
8428 };
8429
8432#ifdef _UNICODE
8434#else
8436#endif
8437
8441 template <class T>
8443 {
8444 public:
8445 virtual bool match(
8446 _In_reads_or_z_opt_(end) const T* text,
8447 _In_ size_t start = 0,
8448 _In_ size_t end = SIZE_MAX,
8449 _In_ int flags = match_multiline)
8450 {
8451 _Unreferenced_(flags);
8452 _Assume_(text || start + 2 >= end);
8453 if (start + 2 < end &&
8454 text[start] == ']' &&
8455 text[start + 1] == ']' &&
8456 text[start + 2] == '>')
8457 {
8458 this->interval.start = start;
8459 this->interval.end = start + 3;
8460 return true;
8461 }
8462 this->interval.invalidate();
8463 return false;
8464 }
8465 };
8466
8469#ifdef _UNICODE
8471#else
8473#endif
8474 }
8475}
8476
8477#undef ENUM_FLAG_OPERATOR
8478#undef ENUM_FLAGS
8479
8480#ifdef _MSC_VER
8481#pragma warning(pop)
8482#endif
Test for angle in d°mm'ss.dddd form.
Definition parser.hpp:4398
Test for any code unit.
Definition parser.hpp:224
Test for beginning of line.
Definition parser.hpp:618
Test for any.
Definition parser.hpp:1060
Test for chemical formula.
Definition parser.hpp:5542
Test for Creditor Reference.
Definition parser.hpp:4968
T reference[22]
Normalized national reference number.
Definition parser.hpp:5097
T check_digits[3]
Two check digits.
Definition parser.hpp:5096
bool is_valid
Is reference valid per ISO 7064.
Definition parser.hpp:5098
Legacy CSS comment end -->
Definition parser.hpp:7551
Legacy CSS comment start <!--
Definition parser.hpp:7513
CSS comment.
Definition parser.hpp:7453
stdex::interval< size_t > content
content position in source
Definition parser.hpp:7497
CSS import directive.
Definition parser.hpp:7766
stdex::interval< size_t > content
content position in source
Definition parser.hpp:7835
CSS string.
Definition parser.hpp:7588
stdex::interval< size_t > content
content position in source
Definition parser.hpp:7639
URI in CSS.
Definition parser.hpp:7655
stdex::interval< size_t > content
content position in source
Definition parser.hpp:7750
Test for any code unit from a given string of code units.
Definition parser.hpp:723
Test for specific code unit.
Definition parser.hpp:294
Test for date.
Definition parser.hpp:4028
Test for valid DNS domain character.
Definition parser.hpp:2809
bool allow_on_edge
Is character allowed at the beginning or an end of a DNS domain?
Definition parser.hpp:2847
Test for DNS domain/hostname.
Definition parser.hpp:2909
bool m_allow_absolute
May DNS names end with a dot (absolute name)?
Definition parser.hpp:2973
Test for e-mail address.
Definition parser.hpp:3797
Test for emoticon.
Definition parser.hpp:3905
std::shared_ptr< basic_parser< T > > apex
apex/eyebrows/halo (e.g. O, 0)
Definition parser.hpp:3994
std::shared_ptr< basic_parser< T > > eyes
eyes (e.g. :, ;, >, |, B)
Definition parser.hpp:3995
std::shared_ptr< basic_set< T > > mouth
mouth (e.g. ), ), (, (, |, P, D, p, d)
Definition parser.hpp:3997
std::shared_ptr< basic_parser< T > > nose
nose (e.g. -, o)
Definition parser.hpp:3996
std::shared_ptr< basic_parser< T > > emoticon
emoticon as a whole (e.g. 😀, 🤔, 😶)
Definition parser.hpp:3993
Test for end of line.
Definition parser.hpp:656
Test for fraction.
Definition parser.hpp:1689
End of condition ...]]>
Definition parser.hpp:8443
Start of condition <![condition[...
Definition parser.hpp:8377
Contiguous sequence of characters representing name of element, attribute etc.
Definition parser.hpp:7987
Tag.
Definition parser.hpp:8139
std::vector< html_attribute > attributes
tag attributes
Definition parser.hpp:8357
html_sequence_t type
tag type
Definition parser.hpp:8355
stdex::interval< size_t > name
tag name position in source
Definition parser.hpp:8356
Optionally-quoted string representing value of an attribute.
Definition parser.hpp:8034
stdex::interval< size_t > content
content position in source
Definition parser.hpp:8098
Test for International Bank Account Number.
Definition parser.hpp:4674
T bban[31]
Normalized Basic Bank Account Number.
Definition parser.hpp:4945
T country[3]
ISO 3166-1 alpha-2 country code.
Definition parser.hpp:4943
T check_digits[3]
Two check digits.
Definition parser.hpp:4944
bool is_valid
Is IBAN valid per ISO 7064.
Definition parser.hpp:4946
Test for decimal integer.
Definition parser.hpp:1298
Test for decimal integer possibly containing thousand separators.
Definition parser.hpp:1383
bool has_separators
Did integer have any separators?
Definition parser.hpp:1443
size_t digit_count
Total number of digits in integer.
Definition parser.hpp:1442
Test for hexadecimal integer.
Definition parser.hpp:1464
Base class for integer testing.
Definition parser.hpp:1276
size_t value
Calculated value of the numeral.
Definition parser.hpp:1290
Test for IPv4 address.
Definition parser.hpp:2349
stdex::interval< size_t > components[4]
Individual component intervals.
Definition parser.hpp:2464
struct in_addr value
IPv4 address value.
Definition parser.hpp:2465
Test for IPv6 address.
Definition parser.hpp:2568
std::shared_ptr< basic_parser< T > > scope_id
Scope ID (e.g. NIC index with link-local addresses)
Definition parser.hpp:2772
stdex::interval< size_t > components[8]
Individual component intervals.
Definition parser.hpp:2770
struct in6_addr value
IPv6 address value.
Definition parser.hpp:2771
Test for valid IPv6 address scope ID character.
Definition parser.hpp:2496
Test for repeating.
Definition parser.hpp:913
bool m_greedy
try to match as long sequence as possible
Definition parser.hpp:952
std::shared_ptr< basic_parser< T > > m_el
repeating element
Definition parser.hpp:949
size_t m_min_iterations
minimum number of iterations
Definition parser.hpp:950
size_t m_max_iterations
maximum number of iterations
Definition parser.hpp:951
Test for JSON string.
Definition parser.hpp:7300
MIME content type.
Definition parser.hpp:7851
stdex::interval< size_t > base_type
basic type position in source
Definition parser.hpp:7969
stdex::interval< size_t > sub_type
sub-type position in source
Definition parser.hpp:7970
stdex::interval< size_t > charset
charset position in source
Definition parser.hpp:7971
Test for mixed numeral.
Definition parser.hpp:1925
std::shared_ptr< basic_parser< T > > fraction
fraction
Definition parser.hpp:2031
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:2029
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:2028
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:2027
std::shared_ptr< basic_parser< T > > integer
Integer part.
Definition parser.hpp:2030
Test for monetary numeral.
Definition parser.hpp:2220
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:2326
std::shared_ptr< basic_parser< T > > decimal_separator
Decimal separator.
Definition parser.hpp:2331
std::shared_ptr< basic_parser< T > > currency
Currency part.
Definition parser.hpp:2329
std::shared_ptr< basic_parser< T > > decimal
Decimal part.
Definition parser.hpp:2332
std::shared_ptr< basic_parser< T > > integer
Integer part.
Definition parser.hpp:2330
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:2327
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:2328
"No-op" match
Definition parser.hpp:192
Base template for all parsers.
Definition parser.hpp:74
stdex::interval< size_t > interval
Region of the last match.
Definition parser.hpp:172
Test for permutation.
Definition parser.hpp:1200
Test for phone number.
Definition parser.hpp:4521
std::basic_string< T > value
Normalized phone number.
Definition parser.hpp:4647
Test for any punctuation code unit.
Definition parser.hpp:466
Test for Roman numeral.
Definition parser.hpp:1573
Test for scientific numeral.
Definition parser.hpp:2051
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:2195
std::shared_ptr< basic_parser< T > > exponent_symbol
Exponent symbol (e.g. 'e')
Definition parser.hpp:2199
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:2193
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:2194
double value
Calculated value of the numeral.
Definition parser.hpp:2203
std::shared_ptr< basic_parser< T > > negative_exp_sign
Negative exponent sign (e.g. '-')
Definition parser.hpp:2201
std::shared_ptr< basic_integer< T > > decimal
Decimal part.
Definition parser.hpp:2198
std::shared_ptr< basic_parser< T > > positive_exp_sign
Positive exponent sign (e.g. '+')
Definition parser.hpp:2200
std::shared_ptr< basic_integer< T > > exponent
Exponent part.
Definition parser.hpp:2202
std::shared_ptr< basic_parser< T > > decimal_separator
Decimal separator.
Definition parser.hpp:2197
std::shared_ptr< basic_integer< T > > integer
Integer part.
Definition parser.hpp:2196
Test for match score.
Definition parser.hpp:1752
Test for sequence.
Definition parser.hpp:1009
Definition parser.hpp:691
Test for SI Reference delimiter.
Definition parser.hpp:5165
Test for SI Reference part.
Definition parser.hpp:5120
Test for SI Reference.
Definition parser.hpp:5203
basic_si_reference_part< T > part3
Reference data part 3 (P3)
Definition parser.hpp:5520
basic_si_reference_part< T > part1
Reference data part 1 (P1)
Definition parser.hpp:5518
bool is_valid
Is reference valid.
Definition parser.hpp:5521
T model[3]
Reference model.
Definition parser.hpp:5517
basic_si_reference_part< T > part2
Reference data part 2 (P2)
Definition parser.hpp:5519
Test for signed numeral.
Definition parser.hpp:1839
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:1907
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:1906
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:1905
std::shared_ptr< basic_parser< T > > number
Number.
Definition parser.hpp:1908
Test for any space code unit.
Definition parser.hpp:387
Test for any space or punctuation code unit.
Definition parser.hpp:540
Test for any string.
Definition parser.hpp:1128
Test for given string.
Definition parser.hpp:818
Test for time.
Definition parser.hpp:4295
Test for valid URL password character.
Definition parser.hpp:3091
Test for valid URL path character.
Definition parser.hpp:3191
Test for URL path.
Definition parser.hpp:3299
Test for valid URL username character.
Definition parser.hpp:2992
Test for URL.
Definition parser.hpp:3440
Test for HTTP agent.
Definition parser.hpp:6827
Test for HTTP any type.
Definition parser.hpp:5949
Test for HTTP asterisk.
Definition parser.hpp:6591
Test for HTTP header.
Definition parser.hpp:7147
Test for HTTP language (RFC1766)
Definition parser.hpp:6459
Test for HTTP line break (RFC2616: CRLF | LF)
Definition parser.hpp:5623
Test for HTTP media range (RFC2616: media-range)
Definition parser.hpp:5981
Test for HTTP media type (RFC2616: media-type)
Definition parser.hpp:6036
Test for HTTP parameter (RFC2616: parameter)
Definition parser.hpp:5894
http_token name
Parameter name.
Definition parser.hpp:5938
http_value value
Parameter value.
Definition parser.hpp:5939
Test for HTTP protocol.
Definition parser.hpp:6902
uint16_t version
HTTP protocol version: 0x100 = 1.0, 0x101 = 1.1...
Definition parser.hpp:7003
Test for HTTP quoted string (RFC2616: quoted-string)
Definition parser.hpp:5784
stdex::interval< size_t > content
String content (without quotes)
Definition parser.hpp:5840
Test for HTTP request.
Definition parser.hpp:7010
Test for HTTP space (RFC2616: LWS)
Definition parser.hpp:5659
Test for HTTP text character (RFC2616: TEXT)
Definition parser.hpp:5696
Test for HTTP token (RFC2616: token - tolerates non-ASCII)
Definition parser.hpp:5730
Test for HTTP URL parameter.
Definition parser.hpp:6276
Test for HTTP URL path segment.
Definition parser.hpp:6187
Test for HTTP URL path segment.
Definition parser.hpp:6220
std::vector< http_url_path_segment > segments
Path segments.
Definition parser.hpp:6269
Test for HTTP URL port.
Definition parser.hpp:6131
Test for HTTP URL server.
Definition parser.hpp:6094
Test for HTTP URL.
Definition parser.hpp:6357
Collection of HTTP values.
Definition parser.hpp:7256
Test for HTTP value (RFC2616: value)
Definition parser.hpp:5850
http_quoted_string string
Value when matched as quoted string.
Definition parser.hpp:5886
http_token token
Value when matched as token.
Definition parser.hpp:5887
Test for HTTP weight factor.
Definition parser.hpp:6522
float value
Calculated value of the weight factor.
Definition parser.hpp:6584
Test for HTTP weighted value.
Definition parser.hpp:6614
Base template for collection-holding parsers.
Definition parser.hpp:969
Test for any SGML code point.
Definition parser.hpp:256
Test for any SGML code point from a given string of SGML code points.
Definition parser.hpp:775
Test for specific SGML code point.
Definition parser.hpp:343
Test for valid DNS domain SGML character.
Definition parser.hpp:2865
Test for valid IPv6 address scope ID SGML character.
Definition parser.hpp:2534
Test for any SGML punctuation code point.
Definition parser.hpp:507
Test for any SGML space code point.
Definition parser.hpp:430
Test for any SGML space or punctuation code point.
Definition parser.hpp:583
Test for SGML given string.
Definition parser.hpp:865
Test for valid URL password SGML character.
Definition parser.hpp:3143
Test for valid URL path SGML character.
Definition parser.hpp:3247
Test for valid URL username SGML character.
Definition parser.hpp:3043
Numerical interval.
Definition interval.hpp:18
T size() const
Returns interval size.
Definition interval.hpp:47
T end
interval end
Definition interval.hpp:20
interval() noexcept
Constructs an invalid interval.
Definition interval.hpp:25
void invalidate()
Invalidates interval.
Definition interval.hpp:59
T start
interval start
Definition interval.hpp:19
Tag attribute.
Definition parser.hpp:8129
stdex::interval< size_t > name
attribute name position in source
Definition parser.hpp:8130
stdex::interval< size_t > value
attribute value position in source
Definition parser.hpp:8131
Definition parser.hpp:7282