stdex
Additional custom or not Standard C++ covered algorithms
Loading...
Searching...
No Matches
parser.hpp
1/*
2 SPDX-License-Identifier: MIT
3 Copyright © 2023 Amebis
4*/
5
6#pragma once
7
8#include "compat.hpp"
9#include "interval.hpp"
10#include "memory.hpp"
11#include "sgml.hpp"
12#include "string.hpp"
13#include <stdarg.h>
14#include <stdint.h>
15#include <math.h>
16#if defined(_WIN32)
17#include <winsock2.h>
18#if _MSC_VER >= 1300
19#include <ws2ipdef.h>
20#endif
21#include <ws2tcpip.h>
22#else
23#include <netinet/in.h>
24#endif
25#include <limits>
26#include <list>
27#include <locale>
28#include <memory>
29#include <set>
30#include <string>
31
32#ifdef _MSC_VER
33#pragma warning(push)
34#pragma warning(disable: 4100)
35#endif
36
37#define ENUM_FLAG_OPERATOR(T,X) \
38inline T operator X (const T lhs, const T rhs) { return static_cast<T>(static_cast<std::underlying_type_t<T>>(lhs) X static_cast<std::underlying_type_t<T>>(rhs)); } \
39inline T operator X (const T lhs, const std::underlying_type_t<T> rhs) { return static_cast<T>(static_cast<std::underlying_type_t<T>>(lhs) X rhs); } \
40inline T operator X (const std::underlying_type_t<T> lhs, const T rhs) { return static_cast<T>(lhs X static_cast<std::underlying_type_t<T>>(rhs)); } \
41inline T& operator X= (T& lhs, const T rhs) { return lhs = lhs X rhs; } \
42inline T& operator X= (T& lhs, const std::underlying_type_t<T> rhs) { return lhs = lhs X rhs; }
43#define ENUM_FLAGS(T, type) \
44enum class T : type; \
45inline T operator ~ (T t) { return (T) (~static_cast<std::underlying_type_t <T>>(t)); } \
46ENUM_FLAG_OPERATOR(T,|) \
47ENUM_FLAG_OPERATOR(T,^) \
48ENUM_FLAG_OPERATOR(T,&) \
49enum class T : type
50
51#if defined(_WIN32)
52#elif defined(__APPLE__)
53#define s6_words __u6_addr.__u6_addr16
54#else
55#define s6_words s6_addr16
56#endif
57
58namespace stdex
59{
60 namespace parser
61 {
65 constexpr int match_default = 0;
66 constexpr int match_case_insensitive = 0x1;
67 constexpr int match_multiline = 0x2;
68
72 template <class T>
74 {
75 public:
76 basic_parser(_In_ const std::locale& locale = std::locale()) : m_locale(locale) {}
77 virtual ~basic_parser() {}
78
79 bool search(
80 _In_reads_or_z_(end) const T* text,
81 _In_ size_t start = 0,
82 _In_ size_t end = SIZE_MAX,
83 _In_ int flags = match_default)
84 {
85 for (size_t i = start; i < end && text[i]; i++)
86 if (match(text, i, end, flags))
87 return true;
88 return false;
89 }
90
91 virtual bool match(
92 _In_reads_or_z_(end) const T* text,
93 _In_ size_t start = 0,
94 _In_ size_t end = SIZE_MAX,
95 _In_ int flags = match_default) = 0;
96
97 template<class _Traits, class _Ax>
98 inline bool match(
99 const std::basic_string<T, _Traits, _Ax>& text,
100 _In_ size_t start = 0,
101 _In_ size_t end = SIZE_MAX,
102 _In_ int flags = match_default)
103 {
104 return match(text.c_str(), start, std::min<size_t>(end, text.size()), flags);
105 }
106
107 virtual void invalidate()
108 {
109 this->interval.invalidate();
110 }
111
112 protected:
114 const wchar_t* next_sgml_cp(_In_ const char* text, _In_ size_t start, _In_ size_t end, _Out_ size_t& chr_end, _Out_ wchar_t(&buf)[3])
115 {
116 if (text[start] == '&') {
117 // Potential entity start
118 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
119 for (chr_end = start + 1;; chr_end++) {
120 if (chr_end >= end || text[chr_end] == 0) {
121 // Unterminated entity
122 break;
123 }
124 if (text[chr_end] == ';') {
125 // Entity end
126 size_t n = chr_end - start - 1;
127 if (n >= 2 && text[start + 1] == '#') {
128 // Numerical entity
129 char32_t unicode;
130 if (text[start + 2] == 'x' || text[start + 2] == 'X')
131 unicode = strtou32(text + start + 3, n - 2, nullptr, 16);
132 else
133 unicode = strtou32(text + start + 2, n - 1, nullptr, 10);
134#ifdef _WIN32
135 if (unicode < 0x10000) {
136 buf[0] = (wchar_t)unicode;
137 buf[1] = 0;
138 }
139 else {
140 ucs4_to_surrogate_pair(buf, unicode);
141 buf[2] = 0;
142 }
143#else
144 buf[0] = (wchar_t)unicode;
145 buf[1] = 0;
146#endif
147 chr_end++;
148 return buf;
149 }
150 const wchar_t* entity_w = sgml2uni(text + start + 1, n);
151 if (entity_w) {
152 chr_end++;
153 return entity_w;
154 }
155 // Unknown entity.
156 break;
157 }
158 else if (text[chr_end] == '&' || ctype.is(ctype.space, text[chr_end])) {
159 // This char cannot possibly be a part of entity.
160 break;
161 }
162 }
163 }
164 buf[0] = text[start];
165 buf[1] = 0;
166 chr_end = start + 1;
167 return buf;
168 }
170
171 public:
173
174 protected:
175 std::locale m_locale;
176 };
177
180#ifdef _UNICODE
181 using tparser = wparser;
182#else
183 using tparser = parser;
184#endif
186
190 template <class T>
191 class basic_noop : public basic_parser<T>
192 {
193 public:
194 virtual bool match(
195 _In_reads_or_z_(end) const T* text,
196 _In_ size_t start = 0,
197 _In_ size_t end = SIZE_MAX,
198 _In_ int flags = match_default)
199 {
200 _Assume_(text || start >= end);
201 if (start < end && text[start]) {
202 this->interval.start = this->interval.end = start;
203 return true;
204 }
205 this->interval.invalidate();
206 return false;
207 }
208 };
209
210 using noop = basic_noop<char>;
212#ifdef _UNICODE
213 using tnoop = wnoop;
214#else
215 using tnoop = noop;
216#endif
218
222 template <class T>
223 class basic_any_cu : public basic_parser<T>
224 {
225 public:
226 basic_any_cu(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
227
228 virtual bool match(
229 _In_reads_or_z_(end) const T* text,
230 _In_ size_t start = 0,
231 _In_ size_t end = SIZE_MAX,
232 _In_ int flags = match_default)
233 {
234 _Assume_(text || start >= end);
235 if (start < end && text[start]) {
236 this->interval.end = (this->interval.start = start) + 1;
237 return true;
238 }
239 this->interval.invalidate();
240 return false;
241 }
242 };
243
246#ifdef _UNICODE
247 using tany_cu = wany_cu;
248#else
249 using tany_cu = any_cu;
250#endif
251
255 class sgml_any_cp : public basic_any_cu<char>
256 {
257 public:
258 sgml_any_cp(_In_ const std::locale& locale = std::locale()) : basic_any_cu<char>(locale) {}
259
260 virtual bool match(
261 _In_reads_or_z_(end) const char* text,
262 _In_ size_t start = 0,
263 _In_ size_t end = SIZE_MAX,
264 _In_ int flags = match_default)
265 {
266 _Assume_(text || start >= end);
267 if (start < end && text[start]) {
268 if (text[start] == '&') {
269 // SGML entity
270 const auto& ctype = std::use_facet<std::ctype<char>>(m_locale);
271 for (this->interval.end = start + 1; this->interval.end < end && text[this->interval.end]; this->interval.end++)
272 if (text[this->interval.end] == ';') {
273 this->interval.end++;
274 this->interval.start = start;
275 return true;
276 }
277 else if (text[this->interval.end] == '&' || ctype.is(ctype.space, text[this->interval.end]))
278 break;
279 // Unterminated entity
280 }
281 this->interval.end = (this->interval.start = start) + 1;
282 return true;
283 }
284 this->interval.invalidate();
285 return false;
286 }
287 };
288
292 template <class T>
293 class basic_cu : public basic_parser<T>
294 {
295 public:
296 basic_cu(T chr, bool invert = false, _In_ const std::locale& locale = std::locale()) :
297 basic_parser<T>(locale),
298 m_chr(chr),
299 m_invert(invert)
300 {}
301
302 virtual bool match(
303 _In_reads_or_z_(end) const T* text,
304 _In_ size_t start = 0,
305 _In_ size_t end = SIZE_MAX,
306 _In_ int flags = match_default)
307 {
308 _Assume_(text || start >= end);
309 if (start < end && text[start]) {
310 bool r;
311 if (flags & match_case_insensitive) {
312 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
313 r = ctype.tolower(text[start]) == ctype.tolower(m_chr);
314 }
315 else
316 r = text[start] == m_chr;
317 if ((r && !m_invert) || (!r && m_invert)) {
318 this->interval.end = (this->interval.start = start) + 1;
319 return true;
320 }
321 }
322 this->interval.invalidate();
323 return false;
324 }
325
326 protected:
327 T m_chr;
328 bool m_invert;
329 };
330
331 using cu = basic_cu<char>;
332 using wcu = basic_cu<wchar_t>;
333#ifdef _UNICODE
334 using tcu = wcu;
335#else
336 using tcu = cu;
337#endif
338
342 class sgml_cp : public sgml_parser
343 {
344 public:
345 sgml_cp(const char* chr, size_t count = SIZE_MAX, bool invert = false, _In_ const std::locale& locale = std::locale()) :
346 sgml_parser(locale),
347 m_invert(invert)
348 {
349 _Assume_(chr || !count);
350 wchar_t buf[3];
351 size_t chr_end;
352 m_chr.assign(count ? next_sgml_cp(chr, 0, count, chr_end, buf) : L"");
353 }
354
355 virtual bool match(
356 _In_reads_or_z_(end) const char* text,
357 _In_ size_t start = 0,
358 _In_ size_t end = SIZE_MAX,
359 _In_ int flags = match_default)
360 {
361 _Assume_(text || start >= end);
362 if (start < end && text[start]) {
363 wchar_t buf[3];
364 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
365 bool r = ((flags & match_case_insensitive) ?
366 stdex::strnicmp(chr, SIZE_MAX, m_chr.c_str(), m_chr.size(), m_locale) :
367 stdex::strncmp(chr, SIZE_MAX, m_chr.c_str(), m_chr.size())) == 0;
368 if ((r && !m_invert) || (!r && m_invert)) {
369 this->interval.start = start;
370 return true;
371 }
372 }
373 this->interval.invalidate();
374 return false;
375 }
376
377 protected:
378 std::wstring m_chr;
379 bool m_invert;
380 };
381
385 template <class T>
386 class basic_space_cu : public basic_parser<T>
387 {
388 public:
389 basic_space_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) :
390 basic_parser<T>(locale),
391 m_invert(invert)
392 {}
393
394 virtual bool match(
395 _In_reads_or_z_(end) const T* text,
396 _In_ size_t start = 0,
397 _In_ size_t end = SIZE_MAX,
398 _In_ int flags = match_default)
399 {
400 _Assume_(text || start >= end);
401 if (start < end && text[start]) {
402 bool r =
403 ((flags & match_multiline) || !stdex::islbreak(text[start])) &&
404 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::space, text[start]);
405 if ((r && !m_invert) || (!r && m_invert)) {
406 this->interval.end = (this->interval.start = start) + 1;
407 return true;
408 }
409 }
410 this->interval.invalidate();
411 return false;
412 }
413
414 protected:
415 bool m_invert;
416 };
417
420#ifdef _UNICODE
421 using tspace_cu = wspace_cu;
422#else
423 using tspace_cu = space_cu;
424#endif
425
429 class sgml_space_cp : public basic_space_cu<char>
430 {
431 public:
432 sgml_space_cp(_In_ bool invert = false, _In_ const std::locale& locale = std::locale()) :
434 {}
435
436 virtual bool match(
437 _In_reads_or_z_(end) const char* text,
438 _In_ size_t start = 0,
439 _In_ size_t end = SIZE_MAX,
440 _In_ int flags = match_default)
441 {
442 _Assume_(text || start >= end);
443 if (start < end && text[start]) {
444 wchar_t buf[3];
445 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
446 const wchar_t* chr_end = chr + stdex::strlen(chr);
447 bool r =
448 ((flags & match_multiline) || !stdex::islbreak(chr, SIZE_MAX)) &&
449 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::space, chr, chr_end) == chr_end;
450 if ((r && !m_invert) || (!r && m_invert)) {
451 this->interval.start = start;
452 return true;
453 }
454 }
455
456 this->interval.invalidate();
457 return false;
458 }
459 };
460
464 template <class T>
465 class basic_punct_cu : public basic_parser<T>
466 {
467 public:
468 basic_punct_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) :
469 basic_parser<T>(locale),
470 m_invert(invert)
471 {}
472
473 virtual bool match(
474 _In_reads_or_z_(end) const T* text,
475 _In_ size_t start = 0,
476 _In_ size_t end = SIZE_MAX,
477 _In_ int flags = match_default)
478 {
479 _Assume_(text || start >= end);
480 if (start < end && text[start]) {
481 bool r = std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::punct, text[start]);
482 if ((r && !m_invert) || (!r && m_invert)) {
483 this->interval.end = (this->interval.start = start) + 1;
484 return true;
485 }
486 }
487 this->interval.invalidate();
488 return false;
489 }
490
491 protected:
492 bool m_invert;
493 };
494
497#ifdef _UNICODE
498 using tpunct_cu = wpunct_cu;
499#else
500 using tpunct_cu = punct_cu;
501#endif
502
506 class sgml_punct_cp : public basic_punct_cu<char>
507 {
508 public:
509 sgml_punct_cp(bool invert = false, _In_ const std::locale& locale = std::locale()) :
511 {}
512
513 virtual bool match(
514 _In_reads_or_z_(end) const char* text,
515 _In_ size_t start = 0,
516 _In_ size_t end = SIZE_MAX,
517 _In_ int flags = match_default)
518 {
519 _Assume_(text || start >= end);
520 if (start < end && text[start]) {
521 wchar_t buf[3];
522 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
523 const wchar_t* chr_end = chr + stdex::strlen(chr);
524 bool r = std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::punct, chr, chr_end) == chr_end;
525 if ((r && !m_invert) || (!r && m_invert)) {
526 this->interval.start = start;
527 return true;
528 }
529 }
530 this->interval.invalidate();
531 return false;
532 }
533 };
534
538 template <class T>
540 {
541 public:
542 basic_space_or_punct_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) :
543 basic_parser<T>(locale),
544 m_invert(invert)
545 {}
546
547 virtual bool match(
548 _In_reads_or_z_(end) const T* text,
549 _In_ size_t start = 0,
550 _In_ size_t end = SIZE_MAX,
551 _In_ int flags = match_default)
552 {
553 _Assume_(text || start >= end);
554 if (start < end && text[start]) {
555 bool r =
556 ((flags & match_multiline) || !stdex::islbreak(text[start])) &&
557 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::space | std::ctype_base::punct, text[start]);
558 if ((r && !m_invert) || (!r && m_invert)) {
559 this->interval.end = (this->interval.start = start) + 1;
560 return true;
561 }
562 }
563 this->interval.invalidate();
564 return false;
565 }
566
567 protected:
568 bool m_invert;
569 };
570
573#ifdef _UNICODE
575#else
577#endif
578
583 {
584 public:
585 sgml_space_or_punct_cp(bool invert = false, _In_ const std::locale& locale = std::locale()) :
587 {}
588
589 virtual bool match(
590 _In_reads_or_z_(end) const char* text,
591 _In_ size_t start = 0,
592 _In_ size_t end = SIZE_MAX,
593 _In_ int flags = match_default)
594 {
595 _Assume_(text || start >= end);
596 if (start < end && text[start]) {
597 wchar_t buf[3];
598 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
599 const wchar_t* chr_end = chr + stdex::strlen(chr);
600 bool r =
601 ((flags & match_multiline) || !stdex::islbreak(chr, SIZE_MAX)) &&
602 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::space | std::ctype_base::punct, chr, chr_end) == chr_end;
603 if ((r && !m_invert) || (!r && m_invert)) {
604 this->interval.start = start;
605 return true;
606 }
607 }
608 this->interval.invalidate();
609 return false;
610 }
611 };
612
616 template <class T>
617 class basic_bol : public basic_parser<T>
618 {
619 public:
620 basic_bol(bool invert = false) : m_invert(invert) {}
621
622 virtual bool match(
623 _In_reads_or_z_(end) const T* text,
624 _In_ size_t start = 0,
625 _In_ size_t end = SIZE_MAX,
626 _In_ int flags = match_default)
627 {
628 _Assume_(text || start >= end);
629 bool r = start == 0 || (start <= end && stdex::islbreak(text[start - 1]));
630 if ((r && !m_invert) || (!r && m_invert)) {
631 this->interval.end = this->interval.start = start;
632 return true;
633 }
634 this->interval.invalidate();
635 return false;
636 }
637
638 protected:
639 bool m_invert;
640 };
641
642 using bol = basic_bol<char>;
643 using wbol = basic_bol<wchar_t>;
644#ifdef _UNICODE
645 using tbol = wbol;
646#else
647 using tbol = bol;
648#endif
650
654 template <class T>
655 class basic_eol : public basic_parser<T>
656 {
657 public:
658 basic_eol(bool invert = false) : m_invert(invert) {}
659
660 virtual bool match(
661 _In_reads_or_z_(end) const T* text,
662 _In_ size_t start = 0,
663 _In_ size_t end = SIZE_MAX,
664 _In_ int flags = match_default)
665 {
666 _Assume_(text || start >= end);
667 bool r = start >= end || !text[start] || stdex::islbreak(text[start]);
668 if ((r && !m_invert) || (!r && m_invert)) {
669 this->interval.end = this->interval.start = start;
670 return true;
671 }
672 this->interval.invalidate();
673 return false;
674 }
675
676 protected:
677 bool m_invert;
678 };
679
680 using eol = basic_eol<char>;
681 using weol = basic_eol<wchar_t>;
682#ifdef _UNICODE
683 using teol = weol;
684#else
685 using teol = eol;
686#endif
688
689 template <class T>
690 class basic_set : public basic_parser<T>
691 {
692 public:
693 basic_set(bool invert = false, _In_ const std::locale& locale = std::locale()) :
694 basic_parser<T>(locale),
695 hit_offset(SIZE_MAX),
696 m_invert(invert)
697 {}
698
699 virtual bool match(
700 _In_reads_or_z_(end) const T* text,
701 _In_ size_t start = 0,
702 _In_ size_t end = SIZE_MAX,
703 _In_ int flags = match_default) = 0;
704
705 virtual void invalidate()
706 {
707 hit_offset = SIZE_MAX;
709 }
710
711 public:
712 size_t hit_offset;
713
714 protected:
715 bool m_invert;
716 };
717
721 template <class T>
722 class basic_cu_set : public basic_set<T>
723 {
724 public:
726 _In_reads_or_z_(count) const T* set,
727 _In_ size_t count = SIZE_MAX,
728 _In_ bool invert = false,
729 _In_ const std::locale& locale = std::locale()) :
730 basic_set<T>(invert, locale)
731 {
732 if (set)
733 m_set.assign(set, set + stdex::strnlen(set, count));
734 }
735
736 virtual bool match(
737 _In_reads_or_z_(end) const T* text,
738 _In_ size_t start = 0,
739 _In_ size_t end = SIZE_MAX,
740 _In_ int flags = match_default)
741 {
742 _Assume_(text || start >= end);
743 if (start < end && text[start]) {
744 const T* set = m_set.c_str();
745 size_t r = (flags & match_case_insensitive) ?
746 stdex::strnichr(set, m_set.size(), text[start], this->m_locale) :
747 stdex::strnchr(set, m_set.size(), text[start]);
748 if ((r != stdex::npos && !this->m_invert) || (r == stdex::npos && this->m_invert)) {
749 this->hit_offset = r;
750 this->interval.end = (this->interval.start = start) + 1;
751 return true;
752 }
753 }
754 this->hit_offset = SIZE_MAX;
755 this->interval.invalidate();
756 return false;
757 }
758
759 protected:
760 std::basic_string<T> m_set;
761 };
762
765#ifdef _UNICODE
766 using tcu_set = wcu_set;
767#else
768 using tcu_set = cu_set;
769#endif
770
774 class sgml_cp_set : public basic_set<char>
775 {
776 public:
777 sgml_cp_set(const char* set, size_t count = SIZE_MAX, bool invert = false, _In_ const std::locale& locale = std::locale()) :
778 basic_set<char>(invert, locale)
779 {
780 if (set)
781 m_set = sgml2str(set, count);
782 }
783
784 virtual bool match(
785 _In_reads_or_z_(end) const char* text,
786 _In_ size_t start = 0,
787 _In_ size_t end = SIZE_MAX,
788 _In_ int flags = match_default)
789 {
790 _Assume_(text || start >= end);
791 if (start < end && text[start]) {
792 wchar_t buf[3];
793 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
794 const wchar_t* set = m_set.c_str();
795 size_t r = (flags & match_case_insensitive) ?
796 stdex::strnistr(set, m_set.size(), chr, m_locale) :
797 stdex::strnstr(set, m_set.size(), chr);
798 if ((r != stdex::npos && !m_invert) || (r == stdex::npos && m_invert)) {
799 hit_offset = r;
800 this->interval.start = start;
801 return true;
802 }
803 }
804 hit_offset = SIZE_MAX;
805 this->interval.invalidate();
806 return false;
807 }
808
809 protected:
810 std::wstring m_set;
811 };
812
816 template <class T>
817 class basic_string : public basic_parser<T>
818 {
819 public:
821 _In_reads_or_z_(count) const T* str,
822 _In_ size_t count = SIZE_MAX,
823 _In_ const std::locale& locale = std::locale()) :
824 basic_parser<T>(locale),
825 m_str(str, str + stdex::strnlen(str, count))
826 {}
827
828 virtual bool match(
829 _In_reads_or_z_(end) const T* text,
830 _In_ size_t start = 0,
831 _In_ size_t end = SIZE_MAX,
832 _In_ int flags = match_default)
833 {
834 _Assume_(text || start >= end);
835 size_t
836 m = m_str.size(),
837 n = std::min<size_t>(end - start, m);
838 bool r = ((flags & match_case_insensitive) ?
839 stdex::strnicmp(text + start, n, m_str.c_str(), m, this->m_locale) :
840 stdex::strncmp(text + start, n, m_str.c_str(), m)) == 0;
841 if (r) {
842 this->interval.end = (this->interval.start = start) + n;
843 return true;
844 }
845 this->interval.invalidate();
846 return false;
847 }
848
849 protected:
850 std::basic_string<T> m_str;
851 };
852
855#ifdef _UNICODE
856 using tstring = wstring;
857#else
858 using tstring = string;
859#endif
860
865 {
866 public:
867 sgml_string(const char* str, size_t count = SIZE_MAX, _In_ const std::locale& locale = std::locale()) :
868 sgml_parser(locale),
869 m_str(sgml2str(str, count))
870 {}
871
872 virtual bool match(
873 _In_reads_or_z_(end) const char* text,
874 _In_ size_t start = 0,
875 _In_ size_t end = SIZE_MAX,
876 _In_ int flags = match_default)
877 {
878 _Assume_(text || start >= end);
879 const wchar_t* str = m_str.c_str();
880 const bool case_insensitive = flags & match_case_insensitive ? true : false;
881 const auto& ctype = std::use_facet<std::ctype<wchar_t>>(m_locale);
882 for (this->interval.end = start;;) {
883 if (!*str) {
884 this->interval.start = start;
885 return true;
886 }
887 if (this->interval.end >= end || !text[this->interval.end]) {
888 this->interval.invalidate();
889 return false;
890 }
891 wchar_t buf[3];
892 const wchar_t* chr = next_sgml_cp(text, this->interval.end, end, this->interval.end, buf);
893 for (; *chr; ++str, ++chr) {
894 if (!*str ||
895 (case_insensitive ? ctype.tolower(*str) != ctype.tolower(*chr) : *str != *chr))
896 {
897 this->interval.invalidate();
898 return false;
899 }
900 }
901 }
902 }
903
904 protected:
905 std::wstring m_str;
906 };
907
911 template <class T>
913 {
914 public:
915 basic_iterations(const std::shared_ptr<basic_parser<T>>& el, size_t min_iterations = 0, size_t max_iterations = SIZE_MAX, bool greedy = true) :
916 m_el(el),
920 {}
921
922 virtual bool match(
923 _In_reads_or_z_(end) const T* text,
924 _In_ size_t start = 0,
925 _In_ size_t end = SIZE_MAX,
926 _In_ int flags = match_default)
927 {
928 _Assume_(text || start >= end);
929 this->interval.start = this->interval.end = start;
930 for (size_t i = 0; ; i++) {
931 if ((!m_greedy && i >= m_min_iterations) || i >= m_max_iterations)
932 return true;
933 if (!m_el->match(text, this->interval.end, end, flags)) {
934 if (i >= m_min_iterations)
935 return true;
936 break;
937 }
938 if (m_el->interval.end == this->interval.end) {
939 // Element did match, but the matching interval was empty. Quit instead of spinning.
940 return true;
941 }
942 this->interval.end = m_el->interval.end;
943 }
944 this->interval.invalidate();
945 return false;
946 }
947
948 protected:
949 std::shared_ptr<basic_parser<T>> m_el;
952 bool m_greedy;
953 };
954
957#ifdef _UNICODE
958 using titerations = witerations;
959#else
960 using titerations = iterations;
961#endif
963
967 template <class T>
969 {
970 protected:
971 parser_collection(_In_ const std::locale& locale) : basic_parser<T>(locale) {}
972
973 public:
975 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el,
976 _In_ size_t count,
977 _In_ const std::locale& locale = std::locale()) :
978 basic_parser<T>(locale)
979 {
980 _Assume_(el || !count);
981 m_collection.reserve(count);
982 for (size_t i = 0; i < count; i++)
983 m_collection.push_back(el[i]);
984 }
985
987 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
988 _In_ const std::locale& locale = std::locale()) :
989 basic_parser<T>(locale),
990 m_collection(std::move(collection))
991 {}
992
993 virtual void invalidate()
994 {
995 for (auto& el : m_collection)
996 el->invalidate();
998 }
999
1000 protected:
1001 std::vector<std::shared_ptr<basic_parser<T>>> m_collection;
1002 };
1003
1007 template <class T>
1009 {
1010 public:
1012 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el = nullptr,
1013 _In_ size_t count = 0,
1014 _In_ const std::locale& locale = std::locale()) :
1015 parser_collection<T>(el, count, locale)
1016 {}
1017
1019 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1020 _In_ const std::locale& locale = std::locale()) :
1021 parser_collection<T>(std::move(collection), locale)
1022 {}
1023
1024 virtual bool match(
1025 _In_reads_or_z_(end) const T* text,
1026 _In_ size_t start = 0,
1027 _In_ size_t end = SIZE_MAX,
1028 _In_ int flags = match_default)
1029 {
1030 _Assume_(text || start >= end);
1031 this->interval.end = start;
1032 for (auto i = this->m_collection.begin(); i != this->m_collection.end(); ++i) {
1033 if (!(*i)->match(text, this->interval.end, end, flags)) {
1034 for (++i; i != this->m_collection.end(); ++i)
1035 (*i)->invalidate();
1036 this->interval.invalidate();
1037 return false;
1038 }
1039 this->interval.end = (*i)->interval.end;
1040 }
1041 this->interval.start = start;
1042 return true;
1043 }
1044 };
1045
1048#ifdef _UNICODE
1049 using tsequence = wsequence;
1050#else
1051 using tsequence = sequence;
1052#endif
1054
1058 template <class T>
1060 {
1061 protected:
1062 basic_branch(_In_ const std::locale& locale) :
1063 parser_collection<T>(locale),
1064 hit_offset(SIZE_MAX)
1065 {}
1066
1067 public:
1069 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el = nullptr,
1070 _In_ size_t count = 0,
1071 _In_ const std::locale& locale = std::locale()) :
1072 parser_collection<T>(el, count, locale),
1073 hit_offset(SIZE_MAX)
1074 {}
1075
1077 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1078 _In_ const std::locale& locale = std::locale()) :
1079 parser_collection<T>(std::move(collection), locale),
1080 hit_offset(SIZE_MAX)
1081 {}
1082
1083 virtual bool match(
1084 _In_reads_or_z_(end) const T* text,
1085 _In_ size_t start = 0,
1086 _In_ size_t end = SIZE_MAX,
1087 _In_ int flags = match_default)
1088 {
1089 _Assume_(text || start >= end);
1090 hit_offset = 0;
1091 for (auto i = this->m_collection.begin(); i != this->m_collection.end(); ++i, ++hit_offset) {
1092 if ((*i)->match(text, start, end, flags)) {
1093 this->interval = (*i)->interval;
1094 for (++i; i != this->m_collection.end(); ++i)
1095 (*i)->invalidate();
1096 return true;
1097 }
1098 }
1099 hit_offset = SIZE_MAX;
1100 this->interval.invalidate();
1101 return false;
1102 }
1103
1104 virtual void invalidate()
1105 {
1106 hit_offset = SIZE_MAX;
1108 }
1109
1110 public:
1111 size_t hit_offset;
1112 };
1113
1114 using branch = basic_branch<char>;
1116#ifdef _UNICODE
1117 using tbranch = wbranch;
1118#else
1119 using tbranch = branch;
1120#endif
1122
1126 template <class T, class T_parser = basic_string<T>>
1128 {
1129 public:
1130 inline basic_string_branch(
1131 _In_reads_(count) const T* str_z = nullptr,
1132 _In_ size_t count = 0,
1133 _In_ const std::locale& locale = std::locale()) :
1134 basic_branch<T>(locale)
1135 {
1136 build(str_z, count);
1137 }
1138
1139 inline basic_string_branch(_In_z_ const T* str, ...) :
1140 basic_branch<T>(std::locale())
1141 {
1142 va_list params;
1143 va_start(params, str);
1144 build(str, params);
1145 va_end(params);
1146 }
1147
1148 inline basic_string_branch(_In_ const std::locale& locale, _In_z_ const T* str, ...) :
1149 basic_branch<T>(locale)
1150 {
1151 va_list params;
1152 va_start(params, str);
1153 build(str, params);
1154 va_end(params);
1155 }
1156
1157 protected:
1158 void build(_In_reads_(count) const T* str_z, _In_ size_t count)
1159 {
1160 _Assume_(str_z || !count);
1161 if (count) {
1162 size_t offset, n;
1163 for (
1164 offset = n = 0;
1165 offset < count && str_z[offset];
1166 offset += stdex::strnlen(str_z + offset, count - offset) + 1, ++n);
1167 this->m_collection.reserve(n);
1168 for (
1169 offset = 0;
1170 offset < count && str_z[offset];
1171 offset += stdex::strnlen(str_z + offset, count - offset) + 1)
1172 this->m_collection.push_back(std::move(std::make_shared<T_parser>(str_z + offset, count - offset, this->m_locale)));
1173 }
1174 }
1175
1176 void build(_In_z_ const T* str, _In_ va_list params)
1177 {
1178 const T* p;
1179 for (
1180 this->m_collection.push_back(std::move(std::make_shared<T_parser>(str, SIZE_MAX, this->m_locale)));
1181 (p = va_arg(params, const T*)) != nullptr;
1182 this->m_collection.push_back(std::move(std::make_shared<T_parser>(p, SIZE_MAX, this->m_locale))));
1183 }
1184 };
1185
1188#ifdef _UNICODE
1190#else
1192#endif
1194
1198 template <class T>
1200 {
1201 public:
1203 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el = nullptr,
1204 _In_ size_t count = 0,
1205 _In_ const std::locale& locale = std::locale()) :
1206 parser_collection<T>(el, count, locale)
1207 {}
1208
1210 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1211 _In_ const std::locale& locale = std::locale()) :
1212 parser_collection<T>(std::move(collection), locale)
1213 {}
1214
1215 virtual bool match(
1216 _In_reads_or_z_(end) const T* text,
1217 _In_ size_t start = 0,
1218 _In_ size_t end = SIZE_MAX,
1219 _In_ int flags = match_default)
1220 {
1221 _Assume_(text || start >= end);
1222 for (auto& el : this->m_collection)
1223 el->invalidate();
1224 if (match_recursively(text, start, end, flags)) {
1225 this->interval.start = start;
1226 return true;
1227 }
1228 this->interval.invalidate();
1229 return false;
1230 }
1231
1232 protected:
1233 bool match_recursively(
1234 _In_reads_or_z_(end) const T* text,
1235 _In_ size_t start = 0,
1236 _In_ size_t end = SIZE_MAX,
1237 _In_ int flags = match_default)
1238 {
1239 bool all_matched = true;
1240 for (auto& el : this->m_collection) {
1241 if (!el->interval) {
1242 // Element was not matched in permutatuion yet.
1243 all_matched = false;
1244 if (el->match(text, start, end, flags)) {
1245 // Element matched for the first time.
1246 if (match_recursively(text, el->interval.end, end, flags)) {
1247 // Rest of the elements matched too.
1248 return true;
1249 }
1250 el->invalidate();
1251 }
1252 }
1253 }
1254 if (all_matched) {
1255 this->interval.end = start;
1256 return true;
1257 }
1258 return false;
1259 }
1260 };
1261
1264#ifdef _UNICODE
1265 using tpermutation = wpermutation;
1266#else
1267 using tpermutation = permutation;
1268#endif
1270
1274 template <class T>
1275 class basic_integer : public basic_parser<T>
1276 {
1277 public:
1278 basic_integer(_In_ const std::locale& locale = std::locale()) :
1279 basic_parser<T>(locale),
1280 value(0)
1281 {}
1282
1283 virtual void invalidate()
1284 {
1285 value = 0;
1287 }
1288
1289 public:
1290 size_t value;
1291 };
1292
1296 template <class T>
1298 {
1299 public:
1301 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
1302 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
1303 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
1304 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
1305 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
1306 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
1307 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
1308 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
1309 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
1310 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
1311 _In_ const std::locale& locale = std::locale()) :
1312 basic_integer<T>(locale),
1313 m_digit_0(digit_0),
1314 m_digit_1(digit_1),
1315 m_digit_2(digit_2),
1316 m_digit_3(digit_3),
1317 m_digit_4(digit_4),
1318 m_digit_5(digit_5),
1319 m_digit_6(digit_6),
1320 m_digit_7(digit_7),
1321 m_digit_8(digit_8),
1322 m_digit_9(digit_9)
1323 {}
1324
1325 virtual bool match(
1326 _In_reads_or_z_(end) const T* text,
1327 _In_ size_t start = 0,
1328 _In_ size_t end = SIZE_MAX,
1329 _In_ int flags = match_default)
1330 {
1331 _Assume_(text || start >= end);
1332 for (this->interval.end = start, this->value = 0; this->interval.end < end && text[this->interval.end];) {
1333 size_t dig;
1334 if (m_digit_0->match(text, this->interval.end, end, flags)) { dig = 0; this->interval.end = m_digit_0->interval.end; }
1335 else if (m_digit_1->match(text, this->interval.end, end, flags)) { dig = 1; this->interval.end = m_digit_1->interval.end; }
1336 else if (m_digit_2->match(text, this->interval.end, end, flags)) { dig = 2; this->interval.end = m_digit_2->interval.end; }
1337 else if (m_digit_3->match(text, this->interval.end, end, flags)) { dig = 3; this->interval.end = m_digit_3->interval.end; }
1338 else if (m_digit_4->match(text, this->interval.end, end, flags)) { dig = 4; this->interval.end = m_digit_4->interval.end; }
1339 else if (m_digit_5->match(text, this->interval.end, end, flags)) { dig = 5; this->interval.end = m_digit_5->interval.end; }
1340 else if (m_digit_6->match(text, this->interval.end, end, flags)) { dig = 6; this->interval.end = m_digit_6->interval.end; }
1341 else if (m_digit_7->match(text, this->interval.end, end, flags)) { dig = 7; this->interval.end = m_digit_7->interval.end; }
1342 else if (m_digit_8->match(text, this->interval.end, end, flags)) { dig = 8; this->interval.end = m_digit_8->interval.end; }
1343 else if (m_digit_9->match(text, this->interval.end, end, flags)) { dig = 9; this->interval.end = m_digit_9->interval.end; }
1344 else break;
1345 this->value = this->value * 10 + dig;
1346 }
1348 this->interval.start = start;
1349 return true;
1350 }
1351 this->interval.invalidate();
1352 return false;
1353 }
1354
1355 protected:
1356 std::shared_ptr<basic_parser<T>>
1357 m_digit_0,
1358 m_digit_1,
1359 m_digit_2,
1360 m_digit_3,
1361 m_digit_4,
1362 m_digit_5,
1363 m_digit_6,
1364 m_digit_7,
1365 m_digit_8,
1366 m_digit_9;
1367 };
1368
1371#ifdef _UNICODE
1372 using tinteger10 = winteger10;
1373#else
1374 using tinteger10 = integer10;
1375#endif
1377
1381 template <class T>
1383 {
1384 public:
1386 _In_ const std::shared_ptr<basic_integer10<T>>& digits,
1387 _In_ const std::shared_ptr<basic_set<T>>& separator,
1388 _In_ const std::locale& locale = std::locale()) :
1389 basic_integer<T>(locale),
1390 digit_count(0),
1391 has_separators(false),
1392 m_digits(digits),
1393 m_separator(separator)
1394 {}
1395
1396 virtual bool match(
1397 _In_reads_or_z_(end) const T* text,
1398 _In_ size_t start = 0,
1399 _In_ size_t end = SIZE_MAX,
1400 _In_ int flags = match_default)
1401 {
1402 _Assume_(text || start >= end);
1403 if (m_digits->match(text, start, end, flags)) {
1404 // Leading part match.
1405 this->value = m_digits->value;
1406 digit_count = m_digits->interval.size();
1407 has_separators = false;
1408 this->interval.start = start;
1409 this->interval.end = m_digits->interval.end;
1410 if (m_digits->interval.size() <= 3) {
1411 // Maybe separated with thousand separators?
1412 size_t hit_offset = SIZE_MAX;
1413 while (m_separator->match(text, this->interval.end, end, flags) &&
1414 (hit_offset == SIZE_MAX || hit_offset == m_separator->hit_offset) && // All separators must be the same, no mixing.
1415 m_digits->match(text, m_separator->interval.end, end, flags) &&
1416 m_digits->interval.size() == 3)
1417 {
1418 // Thousand separator and three-digit integer followed.
1419 this->value = this->value * 1000 + m_digits->value;
1420 digit_count += 3;
1421 has_separators = true;
1422 this->interval.end = m_digits->interval.end;
1423 hit_offset = m_separator->hit_offset;
1424 }
1425 }
1426
1427 return true;
1428 }
1429 this->value = 0;
1430 this->interval.invalidate();
1431 return false;
1432 }
1433
1434 virtual void invalidate()
1435 {
1436 digit_count = 0;
1437 has_separators = false;
1439 }
1440
1441 public:
1444
1445 protected:
1446 std::shared_ptr<basic_integer10<T>> m_digits;
1447 std::shared_ptr<basic_set<T>> m_separator;
1448 };
1449
1452#ifdef _UNICODE
1453 using tinteger10ts = winteger10ts;
1454#else
1455 using tinteger10ts = integer10ts;
1456#endif
1458
1462 template <class T>
1464 {
1465 public:
1467 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
1468 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
1469 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
1470 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
1471 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
1472 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
1473 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
1474 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
1475 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
1476 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
1477 _In_ const std::shared_ptr<basic_parser<T>>& digit_10,
1478 _In_ const std::shared_ptr<basic_parser<T>>& digit_11,
1479 _In_ const std::shared_ptr<basic_parser<T>>& digit_12,
1480 _In_ const std::shared_ptr<basic_parser<T>>& digit_13,
1481 _In_ const std::shared_ptr<basic_parser<T>>& digit_14,
1482 _In_ const std::shared_ptr<basic_parser<T>>& digit_15,
1483 _In_ const std::locale& locale = std::locale()) :
1484 basic_integer<T>(locale),
1485 m_digit_0(digit_0),
1486 m_digit_1(digit_1),
1487 m_digit_2(digit_2),
1488 m_digit_3(digit_3),
1489 m_digit_4(digit_4),
1490 m_digit_5(digit_5),
1491 m_digit_6(digit_6),
1492 m_digit_7(digit_7),
1493 m_digit_8(digit_8),
1494 m_digit_9(digit_9),
1495 m_digit_10(digit_10),
1496 m_digit_11(digit_11),
1497 m_digit_12(digit_12),
1498 m_digit_13(digit_13),
1499 m_digit_14(digit_14),
1500 m_digit_15(digit_15)
1501 {}
1502
1503 virtual bool match(
1504 _In_reads_or_z_(end) const T* text,
1505 _In_ size_t start = 0,
1506 _In_ size_t end = SIZE_MAX,
1507 _In_ int flags = match_default)
1508 {
1509 _Assume_(text || start >= end);
1510 for (this->interval.end = start, this->value = 0; this->interval.end < end && text[this->interval.end];) {
1511 size_t dig;
1512 if (m_digit_0->match(text, this->interval.end, end, flags)) { dig = 0; this->interval.end = m_digit_0->interval.end; }
1513 else if (m_digit_1->match(text, this->interval.end, end, flags)) { dig = 1; this->interval.end = m_digit_1->interval.end; }
1514 else if (m_digit_2->match(text, this->interval.end, end, flags)) { dig = 2; this->interval.end = m_digit_2->interval.end; }
1515 else if (m_digit_3->match(text, this->interval.end, end, flags)) { dig = 3; this->interval.end = m_digit_3->interval.end; }
1516 else if (m_digit_4->match(text, this->interval.end, end, flags)) { dig = 4; this->interval.end = m_digit_4->interval.end; }
1517 else if (m_digit_5->match(text, this->interval.end, end, flags)) { dig = 5; this->interval.end = m_digit_5->interval.end; }
1518 else if (m_digit_6->match(text, this->interval.end, end, flags)) { dig = 6; this->interval.end = m_digit_6->interval.end; }
1519 else if (m_digit_7->match(text, this->interval.end, end, flags)) { dig = 7; this->interval.end = m_digit_7->interval.end; }
1520 else if (m_digit_8->match(text, this->interval.end, end, flags)) { dig = 8; this->interval.end = m_digit_8->interval.end; }
1521 else if (m_digit_9->match(text, this->interval.end, end, flags)) { dig = 9; this->interval.end = m_digit_9->interval.end; }
1522 else if (m_digit_10->match(text, this->interval.end, end, flags)) { dig = 10; this->interval.end = m_digit_10->interval.end; }
1523 else if (m_digit_11->match(text, this->interval.end, end, flags)) { dig = 11; this->interval.end = m_digit_11->interval.end; }
1524 else if (m_digit_12->match(text, this->interval.end, end, flags)) { dig = 12; this->interval.end = m_digit_12->interval.end; }
1525 else if (m_digit_13->match(text, this->interval.end, end, flags)) { dig = 13; this->interval.end = m_digit_13->interval.end; }
1526 else if (m_digit_14->match(text, this->interval.end, end, flags)) { dig = 14; this->interval.end = m_digit_14->interval.end; }
1527 else if (m_digit_15->match(text, this->interval.end, end, flags)) { dig = 15; this->interval.end = m_digit_15->interval.end; }
1528 else break;
1529 this->value = this->value * 16 + dig;
1530 }
1532 this->interval.start = start;
1533 return true;
1534 }
1535 this->interval.invalidate();
1536 return false;
1537 }
1538
1539 protected:
1540 std::shared_ptr<basic_parser<T>>
1541 m_digit_0,
1542 m_digit_1,
1543 m_digit_2,
1544 m_digit_3,
1545 m_digit_4,
1546 m_digit_5,
1547 m_digit_6,
1548 m_digit_7,
1549 m_digit_8,
1550 m_digit_9,
1551 m_digit_10,
1552 m_digit_11,
1553 m_digit_12,
1554 m_digit_13,
1555 m_digit_14,
1556 m_digit_15;
1557 };
1558
1561#ifdef _UNICODE
1562 using tinteger16 = winteger16;
1563#else
1564 using tinteger16 = integer16;
1565#endif
1567
1571 template <class T>
1573 {
1574 public:
1576 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
1577 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
1578 _In_ const std::shared_ptr<basic_parser<T>>& digit_10,
1579 _In_ const std::shared_ptr<basic_parser<T>>& digit_50,
1580 _In_ const std::shared_ptr<basic_parser<T>>& digit_100,
1581 _In_ const std::shared_ptr<basic_parser<T>>& digit_500,
1582 _In_ const std::shared_ptr<basic_parser<T>>& digit_1000,
1583 _In_ const std::shared_ptr<basic_parser<T>>& digit_5000,
1584 _In_ const std::shared_ptr<basic_parser<T>>& digit_10000,
1585 _In_ const std::locale& locale = std::locale()) :
1586 basic_integer<T>(locale),
1587 m_digit_1(digit_1),
1588 m_digit_5(digit_5),
1589 m_digit_10(digit_10),
1590 m_digit_50(digit_50),
1591 m_digit_100(digit_100),
1592 m_digit_500(digit_500),
1593 m_digit_1000(digit_1000),
1594 m_digit_5000(digit_5000),
1595 m_digit_10000(digit_10000)
1596 {}
1597
1598 virtual bool match(
1599 _In_reads_or_z_(end) const T* text,
1600 _In_ size_t start = 0,
1601 _In_ size_t end = SIZE_MAX,
1602 _In_ int flags = match_default)
1603 {
1604 _Assume_(text || start >= end);
1605 size_t
1607 end2;
1608
1609 for (this->interval.end = start, this->value = 0; this->interval.end < end && text[this->interval.end]; dig[3] = dig[2], dig[2] = dig[1], dig[1] = dig[0], this->interval.end = end2) {
1610 if (m_digit_1 && m_digit_1->match(text, this->interval.end, end, flags)) { dig[0] = 1; end2 = m_digit_1->interval.end; }
1611 else if (m_digit_5 && m_digit_5->match(text, this->interval.end, end, flags)) { dig[0] = 5; end2 = m_digit_5->interval.end; }
1612 else if (m_digit_10 && m_digit_10->match(text, this->interval.end, end, flags)) { dig[0] = 10; end2 = m_digit_10->interval.end; }
1613 else if (m_digit_50 && m_digit_50->match(text, this->interval.end, end, flags)) { dig[0] = 50; end2 = m_digit_50->interval.end; }
1614 else if (m_digit_100 && m_digit_100->match(text, this->interval.end, end, flags)) { dig[0] = 100; end2 = m_digit_100->interval.end; }
1615 else if (m_digit_500 && m_digit_500->match(text, this->interval.end, end, flags)) { dig[0] = 500; end2 = m_digit_500->interval.end; }
1616 else if (m_digit_1000 && m_digit_1000->match(text, this->interval.end, end, flags)) { dig[0] = 1000; end2 = m_digit_1000->interval.end; }
1617 else if (m_digit_5000 && m_digit_5000->match(text, this->interval.end, end, flags)) { dig[0] = 5000; end2 = m_digit_5000->interval.end; }
1618 else if (m_digit_10000 && m_digit_10000->match(text, this->interval.end, end, flags)) { dig[0] = 10000; end2 = m_digit_10000->interval.end; }
1619 else break;
1620
1621 // Store first digit.
1622 if (dig[4] == SIZE_MAX) dig[4] = dig[0];
1623
1624 if (dig[3] == dig[2] && dig[2] == dig[1] && dig[1] == dig[0] && dig[0] != dig[4]) {
1625 // Same digit repeated four times. No-go, unless first digit. E.g. XIIII vs. XIV. MMMMMCD allowed, IIII also...
1626 break;
1627 }
1628 if (dig[0] <= dig[1]) {
1629 // Digit is less or equal previous one: add.
1630 this->value += dig[0];
1631 }
1632 else if (
1633 (dig[1] == 1 && (dig[0] == 5 || dig[0] == 10)) ||
1634 (dig[1] == 10 && (dig[0] == 50 || dig[0] == 100)) ||
1635 (dig[1] == 100 && (dig[0] == 500 || dig[0] == 1000)) ||
1636 (dig[1] == 1000 && (dig[0] == 5000 || dig[0] == 10000)))
1637 {
1638 // Digit is up to two orders bigger than previous one: subtract. But...
1639 if (dig[2] < dig[0]) {
1640 // Digit is also bigger than pre-previous one. E.g. VIX (V < X => invalid)
1641 break;
1642 }
1643 this->value -= dig[1]; // Cancel addition in the previous step.
1644 dig[0] -= dig[1]; // Combine last two digits.
1645 dig[1] = dig[2]; // The true previous digit is now pre-previous one. :)
1646 dig[2] = dig[3]; // The true pre-previous digit is now pre-pre-previous one. :)
1647 this->value += dig[0]; // Add combined value.
1648 }
1649 else {
1650 // New digit is too big than the previous one. E.g. VX (V < X => invalid)
1651 break;
1652 }
1653 }
1654 if (this->value) {
1655 this->interval.start = start;
1656 return true;
1657 }
1658 this->interval.invalidate();
1659 return false;
1660 }
1661
1662 protected:
1663 std::shared_ptr<basic_parser<T>>
1664 m_digit_1,
1665 m_digit_5,
1666 m_digit_10,
1667 m_digit_50,
1668 m_digit_100,
1669 m_digit_500,
1670 m_digit_1000,
1671 m_digit_5000,
1672 m_digit_10000;
1673 };
1674
1677#ifdef _UNICODE
1679#else
1681#endif
1683
1687 template <class T>
1689 {
1690 public:
1692 _In_ const std::shared_ptr<basic_parser<T>>& _numerator,
1693 _In_ const std::shared_ptr<basic_parser<T>>& _fraction_line,
1694 _In_ const std::shared_ptr<basic_parser<T>>& _denominator,
1695 _In_ const std::locale& locale = std::locale()) :
1696 basic_parser<T>(locale),
1697 numerator(_numerator),
1698 fraction_line(_fraction_line),
1699 denominator(_denominator)
1700 {}
1701
1702 virtual bool match(
1703 _In_reads_or_z_(end) const T* text,
1704 _In_ size_t start = 0,
1705 _In_ size_t end = SIZE_MAX,
1706 _In_ int flags = match_default)
1707 {
1708 _Assume_(text || start >= end);
1709 if (numerator->match(text, start, end, flags) &&
1710 fraction_line->match(text, numerator->interval.end, end, flags) &&
1711 denominator->match(text, fraction_line->interval.end, end, flags))
1712 {
1713 this->interval.start = start;
1714 this->interval.end = denominator->interval.end;
1715 return true;
1716 }
1717 numerator->invalidate();
1718 fraction_line->invalidate();
1719 denominator->invalidate();
1720 this->interval.invalidate();
1721 return false;
1722 }
1723
1724 virtual void invalidate()
1725 {
1726 numerator->invalidate();
1727 fraction_line->invalidate();
1728 denominator->invalidate();
1730 }
1731
1732 public:
1733 std::shared_ptr<basic_parser<T>> numerator;
1734 std::shared_ptr<basic_parser<T>> fraction_line;
1735 std::shared_ptr<basic_parser<T>> denominator;
1736 };
1737
1740#ifdef _UNICODE
1741 using tfraction = wfraction;
1742#else
1743 using tfraction = fraction;
1744#endif
1746
1750 template <class T>
1751 class basic_score : public basic_parser<T>
1752 {
1753 public:
1755 _In_ const std::shared_ptr<basic_parser<T>>& _home,
1756 _In_ const std::shared_ptr<basic_parser<T>>& _separator,
1757 _In_ const std::shared_ptr<basic_parser<T>>& _guest,
1758 _In_ const std::shared_ptr<basic_parser<T>>& space,
1759 _In_ const std::locale& locale = std::locale()) :
1760 basic_parser<T>(locale),
1761 home(_home),
1762 separator(_separator),
1763 guest(_guest),
1764 m_space(space)
1765 {}
1766
1767 virtual bool match(
1768 _In_reads_or_z_(end) const T* text,
1769 _In_ size_t start = 0,
1770 _In_ size_t end = SIZE_MAX,
1771 _In_ int flags = match_default)
1772 {
1773 _Assume_(text || start >= end);
1774 this->interval.end = start;
1775
1776 const int space_match_flags = flags & ~match_multiline; // Spaces in score must never be broken in new line.
1777
1778 if (home->match(text, this->interval.end, end, flags))
1779 this->interval.end = home->interval.end;
1780 else
1781 goto end;
1782
1783 for (; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
1784
1785 if (separator->match(text, this->interval.end, end, flags))
1786 this->interval.end = separator->interval.end;
1787 else
1788 goto end;
1789
1790 for (; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
1791
1792 if (guest->match(text, this->interval.end, end, flags))
1793 this->interval.end = guest->interval.end;
1794 else
1795 goto end;
1796
1797 this->interval.start = start;
1798 return true;
1799
1800 end:
1801 home->invalidate();
1802 separator->invalidate();
1803 guest->invalidate();
1804 this->interval.invalidate();
1805 return false;
1806 }
1807
1808 virtual void invalidate()
1809 {
1810 home->invalidate();
1811 separator->invalidate();
1812 guest->invalidate();
1814 }
1815
1816 public:
1817 std::shared_ptr<basic_parser<T>> home;
1818 std::shared_ptr<basic_parser<T>> separator;
1819 std::shared_ptr<basic_parser<T>> guest;
1820
1821 protected:
1822 std::shared_ptr<basic_parser<T>> m_space;
1823 };
1824
1825 using score = basic_score<char>;
1827#ifdef _UNICODE
1828 using tscore = wscore;
1829#else
1830 using tscore = score;
1831#endif
1833
1837 template <class T>
1839 {
1840 public:
1842 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
1843 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
1844 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
1845 _In_ const std::shared_ptr<basic_parser<T>>& _number,
1846 _In_ const std::locale& locale = std::locale()) :
1847 basic_parser<T>(locale),
1852 {}
1853
1854 virtual bool match(
1855 _In_reads_or_z_(end) const T* text,
1856 _In_ size_t start = 0,
1857 _In_ size_t end = SIZE_MAX,
1858 _In_ int flags = match_default)
1859 {
1860 _Assume_(text || start >= end);
1861 this->interval.end = start;
1862 if (positive_sign && positive_sign->match(text, this->interval.end, end, flags)) {
1863 this->interval.end = positive_sign->interval.end;
1864 if (negative_sign) negative_sign->invalidate();
1865 if (special_sign) special_sign->invalidate();
1866 }
1867 else if (negative_sign && negative_sign->match(text, this->interval.end, end, flags)) {
1868 this->interval.end = negative_sign->interval.end;
1869 if (positive_sign) positive_sign->invalidate();
1870 if (special_sign) special_sign->invalidate();
1871 }
1872 else if (special_sign && special_sign->match(text, this->interval.end, end, flags)) {
1873 this->interval.end = special_sign->interval.end;
1874 if (positive_sign) positive_sign->invalidate();
1875 if (negative_sign) negative_sign->invalidate();
1876 }
1877 else {
1878 if (positive_sign) positive_sign->invalidate();
1879 if (negative_sign) negative_sign->invalidate();
1880 if (special_sign) special_sign->invalidate();
1881 }
1882 if (number->match(text, this->interval.end, end, flags)) {
1883 this->interval.start = start;
1884 this->interval.end = number->interval.end;
1885 return true;
1886 }
1887 if (positive_sign) positive_sign->invalidate();
1888 if (negative_sign) negative_sign->invalidate();
1889 if (special_sign) special_sign->invalidate();
1890 number->invalidate();
1891 this->interval.invalidate();
1892 return false;
1893 }
1894
1895 virtual void invalidate()
1896 {
1897 if (positive_sign) positive_sign->invalidate();
1898 if (negative_sign) negative_sign->invalidate();
1899 if (special_sign) special_sign->invalidate();
1900 number->invalidate();
1902 }
1903
1904 public:
1905 std::shared_ptr<basic_parser<T>> positive_sign;
1906 std::shared_ptr<basic_parser<T>> negative_sign;
1907 std::shared_ptr<basic_parser<T>> special_sign;
1908 std::shared_ptr<basic_parser<T>> number;
1909 };
1910
1913#ifdef _UNICODE
1915#else
1917#endif
1919
1923 template <class T>
1925 {
1926 public:
1928 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
1929 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
1930 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
1931 _In_ const std::shared_ptr<basic_parser<T>>& _integer,
1932 _In_ const std::shared_ptr<basic_parser<T>>& space,
1933 _In_ const std::shared_ptr<basic_parser<T>>& _fraction,
1934 _In_ const std::locale& locale = std::locale()) :
1935 basic_parser<T>(locale),
1941 m_space(space)
1942 {}
1943
1944 virtual bool match(
1945 _In_reads_or_z_(end) const T* text,
1946 _In_ size_t start = 0,
1947 _In_ size_t end = SIZE_MAX,
1948 _In_ int flags = match_default)
1949 {
1950 _Assume_(text || start >= end);
1951 this->interval.end = start;
1952
1953 if (positive_sign && positive_sign->match(text, this->interval.end, end, flags)) {
1954 this->interval.end = positive_sign->interval.end;
1955 if (negative_sign) negative_sign->invalidate();
1956 if (special_sign) special_sign->invalidate();
1957 }
1958 else if (negative_sign && negative_sign->match(text, this->interval.end, end, flags)) {
1959 this->interval.end = negative_sign->interval.end;
1960 if (positive_sign) positive_sign->invalidate();
1961 if (special_sign) special_sign->invalidate();
1962 }
1963 else if (special_sign && special_sign->match(text, this->interval.end, end, flags)) {
1964 this->interval.end = special_sign->interval.end;
1965 if (positive_sign) positive_sign->invalidate();
1966 if (negative_sign) negative_sign->invalidate();
1967 }
1968 else {
1969 if (positive_sign) positive_sign->invalidate();
1970 if (negative_sign) negative_sign->invalidate();
1971 if (special_sign) special_sign->invalidate();
1972 }
1973
1974 // Check for <integer> <fraction>
1975 const int space_match_flags = flags & ~match_multiline; // Spaces in fractions must never be broken in new line.
1976 if (integer->match(text, this->interval.end, end, flags) &&
1977 m_space->match(text, integer->interval.end, end, space_match_flags))
1978 {
1979 for (this->interval.end = m_space->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
1980 if (fraction->match(text, this->interval.end, end, flags)) {
1981 this->interval.start = start;
1982 this->interval.end = fraction->interval.end;
1983 return true;
1984 }
1985 fraction->invalidate();
1986 this->interval.start = start;
1987 this->interval.end = integer->interval.end;
1988 return true;
1989 }
1990
1991 // Check for <fraction>
1992 if (fraction->match(text, this->interval.end, end, flags)) {
1993 integer->invalidate();
1994 this->interval.start = start;
1995 this->interval.end = fraction->interval.end;
1996 return true;
1997 }
1998
1999 // Check for <integer>
2000 if (integer->match(text, this->interval.end, end, flags)) {
2001 fraction->invalidate();
2002 this->interval.start = start;
2003 this->interval.end = integer->interval.end;
2004 return true;
2005 }
2006
2007 if (positive_sign) positive_sign->invalidate();
2008 if (negative_sign) negative_sign->invalidate();
2009 if (special_sign) special_sign->invalidate();
2010 integer->invalidate();
2011 fraction->invalidate();
2012 this->interval.invalidate();
2013 return false;
2014 }
2015
2016 virtual void invalidate()
2017 {
2018 if (positive_sign) positive_sign->invalidate();
2019 if (negative_sign) negative_sign->invalidate();
2020 if (special_sign) special_sign->invalidate();
2021 integer->invalidate();
2022 fraction->invalidate();
2024 }
2025
2026 public:
2027 std::shared_ptr<basic_parser<T>> positive_sign;
2028 std::shared_ptr<basic_parser<T>> negative_sign;
2029 std::shared_ptr<basic_parser<T>> special_sign;
2030 std::shared_ptr<basic_parser<T>> integer;
2031 std::shared_ptr<basic_parser<T>> fraction;
2032
2033 protected:
2034 std::shared_ptr<basic_parser<T>> m_space;
2035 };
2036
2039#ifdef _UNICODE
2041#else
2043#endif
2045
2049 template <class T>
2051 {
2052 public:
2054 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
2055 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
2056 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
2057 _In_ const std::shared_ptr<basic_integer<T>>& _integer,
2058 _In_ const std::shared_ptr<basic_parser<T>>& _decimal_separator,
2059 _In_ const std::shared_ptr<basic_integer<T>>& _decimal,
2060 _In_ const std::shared_ptr<basic_parser<T>>& _exponent_symbol,
2061 _In_ const std::shared_ptr<basic_parser<T>>& _positive_exp_sign,
2062 _In_ const std::shared_ptr<basic_parser<T>>& _negative_exp_sign,
2063 _In_ const std::shared_ptr<basic_integer<T>>& _exponent,
2064 _In_ const std::locale& locale = std::locale()) :
2065 basic_parser<T>(locale),
2076 value(std::numeric_limits<double>::quiet_NaN())
2077 {}
2078
2079 virtual bool match(
2080 _In_reads_or_z_(end) const T* text,
2081 _In_ size_t start = 0,
2082 _In_ size_t end = SIZE_MAX,
2083 _In_ int flags = match_default)
2084 {
2085 _Assume_(text || start >= end);
2086 this->interval.end = start;
2087
2088 if (positive_sign && positive_sign->match(text, this->interval.end, end, flags)) {
2089 this->interval.end = positive_sign->interval.end;
2090 if (negative_sign) negative_sign->invalidate();
2091 if (special_sign) special_sign->invalidate();
2092 }
2093 else if (negative_sign && negative_sign->match(text, this->interval.end, end, flags)) {
2094 this->interval.end = negative_sign->interval.end;
2095 if (positive_sign) positive_sign->invalidate();
2096 if (special_sign) special_sign->invalidate();
2097 }
2098 else if (special_sign && special_sign->match(text, this->interval.end, end, flags)) {
2099 this->interval.end = special_sign->interval.end;
2100 if (positive_sign) positive_sign->invalidate();
2101 if (negative_sign) negative_sign->invalidate();
2102 }
2103 else {
2104 if (positive_sign) positive_sign->invalidate();
2105 if (negative_sign) negative_sign->invalidate();
2106 if (special_sign) special_sign->invalidate();
2107 }
2108
2109 if (integer->match(text, this->interval.end, end, flags))
2110 this->interval.end = integer->interval.end;
2111
2112 if (decimal_separator->match(text, this->interval.end, end, flags) &&
2113 decimal->match(text, decimal_separator->interval.end, end, flags))
2114 this->interval.end = decimal->interval.end;
2115 else {
2116 decimal_separator->invalidate();
2117 decimal->invalidate();
2118 }
2119
2120 if (integer->interval.empty() &&
2121 decimal->interval.empty())
2122 {
2123 // No integer part, no decimal part.
2124 if (positive_sign) positive_sign->invalidate();
2125 if (negative_sign) negative_sign->invalidate();
2126 if (special_sign) special_sign->invalidate();
2127 integer->invalidate();
2128 decimal_separator->invalidate();
2129 decimal->invalidate();
2130 if (exponent_symbol) exponent_symbol->invalidate();
2131 if (positive_exp_sign) positive_exp_sign->invalidate();
2132 if (negative_exp_sign) negative_exp_sign->invalidate();
2133 if (exponent) exponent->invalidate();
2134 this->interval.invalidate();
2135 return false;
2136 }
2137
2138 if (exponent_symbol && exponent_symbol->match(text, this->interval.end, end, flags) &&
2139 ((positive_exp_sign && positive_exp_sign->match(text, exponent_symbol->interval.end, end, flags) &&
2140 exponent && exponent->match(text, positive_exp_sign->interval.end, end, flags)) ||
2141 (exponent && exponent->match(text, exponent_symbol->interval.end, end, flags))))
2142 {
2143 this->interval.end = exponent->interval.end;
2144 if (negative_exp_sign) negative_exp_sign->invalidate();
2145 }
2146 else if (exponent_symbol && exponent_symbol->match(text, this->interval.end, end, flags) &&
2147 negative_exp_sign && negative_exp_sign->match(text, exponent_symbol->interval.end, end, flags) &&
2148 exponent && exponent->match(text, negative_exp_sign->interval.end, end, flags))
2149 {
2150 this->interval.end = exponent->interval.end;
2151 if (positive_exp_sign) positive_exp_sign->invalidate();
2152 }
2153 else {
2154 if (exponent_symbol) exponent_symbol->invalidate();
2155 if (positive_exp_sign) positive_exp_sign->invalidate();
2156 if (negative_exp_sign) negative_exp_sign->invalidate();
2157 if (exponent) exponent->invalidate();
2158 }
2159
2160 value = (double)integer->value;
2161 if (decimal->interval)
2162 value += (double)decimal->value * pow(10.0, -(double)decimal->interval.size());
2163 if (negative_sign && negative_sign->interval)
2164 value = -value;
2165 if (exponent && exponent->interval) {
2166 double e = (double)exponent->value;
2167 if (negative_exp_sign && negative_exp_sign->interval)
2168 e = -e;
2169 value *= pow(10.0, e);
2170 }
2171
2172 this->interval.start = start;
2173 return true;
2174 }
2175
2176 virtual void invalidate()
2177 {
2178 if (positive_sign) positive_sign->invalidate();
2179 if (negative_sign) negative_sign->invalidate();
2180 if (special_sign) special_sign->invalidate();
2181 integer->invalidate();
2182 decimal_separator->invalidate();
2183 decimal->invalidate();
2184 if (exponent_symbol) exponent_symbol->invalidate();
2185 if (positive_exp_sign) positive_exp_sign->invalidate();
2186 if (negative_exp_sign) negative_exp_sign->invalidate();
2187 if (exponent) exponent->invalidate();
2188 value = std::numeric_limits<double>::quiet_NaN();
2190 }
2191
2192 public:
2193 std::shared_ptr<basic_parser<T>> positive_sign;
2194 std::shared_ptr<basic_parser<T>> negative_sign;
2195 std::shared_ptr<basic_parser<T>> special_sign;
2196 std::shared_ptr<basic_integer<T>> integer;
2197 std::shared_ptr<basic_parser<T>> decimal_separator;
2198 std::shared_ptr<basic_integer<T>> decimal;
2199 std::shared_ptr<basic_parser<T>> exponent_symbol;
2200 std::shared_ptr<basic_parser<T>> positive_exp_sign;
2201 std::shared_ptr<basic_parser<T>> negative_exp_sign;
2202 std::shared_ptr<basic_integer<T>> exponent;
2203 double value;
2204 };
2205
2208#ifdef _UNICODE
2210#else
2212#endif
2214
2218 template <class T>
2220 {
2221 public:
2223 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
2224 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
2225 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
2226 _In_ const std::shared_ptr<basic_parser<T>>& _currency,
2227 _In_ const std::shared_ptr<basic_parser<T>>& _integer,
2228 _In_ const std::shared_ptr<basic_parser<T>>& _decimal_separator,
2229 _In_ const std::shared_ptr<basic_parser<T>>& _decimal,
2230 _In_ const std::locale& locale = std::locale()) :
2231 basic_parser<T>(locale),
2239 {}
2240
2241 virtual bool match(
2242 _In_reads_or_z_(end) const T* text,
2243 _In_ size_t start = 0,
2244 _In_ size_t end = SIZE_MAX,
2245 _In_ int flags = match_default)
2246 {
2247 _Assume_(text || start >= end);
2248 this->interval.end = start;
2249
2250 if (positive_sign->match(text, this->interval.end, end, flags)) {
2251 this->interval.end = positive_sign->interval.end;
2252 if (negative_sign) negative_sign->invalidate();
2253 if (special_sign) special_sign->invalidate();
2254 }
2255 else if (negative_sign->match(text, this->interval.end, end, flags)) {
2256 this->interval.end = negative_sign->interval.end;
2257 if (positive_sign) positive_sign->invalidate();
2258 if (special_sign) special_sign->invalidate();
2259 }
2260 else if (special_sign->match(text, this->interval.end, end, flags)) {
2261 this->interval.end = special_sign->interval.end;
2262 if (positive_sign) positive_sign->invalidate();
2263 if (negative_sign) negative_sign->invalidate();
2264 }
2265 else {
2266 if (positive_sign) positive_sign->invalidate();
2267 if (negative_sign) negative_sign->invalidate();
2268 if (special_sign) special_sign->invalidate();
2269 }
2270
2271 if (currency->match(text, this->interval.end, end, flags))
2272 this->interval.end = currency->interval.end;
2273 else {
2274 if (positive_sign) positive_sign->invalidate();
2275 if (negative_sign) negative_sign->invalidate();
2276 if (special_sign) special_sign->invalidate();
2277 integer->invalidate();
2278 decimal_separator->invalidate();
2279 decimal->invalidate();
2280 this->interval.invalidate();
2281 return false;
2282 }
2283
2284 if (integer->match(text, this->interval.end, end, flags))
2285 this->interval.end = integer->interval.end;
2286 if (decimal_separator->match(text, this->interval.end, end, flags) &&
2287 decimal->match(text, decimal_separator->interval.end, end, flags))
2288 this->interval.end = decimal->interval.end;
2289 else {
2290 decimal_separator->invalidate();
2291 decimal->invalidate();
2292 }
2293
2294 if (integer->interval.empty() &&
2295 decimal->interval.empty())
2296 {
2297 // No integer part, no decimal part.
2298 if (positive_sign) positive_sign->invalidate();
2299 if (negative_sign) negative_sign->invalidate();
2300 if (special_sign) special_sign->invalidate();
2301 currency->invalidate();
2302 integer->invalidate();
2303 decimal_separator->invalidate();
2304 decimal->invalidate();
2305 this->interval.invalidate();
2306 return false;
2307 }
2308
2309 this->interval.start = start;
2310 return true;
2311 }
2312
2313 virtual void invalidate()
2314 {
2315 if (positive_sign) positive_sign->invalidate();
2316 if (negative_sign) negative_sign->invalidate();
2317 if (special_sign) special_sign->invalidate();
2318 currency->invalidate();
2319 integer->invalidate();
2320 decimal_separator->invalidate();
2321 decimal->invalidate();
2323 }
2324
2325 public:
2326 std::shared_ptr<basic_parser<T>> positive_sign;
2327 std::shared_ptr<basic_parser<T>> negative_sign;
2328 std::shared_ptr<basic_parser<T>> special_sign;
2329 std::shared_ptr<basic_parser<T>> currency;
2330 std::shared_ptr<basic_parser<T>> integer;
2331 std::shared_ptr<basic_parser<T>> decimal_separator;
2332 std::shared_ptr<basic_parser<T>> decimal;
2333 };
2334
2337#ifdef _UNICODE
2339#else
2341#endif
2343
2347 template <class T>
2349 {
2350 public:
2352 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
2353 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
2354 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
2355 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
2356 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
2357 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
2358 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
2359 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
2360 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
2361 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
2362 _In_ const std::shared_ptr<basic_parser<T>>& separator,
2363 _In_ const std::locale& locale = std::locale()) :
2364 basic_parser<T>(locale),
2365 m_digit_0(digit_0),
2366 m_digit_1(digit_1),
2367 m_digit_2(digit_2),
2368 m_digit_3(digit_3),
2369 m_digit_4(digit_4),
2370 m_digit_5(digit_5),
2371 m_digit_6(digit_6),
2372 m_digit_7(digit_7),
2373 m_digit_8(digit_8),
2374 m_digit_9(digit_9),
2375 m_separator(separator)
2376 {
2377 value.s_addr = 0;
2378 }
2379
2380 virtual bool match(
2381 _In_reads_or_z_(end) const T* text,
2382 _In_ size_t start = 0,
2383 _In_ size_t end = SIZE_MAX,
2384 _In_ int flags = match_default)
2385 {
2386 _Assume_(text || start >= end);
2387 this->interval.end = start;
2388 value.s_addr = 0;
2389
2390 size_t i;
2391 for (i = 0; i < 4; i++) {
2392 if (i) {
2393 if (m_separator->match(text, this->interval.end, end, flags))
2394 this->interval.end = m_separator->interval.end;
2395 else
2396 goto error;
2397 }
2398
2399 components[i].start = this->interval.end;
2400 bool is_empty = true;
2401 size_t x;
2402 for (x = 0; this->interval.end < end && text[this->interval.end];) {
2403 size_t dig, digit_end;
2404 if (m_digit_0->match(text, this->interval.end, end, flags)) { dig = 0; digit_end = m_digit_0->interval.end; }
2405 else if (m_digit_1->match(text, this->interval.end, end, flags)) { dig = 1; digit_end = m_digit_1->interval.end; }
2406 else if (m_digit_2->match(text, this->interval.end, end, flags)) { dig = 2; digit_end = m_digit_2->interval.end; }
2407 else if (m_digit_3->match(text, this->interval.end, end, flags)) { dig = 3; digit_end = m_digit_3->interval.end; }
2408 else if (m_digit_4->match(text, this->interval.end, end, flags)) { dig = 4; digit_end = m_digit_4->interval.end; }
2409 else if (m_digit_5->match(text, this->interval.end, end, flags)) { dig = 5; digit_end = m_digit_5->interval.end; }
2410 else if (m_digit_6->match(text, this->interval.end, end, flags)) { dig = 6; digit_end = m_digit_6->interval.end; }
2411 else if (m_digit_7->match(text, this->interval.end, end, flags)) { dig = 7; digit_end = m_digit_7->interval.end; }
2412 else if (m_digit_8->match(text, this->interval.end, end, flags)) { dig = 8; digit_end = m_digit_8->interval.end; }
2413 else if (m_digit_9->match(text, this->interval.end, end, flags)) { dig = 9; digit_end = m_digit_9->interval.end; }
2414 else break;
2415 size_t x_n = x * 10 + dig;
2416 if (x_n <= 255) {
2417 x = x_n;
2418 this->interval.end = digit_end;
2419 is_empty = false;
2420 }
2421 else
2422 break;
2423 }
2424 if (is_empty)
2425 goto error;
2426 components[i].end = this->interval.end;
2427 value.s_addr = (value.s_addr << 8) | (uint8_t)x;
2428 }
2429 if (i < 4)
2430 goto error;
2431
2432 this->interval.start = start;
2433 return true;
2434
2435 error:
2436 components[0].start = 1;
2437 components[0].end = 0;
2438 components[1].start = 1;
2439 components[1].end = 0;
2440 components[2].start = 1;
2441 components[2].end = 0;
2442 components[3].start = 1;
2443 components[3].end = 0;
2444 value.s_addr = 0;
2445 this->interval.invalidate();
2446 return false;
2447 }
2448
2449 virtual void invalidate()
2450 {
2451 components[0].start = 1;
2452 components[0].end = 0;
2453 components[1].start = 1;
2454 components[1].end = 0;
2455 components[2].start = 1;
2456 components[2].end = 0;
2457 components[3].start = 1;
2458 components[3].end = 0;
2459 value.s_addr = 0;
2461 }
2462
2463 public:
2466
2467 protected:
2468 std::shared_ptr<basic_parser<T>>
2469 m_digit_0,
2470 m_digit_1,
2471 m_digit_2,
2472 m_digit_3,
2473 m_digit_4,
2474 m_digit_5,
2475 m_digit_6,
2476 m_digit_7,
2477 m_digit_8,
2478 m_digit_9;
2479 std::shared_ptr<basic_parser<T>> m_separator;
2480 };
2481
2484#ifdef _UNICODE
2486#else
2488#endif
2490
2494 template <class T>
2496 {
2497 public:
2498 basic_ipv6_scope_id_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
2499
2500 virtual bool match(
2501 _In_reads_or_z_(end) const T* text,
2502 _In_ size_t start = 0,
2503 _In_ size_t end = SIZE_MAX,
2504 _In_ int flags = match_default)
2505 {
2506 _Assume_(text || start >= end);
2507 if (start < end && text[start]) {
2508 if (text[start] == '-' ||
2509 text[start] == '_' ||
2510 text[start] == ':' ||
2511 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
2512 {
2513 this->interval.end = (this->interval.start = start) + 1;
2514 return true;
2515 }
2516 }
2517 this->interval.invalidate();
2518 return false;
2519 }
2520 };
2521
2524#ifdef _UNICODE
2526#else
2528#endif
2529
2534 {
2535 public:
2536 sgml_ipv6_scope_id_char(_In_ const std::locale& locale = std::locale()) : sgml_parser(locale) {}
2537
2538 virtual bool match(
2539 _In_reads_or_z_(end) const char* text,
2540 _In_ size_t start = 0,
2541 _In_ size_t end = SIZE_MAX,
2542 _In_ int flags = match_default)
2543 {
2544 _Assume_(text || start >= end);
2545 if (start < end && text[start]) {
2546 wchar_t buf[3];
2547 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
2548 const wchar_t* chr_end = chr + stdex::strlen(chr);
2549 if (((chr[0] == L'-' ||
2550 chr[0] == L'_' ||
2551 chr[0] == L':') && chr[1] == 0) ||
2552 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
2553 {
2554 this->interval.start = start;
2555 return true;
2556 }
2557 }
2558 this->interval.invalidate();
2559 return false;
2560 }
2561 };
2562
2566 template <class T>
2568 {
2569 public:
2571 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
2572 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
2573 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
2574 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
2575 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
2576 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
2577 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
2578 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
2579 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
2580 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
2581 _In_ const std::shared_ptr<basic_parser<T>>& digit_10,
2582 _In_ const std::shared_ptr<basic_parser<T>>& digit_11,
2583 _In_ const std::shared_ptr<basic_parser<T>>& digit_12,
2584 _In_ const std::shared_ptr<basic_parser<T>>& digit_13,
2585 _In_ const std::shared_ptr<basic_parser<T>>& digit_14,
2586 _In_ const std::shared_ptr<basic_parser<T>>& digit_15,
2587 _In_ const std::shared_ptr<basic_parser<T>>& separator,
2588 _In_ const std::shared_ptr<basic_parser<T>>& scope_id_separator = nullptr,
2589 _In_ const std::shared_ptr<basic_parser<T>>& _scope_id = nullptr,
2590 _In_ const std::locale& locale = std::locale()) :
2591 basic_parser<T>(locale),
2592 m_digit_0(digit_0),
2593 m_digit_1(digit_1),
2594 m_digit_2(digit_2),
2595 m_digit_3(digit_3),
2596 m_digit_4(digit_4),
2597 m_digit_5(digit_5),
2598 m_digit_6(digit_6),
2599 m_digit_7(digit_7),
2600 m_digit_8(digit_8),
2601 m_digit_9(digit_9),
2602 m_digit_10(digit_10),
2603 m_digit_11(digit_11),
2604 m_digit_12(digit_12),
2605 m_digit_13(digit_13),
2606 m_digit_14(digit_14),
2607 m_digit_15(digit_15),
2608 m_separator(separator),
2609 m_scope_id_separator(scope_id_separator),
2611 {
2612 memset(&value, 0, sizeof(value));
2613 }
2614
2615 virtual bool match(
2616 _In_reads_or_z_(end) const T* text,
2617 _In_ size_t start = 0,
2618 _In_ size_t end = SIZE_MAX,
2619 _In_ int flags = match_default)
2620 {
2621 _Assume_(text || start >= end);
2622 this->interval.end = start;
2623 memset(&value, 0, sizeof(value));
2624
2625 size_t i, compaction_i = SIZE_MAX, compaction_start = start;
2626 for (i = 0; i < 8; i++) {
2627 bool is_empty = true;
2628
2629 if (m_separator->match(text, this->interval.end, end, flags)) {
2630 // : found
2631 this->interval.end = m_separator->interval.end;
2632 if (m_separator->match(text, this->interval.end, end, flags)) {
2633 // :: found
2634 if (compaction_i == SIZE_MAX) {
2635 // Zero compaction start
2636 compaction_i = i;
2637 compaction_start = m_separator->interval.start;
2638 this->interval.end = m_separator->interval.end;
2639 }
2640 else {
2641 // More than one zero compaction
2642 break;
2643 }
2644 }
2645 else if (!i) {
2646 // Leading : found
2647 goto error;
2648 }
2649 }
2650 else if (i) {
2651 // : missing
2652 break;
2653 }
2654
2655 components[i].start = this->interval.end;
2656 size_t x;
2657 for (x = 0; this->interval.end < end && text[this->interval.end];) {
2658 size_t dig, digit_end;
2659 if (m_digit_0->match(text, this->interval.end, end, flags)) { dig = 0; digit_end = m_digit_0->interval.end; }
2660 else if (m_digit_1->match(text, this->interval.end, end, flags)) { dig = 1; digit_end = m_digit_1->interval.end; }
2661 else if (m_digit_2->match(text, this->interval.end, end, flags)) { dig = 2; digit_end = m_digit_2->interval.end; }
2662 else if (m_digit_3->match(text, this->interval.end, end, flags)) { dig = 3; digit_end = m_digit_3->interval.end; }
2663 else if (m_digit_4->match(text, this->interval.end, end, flags)) { dig = 4; digit_end = m_digit_4->interval.end; }
2664 else if (m_digit_5->match(text, this->interval.end, end, flags)) { dig = 5; digit_end = m_digit_5->interval.end; }
2665 else if (m_digit_6->match(text, this->interval.end, end, flags)) { dig = 6; digit_end = m_digit_6->interval.end; }
2666 else if (m_digit_7->match(text, this->interval.end, end, flags)) { dig = 7; digit_end = m_digit_7->interval.end; }
2667 else if (m_digit_8->match(text, this->interval.end, end, flags)) { dig = 8; digit_end = m_digit_8->interval.end; }
2668 else if (m_digit_9->match(text, this->interval.end, end, flags)) { dig = 9; digit_end = m_digit_9->interval.end; }
2669 else if (m_digit_10->match(text, this->interval.end, end, flags)) { dig = 10; digit_end = m_digit_10->interval.end; }
2670 else if (m_digit_11->match(text, this->interval.end, end, flags)) { dig = 11; digit_end = m_digit_11->interval.end; }
2671 else if (m_digit_12->match(text, this->interval.end, end, flags)) { dig = 12; digit_end = m_digit_12->interval.end; }
2672 else if (m_digit_13->match(text, this->interval.end, end, flags)) { dig = 13; digit_end = m_digit_13->interval.end; }
2673 else if (m_digit_14->match(text, this->interval.end, end, flags)) { dig = 14; digit_end = m_digit_14->interval.end; }
2674 else if (m_digit_15->match(text, this->interval.end, end, flags)) { dig = 15; digit_end = m_digit_15->interval.end; }
2675 else break;
2676 size_t x_n = x * 16 + dig;
2677 if (x_n <= 0xffff) {
2678 x = x_n;
2679 this->interval.end = digit_end;
2680 is_empty = false;
2681 }
2682 else
2683 break;
2684 }
2685 if (is_empty) {
2686 if (compaction_i != SIZE_MAX) {
2687 // Zero compaction active: no sweat.
2688 break;
2689 }
2690 goto error;
2691 }
2692 components[i].end = this->interval.end;
2693 this->value.s6_words[i] = (uint16_t)x;
2694 }
2695
2696 if (compaction_i != SIZE_MAX) {
2697 // Align components right due to zero compaction.
2698 size_t j, k;
2699 for (j = 8, k = i; k > compaction_i;) {
2700 this->value.s6_words[--j] = this->value.s6_words[--k];
2702 }
2703 for (; j > compaction_i;) {
2704 this->value.s6_words[--j] = 0;
2705 components[j].start =
2707 }
2708 }
2709 else if (i < 8)
2710 goto error;
2711
2712 if (m_scope_id_separator && m_scope_id_separator->match(text, this->interval.end, end, flags) &&
2713 scope_id && scope_id->match(text, m_scope_id_separator->interval.end, end, flags))
2714 this->interval.end = scope_id->interval.end;
2715 else if (scope_id)
2716 scope_id->invalidate();
2717
2718 this->interval.start = start;
2719 return true;
2720
2721 error:
2722 components[0].start = 1;
2723 components[0].end = 0;
2724 components[1].start = 1;
2725 components[1].end = 0;
2726 components[2].start = 1;
2727 components[2].end = 0;
2728 components[3].start = 1;
2729 components[3].end = 0;
2730 components[4].start = 1;
2731 components[4].end = 0;
2732 components[5].start = 1;
2733 components[5].end = 0;
2734 components[6].start = 1;
2735 components[6].end = 0;
2736 components[7].start = 1;
2737 components[7].end = 0;
2738 memset(&value, 0, sizeof(value));
2739 if (scope_id) scope_id->invalidate();
2740 this->interval.invalidate();
2741 return false;
2742 }
2743
2744 virtual void invalidate()
2745 {
2746 components[0].start = 1;
2747 components[0].end = 0;
2748 components[1].start = 1;
2749 components[1].end = 0;
2750 components[2].start = 1;
2751 components[2].end = 0;
2752 components[3].start = 1;
2753 components[3].end = 0;
2754 components[4].start = 1;
2755 components[4].end = 0;
2756 components[5].start = 1;
2757 components[5].end = 0;
2758 components[6].start = 1;
2759 components[6].end = 0;
2760 components[7].start = 1;
2761 components[7].end = 0;
2762 memset(&value, 0, sizeof(value));
2763 if (scope_id) scope_id->invalidate();
2765 }
2766
2767 public:
2770 std::shared_ptr<basic_parser<T>> scope_id;
2771
2772 protected:
2773 std::shared_ptr<basic_parser<T>>
2774 m_digit_0,
2775 m_digit_1,
2776 m_digit_2,
2777 m_digit_3,
2778 m_digit_4,
2779 m_digit_5,
2780 m_digit_6,
2781 m_digit_7,
2782 m_digit_8,
2783 m_digit_9,
2784 m_digit_10,
2785 m_digit_11,
2786 m_digit_12,
2787 m_digit_13,
2788 m_digit_14,
2789 m_digit_15;
2790 std::shared_ptr<basic_parser<T>> m_separator, m_scope_id_separator;
2791 };
2792
2795#ifdef _UNICODE
2797#else
2799#endif
2801
2805 template <class T>
2807 {
2808 public:
2810 _In_ bool allow_idn,
2811 _In_ const std::locale& locale = std::locale()) :
2812 basic_parser<T>(locale),
2813 m_allow_idn(allow_idn),
2814 allow_on_edge(true)
2815 {}
2816
2817 virtual bool match(
2818 _In_reads_or_z_(end) const T* text,
2819 _In_ size_t start = 0,
2820 _In_ size_t end = SIZE_MAX,
2821 _In_ int flags = match_default)
2822 {
2823 _Assume_(text || start >= end);
2824 if (start < end && text[start]) {
2825 if (('A' <= text[start] && text[start] <= 'Z') ||
2826 ('a' <= text[start] && text[start] <= 'z') ||
2827 ('0' <= text[start] && text[start] <= '9'))
2828 allow_on_edge = true;
2829 else if (text[start] == '-')
2830 allow_on_edge = false;
2831 else if (m_allow_idn && std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
2832 allow_on_edge = true;
2833 else {
2834 this->interval.invalidate();
2835 return false;
2836 }
2837 this->interval.end = (this->interval.start = start) + 1;
2838 return true;
2839 }
2840 this->interval.invalidate();
2841 return false;
2842 }
2843
2844 public:
2846
2847 protected:
2848 bool m_allow_idn;
2849 };
2850
2853#ifdef _UNICODE
2855#else
2857#endif
2858
2863 {
2864 public:
2866 _In_ bool allow_idn,
2867 _In_ const std::locale& locale = std::locale()) :
2869 {}
2870
2871 virtual bool match(
2872 _In_reads_or_z_(end) const char* text,
2873 _In_ size_t start = 0,
2874 _In_ size_t end = SIZE_MAX,
2875 _In_ int flags = match_default)
2876 {
2877 _Assume_(text || start >= end);
2878 if (start < end && text[start]) {
2879 wchar_t buf[3];
2880 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
2881 const wchar_t* chr_end = chr + stdex::strlen(chr);
2882 if ((('A' <= chr[0] && chr[0] <= 'Z') ||
2883 ('a' <= chr[0] && chr[0] <= 'z') ||
2884 ('0' <= chr[0] && chr[0] <= '9')) && chr[1] == 0)
2885 allow_on_edge = true;
2886 else if (chr[0] == '-' && chr[1] == 0)
2887 allow_on_edge = false;
2888 else if (m_allow_idn && std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
2889 allow_on_edge = true;
2890 else {
2891 this->interval.invalidate();
2892 return false;
2893 }
2894 this->interval.start = start;
2895 return true;
2896 }
2897 this->interval.invalidate();
2898 return false;
2899 }
2900 };
2901
2905 template <class T>
2907 {
2908 public:
2910 _In_ bool allow_absolute,
2911 _In_ const std::shared_ptr<basic_dns_domain_char<T>>& domain_char,
2912 _In_ const std::shared_ptr<basic_parser<T>>& separator,
2913 _In_ const std::locale& locale = std::locale()) :
2914 basic_parser<T>(locale),
2916 m_domain_char(domain_char),
2917 m_separator(separator)
2918 {}
2919
2920 virtual bool match(
2921 _In_reads_or_z_(end) const T* text,
2922 _In_ size_t start = 0,
2923 _In_ size_t end = SIZE_MAX,
2924 _In_ int flags = match_default)
2925 {
2926 _Assume_(text || start >= end);
2927 size_t i = start, count;
2928 for (count = 0; i < end && text[i] && count < 127; count++) {
2929 if (m_domain_char->match(text, i, end, flags) &&
2930 m_domain_char->allow_on_edge)
2931 {
2932 // Domain start
2933 this->interval.end = i = m_domain_char->interval.end;
2934 while (i < end && text[i]) {
2935 if (m_domain_char->allow_on_edge &&
2936 m_separator->match(text, i, end, flags))
2937 {
2938 // Domain end
2939 if (m_allow_absolute)
2940 this->interval.end = i = m_separator->interval.end;
2941 else {
2942 this->interval.end = i;
2943 i = m_separator->interval.end;
2944 }
2945 break;
2946 }
2947 if (m_domain_char->match(text, i, end, flags)) {
2948 if (m_domain_char->allow_on_edge)
2949 this->interval.end = i = m_domain_char->interval.end;
2950 else
2951 i = m_domain_char->interval.end;
2952 }
2953 else {
2954 this->interval.start = start;
2955 return true;
2956 }
2957 }
2958 }
2959 else
2960 break;
2961 }
2962 if (count) {
2963 this->interval.start = start;
2964 return true;
2965 }
2966 this->interval.invalidate();
2967 return false;
2968 }
2969
2970 protected:
2972 std::shared_ptr<basic_dns_domain_char<T>> m_domain_char;
2973 std::shared_ptr<basic_parser<T>> m_separator;
2974 };
2975
2978#ifdef _UNICODE
2979 using tdns_name = wdns_name;
2980#else
2981 using tdns_name = dns_name;
2982#endif
2984
2988 template <class T>
2990 {
2991 public:
2992 basic_url_username_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
2993
2994 virtual bool match(
2995 _In_reads_or_z_(end) const T* text,
2996 _In_ size_t start = 0,
2997 _In_ size_t end = SIZE_MAX,
2998 _In_ int flags = match_default)
2999 {
3000 _Assume_(text || start >= end);
3001 if (start < end && text[start]) {
3002 if (text[start] == '-' ||
3003 text[start] == '.' ||
3004 text[start] == '_' ||
3005 text[start] == '~' ||
3006 text[start] == '%' ||
3007 text[start] == '!' ||
3008 text[start] == '$' ||
3009 text[start] == '&' ||
3010 text[start] == '\'' ||
3011 //text[start] == '(' ||
3012 //text[start] == ')' ||
3013 text[start] == '*' ||
3014 text[start] == '+' ||
3015 text[start] == ',' ||
3016 text[start] == ';' ||
3017 text[start] == '=' ||
3018 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
3019 {
3020 this->interval.end = (this->interval.start = start) + 1;
3021 return true;
3022 }
3023 }
3024 this->interval.invalidate();
3025 return false;
3026 }
3027 };
3028
3031#ifdef _UNICODE
3033#else
3035#endif
3036
3041 {
3042 public:
3043 sgml_url_username_char(_In_ const std::locale& locale = std::locale()) : basic_url_username_char<char>(locale) {}
3044
3045 virtual bool match(
3046 _In_reads_or_z_(end) const char* text,
3047 _In_ size_t start = 0,
3048 _In_ size_t end = SIZE_MAX,
3049 _In_ int flags = match_default)
3050 {
3051 _Assume_(text || start >= end);
3052 if (start < end && text[start]) {
3053 wchar_t buf[3];
3054 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
3055 const wchar_t* chr_end = chr + stdex::strlen(chr);
3056 if (((chr[0] == L'-' ||
3057 chr[0] == L'.' ||
3058 chr[0] == L'_' ||
3059 chr[0] == L'~' ||
3060 chr[0] == L'%' ||
3061 chr[0] == L'!' ||
3062 chr[0] == L'$' ||
3063 chr[0] == L'&' ||
3064 chr[0] == L'\'' ||
3065 //chr[0] == L'(' ||
3066 //chr[0] == L')' ||
3067 chr[0] == L'*' ||
3068 chr[0] == L'+' ||
3069 chr[0] == L',' ||
3070 chr[0] == L';' ||
3071 chr[0] == L'=') && chr[1] == 0) ||
3072 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
3073 {
3074 this->interval.start = start;
3075 return true;
3076 }
3077 }
3078
3079 this->interval.invalidate();
3080 return false;
3081 }
3082 };
3083
3087 template <class T>
3089 {
3090 public:
3091 basic_url_password_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
3092
3093 virtual bool match(
3094 _In_reads_or_z_(end) const T* text,
3095 _In_ size_t start = 0,
3096 _In_ size_t end = SIZE_MAX,
3097 _In_ int flags = match_default)
3098 {
3099 _Assume_(text || start >= end);
3100 if (start < end && text[start]) {
3101 if (text[start] == '-' ||
3102 text[start] == '.' ||
3103 text[start] == '_' ||
3104 text[start] == '~' ||
3105 text[start] == '%' ||
3106 text[start] == '!' ||
3107 text[start] == '$' ||
3108 text[start] == '&' ||
3109 text[start] == '\'' ||
3110 text[start] == '(' ||
3111 text[start] == ')' ||
3112 text[start] == '*' ||
3113 text[start] == '+' ||
3114 text[start] == ',' ||
3115 text[start] == ';' ||
3116 text[start] == '=' ||
3117 text[start] == ':' ||
3118 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
3119 {
3120 this->interval.end = (this->interval.start = start) + 1;
3121 return true;
3122 }
3123 }
3124 this->interval.invalidate();
3125 return false;
3126 }
3127 };
3128
3131#ifdef _UNICODE
3133#else
3135#endif
3136
3141 {
3142 public:
3143 sgml_url_password_char(_In_ const std::locale& locale = std::locale()) : basic_url_password_char<char>(locale) {}
3144
3145 virtual bool match(
3146 _In_reads_or_z_(end) const char* text,
3147 _In_ size_t start = 0,
3148 _In_ size_t end = SIZE_MAX,
3149 _In_ int flags = match_default)
3150 {
3151 _Assume_(text || start >= end);
3152 if (start < end && text[start]) {
3153 wchar_t buf[3];
3154 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
3155 const wchar_t* chr_end = chr + stdex::strlen(chr);
3156 if (((chr[0] == L'-' ||
3157 chr[0] == L'.' ||
3158 chr[0] == L'_' ||
3159 chr[0] == L'~' ||
3160 chr[0] == L'%' ||
3161 chr[0] == L'!' ||
3162 chr[0] == L'$' ||
3163 chr[0] == L'&' ||
3164 chr[0] == L'\'' ||
3165 chr[0] == L'(' ||
3166 chr[0] == L')' ||
3167 chr[0] == L'*' ||
3168 chr[0] == L'+' ||
3169 chr[0] == L',' ||
3170 chr[0] == L';' ||
3171 chr[0] == L'=' ||
3172 chr[0] == L':') && chr[1] == 0) ||
3173 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
3174 {
3175 this->interval.start = start;
3176 return true;
3177 }
3178 }
3179 this->interval.invalidate();
3180 return false;
3181 }
3182 };
3183
3187 template <class T>
3189 {
3190 public:
3191 basic_url_path_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
3192
3193 virtual bool match(
3194 _In_reads_or_z_(end) const T* text,
3195 _In_ size_t start = 0,
3196 _In_ size_t end = SIZE_MAX,
3197 _In_ int flags = match_default)
3198 {
3199 _Assume_(text || start >= end);
3200 if (start < end && text[start]) {
3201 if (text[start] == '/' ||
3202 text[start] == '-' ||
3203 text[start] == '.' ||
3204 text[start] == '_' ||
3205 text[start] == '~' ||
3206 text[start] == '%' ||
3207 text[start] == '!' ||
3208 text[start] == '$' ||
3209 text[start] == '&' ||
3210 text[start] == '\'' ||
3211 text[start] == '(' ||
3212 text[start] == ')' ||
3213 text[start] == '*' ||
3214 text[start] == '+' ||
3215 text[start] == ',' ||
3216 text[start] == ';' ||
3217 text[start] == '=' ||
3218 text[start] == ':' ||
3219 text[start] == '@' ||
3220 text[start] == '?' ||
3221 text[start] == '#' ||
3222 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
3223 {
3224 this->interval.end = (this->interval.start = start) + 1;
3225 return true;
3226 }
3227 }
3228 this->interval.invalidate();
3229 return false;
3230 }
3231 };
3232
3235#ifdef _UNICODE
3237#else
3239#endif
3240
3245 {
3246 public:
3247 sgml_url_path_char(_In_ const std::locale& locale = std::locale()) : basic_url_path_char<char>(locale) {}
3248
3249 virtual bool match(
3250 _In_reads_or_z_(end) const char* text,
3251 _In_ size_t start = 0,
3252 _In_ size_t end = SIZE_MAX,
3253 _In_ int flags = match_default)
3254 {
3255 _Assume_(text || start >= end);
3256 if (start < end && text[start]) {
3257 wchar_t buf[3];
3258 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
3259 const wchar_t* chr_end = chr + stdex::strlen(chr);
3260 if (((chr[0] == L'/' ||
3261 chr[0] == L'-' ||
3262 chr[0] == L'.' ||
3263 chr[0] == L'_' ||
3264 chr[0] == L'~' ||
3265 chr[0] == L'%' ||
3266 chr[0] == L'!' ||
3267 chr[0] == L'$' ||
3268 chr[0] == L'&' ||
3269 chr[0] == L'\'' ||
3270 chr[0] == L'(' ||
3271 chr[0] == L')' ||
3272 chr[0] == L'*' ||
3273 chr[0] == L'+' ||
3274 chr[0] == L',' ||
3275 chr[0] == L';' ||
3276 chr[0] == L'=' ||
3277 chr[0] == L':' ||
3278 chr[0] == L'@' ||
3279 chr[0] == L'?' ||
3280 chr[0] == L'#') && chr[1] == 0) ||
3281 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
3282 {
3283 this->interval.start = start;
3284 return true;
3285 }
3286 }
3287 this->interval.invalidate();
3288 return false;
3289 }
3290 };
3291
3295 template <class T>
3297 {
3298 public:
3300 _In_ const std::shared_ptr<basic_parser<T>>& path_char,
3301 _In_ const std::shared_ptr<basic_parser<T>>& query_start,
3302 _In_ const std::shared_ptr<basic_parser<T>>& bookmark_start,
3303 _In_ const std::locale& locale = std::locale()) :
3304 basic_parser<T>(locale),
3305 m_path_char(path_char),
3306 m_query_start(query_start),
3307 m_bookmark_start(bookmark_start)
3308 {}
3309
3310 virtual bool match(
3311 _In_reads_or_z_(end) const T* text,
3312 _In_ size_t start = 0,
3313 _In_ size_t end = SIZE_MAX,
3314 _In_ int flags = match_default)
3315 {
3316 _Assume_(text || start >= end);
3317
3318 this->interval.end = start;
3319 path.start = start;
3320 query.start = 1;
3321 query.end = 0;
3322 bookmark.start = 1;
3323 bookmark.end = 0;
3324
3325 for (;;) {
3326 if (this->interval.end >= end || !text[this->interval.end])
3327 break;
3328 if (m_query_start->match(text, this->interval.end, end, flags)) {
3329 path.end = this->interval.end;
3330 query.start = this->interval.end = m_query_start->interval.end;
3331 for (;;) {
3332 if (this->interval.end >= end || !text[this->interval.end]) {
3333 query.end = this->interval.end;
3334 break;
3335 }
3336 if (m_bookmark_start->match(text, this->interval.end, end, flags)) {
3337 query.end = this->interval.end;
3338 bookmark.start = this->interval.end = m_bookmark_start->interval.end;
3339 for (;;) {
3340 if (this->interval.end >= end || !text[this->interval.end]) {
3341 bookmark.end = this->interval.end;
3342 break;
3343 }
3344 if (m_path_char->match(text, this->interval.end, end, flags))
3345 this->interval.end = m_path_char->interval.end;
3346 else {
3347 bookmark.end = this->interval.end;
3348 break;
3349 }
3350 }
3351 this->interval.start = start;
3352 return true;
3353 }
3354 if (m_path_char->match(text, this->interval.end, end, flags))
3355 this->interval.end = m_path_char->interval.end;
3356 else {
3357 query.end = this->interval.end;
3358 break;
3359 }
3360 }
3361 this->interval.start = start;
3362 return true;
3363 }
3364 if (m_bookmark_start->match(text, this->interval.end, end, flags)) {
3365 path.end = this->interval.end;
3366 bookmark.start = this->interval.end = m_bookmark_start->interval.end;
3367 for (;;) {
3368 if (this->interval.end >= end || !text[this->interval.end]) {
3369 bookmark.end = this->interval.end;
3370 break;
3371 }
3372 if (m_path_char->match(text, this->interval.end, end, flags))
3373 this->interval.end = m_path_char->interval.end;
3374 else {
3375 bookmark.end = this->interval.end;
3376 break;
3377 }
3378 }
3379 this->interval.start = start;
3380 return true;
3381 }
3382 if (m_path_char->match(text, this->interval.end, end, flags))
3383 this->interval.end = m_path_char->interval.end;
3384 else
3385 break;
3386 }
3387
3389 path.end = this->interval.end;
3390 this->interval.start = start;
3391 return true;
3392 }
3393
3394 path.start = 1;
3395 path.end = 0;
3396 bookmark.start = 1;
3397 bookmark.end = 0;
3398 this->interval.invalidate();
3399 return false;
3400 }
3401
3402 virtual void invalidate()
3403 {
3404 path.start = 1;
3405 path.end = 0;
3406 query.start = 1;
3407 query.end = 0;
3408 bookmark.start = 1;
3409 bookmark.end = 0;
3411 }
3412
3413 public:
3416 stdex::interval<size_t> bookmark;
3417
3418 protected:
3419 std::shared_ptr<basic_parser<T>> m_path_char;
3420 std::shared_ptr<basic_parser<T>> m_query_start;
3421 std::shared_ptr<basic_parser<T>> m_bookmark_start;
3422 };
3423
3426#ifdef _UNICODE
3427 using turl_path = wurl_path;
3428#else
3429 using turl_path = url_path;
3430#endif
3432
3436 template <class T>
3437 class basic_url : public basic_parser<T>
3438 {
3439 public:
3440 basic_url(
3441 _In_ const std::shared_ptr<basic_parser<T>>& _http_scheme,
3442 _In_ const std::shared_ptr<basic_parser<T>>& _ftp_scheme,
3443 _In_ const std::shared_ptr<basic_parser<T>>& _mailto_scheme,
3444 _In_ const std::shared_ptr<basic_parser<T>>& _file_scheme,
3445 _In_ const std::shared_ptr<basic_parser<T>>& colon,
3446 _In_ const std::shared_ptr<basic_parser<T>>& slash,
3447 _In_ const std::shared_ptr<basic_parser<T>>& _username,
3448 _In_ const std::shared_ptr<basic_parser<T>>& _password,
3449 _In_ const std::shared_ptr<basic_parser<T>>& at,
3450 _In_ const std::shared_ptr<basic_parser<T>>& ip_lbracket,
3451 _In_ const std::shared_ptr<basic_parser<T>>& ip_rbracket,
3452 _In_ const std::shared_ptr<basic_parser<T>>& _ipv4_host,
3453 _In_ const std::shared_ptr<basic_parser<T>>& _ipv6_host,
3454 _In_ const std::shared_ptr<basic_parser<T>>& _dns_host,
3455 _In_ const std::shared_ptr<basic_parser<T>>& _port,
3456 _In_ const std::shared_ptr<basic_parser<T>>& _path,
3457 _In_ const std::locale& locale = std::locale()) :
3458 basic_parser<T>(locale),
3459 http_scheme(_http_scheme),
3460 ftp_scheme(_ftp_scheme),
3461 mailto_scheme(_mailto_scheme),
3462 file_scheme(_file_scheme),
3463 m_colon(colon),
3464 m_slash(slash),
3465 username(_username),
3466 password(_password),
3467 m_at(at),
3468 m_ip_lbracket(ip_lbracket),
3469 m_ip_rbracket(ip_rbracket),
3470 ipv4_host(_ipv4_host),
3471 ipv6_host(_ipv6_host),
3472 dns_host(_dns_host),
3473 port(_port),
3474 path(_path)
3475 {}
3476
3477 virtual bool match(
3478 _In_reads_or_z_(end) const T* text,
3479 _In_ size_t start = 0,
3480 _In_ size_t end = SIZE_MAX,
3481 _In_ int flags = match_default)
3482 {
3483 _Assume_(text || start >= end);
3484
3485 this->interval.end = start;
3486
3487 if (http_scheme->match(text, this->interval.end, end, flags) &&
3488 m_colon->match(text, http_scheme->interval.end, end, flags) &&
3489 m_slash->match(text, m_colon->interval.end, end, flags) &&
3490 m_slash->match(text, m_slash->interval.end, end, flags))
3491 {
3492 // http://
3493 this->interval.end = m_slash->interval.end;
3494 ftp_scheme->invalidate();
3495 mailto_scheme->invalidate();
3496 file_scheme->invalidate();
3497 }
3498 else if (ftp_scheme->match(text, this->interval.end, end, flags) &&
3499 m_colon->match(text, ftp_scheme->interval.end, end, flags) &&
3500 m_slash->match(text, m_colon->interval.end, end, flags) &&
3501 m_slash->match(text, m_slash->interval.end, end, flags))
3502 {
3503 // ftp://
3504 this->interval.end = m_slash->interval.end;
3505 http_scheme->invalidate();
3506 mailto_scheme->invalidate();
3507 file_scheme->invalidate();
3508 }
3509 else if (mailto_scheme->match(text, this->interval.end, end, flags) &&
3510 m_colon->match(text, mailto_scheme->interval.end, end, flags))
3511 {
3512 // mailto:
3513 this->interval.end = m_colon->interval.end;
3514 http_scheme->invalidate();
3515 ftp_scheme->invalidate();
3516 file_scheme->invalidate();
3517 }
3518 else if (file_scheme->match(text, this->interval.end, end, flags) &&
3519 m_colon->match(text, file_scheme->interval.end, end, flags) &&
3520 m_slash->match(text, m_colon->interval.end, end, flags) &&
3521 m_slash->match(text, m_slash->interval.end, end, flags))
3522 {
3523 // file://
3524 this->interval.end = m_slash->interval.end;
3525 http_scheme->invalidate();
3526 ftp_scheme->invalidate();
3527 mailto_scheme->invalidate();
3528 }
3529 else {
3530 // Default to http:
3531 http_scheme->invalidate();
3532 ftp_scheme->invalidate();
3533 mailto_scheme->invalidate();
3534 file_scheme->invalidate();
3535 }
3536
3537 if (ftp_scheme->interval) {
3538 if (username->match(text, this->interval.end, end, flags)) {
3539 if (m_colon->match(text, username->interval.end, end, flags) &&
3540 password->match(text, m_colon->interval.end, end, flags) &&
3541 m_at->match(text, password->interval.end, end, flags))
3542 {
3543 // Username and password
3544 this->interval.end = m_at->interval.end;
3545 }
3546 else if (m_at->match(text, this->interval.end, end, flags)) {
3547 // Username only
3548 this->interval.end = m_at->interval.end;
3549 password->invalidate();
3550 }
3551 else {
3552 username->invalidate();
3553 password->invalidate();
3554 }
3555 }
3556 else {
3557 username->invalidate();
3558 password->invalidate();
3559 }
3560
3561 if (ipv4_host->match(text, this->interval.end, end, flags)) {
3562 // Host is IPv4
3563 this->interval.end = ipv4_host->interval.end;
3564 ipv6_host->invalidate();
3565 dns_host->invalidate();
3566 }
3567 else if (
3568 m_ip_lbracket->match(text, this->interval.end, end, flags) &&
3569 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3570 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3571 {
3572 // Host is IPv6
3573 this->interval.end = m_ip_rbracket->interval.end;
3574 ipv4_host->invalidate();
3575 dns_host->invalidate();
3576 }
3577 else if (dns_host->match(text, this->interval.end, end, flags)) {
3578 // Host is hostname
3579 this->interval.end = dns_host->interval.end;
3580 ipv4_host->invalidate();
3581 ipv6_host->invalidate();
3582 }
3583 else {
3584 invalidate();
3585 return false;
3586 }
3587
3588 if (m_colon->match(text, this->interval.end, end, flags) &&
3589 port->match(text, m_colon->interval.end, end, flags))
3590 {
3591 // Port
3592 this->interval.end = port->interval.end;
3593 }
3594 else
3595 port->invalidate();
3596
3597 if (path->match(text, this->interval.end, end, flags)) {
3598 // Path
3599 this->interval.end = path->interval.end;
3600 }
3601
3602 this->interval.start = start;
3603 return true;
3604 }
3605
3606 if (mailto_scheme->interval) {
3607 if (username->match(text, this->interval.end, end, flags) &&
3608 m_at->match(text, username->interval.end, end, flags))
3609 {
3610 // Username
3611 this->interval.end = m_at->interval.end;
3612 }
3613 else {
3614 invalidate();
3615 return false;
3616 }
3617
3618 if (m_ip_lbracket->match(text, this->interval.end, end, flags) &&
3619 ipv4_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3620 m_ip_rbracket->match(text, ipv4_host->interval.end, end, flags))
3621 {
3622 // Host is IPv4
3623 this->interval.end = m_ip_rbracket->interval.end;
3624 ipv6_host->invalidate();
3625 dns_host->invalidate();
3626 }
3627 else if (
3628 m_ip_lbracket->match(text, this->interval.end, end, flags) &&
3629 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3630 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3631 {
3632 // Host is IPv6
3633 this->interval.end = m_ip_rbracket->interval.end;
3634 ipv4_host->invalidate();
3635 dns_host->invalidate();
3636 }
3637 else if (dns_host->match(text, this->interval.end, end, flags)) {
3638 // Host is hostname
3639 this->interval.end = dns_host->interval.end;
3640 ipv4_host->invalidate();
3641 ipv6_host->invalidate();
3642 }
3643 else {
3644 invalidate();
3645 return false;
3646 }
3647
3648 password->invalidate();
3649 port->invalidate();
3650 path->invalidate();
3651 this->interval.start = start;
3652 return true;
3653 }
3654
3655 if (file_scheme->interval) {
3656 if (path->match(text, this->interval.end, end, flags)) {
3657 // Path
3658 this->interval.end = path->interval.end;
3659 }
3660
3661 username->invalidate();
3662 password->invalidate();
3663 ipv4_host->invalidate();
3664 ipv6_host->invalidate();
3665 dns_host->invalidate();
3666 port->invalidate();
3667 this->interval.start = start;
3668 return true;
3669 }
3670
3671 // "http://" found or defaulted to
3672
3673 // If "http://" explicit, test for username&password.
3674 if (http_scheme->interval &&
3675 username->match(text, this->interval.end, end, flags))
3676 {
3677 if (m_colon->match(text, username->interval.end, end, flags) &&
3678 password->match(text, m_colon->interval.end, end, flags) &&
3679 m_at->match(text, password->interval.end, end, flags))
3680 {
3681 // Username and password
3682 this->interval.end = m_at->interval.end;
3683 }
3684 else if (m_at->match(text, username->interval.end, end, flags)) {
3685 // Username only
3686 this->interval.end = m_at->interval.end;
3687 password->invalidate();
3688 }
3689 else {
3690 username->invalidate();
3691 password->invalidate();
3692 }
3693 }
3694 else {
3695 username->invalidate();
3696 password->invalidate();
3697 }
3698
3699 if (ipv4_host->match(text, this->interval.end, end, flags)) {
3700 // Host is IPv4
3701 this->interval.end = ipv4_host->interval.end;
3702 ipv6_host->invalidate();
3703 dns_host->invalidate();
3704 }
3705 else if (
3706 m_ip_lbracket->match(text, this->interval.end, end, flags) &&
3707 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3708 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3709 {
3710 // Host is IPv6
3711 this->interval.end = m_ip_rbracket->interval.end;
3712 ipv4_host->invalidate();
3713 dns_host->invalidate();
3714 }
3715 else if (dns_host->match(text, this->interval.end, end, flags)) {
3716 // Host is hostname
3717 this->interval.end = dns_host->interval.end;
3718 ipv4_host->invalidate();
3719 ipv6_host->invalidate();
3720 }
3721 else {
3722 invalidate();
3723 return false;
3724 }
3725
3726 if (m_colon->match(text, this->interval.end, end, flags) &&
3727 port->match(text, m_colon->interval.end, end, flags))
3728 {
3729 // Port
3730 this->interval.end = port->interval.end;
3731 }
3732 else
3733 port->invalidate();
3734
3735 if (path->match(text, this->interval.end, end, flags)) {
3736 // Path
3737 this->interval.end = path->interval.end;
3738 }
3739
3740 this->interval.start = start;
3741 return true;
3742 }
3743
3744 virtual void invalidate()
3745 {
3746 http_scheme->invalidate();
3747 ftp_scheme->invalidate();
3748 mailto_scheme->invalidate();
3749 file_scheme->invalidate();
3750 username->invalidate();
3751 password->invalidate();
3752 ipv4_host->invalidate();
3753 ipv6_host->invalidate();
3754 dns_host->invalidate();
3755 port->invalidate();
3756 path->invalidate();
3758 }
3759
3760 public:
3761 std::shared_ptr<basic_parser<T>> http_scheme;
3762 std::shared_ptr<basic_parser<T>> ftp_scheme;
3763 std::shared_ptr<basic_parser<T>> mailto_scheme;
3764 std::shared_ptr<basic_parser<T>> file_scheme;
3765 std::shared_ptr<basic_parser<T>> username;
3766 std::shared_ptr<basic_parser<T>> password;
3767 std::shared_ptr<basic_parser<T>> ipv4_host;
3768 std::shared_ptr<basic_parser<T>> ipv6_host;
3769 std::shared_ptr<basic_parser<T>> dns_host;
3770 std::shared_ptr<basic_parser<T>> port;
3771 std::shared_ptr<basic_parser<T>> path;
3772
3773 protected:
3774 std::shared_ptr<basic_parser<T>> m_colon;
3775 std::shared_ptr<basic_parser<T>> m_slash;
3776 std::shared_ptr<basic_parser<T>> m_at;
3777 std::shared_ptr<basic_parser<T>> m_ip_lbracket;
3778 std::shared_ptr<basic_parser<T>> m_ip_rbracket;
3779 };
3780
3781 using url = basic_url<char>;
3782 using wurl = basic_url<wchar_t>;
3783#ifdef _UNICODE
3784 using turl = wurl;
3785#else
3786 using turl = url;
3787#endif
3788 using sgml_url = basic_url<char>;
3789
3793 template <class T>
3795 {
3796 public:
3798 _In_ const std::shared_ptr<basic_parser<T>>& _username,
3799 _In_ const std::shared_ptr<basic_parser<T>>& at,
3800 _In_ const std::shared_ptr<basic_parser<T>>& ip_lbracket,
3801 _In_ const std::shared_ptr<basic_parser<T>>& ip_rbracket,
3802 _In_ const std::shared_ptr<basic_parser<T>>& _ipv4_host,
3803 _In_ const std::shared_ptr<basic_parser<T>>& _ipv6_host,
3804 _In_ const std::shared_ptr<basic_parser<T>>& _dns_host,
3805 _In_ const std::locale& locale = std::locale()) :
3806 basic_parser<T>(locale),
3807 username(_username),
3808 m_at(at),
3809 m_ip_lbracket(ip_lbracket),
3810 m_ip_rbracket(ip_rbracket),
3811 ipv4_host(_ipv4_host),
3812 ipv6_host(_ipv6_host),
3813 dns_host(_dns_host)
3814 {}
3815
3816 virtual bool match(
3817 _In_reads_or_z_(end) const T* text,
3818 _In_ size_t start = 0,
3819 _In_ size_t end = SIZE_MAX,
3820 _In_ int flags = match_default)
3821 {
3822 _Assume_(text || start >= end);
3823
3824 if (username->match(text, start, end, flags) &&
3825 m_at->match(text, username->interval.end, end, flags))
3826 {
3827 // Username@
3828 if (m_ip_lbracket->match(text, m_at->interval.end, end, flags) &&
3829 ipv4_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3830 m_ip_rbracket->match(text, ipv4_host->interval.end, end, flags))
3831 {
3832 // Host is IPv4
3833 this->interval.end = m_ip_rbracket->interval.end;
3834 ipv6_host->invalidate();
3835 dns_host->invalidate();
3836 }
3837 else if (
3838 m_ip_lbracket->match(text, m_at->interval.end, end, flags) &&
3839 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3840 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3841 {
3842 // Host is IPv6
3843 this->interval.end = m_ip_rbracket->interval.end;
3844 ipv4_host->invalidate();
3845 dns_host->invalidate();
3846 }
3847 else if (dns_host->match(text, m_at->interval.end, end, flags)) {
3848 // Host is hostname
3849 this->interval.end = dns_host->interval.end;
3850 ipv4_host->invalidate();
3851 ipv6_host->invalidate();
3852 }
3853 else
3854 goto error;
3855 this->interval.start = start;
3856 return true;
3857 }
3858
3859 error:
3860 username->invalidate();
3861 ipv4_host->invalidate();
3862 ipv6_host->invalidate();
3863 dns_host->invalidate();
3864 this->interval.invalidate();
3865 return false;
3866 }
3867
3868 virtual void invalidate()
3869 {
3870 username->invalidate();
3871 ipv4_host->invalidate();
3872 ipv6_host->invalidate();
3873 dns_host->invalidate();
3875 }
3876
3877 public:
3878 std::shared_ptr<basic_parser<T>> username;
3879 std::shared_ptr<basic_parser<T>> ipv4_host;
3880 std::shared_ptr<basic_parser<T>> ipv6_host;
3881 std::shared_ptr<basic_parser<T>> dns_host;
3882
3883 protected:
3884 std::shared_ptr<basic_parser<T>> m_at;
3885 std::shared_ptr<basic_parser<T>> m_ip_lbracket;
3886 std::shared_ptr<basic_parser<T>> m_ip_rbracket;
3887 };
3888
3891#ifdef _UNICODE
3893#else
3895#endif
3897
3901 template <class T>
3903 {
3904 public:
3906 _In_ const std::shared_ptr<basic_parser<T>>& _emoticon,
3907 _In_ const std::shared_ptr<basic_parser<T>>& _apex,
3908 _In_ const std::shared_ptr<basic_parser<T>>& _eyes,
3909 _In_ const std::shared_ptr<basic_parser<T>>& _nose,
3910 _In_ const std::shared_ptr<basic_set<T>>& _mouth,
3911 _In_ const std::locale& locale = std::locale()) :
3912 basic_parser<T>(locale),
3914 apex(_apex),
3915 eyes(_eyes),
3916 nose(_nose),
3917 mouth(_mouth)
3918 {}
3919
3920 virtual bool match(
3921 _In_reads_or_z_(end) const T* text,
3922 _In_ size_t start = 0,
3923 _In_ size_t end = SIZE_MAX,
3924 _In_ int flags = match_default)
3925 {
3926 _Assume_(text || start >= end);
3927
3928 if (emoticon && emoticon->match(text, start, end, flags)) {
3929 if (apex) apex->invalidate();
3930 eyes->invalidate();
3931 if (nose) nose->invalidate();
3932 mouth->invalidate();
3933 this->interval.start = start;
3934 this->interval.end = emoticon->interval.end;
3935 return true;
3936 }
3937
3938 this->interval.end = start;
3939
3940 if (apex && apex->match(text, this->interval.end, end, flags))
3941 this->interval.end = apex->interval.end;
3942
3943 if (eyes->match(text, this->interval.end, end, flags)) {
3944 if (nose && nose->match(text, eyes->interval.end, end, flags) &&
3945 mouth->match(text, nose->interval.end, end, flags))
3946 {
3947 size_t
3949 hit_offset = mouth->hit_offset;
3950 // Mouth may repeat :-)))))))
3951 for (this->interval.end = mouth->interval.end; mouth->match(text, this->interval.end, end, flags) && mouth->hit_offset == hit_offset; this->interval.end = mouth->interval.end);
3952 mouth->interval.start = start_mouth;
3953 mouth->interval.end = this->interval.end;
3954 this->interval.start = start;
3955 return true;
3956 }
3957 if (mouth->match(text, eyes->interval.end, end, flags)) {
3958 size_t
3960 hit_offset = mouth->hit_offset;
3961 // Mouth may repeat :-)))))))
3962 for (this->interval.end = mouth->interval.end; mouth->match(text, this->interval.end, end, flags) && mouth->hit_offset == hit_offset; this->interval.end = mouth->interval.end);
3963 if (nose) nose->invalidate();
3964 mouth->interval.start = start_mouth;
3965 mouth->interval.end = this->interval.end;
3966 this->interval.start = start;
3967 return true;
3968 }
3969 }
3970
3971 if (emoticon) emoticon->invalidate();
3972 if (apex) apex->invalidate();
3973 eyes->invalidate();
3974 if (nose) nose->invalidate();
3975 mouth->invalidate();
3976 this->interval.invalidate();
3977 return false;
3978 }
3979
3980 virtual void invalidate()
3981 {
3982 if (emoticon) emoticon->invalidate();
3983 if (apex) apex->invalidate();
3984 eyes->invalidate();
3985 if (nose) nose->invalidate();
3986 mouth->invalidate();
3988 }
3989
3990 public:
3991 std::shared_ptr<basic_parser<T>> emoticon;
3992 std::shared_ptr<basic_parser<T>> apex;
3993 std::shared_ptr<basic_parser<T>> eyes;
3994 std::shared_ptr<basic_parser<T>> nose;
3995 std::shared_ptr<basic_set<T>> mouth;
3996 };
3997
4000#ifdef _UNICODE
4001 using temoticon = wemoticon;
4002#else
4003 using temoticon = emoticon;
4004#endif
4006
4010 enum date_format_t {
4011 date_format_none = 0,
4012 date_format_dmy = 0x1,
4013 date_format_mdy = 0x2,
4014 date_format_ymd = 0x4,
4015 date_format_ym = 0x8,
4016 date_format_my = 0x10,
4017 date_format_dm = 0x20,
4018 date_format_md = 0x40,
4019 };
4020
4024 template <class T>
4025 class basic_date : public basic_parser<T>
4026 {
4027 public:
4028 basic_date(
4029 _In_ int format_mask,
4030 _In_ const std::shared_ptr<basic_integer<T>>& _day,
4031 _In_ const std::shared_ptr<basic_integer<T>>& _month,
4032 _In_ const std::shared_ptr<basic_integer<T>>& _year,
4033 _In_ const std::shared_ptr<basic_set<T>>& separator,
4034 _In_ const std::shared_ptr<basic_parser<T>>& space,
4035 _In_ const std::locale& locale = std::locale()) :
4036 basic_parser<T>(locale),
4037 format(date_format_none),
4038 m_format_mask(format_mask),
4039 day(_day),
4040 month(_month),
4041 year(_year),
4042 m_separator(separator),
4043 m_space(space)
4044 {}
4045
4046 virtual bool match(
4047 _In_reads_or_z_(end) const T* text,
4048 _In_ size_t start = 0,
4049 _In_ size_t end = SIZE_MAX,
4050 _In_ int flags = match_default)
4051 {
4052 _Assume_(text || start >= end);
4053
4054 const int space_match_flags = flags & ~match_multiline; // Spaces in dates must never be broken in new line.
4055 if ((m_format_mask & date_format_dmy) == date_format_dmy) {
4056 if (day->match(text, start, end, flags)) {
4057 for (this->interval.end = day->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4058 if (m_separator->match(text, this->interval.end, end, flags)) {
4059 size_t hit_offset = m_separator->hit_offset;
4060 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4061 if (month->match(text, this->interval.end, end, flags)) {
4062 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4063 if (m_separator->match(text, this->interval.end, end, flags) &&
4064 m_separator->hit_offset == hit_offset) // Both separators must match.
4065 {
4066 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4067 if (year->match(text, this->interval.end, end, flags) &&
4068 is_valid(day->value, month->value))
4069 {
4070 this->interval.start = start;
4071 this->interval.end = year->interval.end;
4072 format = date_format_dmy;
4073 return true;
4074 }
4075 }
4076 }
4077 }
4078 }
4079 }
4080
4081 if ((m_format_mask & date_format_mdy) == date_format_mdy) {
4082 if (month->match(text, start, end, flags)) {
4083 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4084 if (m_separator->match(text, this->interval.end, end, flags)) {
4085 size_t hit_offset = m_separator->hit_offset;
4086 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4087 if (day->match(text, this->interval.end, end, flags)) {
4088 for (this->interval.end = day->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4089 if (m_separator->match(text, this->interval.end, end, flags) &&
4090 m_separator->hit_offset == hit_offset) // Both separators must match.
4091 {
4092 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4093 if (year->match(text, this->interval.end, end, flags) &&
4094 is_valid(day->value, month->value))
4095 {
4096 this->interval.start = start;
4097 this->interval.end = year->interval.end;
4098 format = date_format_mdy;
4099 return true;
4100 }
4101 }
4102 }
4103 }
4104 }
4105 }
4106
4107 if ((m_format_mask & date_format_ymd) == date_format_ymd) {
4108 if (year->match(text, start, end, flags)) {
4109 for (this->interval.end = year->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4110 if (m_separator->match(text, this->interval.end, end, flags)) {
4111 size_t hit_offset = m_separator->hit_offset;
4112 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4113 if (month->match(text, this->interval.end, end, flags)) {
4114 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4115 if (m_separator->match(text, this->interval.end, end, flags) &&
4116 m_separator->hit_offset == hit_offset) // Both separators must match.
4117 {
4118 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4119 if (day->match(text, this->interval.end, end, flags) &&
4120 is_valid(day->value, month->value))
4121 {
4122 this->interval.start = start;
4123 this->interval.end = day->interval.end;
4124 format = date_format_ymd;
4125 return true;
4126 }
4127 }
4128 }
4129 }
4130 }
4131 }
4132
4133 if ((m_format_mask & date_format_ym) == date_format_ym) {
4134 if (year->match(text, start, end, flags)) {
4135 for (this->interval.end = year->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4136 if (m_separator->match(text, this->interval.end, end, flags)) {
4137 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4138 if (month->match(text, this->interval.end, end, flags) &&
4139 is_valid(SIZE_MAX, month->value))
4140 {
4141 if (day) day->invalidate();
4142 this->interval.start = start;
4143 this->interval.end = month->interval.end;
4144 format = date_format_ym;
4145 return true;
4146 }
4147 }
4148 }
4149 }
4150
4151 if ((m_format_mask & date_format_my) == date_format_my) {
4152 if (month->match(text, start, end, flags)) {
4153 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4154 if (m_separator->match(text, this->interval.end, end, flags)) {
4155 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4156 if (year->match(text, this->interval.end, end, flags) &&
4157 is_valid(SIZE_MAX, month->value))
4158 {
4159 if (day) day->invalidate();
4160 this->interval.start = start;
4161 this->interval.end = year->interval.end;
4162 format = date_format_my;
4163 return true;
4164 }
4165 }
4166 }
4167 }
4168
4169 if ((m_format_mask & date_format_dm) == date_format_dm) {
4170 if (day->match(text, start, end, flags)) {
4171 for (this->interval.end = day->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4172 if (m_separator->match(text, this->interval.end, end, flags)) {
4173 size_t hit_offset = m_separator->hit_offset;
4174 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4175 if (month->match(text, this->interval.end, end, flags) &&
4176 is_valid(day->value, month->value))
4177 {
4178 if (year) year->invalidate();
4179 this->interval.start = start;
4180 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4181 if (m_separator->match(text, this->interval.end, end, flags) &&
4182 m_separator->hit_offset == hit_offset) // Both separators must match.
4183 this->interval.end = m_separator->interval.end;
4184 else
4185 this->interval.end = month->interval.end;
4186 format = date_format_dm;
4187 return true;
4188 }
4189 }
4190 }
4191 }
4192
4193 if ((m_format_mask & date_format_md) == date_format_md) {
4194 if (month->match(text, start, end, flags)) {
4195 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4196 if (m_separator->match(text, this->interval.end, end, flags)) {
4197 size_t hit_offset = m_separator->hit_offset;
4198 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4199 if (day->match(text, this->interval.end, end, flags) &&
4200 is_valid(day->value, month->value))
4201 {
4202 if (year) year->invalidate();
4203 this->interval.start = start;
4204 for (this->interval.end = day->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4205 if (m_separator->match(text, this->interval.end, end, flags) &&
4206 m_separator->hit_offset == hit_offset) // Both separators must match.
4207 this->interval.end = m_separator->interval.end;
4208 else
4209 this->interval.end = day->interval.end;
4210 format = date_format_md;
4211 return true;
4212 }
4213 }
4214 }
4215 }
4216
4217 if (day) day->invalidate();
4218 if (month) month->invalidate();
4219 if (year) year->invalidate();
4220 format = date_format_none;
4221 this->interval.invalidate();
4222 return false;
4223 }
4224
4225 virtual void invalidate()
4226 {
4227 if (day) day->invalidate();
4228 if (month) month->invalidate();
4229 if (year) year->invalidate();
4230 format = date_format_none;
4232 }
4233
4234 protected:
4235 static inline bool is_valid(size_t day, size_t month)
4236 {
4237 if (month == SIZE_MAX) {
4238 // Default to January. This allows validating day only, as January has all 31 days.
4239 month = 1;
4240 }
4241 if (day == SIZE_MAX) {
4242 // Default to 1st day in month. This allows validating month only, as each month has 1st day.
4243 day = 1;
4244 }
4245
4246 switch (month) {
4247 case 1:
4248 case 3:
4249 case 5:
4250 case 7:
4251 case 8:
4252 case 10:
4253 case 12:
4254 return 1 <= day && day <= 31;
4255 case 2:
4256 return 1 <= day && day <= 29;
4257 case 4:
4258 case 6:
4259 case 9:
4260 case 11:
4261 return 1 <= day && day <= 30;
4262 default:
4263 return false;
4264 }
4265 }
4266
4267 public:
4268 date_format_t format;
4269 std::shared_ptr<basic_integer<T>> day;
4270 std::shared_ptr<basic_integer<T>> month;
4271 std::shared_ptr<basic_integer<T>> year;
4272
4273 protected:
4274 int m_format_mask;
4275 std::shared_ptr<basic_set<T>> m_separator;
4276 std::shared_ptr<basic_parser<T>> m_space;
4277 };
4278
4279 using date = basic_date<char>;
4280 using wdate = basic_date<wchar_t>;
4281#ifdef _UNICODE
4282 using tdate = wdate;
4283#else
4284 using tdate = date;
4285#endif
4287
4291 template <class T>
4292 class basic_time : public basic_parser<T>
4293 {
4294 public:
4295 basic_time(
4296 _In_ const std::shared_ptr<basic_integer10<T>>& _hour,
4297 _In_ const std::shared_ptr<basic_integer10<T>>& _minute,
4298 _In_ const std::shared_ptr<basic_integer10<T>>& _second,
4299 _In_ const std::shared_ptr<basic_integer10<T>>& _millisecond,
4300 _In_ const std::shared_ptr<basic_set<T>>& separator,
4301 _In_ const std::shared_ptr<basic_parser<T>>& millisecond_separator,
4302 _In_ const std::locale& locale = std::locale()) :
4303 basic_parser<T>(locale),
4304 hour(_hour),
4305 minute(_minute),
4306 second(_second),
4307 millisecond(_millisecond),
4308 m_separator(separator),
4309 m_millisecond_separator(millisecond_separator)
4310 {}
4311
4312 virtual bool match(
4313 _In_reads_or_z_(end) const T* text,
4314 _In_ size_t start = 0,
4315 _In_ size_t end = SIZE_MAX,
4316 _In_ int flags = match_default)
4317 {
4318 _Assume_(text || start >= end);
4319
4320 if (hour->match(text, start, end, flags) &&
4321 m_separator->match(text, hour->interval.end, end, flags) &&
4322 minute->match(text, m_separator->interval.end, end, flags) &&
4323 minute->value < 60)
4324 {
4325 // hh::mm
4326 size_t hit_offset = m_separator->hit_offset;
4327 if (m_separator->match(text, minute->interval.end, end, flags) &&
4328 m_separator->hit_offset == hit_offset && // Both separators must match.
4329 second && second->match(text, m_separator->interval.end, end, flags) &&
4330 second->value < 60)
4331 {
4332 // hh::mm:ss
4333 if (m_millisecond_separator && m_millisecond_separator->match(text, second->interval.end, end, flags) &&
4334 millisecond && millisecond->match(text, m_millisecond_separator->interval.end, end, flags) &&
4335 millisecond->value < 1000)
4336 {
4337 // hh::mm:ss.mmmm
4338 this->interval.end = millisecond->interval.end;
4339 }
4340 else {
4341 if (millisecond) millisecond->invalidate();
4342 this->interval.end = second->interval.end;
4343 }
4344 }
4345 else {
4346 if (second) second->invalidate();
4347 if (millisecond) millisecond->invalidate();
4348 this->interval.end = minute->interval.end;
4349 }
4350 this->interval.start = start;
4351 return true;
4352 }
4353
4354 hour->invalidate();
4355 minute->invalidate();
4356 if (second) second->invalidate();
4357 if (millisecond) millisecond->invalidate();
4358 this->interval.invalidate();
4359 return false;
4360 }
4361
4362 virtual void invalidate()
4363 {
4364 hour->invalidate();
4365 minute->invalidate();
4366 if (second) second->invalidate();
4367 if (millisecond) millisecond->invalidate();
4369 }
4370
4371 public:
4372 std::shared_ptr<basic_integer10<T>> hour;
4373 std::shared_ptr<basic_integer10<T>> minute;
4374 std::shared_ptr<basic_integer10<T>> second;
4375 std::shared_ptr<basic_integer10<T>> millisecond;
4376
4377 protected:
4378 std::shared_ptr<basic_set<T>> m_separator;
4379 std::shared_ptr<basic_parser<T>> m_millisecond_separator;
4380 };
4381
4382 using time = basic_time<char>;
4383 using wtime = basic_time<wchar_t>;
4384#ifdef _UNICODE
4385 using ttime = wtime;
4386#else
4387 using ttime = time;
4388#endif
4390
4394 template <class T>
4395 class basic_angle : public basic_parser<T>
4396 {
4397 public:
4399 _In_ const std::shared_ptr<basic_integer10<T>>& _degree,
4400 _In_ const std::shared_ptr<basic_parser<T>>& _degree_separator,
4401 _In_ const std::shared_ptr<basic_integer10<T>>& _minute,
4402 _In_ const std::shared_ptr<basic_parser<T>>& _minute_separator,
4403 _In_ const std::shared_ptr<basic_integer10<T>>& _second,
4404 _In_ const std::shared_ptr<basic_parser<T>>& _second_separator,
4405 _In_ const std::shared_ptr<basic_parser<T>>& _decimal,
4406 _In_ const std::locale& locale = std::locale()) :
4407 basic_parser<T>(locale),
4408 degree(_degree),
4409 degree_separator(_degree_separator),
4410 minute(_minute),
4411 minute_separator(_minute_separator),
4412 second(_second),
4413 second_separator(_second_separator),
4414 decimal(_decimal)
4415 {}
4416
4417 virtual bool match(
4418 _In_reads_or_z_(end) const T* text,
4419 _In_ size_t start = 0,
4420 _In_ size_t end = SIZE_MAX,
4421 _In_ int flags = match_default)
4422 {
4423 _Assume_(text || start >= end);
4424
4425 this->interval.end = start;
4426
4427 if (degree->match(text, this->interval.end, end, flags) &&
4428 degree_separator->match(text, degree->interval.end, end, flags))
4429 {
4430 // Degrees
4431 this->interval.end = degree_separator->interval.end;
4432 }
4433 else {
4434 degree->invalidate();
4435 degree_separator->invalidate();
4436 }
4437
4438 if (minute->match(text, this->interval.end, end, flags) &&
4439 minute->value < 60 &&
4440 minute_separator->match(text, minute->interval.end, end, flags))
4441 {
4442 // Minutes
4443 this->interval.end = minute_separator->interval.end;
4444 }
4445 else {
4446 minute->invalidate();
4447 minute_separator->invalidate();
4448 }
4449
4450 if (second && second->match(text, this->interval.end, end, flags) &&
4451 second->value < 60)
4452 {
4453 // Seconds
4454 this->interval.end = second->interval.end;
4455 if (second_separator && second_separator->match(text, this->interval.end, end, flags))
4456 this->interval.end = second_separator->interval.end;
4457 else
4458 if (second_separator) second_separator->invalidate();
4459 }
4460 else {
4461 if (second) second->invalidate();
4462 if (second_separator) second_separator->invalidate();
4463 }
4464
4465 if (degree->interval.start < degree->interval.end ||
4466 minute->interval.start < minute->interval.end ||
4467 (second && second->interval.start < second->interval.end))
4468 {
4469 if (decimal && decimal->match(text, this->interval.end, end, flags)) {
4470 // Decimals
4471 this->interval.end = decimal->interval.end;
4472 }
4473 else if (decimal)
4474 decimal->invalidate();
4475 this->interval.start = start;
4476 return true;
4477 }
4478 if (decimal) decimal->invalidate();
4479 this->interval.invalidate();
4480 return false;
4481 }
4482
4483 virtual void invalidate()
4484 {
4485 degree->invalidate();
4486 degree_separator->invalidate();
4487 minute->invalidate();
4488 minute_separator->invalidate();
4489 if (second) second->invalidate();
4490 if (second_separator) second_separator->invalidate();
4491 if (decimal) decimal->invalidate();
4493 }
4494
4495 public:
4496 std::shared_ptr<basic_integer10<T>> degree;
4497 std::shared_ptr<basic_parser<T>> degree_separator;
4498 std::shared_ptr<basic_integer10<T>> minute;
4499 std::shared_ptr<basic_parser<T>> minute_separator;
4500 std::shared_ptr<basic_integer10<T>> second;
4501 std::shared_ptr<basic_parser<T>> second_separator;
4502 std::shared_ptr<basic_parser<T>> decimal;
4503 };
4504
4505 using angle = basic_angle<char>;
4507#ifdef _UNICODE
4508 using RRegElKot = wangle;
4509#else
4510 using RRegElKot = angle;
4511#endif
4513
4517 template <class T>
4519 {
4520 public:
4522 _In_ const std::shared_ptr<basic_parser<T>>& digit,
4523 _In_ const std::shared_ptr<basic_parser<T>>& plus_sign,
4524 _In_ const std::shared_ptr<basic_set<T>>& lparenthesis,
4525 _In_ const std::shared_ptr<basic_set<T>>& rparenthesis,
4526 _In_ const std::shared_ptr<basic_parser<T>>& separator,
4527 _In_ const std::shared_ptr<basic_parser<T>>& space,
4528 _In_ const std::locale& locale = std::locale()) :
4529 basic_parser<T>(locale),
4530 m_digit(digit),
4531 m_plus_sign(plus_sign),
4532 m_lparenthesis(lparenthesis),
4533 m_rparenthesis(rparenthesis),
4534 m_separator(separator),
4535 m_space(space)
4536 {}
4537
4538 virtual bool match(
4539 _In_reads_or_z_(end) const T* text,
4540 _In_ size_t start = 0,
4541 _In_ size_t end = SIZE_MAX,
4542 _In_ int flags = match_default)
4543 {
4544 _Assume_(text || start >= end);
4545
4546 size_t safe_digit_end = start, safe_value_size = 0;
4547 bool has_digits = false, after_digit = false, in_parentheses = false, after_parentheses = false;
4548 const int space_match_flags = flags & ~match_multiline; // Spaces in phone numbers must never be broken in new line.
4549
4550 this->interval.end = start;
4551 value.clear();
4552 m_lparenthesis->invalidate();
4553 m_rparenthesis->invalidate();
4554
4555 if (m_plus_sign && m_plus_sign->match(text, this->interval.end, end, flags)) {
4556 value.append(text + m_plus_sign->interval.start, text + m_plus_sign->interval.end);
4557 safe_value_size = value.size();
4558 this->interval.end = m_plus_sign->interval.end;
4559 }
4560
4561 for (;;) {
4562 _Assume_(text || this->interval.end >= end);
4563 if (this->interval.end >= end || !text[this->interval.end])
4564 break;
4565 if (m_digit->match(text, this->interval.end, end, flags)) {
4566 // Digit
4567 value.append(text + m_digit->interval.start, text + m_digit->interval.end);
4568 this->interval.end = m_digit->interval.end;
4569 if (!in_parentheses) {
4570 safe_digit_end = this->interval.end;
4571 safe_value_size = value.size();
4572 has_digits = true;
4573 }
4574 after_digit = true;
4575 after_parentheses = false;
4576 }
4577 else if (
4578 m_lparenthesis && !m_lparenthesis->interval && // No left parenthesis yet
4579 m_rparenthesis && !m_rparenthesis->interval && // Right parenthesis after left
4580 m_lparenthesis->match(text, this->interval.end, end, flags))
4581 {
4582 // Left parenthesis
4583 value.append(text + m_lparenthesis->interval.start, m_lparenthesis->interval.size());
4584 this->interval.end = m_lparenthesis->interval.end;
4585 in_parentheses = true;
4586 after_digit = false;
4587 after_parentheses = false;
4588 }
4589 else if (
4590 in_parentheses && // After left parenthesis
4591 m_rparenthesis && !m_rparenthesis->interval && // No right parenthesis yet
4592 m_rparenthesis->match(text, this->interval.end, end, flags) &&
4593 m_lparenthesis->hit_offset == m_rparenthesis->hit_offset) // Left and right parentheses must match
4594 {
4595 // Right parenthesis
4596 value.append(text + m_rparenthesis->interval.start, text + m_rparenthesis->interval.end);
4597 this->interval.end = m_rparenthesis->interval.end;
4598 safe_digit_end = this->interval.end;
4599 safe_value_size = value.size();
4600 in_parentheses = false;
4601 after_digit = false;
4602 after_parentheses = true;
4603 }
4604 else if (
4605 after_digit &&
4606 !in_parentheses && // No separators inside parentheses
4607 !after_parentheses && // No separators following right parenthesis
4608 m_separator && m_separator->match(text, this->interval.end, end, flags))
4609 {
4610 // Separator
4611 this->interval.end = m_separator->interval.end;
4612 after_digit = false;
4613 after_parentheses = false;
4614 }
4615 else if (
4617 m_space && m_space->match(text, this->interval.end, end, space_match_flags))
4618 {
4619 // Space
4620 this->interval.end = m_space->interval.end;
4621 after_digit = false;
4622 after_parentheses = false;
4623 }
4624 else
4625 break;
4626 }
4627 if (has_digits) {
4628 value.erase(safe_value_size);
4629 this->interval.start = start;
4630 this->interval.end = safe_digit_end;
4631 return true;
4632 }
4633 value.clear();
4634 this->interval.invalidate();
4635 return false;
4636 }
4637
4638 virtual void invalidate()
4639 {
4640 value.clear();
4642 }
4643
4644 public:
4645 std::basic_string<T> value;
4646
4647 protected:
4648 std::shared_ptr<basic_parser<T>> m_digit;
4649 std::shared_ptr<basic_parser<T>> m_plus_sign;
4650 std::shared_ptr<basic_set<T>> m_lparenthesis;
4651 std::shared_ptr<basic_set<T>> m_rparenthesis;
4652 std::shared_ptr<basic_parser<T>> m_separator;
4653 std::shared_ptr<basic_parser<T>> m_space;
4654 };
4655
4658#ifdef _UNICODE
4660#else
4662#endif
4664
4670 template <class T>
4671 class basic_iban : public basic_parser<T>
4672 {
4673 public:
4674 basic_iban(
4675 _In_ const std::shared_ptr<basic_parser<T>>& space,
4676 _In_ const std::locale& locale = std::locale()) :
4677 basic_parser<T>(locale),
4678 m_space(space)
4679 {
4680 this->country[0] = 0;
4681 this->check_digits[0] = 0;
4682 this->bban[0] = 0;
4683 this->is_valid = false;
4684 }
4685
4686 virtual bool match(
4687 _In_reads_or_z_(end) const T* text,
4688 _In_ size_t start = 0,
4689 _In_ size_t end = SIZE_MAX,
4690 _In_ int flags = match_default)
4691 {
4692 _Assume_(text || start >= end);
4693 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
4694 const bool case_insensitive = flags & match_case_insensitive ? true : false;
4695 struct country_t {
4696 T country[2];
4697 T check_digits[2];
4698 size_t length;
4699 };
4700 static const country_t s_countries[] = {
4701 { { 'A', 'D' }, {}, 24 }, // Andorra
4702 { { 'A', 'E' }, {}, 23 }, // United Arab Emirates
4703 { { 'A', 'L' }, {}, 28 }, // Albania
4704 { { 'A', 'O' }, {}, 25 }, // Angola
4705 { { 'A', 'T' }, {}, 20 }, // Austria
4706 { { 'A', 'Z' }, {}, 28 }, // Azerbaijan
4707 { { 'B', 'A' }, { '3', '9' }, 20}, // Bosnia and Herzegovina
4708 { { 'B', 'E' }, {}, 16 }, // Belgium
4709 { { 'B', 'F' }, {}, 28 }, // Burkina Faso
4710 { { 'B', 'G' }, {}, 22 }, // Bulgaria
4711 { { 'B', 'H' }, {}, 22 }, // Bahrain
4712 { { 'B', 'I' }, {}, 27 }, // Burundi
4713 { { 'B', 'J' }, {}, 28 }, // Benin
4714 { { 'B', 'R' }, {}, 29 }, // Brazil
4715 { { 'B', 'Y' }, {}, 28 }, // Belarus
4716 { { 'C', 'F' }, {}, 27 }, // Central African Republic
4717 { { 'C', 'G' }, {}, 27 }, // Congo, Republic of the
4718 { { 'C', 'H' }, {}, 21 }, // Switzerland
4719 { { 'C', 'I' }, {}, 28 }, // Côte d'Ivoire
4720 { { 'C', 'M' }, {}, 27 }, // Cameroon
4721 { { 'C', 'R' }, {}, 22 }, // Costa Rica
4722 { { 'C', 'V' }, {}, 25 }, // Cabo Verde
4723 { { 'C', 'Y' }, {}, 28 }, // Cyprus
4724 { { 'C', 'Z' }, {}, 24 }, // Czech Republic
4725 { { 'D', 'E' }, {}, 22 }, // Germany
4726 { { 'D', 'J' }, {}, 27 }, // Djibouti
4727 { { 'D', 'K' }, {}, 18 }, // Denmark
4728 { { 'D', 'O' }, {}, 28 }, // Dominican Republic
4729 { { 'D', 'Z' }, {}, 26 }, // Algeria
4730 { { 'E', 'E' }, {}, 20 }, // Estonia
4731 { { 'E', 'G' }, {}, 29 }, // Egypt
4732 { { 'E', 'S' }, {}, 24 }, // Spain
4733 { { 'F', 'I' }, {}, 18 }, // Finland
4734 { { 'F', 'O' }, {}, 18 }, // Faroe Islands
4735 { { 'F', 'R' }, {}, 27 }, // France
4736 { { 'G', 'A' }, {}, 27 }, // Gabon
4737 { { 'G', 'B' }, {}, 22 }, // United Kingdom
4738 { { 'G', 'E' }, {}, 22 }, // Georgia
4739 { { 'G', 'I' }, {}, 23 }, // Gibraltar
4740 { { 'G', 'L' }, {}, 18 }, // Greenland
4741 { { 'G', 'Q' }, {}, 27 }, // Equatorial Guinea
4742 { { 'G', 'R' }, {}, 27 }, // Greece
4743 { { 'G', 'T' }, {}, 28 }, // Guatemala
4744 { { 'G', 'W' }, {}, 25 }, // Guinea-Bissau
4745 { { 'H', 'N' }, {}, 28 }, // Honduras
4746 { { 'H', 'R' }, {}, 21 }, // Croatia
4747 { { 'H', 'U' }, {}, 28 }, // Hungary
4748 { { 'I', 'E' }, {}, 22 }, // Ireland
4749 { { 'I', 'L' }, {}, 23 }, // Israel
4750 { { 'I', 'Q' }, {}, 23 }, // Iraq
4751 { { 'I', 'R' }, {}, 26 }, // Iran
4752 { { 'I', 'S' }, {}, 26 }, // Iceland
4753 { { 'I', 'T' }, {}, 27 }, // Italy
4754 { { 'J', 'O' }, {}, 30 }, // Jordan
4755 { { 'K', 'M' }, {}, 27 }, // Comoros
4756 { { 'K', 'W' }, {}, 30 }, // Kuwait
4757 { { 'K', 'Z' }, {}, 20 }, // Kazakhstan
4758 { { 'L', 'B' }, {}, 28 }, // Lebanon
4759 { { 'L', 'C' }, {}, 32 }, // Saint Lucia
4760 { { 'L', 'I' }, {}, 21 }, // Liechtenstein
4761 { { 'L', 'T' }, {}, 20 }, // Lithuania
4762 { { 'L', 'U' }, {}, 20 }, // Luxembourg
4763 { { 'L', 'V' }, {}, 21 }, // Latvia
4764 { { 'L', 'Y' }, {}, 25 }, // Libya
4765 { { 'M', 'A' }, {}, 28 }, // Morocco
4766 { { 'M', 'C' }, {}, 27 }, // Monaco
4767 { { 'M', 'D' }, {}, 24 }, // Moldova
4768 { { 'M', 'E' }, { '2', '5' }, 22 }, // Montenegro
4769 { { 'M', 'G' }, {}, 27 }, // Madagascar
4770 { { 'M', 'K' }, { '0', '7' }, 19 }, // North Macedonia
4771 { { 'M', 'L' }, {}, 28 }, // Mali
4772 { { 'M', 'R' }, { '1', '3' }, 27}, // Mauritania
4773 { { 'M', 'T' }, {}, 31 }, // Malta
4774 { { 'M', 'U' }, {}, 30 }, // Mauritius
4775 { { 'M', 'Z' }, {}, 25 }, // Mozambique
4776 { { 'N', 'E' }, {}, 28 }, // Niger
4777 { { 'N', 'I' }, {}, 32 }, // Nicaragua
4778 { { 'N', 'L' }, {}, 18 }, // Netherlands
4779 { { 'N', 'O' }, {}, 15 }, // Norway
4780 { { 'P', 'K' }, {}, 24 }, // Pakistan
4781 { { 'P', 'L' }, {}, 28 }, // Poland
4782 { { 'P', 'S' }, {}, 29 }, // Palestinian territories
4783 { { 'P', 'T' }, { '5', '0' }, 25 }, // Portugal
4784 { { 'Q', 'A' }, {}, 29 }, // Qatar
4785 { { 'R', 'O' }, {}, 24 }, // Romania
4786 { { 'R', 'S' }, { '3', '5' }, 22 }, // Serbia
4787 { { 'R', 'U' }, {}, 33 }, // Russia
4788 { { 'S', 'A' }, {}, 24 }, // Saudi Arabia
4789 { { 'S', 'C' }, {}, 31 }, // Seychelles
4790 { { 'S', 'D' }, {}, 18 }, // Sudan
4791 { { 'S', 'E' }, {}, 24 }, // Sweden
4792 { { 'S', 'I' }, { '5', '6' }, 19 }, // Slovenia
4793 { { 'S', 'K' }, {}, 24 }, // Slovakia
4794 { { 'S', 'M' }, {}, 27 }, // San Marino
4795 { { 'S', 'N' }, {}, 28 }, // Senegal
4796 { { 'S', 'T' }, {}, 25 }, // São Tomé and Príncipe
4797 { { 'S', 'V' }, {}, 28 }, // El Salvador
4798 { { 'T', 'D' }, {}, 27 }, // Chad
4799 { { 'T', 'G' }, {}, 28 }, // Togo
4800 { { 'T', 'L' }, { '3', '8' }, 23}, // East Timor
4801 { { 'T', 'N' }, { '5', '9' }, 24 }, // Tunisia
4802 { { 'T', 'R' }, {}, 26 }, // Turkey
4803 { { 'U', 'A' }, {}, 29 }, // Ukraine
4804 { { 'V', 'A' }, {}, 22 }, // Vatican City
4805 { { 'V', 'G' }, {}, 24 }, // Virgin Islands, British
4806 { { 'X', 'K' }, {}, 20 }, // Kosovo
4807 };
4808 const country_t* country_desc = nullptr;
4809 size_t n, available, next, bban_length;
4811
4812 this->interval.end = start;
4813 for (size_t i = 0; i < 2; ++i, ++this->interval.end) {
4814 if (this->interval.end >= end || !text[this->interval.end])
4815 goto error; // incomplete country code
4816 T chr = case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end];
4817 if (chr < 'A' || 'Z' < chr)
4818 goto error; // invalid country code
4819 this->country[i] = chr;
4820 }
4821 for (size_t l = 0, r = _countof(s_countries);;) {
4822 if (l >= r)
4823 goto error; // unknown country
4824 size_t m = (l + r) / 2;
4825 const country_t& c = s_countries[m];
4826 if (c.country[0] < this->country[0] || (c.country[0] == this->country[0] && c.country[1] < this->country[1]))
4827 l = m + 1;
4828 else if (this->country[0] < c.country[0] || (this->country[0] == c.country[0] && this->country[1] < c.country[1]))
4829 r = m;
4830 else {
4831 country_desc = &c;
4832 break;
4833 }
4834 }
4835 this->country[2] = 0;
4836
4837 for (size_t i = 0; i < 2; ++i, ++this->interval.end) {
4838 if (this->interval.end >= end || text[this->interval.end] < '0' || '9' < text[this->interval.end])
4839 goto error; // incomplete or invalid check digits
4840 this->check_digits[i] = text[this->interval.end];
4841 }
4842 this->check_digits[2] = 0;
4843
4844 if ((country_desc->check_digits[0] && this->check_digits[0] != country_desc->check_digits[0]) ||
4845 (country_desc->check_digits[1] && this->check_digits[1] != country_desc->check_digits[1]))
4846 goto error; // unexpected check digits
4847
4848 bban_length = country_desc->length - 4;
4849 for (n = 0; n < bban_length;) {
4850 if (this->interval.end >= end || !text[this->interval.end])
4851 goto error; // bban too short
4852 if (m_space && m_space->match(text, this->interval.end, end, flags)) {
4853 this->interval.end = m_space->interval.end;
4854 continue;
4855 }
4856 T chr = case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end];
4857 if (('0' <= chr && chr <= '9') || ('A' <= chr && chr <= 'Z')) {
4858 this->bban[n++] = chr;
4859 this->interval.end++;
4860 }
4861 else
4862 goto error; // invalid bban
4863 }
4864 this->bban[n] = 0;
4865
4866 // Normalize IBAN.
4867 T normalized[69];
4868 available = 0;
4869 for (size_t i = 0; ; ++i) {
4870 if (!this->bban[i]) {
4871 for (i = 0; i < 2; ++i) {
4872 if ('A' <= this->country[i] && this->country[i] <= 'J') {
4873 normalized[available++] = '1';
4874 normalized[available++] = '0' + this->country[i] - 'A';
4875 }
4876 else if ('K' <= this->country[i] && this->country[i] <= 'T') {
4877 normalized[available++] = '2';
4878 normalized[available++] = '0' + this->country[i] - 'K';
4879 }
4880 else if ('U' <= this->country[i] && this->country[i] <= 'Z') {
4881 normalized[available++] = '3';
4882 normalized[available++] = '0' + this->country[i] - 'U';
4883 }
4884 }
4885 normalized[available++] = this->check_digits[0];
4886 normalized[available++] = this->check_digits[1];
4887 normalized[available] = 0;
4888 break;
4889 }
4890 if ('0' <= this->bban[i] && this->bban[i] <= '9')
4891 normalized[available++] = this->bban[i];
4892 else if ('A' <= this->bban[i] && this->bban[i] <= 'J') {
4893 normalized[available++] = '1';
4894 normalized[available++] = '0' + this->bban[i] - 'A';
4895 }
4896 else if ('K' <= this->bban[i] && this->bban[i] <= 'T') {
4897 normalized[available++] = '2';
4898 normalized[available++] = '0' + this->bban[i] - 'K';
4899 }
4900 else if ('U' <= this->bban[i] && this->bban[i] <= 'Z') {
4901 normalized[available++] = '3';
4902 normalized[available++] = '0' + this->bban[i] - 'U';
4903 }
4904 }
4905
4906 // Calculate modulo 97.
4907 nominator = stdex::strtou32(normalized, 9, &next, 10);
4908 for (;;) {
4909 nominator %= 97;
4910 if (!normalized[next]) {
4911 this->is_valid = nominator == 1;
4912 break;
4913 }
4914 size_t digit_count = nominator == 0 ? 0 : nominator < 10 ? 1 : 2;
4915 for (; digit_count < 9 && normalized[next]; ++next, ++digit_count)
4916 nominator = nominator * 10 + (normalized[next] - '0');
4917 }
4918
4919 this->interval.start = start;
4920 return true;
4921
4922 error:
4923 this->country[0] = 0;
4924 this->check_digits[0] = 0;
4925 this->bban[0] = 0;
4926 this->is_valid = false;
4927 this->interval.invalidate();
4928 return false;
4929 }
4930
4931 virtual void invalidate()
4932 {
4933 this->country[0] = 0;
4934 this->check_digits[0] = 0;
4935 this->bban[0] = 0;
4936 this->is_valid = false;
4938 }
4939
4940 public:
4941 T country[3];
4943 T bban[31];
4945
4946 protected:
4947 std::shared_ptr<basic_parser<T>> m_space;
4948 };
4949
4950 using iban = basic_iban<char>;
4951 using wiban = basic_iban<wchar_t>;
4952#ifdef _UNICODE
4953 using tiban = wiban;
4954#else
4955 using tiban = iban;
4956#endif
4958
4964 template <class T>
4966 {
4967 public:
4969 _In_ const std::shared_ptr<basic_parser<T>>& space,
4970 _In_ const std::locale& locale = std::locale()) :
4971 basic_parser<T>(locale),
4972 m_space(space)
4973 {
4974 this->check_digits[0] = 0;
4975 this->reference[0] = 0;
4976 this->is_valid = false;
4977 }
4978
4979 virtual bool match(
4980 _In_reads_or_z_(end) const T* text,
4981 _In_ size_t start = 0,
4982 _In_ size_t end = SIZE_MAX,
4983 _In_ int flags = match_default)
4984 {
4985 _Assume_(text || start >= end);
4986 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
4987 const bool case_insensitive = flags & match_case_insensitive ? true : false;
4988 size_t n, available, next;
4990
4991 this->interval.end = start;
4992 if (this->interval.end + 1 >= end ||
4993 (case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end]) != 'R' ||
4994 (case_insensitive ? ctype.toupper(text[this->interval.end + 1]) : text[this->interval.end + 1]) != 'F')
4995 goto error; // incomplete or wrong reference ID
4996 this->interval.end += 2;
4997
4998 for (size_t i = 0; i < 2; ++i, ++this->interval.end) {
4999 if (this->interval.end >= end || text[this->interval.end] < '0' || '9' < text[this->interval.end])
5000 goto error; // incomplete or invalid check digits
5001 this->check_digits[i] = text[this->interval.end];
5002 }
5003 this->check_digits[2] = 0;
5004
5005 for (n = 0;;) {
5006 if (m_space && m_space->match(text, this->interval.end, end, flags))
5007 this->interval.end = m_space->interval.end;
5008 for (size_t j = 0; j < 4; ++j) {
5009 if (this->interval.end >= end || !text[this->interval.end])
5010 goto out;
5011 T chr = case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end];
5012 if (('0' <= chr && chr <= '9') || ('A' <= chr && chr <= 'Z')) {
5013 if (n >= _countof(reference) - 1)
5014 goto error; // reference overflow
5015 this->reference[n++] = chr;
5016 this->interval.end++;
5017 }
5018 else
5019 goto out;
5020 }
5021 }
5022 out:
5023 if (!n)
5024 goto error; // reference too short
5025 this->reference[_countof(this->reference) - 1] = 0;
5026 for (size_t i = n, j = _countof(this->reference) - 1; i;)
5027 this->reference[--j] = this->reference[--i];
5028 for (size_t j = _countof(this->reference) - 1 - n; j;)
5029 this->reference[--j] = '0';
5030
5031 // Normalize creditor reference.
5032 T normalized[47];
5033 available = 0;
5034 for (size_t i = 0; ; ++i) {
5035 if (!this->reference[i]) {
5036 normalized[available++] = '2'; // R
5037 normalized[available++] = '7';
5038 normalized[available++] = '1'; // F
5039 normalized[available++] = '5';
5040 normalized[available++] = this->check_digits[0];
5041 normalized[available++] = this->check_digits[1];
5042 normalized[available] = 0;
5043 break;
5044 }
5045 if ('0' <= this->reference[i] && this->reference[i] <= '9')
5046 normalized[available++] = this->reference[i];
5047 else if ('A' <= this->reference[i] && this->reference[i] <= 'J') {
5048 normalized[available++] = '1';
5049 normalized[available++] = '0' + this->reference[i] - 'A';
5050 }
5051 else if ('K' <= this->reference[i] && this->reference[i] <= 'T') {
5052 normalized[available++] = '2';
5053 normalized[available++] = '0' + this->reference[i] - 'K';
5054 }
5055 else if ('U' <= this->reference[i] && this->reference[i] <= 'Z') {
5056 normalized[available++] = '3';
5057 normalized[available++] = '0' + this->reference[i] - 'U';
5058 }
5059 }
5060
5061 // Calculate modulo 97.
5062 nominator = stdex::strtou32(normalized, 9, &next, 10);
5063 for (;;) {
5064 nominator %= 97;
5065 if (!normalized[next]) {
5066 this->is_valid = nominator == 1;
5067 break;
5068 }
5069 size_t digit_count = nominator == 0 ? 0 : nominator < 10 ? 1 : 2;
5070 for (; digit_count < 9 && normalized[next]; ++next, ++digit_count)
5071 nominator = nominator * 10 + (normalized[next] - '0');
5072 }
5073
5074 this->interval.start = start;
5075 return true;
5076
5077 error:
5078 this->check_digits[0] = 0;
5079 this->reference[0] = 0;
5080 this->is_valid = false;
5081 this->interval.invalidate();
5082 return false;
5083 }
5084
5085 virtual void invalidate()
5086 {
5087 this->check_digits[0] = 0;
5088 this->reference[0] = 0;
5089 this->is_valid = false;
5091 }
5092
5093 public:
5097
5098 protected:
5099 std::shared_ptr<basic_parser<T>> m_space;
5100 };
5101
5104#ifdef _UNICODE
5106#else
5108#endif
5110
5116 template <class T>
5118 {
5119 public:
5120 basic_si_reference_part(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
5121
5122 virtual bool match(
5123 _In_reads_or_z_(end) const T* text,
5124 _In_ size_t start = 0,
5125 _In_ size_t end = SIZE_MAX,
5126 _In_ int flags = match_default)
5127 {
5128 _Assume_(text || start >= end);
5129 this->interval.end = start;
5130 for (;;) {
5131 if (this->interval.end >= end || !text[this->interval.end])
5132 break;
5133 if ('0' <= text[this->interval.end] && text[this->interval.end] <= '9')
5134 this->interval.end++;
5135 else
5136 break;
5137 }
5139 this->interval.start = start;
5140 return true;
5141 }
5142 this->interval.invalidate();
5143 return false;
5144 }
5145 };
5146
5149#ifdef _UNICODE
5151#else
5153#endif
5155
5161 template <class T>
5163 {
5164 public:
5165 basic_si_reference_delimiter(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
5166
5167 virtual bool match(
5168 _In_reads_or_z_(end) const T* text,
5169 _In_ size_t start = 0,
5170 _In_ size_t end = SIZE_MAX,
5171 _In_ int flags = match_default)
5172 {
5173 _Assume_(text || start >= end);
5174 if (start < end && text[start] == '-') {
5175 this->interval.end = (this->interval.start = start) + 1;
5176 return true;
5177 }
5178 this->interval.invalidate();
5179 return false;
5180 }
5181 };
5182
5185#ifdef _UNICODE
5187#else
5189#endif
5191
5199 template <class T>
5201 {
5202 public:
5204 _In_ const std::shared_ptr<basic_parser<T>>& space,
5205 _In_ const std::locale& locale = std::locale()) :
5206 basic_parser<T>(locale),
5207 part1(locale),
5208 part2(locale),
5209 part3(locale),
5210 is_valid(false),
5211 m_space(space),
5212 m_delimiter(locale)
5213 {
5214 this->model[0] = 0;
5215 }
5216
5217 virtual bool match(
5218 _In_reads_or_z_(end) const T* text,
5219 _In_ size_t start = 0,
5220 _In_ size_t end = SIZE_MAX,
5221 _In_ int flags = match_default)
5222 {
5223 _Assume_(text || start >= end);
5224 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
5225 const bool case_insensitive = flags & match_case_insensitive ? true : false;
5226
5227 this->interval.end = start;
5228 if (this->interval.end + 1 >= end ||
5229 (case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end]) != 'S' ||
5230 (case_insensitive ? ctype.toupper(text[this->interval.end + 1]) : text[this->interval.end + 1]) != 'I')
5231 goto error; // incomplete or wrong reference ID
5232 this->interval.end += 2;
5233
5234 for (size_t i = 0; i < 2; ++i, ++this->interval.end) {
5235 if (this->interval.end >= end || text[this->interval.end] < '0' || '9' < text[this->interval.end])
5236 goto error; // incomplete or invalid model
5237 this->model[i] = text[this->interval.end];
5238 }
5239 this->model[2] = 0;
5240
5241 this->part1.invalidate();
5242 this->part2.invalidate();
5243 this->part3.invalidate();
5244 if (this->model[0] == '9' && this->model[1] == '9') {
5245 is_valid = true;
5246 this->interval.start = start;
5247 return true;
5248 }
5249
5250 if (m_space && m_space->match(text, this->interval.end, end, flags))
5251 this->interval.end = m_space->interval.end;
5252
5253 this->part1.match(text, this->interval.end, end, flags) &&
5254 this->m_delimiter.match(text, this->part1.interval.end, end, flags) &&
5255 this->part2.match(text, this->m_delimiter.interval.end, end, flags) &&
5256 this->m_delimiter.match(text, this->part2.interval.end, end, flags) &&
5257 this->part3.match(text, this->m_delimiter.interval.end, end, flags);
5258
5259 this->interval.start = start;
5260 if (this->part3.interval)
5261 this->interval.end = this->part3.interval.end;
5262 else if (this->part2.interval)
5263 this->interval.end = this->part2.interval.end;
5264 else if (this->part1.interval)
5265 this->interval.end = this->part1.interval.end;
5266 else
5267 this->interval.end = start + 4;
5268
5269 if (this->model[0] == '0' && this->model[1] == '0')
5270 is_valid =
5271 this->part3.interval ?
5272 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5273 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 :
5274 this->part2.interval ?
5275 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5276 this->part1.interval.size() + this->part2.interval.size() <= 20 :
5277 this->part1.interval ?
5278 this->part1.interval.size() <= 12 :
5279 false;
5280 else if (this->model[0] == '0' && this->model[1] == '1')
5281 is_valid =
5282 this->part3.interval ?
5283 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5284 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5285 check11(
5286 text + this->part1.interval.start, this->part1.interval.size(),
5287 text + this->part2.interval.start, this->part2.interval.size(),
5288 text + this->part3.interval.start, this->part3.interval.size()) :
5289 this->part2.interval ?
5290 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5291 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5292 check11(
5293 text + this->part1.interval.start, this->part1.interval.size(),
5294 text + this->part2.interval.start, this->part2.interval.size()) :
5295 this->part1.interval ?
5296 this->part1.interval.size() <= 12 &&
5297 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5298 false;
5299 else if (this->model[0] == '0' && this->model[1] == '2')
5300 is_valid =
5301 this->part3.interval ?
5302 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5303 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5304 check11(text + this->part2.interval.start, this->part2.interval.size()) &&
5305 check11(text + this->part3.interval.start, this->part3.interval.size()) :
5306 false;
5307 else if (this->model[0] == '0' && this->model[1] == '3')
5308 is_valid =
5309 this->part3.interval ?
5310 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5311 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5312 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5313 check11(text + this->part2.interval.start, this->part2.interval.size()) &&
5314 check11(text + this->part3.interval.start, this->part3.interval.size()) :
5315 false;
5316 else if (this->model[0] == '0' && this->model[1] == '4')
5317 is_valid =
5318 this->part3.interval ?
5319 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5320 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5321 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5322 check11(text + this->part3.interval.start, this->part3.interval.size()) :
5323 false;
5324 else if ((this->model[0] == '0' || this->model[0] == '5') && this->model[1] == '5')
5325 is_valid =
5326 this->part3.interval ?
5327 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5328 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5329 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5330 this->part2.interval ?
5331 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5332 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5333 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5334 this->part1.interval ?
5335 this->part1.interval.size() <= 12 &&
5336 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5337 false;
5338 else if (this->model[0] == '0' && this->model[1] == '6')
5339 is_valid =
5340 this->part3.interval ?
5341 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5342 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5343 check11(
5344 text + this->part2.interval.start, this->part2.interval.size(),
5345 text + this->part3.interval.start, this->part3.interval.size()) :
5346 this->part2.interval ?
5347 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5348 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5349 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5350 false;
5351 else if (this->model[0] == '0' && this->model[1] == '7')
5352 is_valid =
5353 this->part3.interval ?
5354 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5355 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5356 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5357 this->part2.interval ?
5358 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5359 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5360 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5361 false;
5362 else if (this->model[0] == '0' && this->model[1] == '8')
5363 is_valid =
5364 this->part3.interval ?
5365 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5366 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5367 check11(
5368 text + this->part1.interval.start, this->part1.interval.size(),
5369 text + this->part2.interval.start, this->part2.interval.size()) &&
5370 check11(text + this->part3.interval.start, this->part3.interval.size()) :
5371 false;
5372 else if (this->model[0] == '0' && this->model[1] == '9')
5373 is_valid =
5374 this->part3.interval ?
5375 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5376 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5377 check11(
5378 text + this->part1.interval.start, this->part1.interval.size(),
5379 text + this->part2.interval.start, this->part2.interval.size()) :
5380 this->part2.interval ?
5381 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5382 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5383 check11(
5384 text + this->part1.interval.start, this->part1.interval.size(),
5385 text + this->part2.interval.start, this->part2.interval.size()) :
5386 this->part1.interval ?
5387 this->part1.interval.size() <= 12 &&
5388 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5389 false;
5390 else if (this->model[0] == '1' && this->model[1] == '0')
5391 is_valid =
5392 this->part3.interval ?
5393 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5394 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5395 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5396 check11(
5397 text + this->part2.interval.start, this->part2.interval.size(),
5398 text + this->part3.interval.start, this->part3.interval.size()) :
5399 this->part2.interval ?
5400 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5401 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5402 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5403 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5404 false;
5405 else if (
5406 (this->model[0] == '1' && (this->model[1] == '1' || this->model[1] == '8' || this->model[1] == '9')) ||
5407 ((this->model[0] == '2' || this->model[0] == '3') && this->model[1] == '8') ||
5408 (this->model[0] == '4' && (this->model[1] == '0' || this->model[1] == '1' || this->model[1] == '8' || this->model[1] == '9')) ||
5409 (this->model[0] == '5' && (this->model[1] == '1' || this->model[1] == '8')))
5410 is_valid =
5411 this->part3.interval ?
5412 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5413 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5414 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5415 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5416 this->part2.interval ?
5417 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5418 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5419 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5420 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5421 false;
5422 else if (this->model[0] == '1' && this->model[1] == '2')
5423 is_valid =
5424 this->part3.interval ? false :
5425 this->part2.interval ? false :
5426 this->part1.interval ?
5427 this->part1.interval.size() <= 13 &&
5428 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5429 false;
5430 else if ((this->model[0] == '2' || this->model[0] == '3') && this->model[1] == '1')
5431 is_valid =
5432 this->part3.interval ? false :
5433 this->part2.interval ?
5434 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5435 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5436 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5437 false;
5438 else
5439 is_valid = true; // Assume models we don't handle as valid
5440 return true;
5441
5442 error:
5443 this->model[0] = 0;
5444 this->part1.interval.start = (this->part1.interval.end = start) + 1;
5445 this->part2.interval.start = (this->part2.interval.end = start) + 1;
5446 this->part3.interval.start = (this->part3.interval.end = start) + 1;
5447 this->is_valid = false;
5448 this->interval.invalidate();
5449 return false;
5450 }
5451
5452 virtual void invalidate()
5453 {
5454 this->model[0] = 0;
5455 this->part1.invalidate();
5456 this->part2.invalidate();
5457 this->part3.invalidate();
5458 this->is_valid = false;
5460 }
5461
5462 protected:
5463 static bool check11(
5464 _In_count_(num_part1) const T* part1, _In_ size_t num_part1)
5465 {
5466 _Assume_(part1 && num_part1 >= 1);
5467 uint32_t nominator = 0, ponder = 2;
5468 for (size_t i = num_part1 - 1; i--; ++ponder)
5469 nominator += (part1[i] - '0') * ponder;
5470 uint8_t control = 11 - static_cast<uint8_t>(nominator % 11);
5471 if (control >= 10)
5472 control = 0;
5473 return control == part1[num_part1 - 1] - '0';
5474 }
5475
5476 static bool check11(
5477 _In_count_(num_part1) const T* part1, _In_ size_t num_part1,
5478 _In_count_(num_part2) const T* part2, _In_ size_t num_part2)
5479 {
5480 _Assume_(part1 || !num_part1);
5481 _Assume_(part2 && num_part2 >= 1);
5482 uint32_t nominator = 0, ponder = 2;
5483 for (size_t i = num_part2 - 1; i--; ++ponder)
5484 nominator += (part2[i] - '0') * ponder;
5485 for (size_t i = num_part1; i--; ++ponder)
5486 nominator += (part1[i] - '0') * ponder;
5487 uint8_t control = 11 - static_cast<uint8_t>(nominator % 11);
5488 if (control == 10)
5489 control = 0;
5490 return control == part2[num_part2 - 1] - '0';
5491 }
5492
5493 static bool check11(
5494 _In_count_(num_part1) const T* part1, _In_ size_t num_part1,
5495 _In_count_(num_part2) const T* part2, _In_ size_t num_part2,
5496 _In_count_(num_part3) const T* part3, _In_ size_t num_part3)
5497 {
5498 _Assume_(part1 || !num_part1);
5499 _Assume_(part2 || !num_part2);
5500 _Assume_(part3 && num_part3 >= 1);
5501 uint32_t nominator = 0, ponder = 2;
5502 for (size_t i = num_part3 - 1; i--; ++ponder)
5503 nominator += (part3[i] - '0') * ponder;
5504 for (size_t i = num_part2; i--; ++ponder)
5505 nominator += (part2[i] - '0') * ponder;
5506 for (size_t i = num_part1; i--; ++ponder)
5507 nominator += (part1[i] - '0') * ponder;
5508 uint8_t control = 11 - static_cast<uint8_t>(nominator % 11);
5509 if (control == 10)
5510 control = 0;
5511 return control == part2[num_part3 - 1] - '0';
5512 }
5513
5514 public:
5515 T model[3];
5520
5521 protected:
5522 std::shared_ptr<basic_parser<T>> m_space;
5524 };
5525
5528#ifdef _UNICODE
5530#else
5532#endif
5534
5538 template <class T>
5540 {
5541 public:
5543 _In_ const std::shared_ptr<basic_parser<T>>& element,
5544 _In_ const std::shared_ptr<basic_parser<T>>& digit,
5545 _In_ const std::shared_ptr<basic_parser<T>>& sign,
5546 _In_ const std::locale& locale = std::locale()) :
5547 basic_parser<T>(locale),
5548 m_element(element),
5549 m_digit(digit),
5550 m_sign(sign),
5551 has_digits(false),
5552 has_charge(false)
5553 {}
5554
5555 virtual bool match(
5556 _In_reads_or_z_(end) const T* text,
5557 _In_ size_t start = 0,
5558 _In_ size_t end = SIZE_MAX,
5559 _In_ int flags = match_default)
5560 {
5561 _Assume_(text || start >= end);
5562
5563 has_digits = false;
5564 has_charge = false;
5565 this->interval.end = start;
5566
5567 const int element_match_flags = flags & ~match_case_insensitive; // Chemical elements are always case-sensitive.
5568 for (;;) {
5569 if (m_element->match(text, this->interval.end, end, element_match_flags)) {
5570 this->interval.end = m_element->interval.end;
5571 while (m_digit->match(text, this->interval.end, end, flags)) {
5572 this->interval.end = m_digit->interval.end;
5573 has_digits = true;
5574 }
5575 }
5576 else if (start < this->interval.end) {
5577 if (m_sign->match(text, this->interval.end, end, flags)) {
5578 this->interval.end = m_sign->interval.end;
5579 has_charge = true;
5580 }
5581 this->interval.start = start;
5582 return true;
5583 }
5584 else {
5585 this->interval.invalidate();
5586 return false;
5587 }
5588 }
5589 }
5590
5591 virtual void invalidate()
5592 {
5593 has_digits = false;
5594 has_charge = false;
5596 }
5597
5598 public:
5599 bool has_digits;
5600 bool has_charge;
5601
5602 protected:
5603 std::shared_ptr<basic_parser<T>> m_element;
5604 std::shared_ptr<basic_parser<T>> m_digit;
5605 std::shared_ptr<basic_parser<T>> m_sign;
5606 };
5607
5610#ifdef _UNICODE
5612#else
5614#endif
5616
5621 {
5622 public:
5623 virtual bool match(
5624 _In_reads_or_z_(end) const char* text,
5625 _In_ size_t start = 0,
5626 _In_ size_t end = SIZE_MAX,
5627 _In_ int flags = match_default)
5628 {
5629 _Assume_(text || start >= end);
5630 this->interval.end = start;
5631
5632 _Assume_(text || this->interval.end >= end);
5633 if (this->interval.end < end && text[this->interval.end]) {
5634 if (text[this->interval.end] == '\r') {
5635 this->interval.end++;
5636 if (this->interval.end < end && text[this->interval.end] == '\n') {
5637 this->interval.start = start;
5638 this->interval.end++;
5639 return true;
5640 }
5641 }
5642 else if (text[this->interval.end] == '\n') {
5643 this->interval.start = start;
5644 this->interval.end++;
5645 return true;
5646 }
5647 }
5648 this->interval.invalidate();
5649 return false;
5650 }
5651 };
5652
5656 class http_space : public parser
5657 {
5658 public:
5659 virtual bool match(
5660 _In_reads_or_z_(end) const char* text,
5661 _In_ size_t start = 0,
5662 _In_ size_t end = SIZE_MAX,
5663 _In_ int flags = match_default)
5664 {
5665 _Assume_(text || start >= end);
5666 this->interval.end = start;
5667 if (m_line_break.match(text, this->interval.end, end, flags)) {
5668 this->interval.end = m_line_break.interval.end;
5669 if (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) {
5670 this->interval.start = start;
5671 this->interval.end++;
5672 while (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) this->interval.end++;
5673 return true;
5674 }
5675 }
5676 else if (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) {
5677 this->interval.start = start;
5678 this->interval.end++;
5679 while (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) this->interval.end++;
5680 return true;
5681 }
5682 this->interval.invalidate();
5683 return false;
5684 }
5685
5686 protected:
5687 http_line_break m_line_break;
5688 };
5689
5693 class http_text_char : public parser
5694 {
5695 public:
5696 virtual bool match(
5697 _In_reads_or_z_(end) const char* text,
5698 _In_ size_t start = 0,
5699 _In_ size_t end = SIZE_MAX,
5700 _In_ int flags = match_default)
5701 {
5702 _Assume_(text || start >= end);
5703 this->interval.end = start;
5704
5705 _Assume_(text || this->interval.end >= end);
5706 if (m_space.match(text, this->interval.end, end, flags)) {
5707 this->interval.start = start;
5708 this->interval.end = m_space.interval.end;
5709 return true;
5710 }
5711 else if (this->interval.end < end && text[this->interval.end] && text[this->interval.end] >= 0x20) {
5712 this->interval.start = start;
5713 this->interval.end++;
5714 return true;
5715 }
5716 this->interval.invalidate();
5717 return false;
5718 }
5719
5720 protected:
5721 http_space m_space;
5722 };
5723
5727 class http_token : public parser
5728 {
5729 public:
5730 virtual bool match(
5731 _In_reads_or_z_(end) const char* text,
5732 _In_ size_t start = 0,
5733 _In_ size_t end = SIZE_MAX,
5734 _In_ int flags = match_default)
5735 {
5736 _Assume_(text || start >= end);
5737 this->interval.end = start;
5738 for (;;) {
5739 if (this->interval.end < end && text[this->interval.end]) {
5740 if ((unsigned int)text[this->interval.end] < 0x20 ||
5741 (unsigned int)text[this->interval.end] == 0x7f ||
5742 text[this->interval.end] == '(' ||
5743 text[this->interval.end] == ')' ||
5744 text[this->interval.end] == '<' ||
5745 text[this->interval.end] == '>' ||
5746 text[this->interval.end] == '@' ||
5747 text[this->interval.end] == ',' ||
5748 text[this->interval.end] == ';' ||
5749 text[this->interval.end] == ':' ||
5750 text[this->interval.end] == '\\' ||
5751 text[this->interval.end] == '\"' ||
5752 text[this->interval.end] == '/' ||
5753 text[this->interval.end] == '[' ||
5754 text[this->interval.end] == ']' ||
5755 text[this->interval.end] == '?' ||
5756 text[this->interval.end] == '=' ||
5757 text[this->interval.end] == '{' ||
5758 text[this->interval.end] == '}' ||
5759 stdex::isspace(text[this->interval.end]))
5760 break;
5761 else
5762 this->interval.end++;
5763 }
5764 else
5765 break;
5766 }
5768 this->interval.start = start;
5769 return true;
5770 }
5771 else {
5772 this->interval.invalidate();
5773 return false;
5774 }
5775 }
5776 };
5777
5782 {
5783 public:
5784 virtual bool match(
5785 _In_reads_or_z_(end) const char* text,
5786 _In_ size_t start = 0,
5787 _In_ size_t end = SIZE_MAX,
5788 _In_ int flags = match_default)
5789 {
5790 _Assume_(text || start >= end);
5791 this->interval.end = start;
5792 if (this->interval.end < end && text[this->interval.end] != '"')
5793 goto error;
5794 this->interval.end++;
5795 content.start = this->interval.end;
5796 for (;;) {
5797 _Assume_(text || this->interval.end >= end);
5798 if (this->interval.end < end && text[this->interval.end]) {
5799 if (text[this->interval.end] == '"') {
5800 content.end = this->interval.end;
5801 this->interval.end++;
5802 break;
5803 }
5804 else if (text[this->interval.end] == '\\') {
5805 this->interval.end++;
5806 if (this->interval.end < end && text[this->interval.end]) {
5807 this->interval.end++;
5808 }
5809 else
5810 goto error;
5811 }
5812 else if (m_chr.match(text, this->interval.end, end, flags))
5813 this->interval.end++;
5814 else
5815 goto error;
5816 }
5817 else
5818 goto error;
5819 }
5820 this->interval.start = start;
5821 return true;
5822
5823 error:
5824 content.start = 1;
5825 content.end = 0;
5826 this->interval.invalidate();
5827 return false;
5828 }
5829
5830 virtual void invalidate()
5831 {
5832 content.start = 1;
5833 content.end = 0;
5834 parser::invalidate();
5835 }
5836
5837 public:
5839
5840 protected:
5841 http_text_char m_chr;
5842 };
5843
5847 class http_value : public parser
5848 {
5849 public:
5850 virtual bool match(
5851 _In_reads_or_z_(end) const char* text,
5852 _In_ size_t start = 0,
5853 _In_ size_t end = SIZE_MAX,
5854 _In_ int flags = match_default)
5855 {
5856 _Assume_(text || start >= end);
5857 this->interval.end = start;
5858 if (string.match(text, this->interval.end, end, flags)) {
5859 token.invalidate();
5860 this->interval.end = string.interval.end;
5861 this->interval.start = start;
5862 return true;
5863 }
5864 else if (token.match(text, this->interval.end, end, flags)) {
5865 string.invalidate();
5866 this->interval.end = token.interval.end;
5867 this->interval.start = start;
5868 return true;
5869 }
5870 else {
5871 this->interval.invalidate();
5872 return false;
5873 }
5874 }
5875
5876 virtual void invalidate()
5877 {
5878 string.invalidate();
5879 token.invalidate();
5880 parser::invalidate();
5881 }
5882
5883 public:
5886 };
5887
5891 class http_parameter : public parser
5892 {
5893 public:
5894 virtual bool match(
5895 _In_reads_or_z_(end) const char* text,
5896 _In_ size_t start = 0,
5897 _In_ size_t end = SIZE_MAX,
5898 _In_ int flags = match_default)
5899 {
5900 _Assume_(text || start >= end);
5901 this->interval.end = start;
5902 if (name.match(text, this->interval.end, end, flags))
5903 this->interval.end = name.interval.end;
5904 else
5905 goto error;
5906 while (m_space.match(text, this->interval.end, end, flags))
5907 this->interval.end = m_space.interval.end;
5908 _Assume_(text || this->interval.end >= end);
5909 if (this->interval.end < end && text[this->interval.end] == '=')
5910 this->interval.end++;
5911 else
5912 while (m_space.match(text, this->interval.end, end, flags))
5913 this->interval.end = m_space.interval.end;
5914 if (value.match(text, this->interval.end, end, flags))
5915 this->interval.end = value.interval.end;
5916 else
5917 goto error;
5918 this->interval.start = start;
5919 return true;
5920
5921 error:
5922 name.invalidate();
5923 value.invalidate();
5924 this->interval.invalidate();
5925 return false;
5926 }
5927
5928 virtual void invalidate()
5929 {
5930 name.invalidate();
5931 value.invalidate();
5932 parser::invalidate();
5933 }
5934
5935 public:
5938
5939 protected:
5940 http_space m_space;
5941 };
5942
5946 class http_any_type : public parser
5947 {
5948 public:
5949 virtual bool match(
5950 _In_reads_or_z_(end) const char* text,
5951 _In_ size_t start = 0,
5952 _In_ size_t end = SIZE_MAX,
5953 _In_ int flags = match_default)
5954 {
5955 _Assume_(text || start >= end);
5956 if (start + 2 < end &&
5957 text[start] == '*' &&
5958 text[start + 1] == '/' &&
5959 text[start + 2] == '*')
5960 {
5961 this->interval.end = (this->interval.start = start) + 3;
5962 return true;
5963 }
5964 else if (start < end && text[start] == '*') {
5965 this->interval.end = (this->interval.start = start) + 1;
5966 return true;
5967 }
5968 else {
5969 this->interval.invalidate();
5970 return false;
5971 }
5972 }
5973 };
5974
5979 {
5980 public:
5981 virtual bool match(
5982 _In_reads_or_z_(end) const char* text,
5983 _In_ size_t start = 0,
5984 _In_ size_t end = SIZE_MAX,
5985 _In_ int flags = match_default)
5986 {
5987 _Assume_(text || start >= end);
5988 this->interval.end = start;
5989 if (type.match(text, this->interval.end, end, flags))
5990 this->interval.end = type.interval.end;
5991 else
5992 goto error;
5993 while (m_space.match(text, this->interval.end, end, flags))
5994 this->interval.end = m_space.interval.end;
5995 if (this->interval.end < end && text[this->interval.end] == '/')
5996 this->interval.end++;
5997 else
5998 goto error;
5999 while (m_space.match(text, this->interval.end, end, flags))
6000 this->interval.end = m_space.interval.end;
6001 if (subtype.match(text, this->interval.end, end, flags))
6002 this->interval.end = subtype.interval.end;
6003 else
6004 goto error;
6005 this->interval.start = start;
6006 return true;
6007
6008 error:
6009 type.invalidate();
6010 subtype.invalidate();
6011 this->interval.invalidate();
6012 return false;
6013 }
6014
6015 virtual void invalidate()
6016 {
6017 type.invalidate();
6018 subtype.invalidate();
6019 parser::invalidate();
6020 }
6021
6022 public:
6023 http_token type;
6024 http_token subtype;
6025
6026 protected:
6027 http_space m_space;
6028 };
6029
6034 {
6035 public:
6036 virtual bool match(
6037 _In_reads_or_z_(end) const char* text,
6038 _In_ size_t start = 0,
6039 _In_ size_t end = SIZE_MAX,
6040 _In_ int flags = match_default)
6041 {
6042 _Assume_(text || start >= end);
6043 if (!http_media_range::match(text, start, end, flags))
6044 goto error;
6045 params.clear();
6046 for (;;) {
6047 if (this->interval.end < end && text[this->interval.end]) {
6048 if (m_space.match(text, this->interval.end, end, flags))
6049 this->interval.end = m_space.interval.end;
6050 else if (text[this->interval.end] == ';') {
6051 this->interval.end++;
6052 while (m_space.match(text, this->interval.end, end, flags))
6053 this->interval.end = m_space.interval.end;
6054 http_parameter param;
6055 if (param.match(text, this->interval.end, end, flags)) {
6056 this->interval.end = param.interval.end;
6057 params.push_back(std::move(param));
6058 }
6059 else
6060 break;
6061 }
6062 else
6063 break;
6064 }
6065 else
6066 break;
6067 }
6068 this->interval.end = params.empty() ? subtype.interval.end : params.back().interval.end;
6069 return true;
6070
6071 error:
6072 http_media_range::invalidate();
6073 params.clear();
6074 this->interval.invalidate();
6075 return false;
6076 }
6077
6078 virtual void invalidate()
6079 {
6080 params.clear();
6081 http_media_range::invalidate();
6082 }
6083
6084 public:
6085 std::list<http_parameter> params;
6086 };
6087
6092 {
6093 public:
6094 virtual bool match(
6095 _In_reads_or_z_(end) const char* text,
6096 _In_ size_t start = 0,
6097 _In_ size_t end = SIZE_MAX,
6098 _In_ int flags = match_default)
6099 {
6100 _Assume_(text || start >= end);
6101 this->interval.end = start;
6102 for (;;) {
6103 if (this->interval.end < end && text[this->interval.end]) {
6104 if ((unsigned int)text[this->interval.end] < 0x20 ||
6105 (unsigned int)text[this->interval.end] == 0x7f ||
6106 text[this->interval.end] == ':' ||
6107 text[this->interval.end] == '/' ||
6108 stdex::isspace(text[this->interval.end]))
6109 break;
6110 else
6111 this->interval.end++;
6112 }
6113 else
6114 break;
6115 }
6117 this->interval.start = start;
6118 return true;
6119 }
6120 this->interval.invalidate();
6121 return false;
6122 }
6123 };
6124
6128 class http_url_port : public parser
6129 {
6130 public:
6131 http_url_port(_In_ const std::locale& locale = std::locale()) :
6132 parser(locale),
6133 value(0)
6134 {}
6135
6136 virtual bool match(
6137 _In_reads_or_z_(end) const char* text,
6138 _In_ size_t start = 0,
6139 _In_ size_t end = SIZE_MAX,
6140 _In_ int flags = match_default)
6141 {
6142 _Assume_(text || start >= end);
6143 value = 0;
6144 this->interval.end = start;
6145 for (;;) {
6146 if (this->interval.end < end && text[this->interval.end]) {
6147 if ('0' <= text[this->interval.end] && text[this->interval.end] <= '9') {
6148 size_t _value = (size_t)value * 10 + text[this->interval.end] - '0';
6149 if (_value > (uint16_t)-1) {
6150 value = 0;
6151 this->interval.invalidate();
6152 return false;
6153 }
6154 value = (uint16_t)_value;
6155 this->interval.end++;
6156 }
6157 else
6158 break;
6159 }
6160 else
6161 break;
6162 }
6164 this->interval.start = start;
6165 return true;
6166 }
6167 this->interval.invalidate();
6168 return false;
6169 }
6170
6171 virtual void invalidate()
6172 {
6173 value = 0;
6174 parser::invalidate();
6175 }
6176
6177 public:
6178 uint16_t value;
6179 };
6180
6185 {
6186 public:
6187 virtual bool match(
6188 _In_reads_or_z_(end) const char* text,
6189 _In_ size_t start = 0,
6190 _In_ size_t end = SIZE_MAX,
6191 _In_ int flags = match_default)
6192 {
6193 _Assume_(text || start >= end);
6194 this->interval.end = start;
6195 for (;;) {
6196 if (this->interval.end < end && text[this->interval.end]) {
6197 if ((unsigned int)text[this->interval.end] < 0x20 ||
6198 (unsigned int)text[this->interval.end] == 0x7f ||
6199 text[this->interval.end] == '?' ||
6200 text[this->interval.end] == '/' ||
6201 stdex::isspace(text[this->interval.end]))
6202 break;
6203 else
6204 this->interval.end++;
6205 }
6206 else
6207 break;
6208 }
6209 this->interval.start = start;
6210 return true;
6211 }
6212 };
6213
6217 class http_url_path : public parser
6218 {
6219 public:
6220 virtual bool match(
6221 _In_reads_or_z_(end) const char* text,
6222 _In_ size_t start = 0,
6223 _In_ size_t end = SIZE_MAX,
6224 _In_ int flags = match_default)
6225 {
6226 _Assume_(text || start >= end);
6228 this->interval.end = start;
6229 segments.clear();
6230 _Assume_(text || this->interval.end >= end);
6231 if (this->interval.end < end && text[this->interval.end] != '/')
6232 goto error;
6233 this->interval.end++;
6234 s.match(text, this->interval.end, end, flags);
6235 segments.push_back(s);
6236 this->interval.end = s.interval.end;
6237 for (;;) {
6238 if (this->interval.end < end && text[this->interval.end]) {
6239 if (text[this->interval.end] == '/') {
6240 this->interval.end++;
6241 s.match(text, this->interval.end, end, flags);
6242 segments.push_back(s);
6243 this->interval.end = s.interval.end;
6244 }
6245 else
6246 break;
6247 }
6248 else
6249 break;
6250 }
6251 this->interval.start = start;
6252 return true;
6253
6254 error:
6255 segments.clear();
6256 this->interval.invalidate();
6257 return false;
6258 }
6259
6260 virtual void invalidate()
6261 {
6262 segments.clear();
6263 parser::invalidate();
6264 }
6265
6266 public:
6267 std::vector<http_url_path_segment> segments;
6268 };
6269
6274 {
6275 public:
6276 virtual bool match(
6277 _In_reads_or_z_(end) const char* text,
6278 _In_ size_t start = 0,
6279 _In_ size_t end = SIZE_MAX,
6280 _In_ int flags = match_default)
6281 {
6282 _Assume_(text || start >= end);
6283 this->interval.end = start;
6284 name.start = this->interval.end;
6285 for (;;) {
6286 if (this->interval.end < end && text[this->interval.end]) {
6287 if ((unsigned int)text[this->interval.end] < 0x20 ||
6288 (unsigned int)text[this->interval.end] == 0x7f ||
6289 text[this->interval.end] == '&' ||
6290 text[this->interval.end] == '=' ||
6291 stdex::isspace(text[this->interval.end]))
6292 break;
6293 else
6294 this->interval.end++;
6295 }
6296 else
6297 break;
6298 }
6300 name.end = this->interval.end;
6301 else
6302 goto error;
6303 if (text[this->interval.end] == '=') {
6304 this->interval.end++;
6305 value.start = this->interval.end;
6306 for (;;) {
6307 if (this->interval.end < end && text[this->interval.end]) {
6308 if ((unsigned int)text[this->interval.end] < 0x20 ||
6309 (unsigned int)text[this->interval.end] == 0x7f ||
6310 text[this->interval.end] == '&' ||
6311 stdex::isspace(text[this->interval.end]))
6312 break;
6313 else
6314 this->interval.end++;
6315 }
6316 else
6317 break;
6318 }
6319 value.end = this->interval.end;
6320 }
6321 else {
6322 value.start = 1;
6323 value.end = 0;
6324 }
6325 this->interval.start = start;
6326 return true;
6327
6328 error:
6329 name.start = 1;
6330 name.end = 0;
6331 value.start = 1;
6332 value.end = 0;
6333 this->interval.invalidate();
6334 return false;
6335 }
6336
6337 virtual void invalidate()
6338 {
6339 name.start = 1;
6340 name.end = 0;
6341 value.start = 1;
6342 value.end = 0;
6343 parser::invalidate();
6344 }
6345
6346 public:
6349 };
6350
6354 class http_url : public parser
6355 {
6356 public:
6357 http_url(_In_ const std::locale& locale = std::locale()) :
6358 parser(locale),
6359 port(locale)
6360 {}
6361
6362 virtual bool match(
6363 _In_reads_or_z_(end) const char* text,
6364 _In_ size_t start = 0,
6365 _In_ size_t end = SIZE_MAX,
6366 _In_ int flags = match_default)
6367 {
6368 _Assume_(text || start >= end);
6369 this->interval.end = start;
6370
6371 if (this->interval.end + 7 <= end && stdex::strnicmp(text + this->interval.end, 7, "http://", SIZE_MAX, m_locale) == 0) {
6372 this->interval.end += 7;
6373 if (server.match(text, this->interval.end, end, flags))
6374 this->interval.end = server.interval.end;
6375 else
6376 goto error;
6377 if (this->interval.end < end && text[this->interval.end] == ':') {
6378 this->interval.end++;
6379 if (port.match(text, this->interval.end, end, flags))
6380 this->interval.end = port.interval.end;
6381 }
6382 else {
6383 port.invalidate();
6384 port.value = 80;
6385 }
6386 }
6387 else {
6388 server.invalidate();
6389 port.invalidate();
6390 port.value = 80;
6391 }
6392
6393 if (path.match(text, this->interval.end, end, flags))
6394 this->interval.end = path.interval.end;
6395 else
6396 goto error;
6397
6398 params.clear();
6399
6400 if (this->interval.end < end && text[this->interval.end] == '?') {
6401 this->interval.end++;
6402 for (;;) {
6403 if (this->interval.end < end && text[this->interval.end]) {
6404 if ((unsigned int)text[this->interval.end] < 0x20 ||
6405 (unsigned int)text[this->interval.end] == 0x7f ||
6406 stdex::isspace(text[this->interval.end]))
6407 break;
6408 else if (text[this->interval.end] == '&')
6409 this->interval.end++;
6410 else {
6411 http_url_parameter param;
6412 if (param.match(text, this->interval.end, end, flags)) {
6413 this->interval.end = param.interval.end;
6414 params.push_back(std::move(param));
6415 }
6416 else
6417 break;
6418 }
6419 }
6420 else
6421 break;
6422 }
6423 }
6424
6425 this->interval.start = start;
6426 return true;
6427
6428 error:
6429 server.invalidate();
6430 port.invalidate();
6431 path.invalidate();
6432 params.clear();
6433 this->interval.invalidate();
6434 return false;
6435 }
6436
6437 virtual void invalidate()
6438 {
6439 server.invalidate();
6440 port.invalidate();
6441 path.invalidate();
6442 params.clear();
6443 parser::invalidate();
6444 }
6445
6446 public:
6447 http_url_server server;
6448 http_url_port port;
6449 http_url_path path;
6450 std::list<http_url_parameter> params;
6451 };
6452
6456 class http_language : public parser
6457 {
6458 public:
6459 virtual bool match(
6460 _In_reads_or_z_(end) const char* text,
6461 _In_ size_t start = 0,
6462 _In_ size_t end = SIZE_MAX,
6463 _In_ int flags = match_default)
6464 {
6465 _Assume_(text || start >= end);
6466 this->interval.end = start;
6467 components.clear();
6468 for (;;) {
6469 if (this->interval.end < end && text[this->interval.end]) {
6471 k.end = this->interval.end;
6472 for (;;) {
6473 if (k.end < end && text[k.end]) {
6474 if (stdex::isalpha(text[k.end]))
6475 k.end++;
6476 else
6477 break;
6478 }
6479 else
6480 break;
6481 }
6482 if (this->interval.end < k.end) {
6483 k.start = this->interval.end;
6484 this->interval.end = k.end;
6485 components.push_back(k);
6486 }
6487 else
6488 break;
6489 if (this->interval.end < end && text[this->interval.end] == '-')
6490 this->interval.end++;
6491 else
6492 break;
6493 }
6494 else
6495 break;
6496 }
6497 if (!components.empty()) {
6498 this->interval.start = start;
6499 this->interval.end = components.back().end;
6500 return true;
6501 }
6502 this->interval.invalidate();
6503 return false;
6504 }
6505
6506 virtual void invalidate()
6507 {
6508 components.clear();
6509 parser::invalidate();
6510 }
6511
6512 public:
6513 std::vector<stdex::interval<size_t>> components;
6514 };
6515
6519 class http_weight : public parser
6520 {
6521 public:
6522 http_weight(_In_ const std::locale& locale = std::locale()) :
6523 parser(locale),
6524 value(1.0f)
6525 {}
6526
6527 virtual bool match(
6528 _In_reads_or_z_(end) const char* text,
6529 _In_ size_t start = 0,
6530 _In_ size_t end = SIZE_MAX,
6531 _In_ int flags = match_default)
6532 {
6533 _Assume_(text || start >= end);
6534 size_t celi_del = 0, decimalni_del = 0, decimalni_del_n = 1;
6535 this->interval.end = start;
6536 for (;;) {
6537 if (this->interval.end < end && text[this->interval.end]) {
6538 if ('0' <= text[this->interval.end] && text[this->interval.end] <= '9') {
6539 celi_del = celi_del * 10 + text[this->interval.end] - '0';
6540 this->interval.end++;
6541 }
6542 else if (text[this->interval.end] == '.') {
6543 this->interval.end++;
6544 for (;;) {
6545 if (this->interval.end < end && text[this->interval.end]) {
6546 if ('0' <= text[this->interval.end] && text[this->interval.end] <= '9') {
6547 decimalni_del = decimalni_del * 10 + text[this->interval.end] - '0';
6548 decimalni_del_n *= 10;
6549 this->interval.end++;
6550 }
6551 else
6552 break;
6553 }
6554 else
6555 break;
6556 }
6557 break;
6558 }
6559 else
6560 break;
6561 }
6562 else
6563 break;
6564 }
6567 this->interval.start = start;
6568 return true;
6569 }
6570 value = 1.0f;
6571 this->interval.invalidate();
6572 return false;
6573 }
6574
6575 virtual void invalidate()
6576 {
6577 value = 1.0f;
6578 parser::invalidate();
6579 }
6580
6581 public:
6582 float value;
6583 };
6584
6588 class http_asterisk : public parser
6589 {
6590 public:
6591 virtual bool match(
6592 _In_reads_or_z_(end) const char* text,
6593 _In_ size_t start = 0,
6594 _In_ size_t end = SIZE_MAX,
6595 _In_ int flags = match_default)
6596 {
6597 _Assume_(text || end <= start);
6598 if (start < end && text[start] == '*') {
6599 this->interval.end = (this->interval.start = start) + 1;
6600 return true;
6601 }
6602 this->interval.invalidate();
6603 return false;
6604 }
6605 };
6606
6610 template <class T, class T_asterisk = http_asterisk>
6612 {
6613 public:
6614 http_weighted_value(_In_ const std::locale& locale = std::locale()) :
6615 parser(locale),
6616 factor(locale)
6617 {}
6618
6619 virtual bool match(
6620 _In_reads_or_z_(end) const char* text,
6621 _In_ size_t start = 0,
6622 _In_ size_t end = SIZE_MAX,
6623 _In_ int flags = match_default)
6624 {
6625 _Assume_(text || start >= end);
6626 size_t konec_vrednosti;
6627 this->interval.end = start;
6628 if (asterisk.match(text, this->interval.end, end, flags)) {
6629 this->interval.end = konec_vrednosti = asterisk.interval.end;
6630 value.invalidate();
6631 }
6632 else if (value.match(text, this->interval.end, end, flags)) {
6633 this->interval.end = konec_vrednosti = value.interval.end;
6634 asterisk.invalidate();
6635 }
6636 else {
6637 asterisk.invalidate();
6638 value.invalidate();
6639 this->interval.invalidate();
6640 return false;
6641 }
6642
6643 while (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) this->interval.end++;
6644 if (this->interval.end < end && text[this->interval.end] == ';') {
6645 this->interval.end++;
6646 while (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) this->interval.end++;
6647 if (this->interval.end < end && (text[this->interval.end] == 'q' || text[this->interval.end] == 'Q')) {
6648 this->interval.end++;
6649 while (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) this->interval.end++;
6650 if (this->interval.end < end && text[this->interval.end] == '=') {
6651 this->interval.end++;
6652 while (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) this->interval.end++;
6653 if (factor.match(text, this->interval.end, end, flags))
6654 this->interval.end = factor.interval.end;
6655 }
6656 }
6657 }
6658 if (!factor.interval) {
6659 factor.invalidate();
6661 }
6662 this->interval.start = start;
6663 return true;
6664 }
6665
6666 virtual void invalidate()
6667 {
6668 asterisk.invalidate();
6669 value.invalidate();
6670 factor.invalidate();
6671 parser::invalidate();
6672 }
6673
6674 public:
6675 T_asterisk asterisk;
6676 T value;
6677 http_weight factor;
6678 };
6679
6684 {
6685 public:
6686 virtual bool match(
6687 _In_reads_or_z_(end) const char* text,
6688 _In_ size_t start = 0,
6689 _In_ size_t end = SIZE_MAX,
6690 _In_ int flags = match_default)
6691 {
6692 _Assume_(text || start >= end);
6693 this->interval.end = start;
6694 if (this->interval.end < end && text[this->interval.end] == '$')
6695 this->interval.end++;
6696 else
6697 goto error;
6698 if (name.match(text, this->interval.end, end, flags))
6699 this->interval.end = name.interval.end;
6700 else
6701 goto error;
6702 while (m_space.match(text, this->interval.end, end, flags))
6703 this->interval.end = m_space.interval.end;
6704 if (this->interval.end < end && text[this->interval.end] == '=')
6705 this->interval.end++;
6706 else
6707 goto error;
6708 while (m_space.match(text, this->interval.end, end, flags))
6709 this->interval.end = m_space.interval.end;
6710 if (value.match(text, this->interval.end, end, flags))
6711 this->interval.end = value.interval.end;
6712 else
6713 goto error;
6714 this->interval.start = start;
6715 return true;
6716
6717 error:
6718 name.invalidate();
6719 value.invalidate();
6720 this->interval.invalidate();
6721 return false;
6722 }
6723
6724 virtual void invalidate()
6725 {
6726 name.invalidate();
6727 value.invalidate();
6728 parser::invalidate();
6729 }
6730
6731 public:
6732 http_token name;
6733 http_value value;
6734
6735 protected:
6736 http_space m_space;
6737 };
6738
6742 class http_cookie : public parser
6743 {
6744 public:
6745 virtual bool match(
6746 _In_reads_or_z_(end) const char* text,
6747 _In_ size_t start = 0,
6748 _In_ size_t end = SIZE_MAX,
6749 _In_ int flags = match_default)
6750 {
6751 _Assume_(text || start >= end);
6752 this->interval.end = start;
6753 if (name.match(text, this->interval.end, end, flags))
6754 this->interval.end = name.interval.end;
6755 else
6756 goto error;
6757 while (m_space.match(text, this->interval.end, end, flags))
6758 this->interval.end = m_space.interval.end;
6759 if (this->interval.end < end && text[this->interval.end] == '=')
6760 this->interval.end++;
6761 else
6762 goto error;
6763 while (m_space.match(text, this->interval.end, end, flags))
6764 this->interval.end = m_space.interval.end;
6765 if (value.match(text, this->interval.end, end, flags))
6766 this->interval.end = value.interval.end;
6767 else
6768 goto error;
6769 params.clear();
6770 for (;;) {
6771 if (this->interval.end < end && text[this->interval.end]) {
6772 if (m_space.match(text, this->interval.end, end, flags))
6773 this->interval.end = m_space.interval.end;
6774 else if (text[this->interval.end] == ';') {
6775 this->interval.end++;
6776 while (m_space.match(text, this->interval.end, end, flags))
6777 this->interval.end = m_space.interval.end;
6779 if (param.match(text, this->interval.end, end, flags)) {
6780 this->interval.end = param.interval.end;
6781 params.push_back(std::move(param));
6782 }
6783 else
6784 break;
6785 }
6786 else
6787 break;
6788 }
6789 else
6790 break;
6791 }
6792 this->interval.start = start;
6793 this->interval.end = params.empty() ? value.interval.end : params.back().interval.end;
6794 return true;
6795
6796 error:
6797 name.invalidate();
6798 value.invalidate();
6799 params.clear();
6800 this->interval.invalidate();
6801 return false;
6802 }
6803
6804 virtual void invalidate()
6805 {
6806 name.invalidate();
6807 value.invalidate();
6808 params.clear();
6809 parser::invalidate();
6810 }
6811
6812 public:
6815 std::list<http_cookie_parameter> params;
6816
6817 protected:
6818 http_space m_space;
6819 };
6820
6824 class http_agent : public parser
6825 {
6826 public:
6827 virtual bool match(
6828 _In_reads_or_z_(end) const char* text,
6829 _In_ size_t start = 0,
6830 _In_ size_t end = SIZE_MAX,
6831 _In_ int flags = match_default)
6832 {
6833 _Assume_(text || start >= end);
6834 this->interval.end = start;
6835 type.start = this->interval.end;
6836 for (;;) {
6837 if (this->interval.end < end && text[this->interval.end]) {
6838 if (text[this->interval.end] == '/') {
6839 type.end = this->interval.end;
6840 this->interval.end++;
6841 version.start = this->interval.end;
6842 for (;;) {
6843 if (this->interval.end < end && text[this->interval.end]) {
6844 if (stdex::isspace(text[this->interval.end])) {
6845 version.end = this->interval.end;
6846 break;
6847 }
6848 else
6849 this->interval.end++;
6850 }
6851 else {
6852 version.end = this->interval.end;
6853 break;
6854 }
6855 }
6856 break;
6857 }
6858 else if (stdex::isspace(text[this->interval.end])) {
6859 type.end = this->interval.end;
6860 break;
6861 }
6862 else
6863 this->interval.end++;
6864 }
6865 else {
6866 type.end = this->interval.end;
6867 break;
6868 }
6869 }
6871 this->interval.start = start;
6872 return true;
6873 }
6874 type.start = 1;
6875 type.end = 0;
6876 version.start = 1;
6877 version.end = 0;
6878 this->interval.invalidate();
6879 return false;
6880 }
6881
6882 virtual void invalidate()
6883 {
6884 type.start = 1;
6885 type.end = 0;
6886 version.start = 1;
6887 version.end = 0;
6888 parser::invalidate();
6889 }
6890
6891 public:
6894 };
6895
6899 class http_protocol : public parser
6900 {
6901 public:
6902 http_protocol(_In_ const std::locale& locale = std::locale()) :
6903 parser(locale),
6904 version(0x009)
6905 {}
6906
6907 virtual bool match(
6908 _In_reads_or_z_(end) const char* text,
6909 _In_ size_t start = 0,
6910 _In_ size_t end = SIZE_MAX,
6911 _In_ int flags = match_default)
6912 {
6913 _Assume_(text || start >= end);
6914 this->interval.end = start;
6915 type.start = this->interval.end;
6916 for (;;) {
6917 if (this->interval.end < end && text[this->interval.end]) {
6918 if (text[this->interval.end] == '/') {
6919 type.end = this->interval.end;
6920 this->interval.end++;
6921 break;
6922 }
6923 else if (stdex::isspace(text[this->interval.end]))
6924 goto error;
6925 else
6926 this->interval.end++;
6927 }
6928 else {
6929 type.end = this->interval.end;
6930 goto error;
6931 }
6932 }
6933 version_maj.start = this->interval.end;
6934 for (;;) {
6935 if (this->interval.end < end && text[this->interval.end]) {
6936 if (text[this->interval.end] == '.') {
6937 version_maj.end = this->interval.end;
6938 this->interval.end++;
6939 version_min.start = this->interval.end;
6940 for (;;) {
6941 if (this->interval.end < end && text[this->interval.end]) {
6942 if (stdex::isspace(text[this->interval.end])) {
6943 version_min.end = this->interval.end;
6944 version =
6945 (uint16_t)strtoui(text + version_maj.start, version_maj.size(), nullptr, 10) * 0x100 +
6946 (uint16_t)strtoui(text + version_min.start, version_min.size(), nullptr, 10);
6947 break;
6948 }
6949 else
6950 this->interval.end++;
6951 }
6952 else
6953 goto error;
6954 }
6955 break;
6956 }
6957 else if (stdex::isspace(text[this->interval.end])) {
6958 version_maj.end = this->interval.end;
6959 version_min.start = 1;
6960 version_min.end = 0;
6961 version = (uint16_t)strtoui(text + version_maj.start, version_maj.size(), nullptr, 10) * 0x100;
6962 break;
6963 }
6964 else
6965 this->interval.end++;
6966 }
6967 else
6968 goto error;
6969 }
6970 this->interval.start = start;
6971 return true;
6972
6973 error:
6974 type.start = 1;
6975 type.end = 0;
6976 version_maj.start = 1;
6977 version_maj.end = 0;
6978 version_min.start = 1;
6979 version_min.end = 0;
6980 version = 0x009;
6981 this->interval.invalidate();
6982 return false;
6983 }
6984
6985 virtual void invalidate()
6986 {
6987 type.start = 1;
6988 type.end = 0;
6989 version_maj.start = 1;
6990 version_maj.end = 0;
6991 version_min.start = 1;
6992 version_min.end = 0;
6993 version = 0x009;
6994 parser::invalidate();
6995 }
6996
6997 public:
6999 stdex::interval<size_t> version_maj;
7000 stdex::interval<size_t> version_min;
7002 };
7003
7007 class http_request : public parser
7008 {
7009 public:
7010 http_request(_In_ const std::locale& locale = std::locale()) :
7011 parser(locale),
7012 url(locale),
7013 protocol(locale)
7014 {}
7015
7016 virtual bool match(
7017 _In_reads_or_z_(end) const char* text,
7018 _In_ size_t start = 0,
7019 _In_ size_t end = SIZE_MAX,
7020 _In_ int flags = match_default)
7021 {
7022 _Assume_(text || start >= end);
7023 this->interval.end = start;
7024
7025 for (;;) {
7026 if (m_line_break.match(text, this->interval.end, end, flags))
7027 goto error;
7028 else if (this->interval.end < end && text[this->interval.end]) {
7029 if (stdex::isspace(text[this->interval.end]))
7030 this->interval.end++;
7031 else
7032 break;
7033 }
7034 else
7035 goto error;
7036 }
7037 verb.start = this->interval.end;
7038 for (;;) {
7039 if (m_line_break.match(text, this->interval.end, end, flags))
7040 goto error;
7041 else if (this->interval.end < end && text[this->interval.end]) {
7042 if (stdex::isspace(text[this->interval.end])) {
7043 verb.end = this->interval.end;
7044 this->interval.end++;
7045 break;
7046 }
7047 else
7048 this->interval.end++;
7049 }
7050 else
7051 goto error;
7052 }
7053
7054 for (;;) {
7055 if (m_line_break.match(text, this->interval.end, end, flags))
7056 goto error;
7057 else if (this->interval.end < end && text[this->interval.end]) {
7058 if (stdex::isspace(text[this->interval.end]))
7059 this->interval.end++;
7060 else
7061 break;
7062 }
7063 else
7064 goto error;
7065 }
7066 if (url.match(text, this->interval.end, end, flags))
7067 this->interval.end = url.interval.end;
7068 else
7069 goto error;
7070
7071 protocol.invalidate();
7072 for (;;) {
7073 if (m_line_break.match(text, this->interval.end, end, flags)) {
7074 this->interval.end = m_line_break.interval.end;
7075 goto end;
7076 }
7077 else if (this->interval.end < end && text[this->interval.end]) {
7078 if (stdex::isspace(text[this->interval.end]))
7079 this->interval.end++;
7080 else
7081 break;
7082 }
7083 else
7084 goto end;
7085 }
7086 for (;;) {
7087 if (m_line_break.match(text, this->interval.end, end, flags)) {
7088 this->interval.end = m_line_break.interval.end;
7089 goto end;
7090 }
7091 else if (protocol.match(text, this->interval.end, end, flags)) {
7092 this->interval.end = protocol.interval.end;
7093 break;
7094 }
7095 else
7096 goto end;
7097 }
7098
7099 for (;;) {
7100 if (m_line_break.match(text, this->interval.end, end, flags)) {
7101 this->interval.end = m_line_break.interval.end;
7102 break;
7103 }
7104 else if (this->interval.end < end && text[this->interval.end])
7105 this->interval.end++;
7106 else
7107 goto end;
7108 }
7109
7110 end:
7111 this->interval.start = start;
7112 return true;
7113
7114 error:
7115 verb.start = 1;
7116 verb.end = 0;
7117 url.invalidate();
7118 protocol.invalidate();
7119 this->interval.invalidate();
7120 return false;
7121 }
7122
7123 virtual void invalidate()
7124 {
7125 verb.start = 1;
7126 verb.end = 0;
7127 url.invalidate();
7128 protocol.invalidate();
7129 parser::invalidate();
7130 }
7131
7132 public:
7134 http_url url;
7135 http_protocol protocol;
7136
7137 protected:
7138 http_line_break m_line_break;
7139 };
7140
7144 class http_header : public parser
7145 {
7146 public:
7147 virtual bool match(
7148 _In_reads_or_z_(end) const char* text,
7149 _In_ size_t start = 0,
7150 _In_ size_t end = SIZE_MAX,
7151 _In_ int flags = match_default)
7152 {
7153 _Assume_(text || start >= end);
7154 this->interval.end = start;
7155
7156 if (m_line_break.match(text, this->interval.end, end, flags) ||
7157 (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])))
7158 goto error;
7159 name.start = this->interval.end;
7160 for (;;) {
7161 if (m_line_break.match(text, this->interval.end, end, flags))
7162 goto error;
7163 else if (this->interval.end < end && text[this->interval.end]) {
7164 if (stdex::isspace(text[this->interval.end])) {
7165 name.end = this->interval.end;
7166 this->interval.end++;
7167 for (;;) {
7168 if (m_line_break.match(text, this->interval.end, end, flags))
7169 goto error;
7170 else if (this->interval.end < end && text[this->interval.end]) {
7171 if (stdex::isspace(text[this->interval.end]))
7172 this->interval.end++;
7173 else
7174 break;
7175 }
7176 else
7177 goto error;
7178 }
7179 if (this->interval.end < end && text[this->interval.end] == ':') {
7180 this->interval.end++;
7181 break;
7182 }
7183 else
7184 goto error;
7185 break;
7186 }
7187 else if (text[this->interval.end] == ':') {
7188 name.end = this->interval.end;
7189 this->interval.end++;
7190 break;
7191 }
7192 else
7193 this->interval.end++;
7194 }
7195 else
7196 goto error;
7197 }
7198 value.start = SIZE_MAX;
7199 value.end = 0;
7200 for (;;) {
7201 if (m_line_break.match(text, this->interval.end, end, flags)) {
7202 this->interval.end = m_line_break.interval.end;
7203 if (!m_line_break.match(text, this->interval.end, end, flags) &&
7204 this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end]))
7205 this->interval.end++;
7206 else
7207 break;
7208 }
7209 else if (this->interval.end < end && text[this->interval.end]) {
7210 if (stdex::isspace(text[this->interval.end]))
7211 this->interval.end++;
7212 else {
7213 if (value.start == SIZE_MAX) value.start = this->interval.end;
7214 value.end = ++this->interval.end;
7215 }
7216 }
7217 else
7218 break;
7219 }
7220 this->interval.start = start;
7221 return true;
7222
7223 error:
7224 name.start = 1;
7225 name.end = 0;
7226 value.start = 1;
7227 value.end = 0;
7228 this->interval.invalidate();
7229 return false;
7230 }
7231
7232 virtual void invalidate()
7233 {
7234 name.start = 1;
7235 name.end = 0;
7236 value.start = 1;
7237 value.end = 0;
7238 parser::invalidate();
7239 }
7240
7241 public:
7244
7245 protected:
7246 http_line_break m_line_break;
7247 };
7248
7252 template <class _Key, class T>
7253 class http_value_collection : public T
7254 {
7255 public:
7256 void insert(
7257 _In_reads_or_z_(end) const char* text,
7258 _In_ size_t start = 0,
7259 _In_ size_t end = SIZE_MAX,
7260 _In_ int flags = match_default)
7261 {
7262 while (start < end) {
7263 while (start < end && text[start] && stdex::isspace(text[start])) start++;
7264 if (start < end && text[start] == ',') {
7265 start++;
7266 while (start < end&& text[start] && stdex::isspace(text[start])) start++;
7267 }
7268 _Key el;
7269 if (el.match(text, start, end, flags)) {
7270 start = el.interval.end;
7271 T::insert(std::move(el));
7272 }
7273 else
7274 break;
7275 }
7276 }
7277 };
7278
7279 template <class T>
7281 constexpr bool operator()(const T& a, const T& b) const noexcept
7282 {
7283 return a.factor.value > b.factor.value;
7284 }
7285 };
7286
7290 template <class T, class _Alloc = std::allocator<T>>
7292
7296 template <class T>
7298 {
7299 public:
7301 _In_ const std::shared_ptr<basic_parser<T>>& quote,
7302 _In_ const std::shared_ptr<basic_parser<T>>& chr,
7303 _In_ const std::shared_ptr<basic_parser<T>>& escape,
7304 _In_ const std::shared_ptr<basic_parser<T>>& sol,
7305 _In_ const std::shared_ptr<basic_parser<T>>& bs,
7306 _In_ const std::shared_ptr<basic_parser<T>>& ff,
7307 _In_ const std::shared_ptr<basic_parser<T>>& lf,
7308 _In_ const std::shared_ptr<basic_parser<T>>& cr,
7309 _In_ const std::shared_ptr<basic_parser<T>>& htab,
7310 _In_ const std::shared_ptr<basic_parser<T>>& uni,
7311 _In_ const std::shared_ptr<basic_integer16<T>>& hex,
7312 _In_ const std::locale& locale = std::locale()) :
7313 basic_parser<T>(locale),
7314 m_quote(quote),
7315 m_chr(chr),
7316 m_escape(escape),
7317 m_sol(sol),
7318 m_bs(bs),
7319 m_ff(ff),
7320 m_lf(lf),
7321 m_cr(cr),
7322 m_htab(htab),
7323 m_uni(uni),
7324 m_hex(hex)
7325 {}
7326
7327 virtual bool match(
7328 _In_reads_or_z_(end) const T* text,
7329 _In_ size_t start = 0,
7330 _In_ size_t end = SIZE_MAX,
7331 _In_ int flags = match_default)
7332 {
7333 _Assume_(text || start >= end);
7334 this->interval.end = start;
7335 if (m_quote->match(text, this->interval.end, end, flags)) {
7336 this->interval.end = m_quote->interval.end;
7337 value.clear();
7338 for (;;) {
7339 if (m_quote->match(text, this->interval.end, end, flags)) {
7340 this->interval.start = start;
7341 this->interval.end = m_quote->interval.end;
7342 return true;
7343 }
7344 if (m_escape->match(text, this->interval.end, end, flags)) {
7345 if (m_quote->match(text, m_escape->interval.end, end, flags)) {
7346 value += '"'; this->interval.end = m_quote->interval.end;
7347 continue;
7348 }
7349 if (m_sol->match(text, m_escape->interval.end, end, flags)) {
7350 value += '/'; this->interval.end = m_sol->interval.end;
7351 continue;
7352 }
7353 if (m_bs->match(text, m_escape->interval.end, end, flags)) {
7354 value += '\b'; this->interval.end = m_bs->interval.end;
7355 continue;
7356 }
7357 if (m_ff->match(text, m_escape->interval.end, end, flags)) {
7358 value += '\f'; this->interval.end = m_ff->interval.end;
7359 continue;
7360 }
7361 if (m_lf->match(text, m_escape->interval.end, end, flags)) {
7362 value += '\n'; this->interval.end = m_lf->interval.end;
7363 continue;
7364 }
7365 if (m_cr->match(text, m_escape->interval.end, end, flags)) {
7366 value += '\r'; this->interval.end = m_cr->interval.end;
7367 continue;
7368 }
7369 if (m_htab->match(text, m_escape->interval.end, end, flags)) {
7370 value += '\t'; this->interval.end = m_htab->interval.end;
7371 continue;
7372 }
7373 if (
7374 m_uni->match(text, m_escape->interval.end, end, flags) &&
7375 m_hex->match(text, m_uni->interval.end, std::min(m_uni->interval.end + 4, end), flags | match_case_insensitive) &&
7376 m_hex->interval.size() == 4 /* JSON requests 4-digit Unicode sequneces: \u.... */)
7377 {
7378 _Assume_(m_hex->value <= 0xffff);
7379 if (sizeof(T) == 1) {
7380 if (m_hex->value > 0x7ff) {
7381 value += (T)(0xe0 | ((m_hex->value >> 12) & 0x0f));
7382 value += (T)(0x80 | ((m_hex->value >> 6) & 0x3f));
7383 value += (T)(0x80 | (m_hex->value & 0x3f));
7384 }
7385 else if (m_hex->value > 0x7f) {
7386 value += (T)(0xc0 | ((m_hex->value >> 6) & 0x1f));
7387 value += (T)(0x80 | (m_hex->value & 0x3f));
7388 }
7389 else
7390 value += (T)(m_hex->value & 0x7f);
7391 }
7392 else
7393 value += (T)m_hex->value;
7394 this->interval.end = m_hex->interval.end;
7395 continue;
7396 }
7397 if (m_escape->match(text, m_escape->interval.end, end, flags)) {
7398 value += '\\'; this->interval.end = m_escape->interval.end;
7399 continue;
7400 }
7401 }
7402 if (m_chr->match(text, this->interval.end, end, flags)) {
7403 value.append(text + m_chr->interval.start, m_chr->interval.size());
7404 this->interval.end = m_chr->interval.end;
7405 continue;
7406 }
7407 break;
7408 }
7409 }
7410 value.clear();
7411 this->interval.invalidate();
7412 return false;
7413 }
7414
7415 virtual void invalidate()
7416 {
7417 value.clear();
7419 }
7420
7421 public:
7422 std::basic_string<T> value;
7423
7424 protected:
7425 std::shared_ptr<basic_parser<T>> m_quote;
7426 std::shared_ptr<basic_parser<T>> m_chr;
7427 std::shared_ptr<basic_parser<T>> m_escape;
7428 std::shared_ptr<basic_parser<T>> m_sol;
7429 std::shared_ptr<basic_parser<T>> m_bs;
7430 std::shared_ptr<basic_parser<T>> m_ff;
7431 std::shared_ptr<basic_parser<T>> m_lf;
7432 std::shared_ptr<basic_parser<T>> m_cr;
7433 std::shared_ptr<basic_parser<T>> m_htab;
7434 std::shared_ptr<basic_parser<T>> m_uni;
7435 std::shared_ptr<basic_integer16<T>> m_hex;
7436 };
7437
7440#ifdef _UNICODE
7441 using tjson_string = wjson_string;
7442#else
7443 using tjson_string = json_string;
7444#endif
7445
7449 template <class T>
7451 {
7452 public:
7453 virtual bool match(
7454 _In_reads_or_z_opt_(end) const T* text,
7455 _In_ size_t start = 0,
7456 _In_ size_t end = SIZE_MAX,
7457 _In_ int flags = match_multiline)
7458 {
7459 _Unreferenced_(flags);
7460 _Assume_(text || start + 1 >= end);
7461 if (start + 1 < end &&
7462 text[start] == '/' &&
7463 text[start + 1] == '*')
7464 {
7465 // /*
7466 this->content.start = this->interval.end = start + 2;
7467 for (;;) {
7468 if (this->interval.end >= end || !text[this->interval.end])
7469 break;
7470 if (this->interval.end + 1 < end &&
7471 text[this->interval.end] == '*' &&
7472 text[this->interval.end + 1] == '/')
7473 {
7474 // /*...*/
7475 this->content.end = this->interval.end;
7476 this->interval.start = start;
7477 this->interval.end = this->interval.end + 2;
7478 return true;
7479 }
7480 this->interval.end++;
7481 }
7482 }
7483 this->content.invalidate();
7484 this->interval.invalidate();
7485 return false;
7486 }
7487
7488 virtual void invalidate()
7489 {
7490 this->content.invalidate();
7491 basic_parser::invalidate();
7492 }
7493
7494 public:
7496 };
7497
7500#ifdef _UNICODE
7501 using tcss_comment = wcss_comment;
7502#else
7503 using tcss_comment = css_comment;
7504#endif
7505
7509 template <class T>
7510 class basic_css_cdo : public basic_parser<T>
7511 {
7512 public:
7513 virtual bool match(
7514 _In_reads_or_z_opt_(end) const T* text,
7515 _In_ size_t start = 0,
7516 _In_ size_t end = SIZE_MAX,
7517 _In_ int flags = match_multiline)
7518 {
7519 _Unreferenced_(flags);
7520 _Assume_(text || start + 3 >= end);
7521 if (start + 3 < end &&
7522 text[start] == '<' &&
7523 text[start + 1] == '!' &&
7524 text[start + 2] == '-' &&
7525 text[start + 3] == '-')
7526 {
7527 this->interval.start = start;
7528 this->interval.end = start + 4;
7529 return true;
7530 }
7531 this->interval.invalidate();
7532 return false;
7533 }
7534 };
7535
7538#ifdef _UNICODE
7539 using tcss_cdo = wcss_cdo;
7540#else
7541 using tcss_cdo = css_cdo;
7542#endif
7543
7547 template <class T>
7548 class basic_css_cdc : public basic_parser<T>
7549 {
7550 public:
7551 virtual bool match(
7552 _In_reads_or_z_opt_(end) const T* text,
7553 _In_ size_t start = 0,
7554 _In_ size_t end = SIZE_MAX,
7555 _In_ int flags = match_multiline)
7556 {
7557 _Unreferenced_(flags);
7558 _Assume_(text || start + 2 >= end);
7559 if (start + 2 < end &&
7560 text[start] == '-' &&
7561 text[start + 1] == '-' &&
7562 text[start + 2] == '>')
7563 {
7564 this->interval.start = start;
7565 this->interval.end = start + 3;
7566 return true;
7567 }
7568 this->interval.invalidate();
7569 return false;
7570 }
7571 };
7572
7575#ifdef _UNICODE
7576 using tcss_cdc = wcss_cdc;
7577#else
7578 using tcss_cdc = css_cdc;
7579#endif
7580
7584 template <class T>
7586 {
7587 public:
7588 virtual bool match(
7589 _In_reads_or_z_opt_(end) const T* text,
7590 _In_ size_t start = 0,
7591 _In_ size_t end = SIZE_MAX,
7592 _In_ int flags = match_multiline)
7593 {
7594 _Unreferenced_(flags);
7595 this->interval.end = start;
7596 _Assume_(text || this->interval.end >= end);
7597 if (this->interval.end < end &&
7598 (text[this->interval.end] == '\"' || text[this->interval.end] == '\''))
7599 {
7600 // "Quoted...
7601 T quote = text[this->interval.end];
7602 this->content.start = ++this->interval.end;
7603 for (;;) {
7604 if (this->interval.end >= end || !text[this->interval.end])
7605 break;
7606 if (text[this->interval.end] == quote) {
7607 // End quote"
7608 this->content.end = this->interval.end;
7609 this->interval.start = start;
7610 this->interval.end++;
7611 return true;
7612 }
7613 if (this->interval.end + 1 < end &&
7614 text[this->interval.end] == '\\' &&
7615 (text[this->interval.end + 1] == '\"' || text[this->interval.end + 1] == '\''))
7616 {
7617 // Escaped quote
7618 this->interval.end = this->interval.end + 2;
7619 }
7620 else
7621 this->interval.end++;
7622 }
7623 }
7624
7625 this->content.invalidate();
7626 this->interval.invalidate();
7627 return false;
7628 }
7629
7630 virtual void invalidate()
7631 {
7632 this->content.invalidate();
7633 basic_parser::invalidate();
7634 }
7635
7636 public:
7638 };
7639
7642#ifdef _UNICODE
7643 using tcss_string = wcss_string;
7644#else
7645 using tcss_string = css_string;
7646#endif
7647
7651 template <class T>
7652 class basic_css_uri : public basic_parser<T>
7653 {
7654 public:
7655 virtual bool match(
7656 _In_reads_or_z_opt_(end) const T* text,
7657 _In_ size_t start = 0,
7658 _In_ size_t end = SIZE_MAX,
7659 _In_ int flags = match_multiline)
7660 {
7661 _Unreferenced_(flags);
7662 this->interval.end = start;
7663 _Assume_(text || this->interval.end + 3 >= end);
7664 if (this->interval.end + 3 < end &&
7665 (text[this->interval.end] == 'u' || text[this->interval.end] == 'U') &&
7666 (text[this->interval.end + 1] == 'r' || text[this->interval.end + 1] == 'R') &&
7667 (text[this->interval.end + 2] == 'l' || text[this->interval.end + 2] == 'L') &&
7668 text[this->interval.end + 3] == '(')
7669 {
7670 // url(
7671 this->interval.end = this->interval.end + 4;
7672
7673 // Skip whitespace.
7674 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
7675 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
7676
7677 if (this->interval.end < end &&
7678 (text[this->interval.end] == '\"' || text[this->interval.end] == '\''))
7679 {
7680 // url("Quoted...
7681 T quote = text[this->interval.end];
7682 this->content.start = ++this->interval.end;
7683 for (;;) {
7684 if (this->interval.end >= end || !text[this->interval.end])
7685 goto error;
7686 if (text[this->interval.end] == quote) {
7687 // End quote"
7688 this->content.end = this->interval.end;
7689 this->interval.end++;
7690 break;
7691 }
7692 if (this->interval.end + 1 < end &&
7693 text[this->interval.end] == '\\' &&
7694 (text[this->interval.end + 1] == '\"' || text[this->interval.end + 1] == '\''))
7695 {
7696 // Escaped quote
7697 this->interval.end = this->interval.end + 2;
7698 }
7699 else
7700 this->interval.end++;
7701 }
7702
7703 // Skip whitespace.
7704 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
7705
7706 if (this->interval.end < end &&
7707 text[this->interval.end] == ')')
7708 {
7709 // url("...")
7710 this->interval.start = start;
7711 this->interval.end++;
7712 return true;
7713 }
7714 }
7715 else {
7716 // url(...
7717 this->content.start = content.end = this->interval.end;
7718 for (;;) {
7719 if (this->interval.end >= end || !text[this->interval.end])
7720 goto error;
7721 if (text[this->interval.end] == ')') {
7722 // url(...)
7723 this->interval.start = start;
7724 this->interval.end++;
7725 return true;
7726 }
7727 if (ctype.is(ctype.space, text[this->interval.end]))
7728 this->interval.end++;
7729 else
7730 this->content.end = ++this->interval.end;
7731 }
7732 }
7733 }
7734
7735 error:
7736 this->content.invalidate();
7737 this->interval.invalidate();
7738 return false;
7739 }
7740
7741 virtual void invalidate()
7742 {
7743 this->content.invalidate();
7744 basic_parser::invalidate();
7745 }
7746
7747 public:
7749 };
7750
7753#ifdef _UNICODE
7754 using tcss_uri = wcss_uri;
7755#else
7756 using tcss_uri = css_uri;
7757#endif
7758
7762 template <class T>
7764 {
7765 public:
7766 virtual bool match(
7767 _In_reads_or_z_opt_(end) const T* text,
7768 _In_ size_t start = 0,
7769 _In_ size_t end = SIZE_MAX,
7770 _In_ int flags = match_multiline)
7771 {
7772 _Unreferenced_(flags);
7773 this->interval.end = start;
7774 _Assume_(text || this->interval.end + 6 >= end);
7775 if (this->interval.end + 6 < end &&
7776 text[this->interval.end] == '@' &&
7777 (text[this->interval.end + 1] == 'i' || text[this->interval.end + 1] == 'I') &&
7778 (text[this->interval.end + 2] == 'm' || text[this->interval.end + 2] == 'M') &&
7779 (text[this->interval.end + 3] == 'p' || text[this->interval.end + 3] == 'P') &&
7780 (text[this->interval.end + 4] == 'o' || text[this->interval.end + 4] == 'O') &&
7781 (text[this->interval.end + 5] == 'r' || text[this->interval.end + 5] == 'R') &&
7782 (text[this->interval.end + 6] == 't' || text[this->interval.end + 6] == 'T'))
7783 {
7784 // @import...
7785 this->interval.end = this->interval.end + 7;
7786
7787 // Skip whitespace.
7788 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
7789 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
7790
7791 if (this->interval.end < end &&
7792 (text[this->interval.end] == '\"' || text[this->interval.end] == '\''))
7793 {
7794 // @import "Quoted
7795 T quote = text[this->interval.end];
7796 this->content.start = ++this->interval.end;
7797 for (;;) {
7798 if (this->interval.end >= end || !text[this->interval.end])
7799 goto error;
7800 if (text[this->interval.end] == quote) {
7801 // End quote"
7802 this->content.end = this->interval.end;
7803 this->interval.start = start;
7804 this->interval.end++;
7805 return true;
7806 }
7807 if (this->interval.end + 1 < end &&
7808 text[this->interval.end] == '\\' &&
7809 (text[this->interval.end + 1] == '\"' || text[this->interval.end + 1] == '\''))
7810 {
7811 // Escaped quote
7812 this->interval.end = this->interval.end + 2;
7813 }
7814 else
7815 this->interval.end++;
7816 }
7817 }
7818 }
7819
7820 error:
7821 this->content.invalidate();
7822 this->interval.invalidate();
7823 return false;
7824 }
7825
7826 virtual void invalidate()
7827 {
7828 this->content.invalidate();
7829 basic_parser::invalidate();
7830 }
7831
7832 public:
7834 };
7835
7838#ifdef _UNICODE
7839 using tcss_import = wcss_import;
7840#else
7841 using tcss_import = css_import;
7842#endif
7843
7847 template <class T>
7849 {
7850 public:
7851 virtual bool match(
7852 _In_reads_or_z_opt_(end) const T* text,
7853 _In_ size_t start = 0,
7854 _In_ size_t end = SIZE_MAX,
7855 _In_ int flags = match_multiline)
7856 {
7857 _Unreferenced_(flags);
7858 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
7859
7860 this->interval.end = start;
7861 this->base_type.start = this->interval.end;
7862 for (;;) {
7863 _Assume_(text || this->interval.end >= end);
7864 if (this->interval.end >= end || !text[this->interval.end])
7865 break;
7866 if (text[this->interval.end] == '/' ||
7867 text[this->interval.end] == ';' ||
7868 ctype.is(ctype.space, text[this->interval.end]))
7869 break;
7870 this->interval.end++;
7871 }
7872 if (this->interval.end <= this->base_type.start)
7873 goto error;
7874 this->base_type.end = this->interval.end;
7875
7876 if (end <= this->interval.end || text[this->interval.end] != '/')
7877 goto error;
7878
7879 this->interval.end++;
7880 this->sub_type.start = this->interval.end;
7881 for (;;) {
7882 if (this->interval.end >= end || !text[this->interval.end])
7883 break;
7884 if (text[this->interval.end] == '/' ||
7885 text[this->interval.end] == ';' ||
7886 ctype.is(ctype.space, text[this->interval.end]))
7887 break;
7888 this->interval.end++;
7889 }
7890 if (this->interval.end <= this->sub_type.start)
7891 goto error;
7892
7893 this->sub_type.end = this->interval.end;
7894 this->charset.invalidate();
7895 if (this->interval.end < end && text[this->interval.end] == ';') {
7896 this->interval.end++;
7897
7898 // Skip whitespace.
7899 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
7900
7901 if (this->interval.end + 7 < end &&
7902 (text[this->interval.end] == 'c' || text[this->interval.end] == 'C') &&
7903 (text[this->interval.end + 1] == 'h' || text[this->interval.end + 1] == 'H') &&
7904 (text[this->interval.end + 2] == 'a' || text[this->interval.end + 2] == 'A') &&
7905 (text[this->interval.end + 3] == 'r' || text[this->interval.end + 3] == 'R') &&
7906 (text[this->interval.end + 4] == 's' || text[this->interval.end + 4] == 'S') &&
7907 (text[this->interval.end + 5] == 'e' || text[this->interval.end + 5] == 'E') &&
7908 (text[this->interval.end + 6] == 't' || text[this->interval.end + 6] == 'T') &&
7909 text[this->interval.end + 7] == '=')
7910 {
7911 this->interval.end = this->interval.end + 8;
7912 if (this->interval.end < end &&
7913 (text[this->interval.end] == '\"' || text[this->interval.end] == '\''))
7914 {
7915 // "Quoted...
7916 T quote = text[this->interval.end];
7917 this->charset.start = ++this->interval.end;
7918 for (;;) {
7919 if (this->interval.end >= end || !text[this->interval.end]) {
7920 // No end quote!
7921 this->charset.invalidate();
7922 break;
7923 }
7924 if (text[this->interval.end] == quote) {
7925 // End quote"
7926 this->charset.end = this->interval.end;
7927 this->interval.end++;
7928 break;
7929 }
7930 this->interval.end++;
7931 }
7932 }
7933 else {
7934 // Nonquoted
7935 this->charset.start = this->interval.end;
7936 for (;;) {
7937 if (this->interval.end >= end || !text[this->interval.end] ||
7938 ctype.is(ctype.space, text[this->interval.end])) {
7939 this->charset.end = this->interval.end;
7940 break;
7941 }
7942 this->interval.end++;
7943 }
7944 }
7945 }
7946 }
7947 this->interval.start = start;
7948 return true;
7949
7950 error:
7951 this->base_type.invalidate();
7952 this->sub_type.invalidate();
7953 this->charset.invalidate();
7954 this->interval.invalidate();
7955 return false;
7956 }
7957
7958 virtual void invalidate()
7959 {
7960 this->base_type.invalidate();
7961 this->sub_type.invalidate();
7962 this->charset.invalidate();
7963 basic_parser::invalidate();
7964 }
7965
7966 public:
7970 };
7971
7974#ifdef _UNICODE
7975 using tmime_type = wmime_type;
7976#else
7977 using tmime_type = mime_type;
7978#endif
7979
7983 template <class T>
7985 {
7986 public:
7987 virtual bool match(
7988 _In_reads_or_z_opt_(end) const T* text,
7989 _In_ size_t start = 0,
7990 _In_ size_t end = SIZE_MAX,
7991 _In_ int flags = match_default)
7992 {
7993 _Unreferenced_(flags);
7994 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
7995 this->interval.end = start;
7996 for (;;) {
7997 _Assume_(text || this->interval.end >= end);
7998 if (this->interval.end >= end || !text[this->interval.end]) {
8000 this->interval.start = start;
8001 return true;
8002 }
8003 this->interval.invalidate();
8004 return false;
8005 }
8006 if (text[this->interval.end] == '>' ||
8007 text[this->interval.end] == '=' ||
8008 text[this->interval.end] == '/' && this->interval.end + 1 < end && text[this->interval.end + 1] == '>' ||
8009 ctype.is(ctype.space, text[this->interval.end]))
8010 {
8011 this->interval.start = start;
8012 return true;
8013 }
8014 this->interval.end++;
8015 }
8016 }
8017 };
8018
8021#ifdef _UNICODE
8022 using thtml_ident = whtml_ident;
8023#else
8024 using thtml_ident = html_ident;
8025#endif
8026
8030 template <class T>
8032 {
8033 public:
8034 virtual bool match(
8035 _In_reads_or_z_opt_(end) const T* text,
8036 _In_ size_t start = 0,
8037 _In_ size_t end = SIZE_MAX,
8038 _In_ int flags = match_default)
8039 {
8040 _Unreferenced_(flags);
8041 this->interval.end = start;
8042 _Assume_(text || this->interval.end >= end);
8043 if (this->interval.end < end &&
8044 (text[this->interval.end] == '\"' || text[this->interval.end] == '\''))
8045 {
8046 // "Quoted...
8047 T quote = text[this->interval.end];
8048 this->content.start = ++this->interval.end;
8049 for (;;) {
8050 if (this->interval.end >= end || !text[this->interval.end]) {
8051 // No end quote!
8052 this->content.invalidate();
8053 this->interval.invalidate();
8054 return false;
8055 }
8056 if (text[this->interval.end] == quote) {
8057 // End quote"
8058 this->content.end = this->interval.end;
8059 this->interval.start = start;
8060 this->interval.end++;
8061 return true;
8062 }
8063 this->interval.end++;
8064 }
8065 }
8066
8067 // Nonquoted
8068 this->content.start = this->interval.end;
8069 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
8070 for (;;) {
8071 _Assume_(text || this->interval.end >= end);
8072 if (this->interval.end >= end || !text[this->interval.end]) {
8073 this->content.end = this->interval.end;
8074 this->interval.start = start;
8075 return true;
8076 }
8077 if (text[this->interval.end] == '>' ||
8078 text[this->interval.end] == '/' && this->interval.end + 1 < end && text[this->interval.end + 1] == '>' ||
8079 ctype.is(ctype.space, text[this->interval.end]))
8080 {
8081 this->content.end = this->interval.end;
8082 this->interval.start = start;
8083 return true;
8084 }
8085 this->interval.end++;
8086 }
8087 }
8088
8089 virtual void invalidate()
8090 {
8091 this->content.invalidate();
8092 basic_parser::invalidate();
8093 }
8094
8095 public:
8097 };
8098
8101#ifdef _UNICODE
8102 using thtml_value = whtml_value;
8103#else
8104 using thtml_value = html_value;
8105#endif
8106
8110 enum class html_sequence_t {
8111 text = 0,
8112 element,
8113 element_start,
8114 element_end,
8115 declaration,
8116 comment,
8117 instruction,
8118 PCDATA,
8119 CDATA,
8120
8121 unknown = -1,
8122 };
8123
8131
8135 template <class T>
8137 {
8138 public:
8139 basic_html_tag(_In_ const std::locale& locale = std::locale()) :
8140 basic_parser(locale),
8141 type(html_sequence_t::unknown)
8142 {}
8143
8144 virtual bool match(
8145 _In_reads_or_z_opt_(end) const T* text,
8146 _In_ size_t start = 0,
8147 _In_ size_t end = SIZE_MAX,
8148 _In_ int flags = match_multiline)
8149 {
8150 _Assume_(text || start >= end);
8151 if (start >= end || text[start] != '<')
8152 goto error;
8153 this->interval.end = start + 1;
8154 if (this->interval.end >= end || !text[this->interval.end])
8155 goto error;
8156 if (text[this->interval.end] == '/' &&
8157 this->m_ident.match(text, this->interval.end + 1, end, flags))
8158 {
8159 // </...
8160 this->type = html_sequence_t::element_end;
8161 this->name = this->m_ident.interval;
8162 this->interval.end = this->m_ident.interval.end;
8163 }
8164 else if (text[this->interval.end] == '!') {
8165 // <!...
8166 this->interval.end++;
8167 if (this->interval.end + 1 < end &&
8168 text[this->interval.end] == '-' &&
8169 text[this->interval.end + 1] == '-')
8170 {
8171 // <!--...
8172 this->name.start = this->interval.end = this->interval.end + 2;
8173 for (;;) {
8174 if (this->interval.end >= end || !text[this->interval.end])
8175 goto error;
8176 if (this->interval.end + 2 < end &&
8177 text[this->interval.end] == '-' &&
8178 text[this->interval.end + 1] == '-' &&
8179 text[this->interval.end + 2] == '>')
8180 {
8181 // <!--...-->
8182 this->type = html_sequence_t::comment;
8183 this->name.end = this->interval.end;
8184 this->attributes.clear();
8185 this->interval.start = start;
8186 this->interval.end = this->interval.end + 3;
8187 return true;
8188 }
8189 this->interval.end++;
8190 }
8191 }
8192 this->type = html_sequence_t::declaration;
8193 this->name.start = this->name.end = this->interval.end;
8194 }
8195 else if (text[this->interval.end] == '?') {
8196 // <?...
8197 this->name.start = ++this->interval.end;
8198 for (;;) {
8199 if (this->interval.end >= end || !text[this->interval.end])
8200 goto error;
8201 if (text[this->interval.end] == '>') {
8202 // <?...>
8203 this->type = html_sequence_t::instruction;
8204 this->name.end = this->interval.end;
8205 this->attributes.clear();
8206 this->interval.start = start;
8207 this->interval.end++;
8208 return true;
8209 }
8210 if (this->interval.end + 1 < end &&
8211 text[this->interval.end] == '?' &&
8212 text[this->interval.end + 1] == '>')
8213 {
8214 // <?...?>
8215 this->type = html_sequence_t::instruction;
8216 this->name.end = this->interval.end;
8217 this->attributes.clear();
8218 this->interval.start = start;
8219 this->interval.end = this->interval.end + 2;
8220 return true;
8221 }
8222 this->interval.end++;
8223 }
8224 }
8225 else if (this->m_ident.match(text, this->interval.end, end, flags)) {
8226 // <tag...
8227 this->type = html_sequence_t::element_start;
8228 this->name = this->m_ident.interval;
8229 this->interval.end = this->m_ident.interval.end;
8230 }
8231 else
8232 goto error;
8233
8234 // Skip whitespace.
8235 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
8236 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8237
8238 this->attributes.clear();
8239 for (;;) {
8240 if (this->type == html_sequence_t::element_start &&
8241 this->interval.end + 1 < end &&
8242 text[this->interval.end] == '/' &&
8243 text[this->interval.end + 1] == '>')
8244 {
8245 // <tag .../>
8246 this->type = html_sequence_t::element;
8247 this->interval.end = this->interval.end + 2;
8248 break;
8249 }
8250 if (this->interval.end < end &&
8251 text[this->interval.end] == '>')
8252 {
8253 // <tag ...>
8254 this->interval.end++;
8255 break;
8256 }
8257 if (this->type == html_sequence_t::declaration &&
8258 this->interval.end + 1 < end &&
8259 text[this->interval.end] == '!' &&
8260 text[this->interval.end + 1] == '>')
8261 {
8262 // "<!...!>".
8263 this->interval.end = this->interval.end + 2;
8264 break;
8265 }
8266 if (this->type == html_sequence_t::declaration &&
8267 this->interval.end + 1 < end &&
8268 text[this->interval.end] == '-' &&
8269 text[this->interval.end + 1] == '-')
8270 {
8271 // "<! ... --...".
8272 this->interval.end = this->interval.end + 2;
8273 for (;;) {
8274 if (this->interval.end >= end || !text[this->interval.end])
8275 goto error;
8276 if (this->interval.end + 1 < end &&
8277 text[this->interval.end] == '-' &&
8278 text[this->interval.end + 1] == '-')
8279 {
8280 // "<! ... --...--".
8281 this->interval.end = this->interval.end + 2;
8282 break;
8283 }
8284 this->interval.end++;
8285 }
8286
8287 // Skip whitespace.
8288 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8289 continue;
8290 }
8291
8292 if (this->interval.end >= end || !text[this->interval.end])
8293 goto error;
8294
8295 // Attributes follow...
8296 html_attribute* a = nullptr;
8297 if (this->m_ident.match(text, this->interval.end, end, flags)) {
8298 this->attributes.push_back(std::move(html_attribute{ this->m_ident.interval }));
8299 a = &this->attributes.back();
8300 _Assume_(a);
8301 this->interval.end = this->m_ident.interval.end;
8302 }
8303 else {
8304 // What was that?! Skip.
8305 this->interval.end++;
8306 continue;
8307 }
8308
8309 // Skip whitespace.
8310 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8311
8312 if (this->interval.end < end && text[this->interval.end] == '=') {
8313 this->interval.end++;
8314
8315 // Skip whitespace.
8316 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8317
8318 if (this->m_value.match(text, this->interval.end, end, flags)) {
8319 // This attribute has value.
8320 a->value = this->m_value.content;
8321 this->interval.end = this->m_value.interval.end;
8322
8323 // Skip whitespace.
8324 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8325 }
8326 }
8327 else {
8328 // This attribute has no value.
8329 a->value.invalidate();
8330 }
8331 }
8332
8333 this->interval.start = start;
8334 return true;
8335
8336 error:
8337 this->type = html_sequence_t::unknown;
8338 this->name.invalidate();
8339 this->attributes.clear();
8340 this->interval.invalidate();
8341 return false;
8342 }
8343
8344 virtual void invalidate()
8345 {
8346 this->type = html_sequence_t::unknown;
8347 this->name.invalidate();
8348 this->attributes.clear();
8349 basic_parser::invalidate();
8350 }
8351
8352 public:
8353 html_sequence_t type;
8355 std::vector<html_attribute> attributes;
8356
8357 protected:
8358 basic_html_ident<T> m_ident;
8359 basic_html_value<T> m_value;
8360 };
8361
8364#ifdef _UNICODE
8365 using thtml_tag = whtml_tag;
8366#else
8367 using thtml_tag = html_tag;
8368#endif
8369
8373 template <class T>
8375 {
8376 public:
8377 virtual bool match(
8378 _In_reads_or_z_opt_(end) const T* text,
8379 _In_ size_t start = 0,
8380 _In_ size_t end = SIZE_MAX,
8381 _In_ int flags = match_multiline)
8382 {
8383 _Unreferenced_(flags);
8384 _Assume_(text || start + 2 >= end);
8385 if (start + 2 < end &&
8386 text[start] == '<' &&
8387 text[start + 1] == '!' &&
8388 text[start + 2] == '[')
8389 {
8390 this->interval.end = start + 3;
8391
8392 // Skip whitespace.
8393 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
8394 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8395
8396 this->condition.start = this->condition.end = this->interval.end;
8397
8398 for (;;) {
8399 if (this->interval.end >= end || !text[this->interval.end])
8400 break;
8401 if (text[this->interval.end] == '[') {
8402 this->interval.start = start;
8403 this->interval.end++;
8404 return true;
8405 }
8406 if (ctype.is(ctype.space, text[this->interval.end]))
8407 this->interval.end++;
8408 else
8409 this->condition.end = ++this->interval.end;
8410 }
8411 }
8412
8413 this->condition.invalidate();
8414 this->interval.invalidate();
8415 return false;
8416 }
8417
8418 virtual void invalidate()
8419 {
8420 this->condition.invalidate();
8421 basic_parser::invalidate();
8422 }
8423
8424 public:
8425 stdex::interval<size_t> condition;
8426 };
8427
8430#ifdef _UNICODE
8432#else
8434#endif
8435
8439 template <class T>
8441 {
8442 public:
8443 virtual bool match(
8444 _In_reads_or_z_opt_(end) const T* text,
8445 _In_ size_t start = 0,
8446 _In_ size_t end = SIZE_MAX,
8447 _In_ int flags = match_multiline)
8448 {
8449 _Unreferenced_(flags);
8450 _Assume_(text || start + 2 >= end);
8451 if (start + 2 < end &&
8452 text[start] == ']' &&
8453 text[start + 1] == ']' &&
8454 text[start + 2] == '>')
8455 {
8456 this->interval.start = start;
8457 this->interval.end = start + 3;
8458 return true;
8459 }
8460 this->interval.invalidate();
8461 return false;
8462 }
8463 };
8464
8467#ifdef _UNICODE
8469#else
8471#endif
8472 }
8473}
8474
8475#undef ENUM_FLAG_OPERATOR
8476#undef ENUM_FLAGS
8477
8478#ifdef _MSC_VER
8479#pragma warning(pop)
8480#endif
Test for angle in d°mm'ss.dddd form.
Definition parser.hpp:4396
Test for any code unit.
Definition parser.hpp:224
Test for beginning of line.
Definition parser.hpp:618
Test for any.
Definition parser.hpp:1060
Test for chemical formula.
Definition parser.hpp:5540
Test for Creditor Reference.
Definition parser.hpp:4966
T reference[22]
Normalized national reference number.
Definition parser.hpp:5095
T check_digits[3]
Two check digits.
Definition parser.hpp:5094
bool is_valid
Is reference valid per ISO 7064.
Definition parser.hpp:5096
Legacy CSS comment end -->
Definition parser.hpp:7549
Legacy CSS comment start <!--
Definition parser.hpp:7511
CSS comment.
Definition parser.hpp:7451
stdex::interval< size_t > content
content position in source
Definition parser.hpp:7495
CSS import directive.
Definition parser.hpp:7764
stdex::interval< size_t > content
content position in source
Definition parser.hpp:7833
CSS string.
Definition parser.hpp:7586
stdex::interval< size_t > content
content position in source
Definition parser.hpp:7637
URI in CSS.
Definition parser.hpp:7653
stdex::interval< size_t > content
content position in source
Definition parser.hpp:7748
Test for any code unit from a given string of code units.
Definition parser.hpp:723
Test for specific code unit.
Definition parser.hpp:294
Test for date.
Definition parser.hpp:4026
Test for valid DNS domain character.
Definition parser.hpp:2807
bool allow_on_edge
Is character allowed at the beginning or an end of a DNS domain?
Definition parser.hpp:2845
Test for DNS domain/hostname.
Definition parser.hpp:2907
bool m_allow_absolute
May DNS names end with a dot (absolute name)?
Definition parser.hpp:2971
Test for e-mail address.
Definition parser.hpp:3795
Test for emoticon.
Definition parser.hpp:3903
std::shared_ptr< basic_parser< T > > apex
apex/eyebrows/halo (e.g. O, 0)
Definition parser.hpp:3992
std::shared_ptr< basic_parser< T > > eyes
eyes (e.g. :, ;, >, |, B)
Definition parser.hpp:3993
std::shared_ptr< basic_set< T > > mouth
mouth (e.g. ), ), (, (, |, P, D, p, d)
Definition parser.hpp:3995
std::shared_ptr< basic_parser< T > > nose
nose (e.g. -, o)
Definition parser.hpp:3994
std::shared_ptr< basic_parser< T > > emoticon
emoticon as a whole (e.g. 😀, 🤔, 😶)
Definition parser.hpp:3991
Test for end of line.
Definition parser.hpp:656
Test for fraction.
Definition parser.hpp:1689
End of condition ...]]>
Definition parser.hpp:8441
Start of condition <![condition[...
Definition parser.hpp:8375
Contiguous sequence of characters representing name of element, attribute etc.
Definition parser.hpp:7985
Tag.
Definition parser.hpp:8137
std::vector< html_attribute > attributes
tag attributes
Definition parser.hpp:8355
html_sequence_t type
tag type
Definition parser.hpp:8353
stdex::interval< size_t > name
tag name position in source
Definition parser.hpp:8354
Optionally-quoted string representing value of an attribute.
Definition parser.hpp:8032
stdex::interval< size_t > content
content position in source
Definition parser.hpp:8096
Test for International Bank Account Number.
Definition parser.hpp:4672
T bban[31]
Normalized Basic Bank Account Number.
Definition parser.hpp:4943
T country[3]
ISO 3166-1 alpha-2 country code.
Definition parser.hpp:4941
T check_digits[3]
Two check digits.
Definition parser.hpp:4942
bool is_valid
Is IBAN valid per ISO 7064.
Definition parser.hpp:4944
Test for decimal integer.
Definition parser.hpp:1298
Test for decimal integer possibly containing thousand separators.
Definition parser.hpp:1383
bool has_separators
Did integer have any separators?
Definition parser.hpp:1443
size_t digit_count
Total number of digits in integer.
Definition parser.hpp:1442
Test for hexadecimal integer.
Definition parser.hpp:1464
Base class for integer testing.
Definition parser.hpp:1276
size_t value
Calculated value of the numeral.
Definition parser.hpp:1290
Test for IPv4 address.
Definition parser.hpp:2349
stdex::interval< size_t > components[4]
Individual component intervals.
Definition parser.hpp:2464
struct in_addr value
IPv4 address value.
Definition parser.hpp:2465
Test for IPv6 address.
Definition parser.hpp:2568
std::shared_ptr< basic_parser< T > > scope_id
Scope ID (e.g. NIC index with link-local addresses)
Definition parser.hpp:2770
stdex::interval< size_t > components[8]
Individual component intervals.
Definition parser.hpp:2768
struct in6_addr value
IPv6 address value.
Definition parser.hpp:2769
Test for valid IPv6 address scope ID character.
Definition parser.hpp:2496
Test for repeating.
Definition parser.hpp:913
bool m_greedy
try to match as long sequence as possible
Definition parser.hpp:952
std::shared_ptr< basic_parser< T > > m_el
repeating element
Definition parser.hpp:949
size_t m_min_iterations
minimum number of iterations
Definition parser.hpp:950
size_t m_max_iterations
maximum number of iterations
Definition parser.hpp:951
Test for JSON string.
Definition parser.hpp:7298
MIME content type.
Definition parser.hpp:7849
stdex::interval< size_t > base_type
basic type position in source
Definition parser.hpp:7967
stdex::interval< size_t > sub_type
sub-type position in source
Definition parser.hpp:7968
stdex::interval< size_t > charset
charset position in source
Definition parser.hpp:7969
Test for mixed numeral.
Definition parser.hpp:1925
std::shared_ptr< basic_parser< T > > fraction
fraction
Definition parser.hpp:2031
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:2029
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:2028
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:2027
std::shared_ptr< basic_parser< T > > integer
Integer part.
Definition parser.hpp:2030
Test for monetary numeral.
Definition parser.hpp:2220
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:2326
std::shared_ptr< basic_parser< T > > decimal_separator
Decimal separator.
Definition parser.hpp:2331
std::shared_ptr< basic_parser< T > > currency
Currency part.
Definition parser.hpp:2329
std::shared_ptr< basic_parser< T > > decimal
Decimal part.
Definition parser.hpp:2332
std::shared_ptr< basic_parser< T > > integer
Integer part.
Definition parser.hpp:2330
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:2327
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:2328
"No-op" match
Definition parser.hpp:192
Base template for all parsers.
Definition parser.hpp:74
stdex::interval< size_t > interval
Region of the last match.
Definition parser.hpp:172
Test for permutation.
Definition parser.hpp:1200
Test for phone number.
Definition parser.hpp:4519
std::basic_string< T > value
Normalized phone number.
Definition parser.hpp:4645
Test for any punctuation code unit.
Definition parser.hpp:466
Test for Roman numeral.
Definition parser.hpp:1573
Test for scientific numeral.
Definition parser.hpp:2051
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:2195
std::shared_ptr< basic_parser< T > > exponent_symbol
Exponent symbol (e.g. 'e')
Definition parser.hpp:2199
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:2193
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:2194
double value
Calculated value of the numeral.
Definition parser.hpp:2203
std::shared_ptr< basic_parser< T > > negative_exp_sign
Negative exponent sign (e.g. '-')
Definition parser.hpp:2201
std::shared_ptr< basic_integer< T > > decimal
Decimal part.
Definition parser.hpp:2198
std::shared_ptr< basic_parser< T > > positive_exp_sign
Positive exponent sign (e.g. '+')
Definition parser.hpp:2200
std::shared_ptr< basic_integer< T > > exponent
Exponent part.
Definition parser.hpp:2202
std::shared_ptr< basic_parser< T > > decimal_separator
Decimal separator.
Definition parser.hpp:2197
std::shared_ptr< basic_integer< T > > integer
Integer part.
Definition parser.hpp:2196
Test for match score.
Definition parser.hpp:1752
Test for sequence.
Definition parser.hpp:1009
Definition parser.hpp:691
Test for SI Reference delimiter.
Definition parser.hpp:5163
Test for SI Reference part.
Definition parser.hpp:5118
Test for SI Reference.
Definition parser.hpp:5201
basic_si_reference_part< T > part3
Reference data part 3 (P3)
Definition parser.hpp:5518
basic_si_reference_part< T > part1
Reference data part 1 (P1)
Definition parser.hpp:5516
bool is_valid
Is reference valid.
Definition parser.hpp:5519
T model[3]
Reference model.
Definition parser.hpp:5515
basic_si_reference_part< T > part2
Reference data part 2 (P2)
Definition parser.hpp:5517
Test for signed numeral.
Definition parser.hpp:1839
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:1907
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:1906
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:1905
std::shared_ptr< basic_parser< T > > number
Number.
Definition parser.hpp:1908
Test for any space code unit.
Definition parser.hpp:387
Test for any space or punctuation code unit.
Definition parser.hpp:540
Test for any string.
Definition parser.hpp:1128
Test for given string.
Definition parser.hpp:818
Test for time.
Definition parser.hpp:4293
Test for valid URL password character.
Definition parser.hpp:3089
Test for valid URL path character.
Definition parser.hpp:3189
Test for URL path.
Definition parser.hpp:3297
Test for valid URL username character.
Definition parser.hpp:2990
Test for URL.
Definition parser.hpp:3438
Test for HTTP agent.
Definition parser.hpp:6825
Test for HTTP any type.
Definition parser.hpp:5947
Test for HTTP asterisk.
Definition parser.hpp:6589
Test for HTTP header.
Definition parser.hpp:7145
Test for HTTP language (RFC1766)
Definition parser.hpp:6457
Test for HTTP line break (RFC2616: CRLF | LF)
Definition parser.hpp:5621
Test for HTTP media range (RFC2616: media-range)
Definition parser.hpp:5979
Test for HTTP media type (RFC2616: media-type)
Definition parser.hpp:6034
Test for HTTP parameter (RFC2616: parameter)
Definition parser.hpp:5892
http_token name
Parameter name.
Definition parser.hpp:5936
http_value value
Parameter value.
Definition parser.hpp:5937
Test for HTTP protocol.
Definition parser.hpp:6900
uint16_t version
HTTP protocol version: 0x100 = 1.0, 0x101 = 1.1...
Definition parser.hpp:7001
Test for HTTP quoted string (RFC2616: quoted-string)
Definition parser.hpp:5782
stdex::interval< size_t > content
String content (without quotes)
Definition parser.hpp:5838
Test for HTTP request.
Definition parser.hpp:7008
Test for HTTP space (RFC2616: LWS)
Definition parser.hpp:5657
Test for HTTP text character (RFC2616: TEXT)
Definition parser.hpp:5694
Test for HTTP token (RFC2616: token - tolerates non-ASCII)
Definition parser.hpp:5728
Test for HTTP URL parameter.
Definition parser.hpp:6274
Test for HTTP URL path segment.
Definition parser.hpp:6185
Test for HTTP URL path segment.
Definition parser.hpp:6218
std::vector< http_url_path_segment > segments
Path segments.
Definition parser.hpp:6267
Test for HTTP URL port.
Definition parser.hpp:6129
Test for HTTP URL server.
Definition parser.hpp:6092
Test for HTTP URL.
Definition parser.hpp:6355
Collection of HTTP values.
Definition parser.hpp:7254
Test for HTTP value (RFC2616: value)
Definition parser.hpp:5848
http_quoted_string string
Value when matched as quoted string.
Definition parser.hpp:5884
http_token token
Value when matched as token.
Definition parser.hpp:5885
Test for HTTP weight factor.
Definition parser.hpp:6520
float value
Calculated value of the weight factor.
Definition parser.hpp:6582
Test for HTTP weighted value.
Definition parser.hpp:6612
Base template for collection-holding parsers.
Definition parser.hpp:969
Test for any SGML code point.
Definition parser.hpp:256
Test for any SGML code point from a given string of SGML code points.
Definition parser.hpp:775
Test for specific SGML code point.
Definition parser.hpp:343
Test for valid DNS domain SGML character.
Definition parser.hpp:2863
Test for valid IPv6 address scope ID SGML character.
Definition parser.hpp:2534
Test for any SGML punctuation code point.
Definition parser.hpp:507
Test for any SGML space code point.
Definition parser.hpp:430
Test for any SGML space or punctuation code point.
Definition parser.hpp:583
Test for SGML given string.
Definition parser.hpp:865
Test for valid URL password SGML character.
Definition parser.hpp:3141
Test for valid URL path SGML character.
Definition parser.hpp:3245
Test for valid URL username SGML character.
Definition parser.hpp:3041
Numerical interval.
Definition interval.hpp:18
T size() const
Returns interval size.
Definition interval.hpp:47
T end
interval end
Definition interval.hpp:20
interval() noexcept
Constructs an invalid interval.
Definition interval.hpp:25
void invalidate()
Invalidates interval.
Definition interval.hpp:59
T start
interval start
Definition interval.hpp:19
Tag attribute.
Definition parser.hpp:8127
stdex::interval< size_t > name
attribute name position in source
Definition parser.hpp:8128
stdex::interval< size_t > value
attribute value position in source
Definition parser.hpp:8129
Definition parser.hpp:7280