stdex
Additional custom or not Standard C++ covered algorithms
Loading...
Searching...
No Matches
parser.hpp
1/*
2 SPDX-License-Identifier: MIT
3 Copyright © 2023 Amebis
4*/
5
6#pragma once
7
8#include "interval.hpp"
9#include "memory.hpp"
10#include "sal.hpp"
11#include "sgml.hpp"
12#include "string.hpp"
13#include "system.hpp"
14#include <assert.h>
15#include <stdarg.h>
16#include <stdint.h>
17#ifdef _WIN32
18#include <winsock2.h>
19#include <ws2ipdef.h>
20#else
21#include <inaddr.h>
22#include <in6addr.h>
23#endif
24#include <limits>
25#include <list>
26#include <locale>
27#include <memory>
28#include <set>
29#include <string>
30
31#ifdef _MSC_VER
32#pragma warning(push)
33#pragma warning(disable: 4100)
34#endif
35
36#define ENUM_FLAG_OPERATOR(T,X) \
37inline T operator X (const T lhs, const T rhs) { return static_cast<T>(static_cast<std::underlying_type_t<T>>(lhs) X static_cast<std::underlying_type_t<T>>(rhs)); } \
38inline T operator X (const T lhs, const std::underlying_type_t<T> rhs) { return static_cast<T>(static_cast<std::underlying_type_t<T>>(lhs) X rhs); } \
39inline T operator X (const std::underlying_type_t<T> lhs, const T rhs) { return static_cast<T>(lhs X static_cast<std::underlying_type_t<T>>(rhs)); } \
40inline T& operator X= (T& lhs, const T rhs) { return lhs = lhs X rhs; } \
41inline T& operator X= (T& lhs, const std::underlying_type_t<T> rhs) { return lhs = lhs X rhs; }
42#define ENUM_FLAGS(T, type) \
43enum class T : type; \
44inline T operator ~ (T t) { return (T) (~static_cast<std::underlying_type_t <T>>(t)); } \
45ENUM_FLAG_OPERATOR(T,|) \
46ENUM_FLAG_OPERATOR(T,^) \
47ENUM_FLAG_OPERATOR(T,&) \
48enum class T : type
49
50namespace stdex
51{
52 namespace parser
53 {
57 constexpr int match_default = 0;
58 constexpr int match_case_insensitive = 0x1;
59 constexpr int match_multiline = 0x2;
60
64 template <class T>
66 {
67 public:
68 basic_parser(_In_ const std::locale& locale = std::locale()) : m_locale(locale) {}
69 virtual ~basic_parser() {}
70
71 bool search(
72 _In_reads_or_z_(end) const T* text,
73 _In_ size_t start = 0,
74 _In_ size_t end = (size_t)-1,
75 _In_ int flags = match_default)
76 {
77 for (size_t i = start; i < end && text[i]; i++)
78 if (match(text, i, end, flags))
79 return true;
80 return false;
81 }
82
83 virtual bool match(
84 _In_reads_or_z_(end) const T* text,
85 _In_ size_t start = 0,
86 _In_ size_t end = (size_t)-1,
87 _In_ int flags = match_default) = 0;
88
89 template<class _Traits, class _Ax>
90 inline bool match(
91 const std::basic_string<T, _Traits, _Ax>& text,
92 _In_ size_t start = 0,
93 _In_ size_t end = (size_t)-1,
94 _In_ int flags = match_default)
95 {
96 return match(text.c_str(), start, std::min<size_t>(end, text.size()), flags);
97 }
98
99 virtual void invalidate()
100 {
101 interval.start = 1;
102 interval.end = 0;
103 }
104
105 protected:
107 const wchar_t* next_sgml_cp(_In_ const char* text, _In_ size_t start, _In_ size_t end, _Out_ size_t& chr_end, _Out_ wchar_t(&buf)[3])
108 {
109 if (text[start] == '&') {
110 // Potential entity start
111 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
112 for (chr_end = start + 1;; chr_end++) {
113 if (chr_end >= end || text[chr_end] == 0) {
114 // Unterminated entity
115 break;
116 }
117 if (text[chr_end] == ';') {
118 // Entity end
119 size_t n = chr_end - start - 1;
120 if (n >= 2 && text[start + 1] == '#') {
121 // Numerical entity
122 char32_t unicode;
123 if (text[start + 2] == 'x' || text[start + 2] == 'X')
124 unicode = strtou32(text + start + 3, n - 2, nullptr, 16);
125 else
126 unicode = strtou32(text + start + 2, n - 1, nullptr, 10);
127#ifdef _WIN32
128 if (unicode < 0x10000) {
129 buf[0] = (wchar_t)unicode;
130 buf[1] = 0;
131 }
132 else {
133 ucs4_to_surrogate_pair(buf, unicode);
134 buf[2] = 0;
135 }
136#else
137 buf[0] = (wchar_t)unicode;
138 buf[1] = 0;
139#endif
140 chr_end++;
141 return buf;
142 }
143 const wchar_t* entity_w = sgml2uni(text + start + 1, n);
144 if (entity_w) {
145 chr_end++;
146 return entity_w;
147 }
148 // Unknown entity.
149 break;
150 }
151 else if (text[chr_end] == '&' || ctype.is(ctype.space, text[chr_end])) {
152 // This char cannot possibly be a part of entity.
153 break;
154 }
155 }
156 }
157 buf[0] = text[start];
158 buf[1] = 0;
159 chr_end = start + 1;
160 return buf;
161 }
163
164 public:
166
167 protected:
168 std::locale m_locale;
169 };
170
173#ifdef _UNICODE
174 using tparser = wparser;
175#else
176 using tparser = parser;
177#endif
179
183 template <class T>
184 class basic_noop : public basic_parser<T>
185 {
186 public:
187 virtual bool match(
188 _In_reads_or_z_(end) const T* text,
189 _In_ size_t start = 0,
190 _In_ size_t end = (size_t)-1,
191 _In_ int flags = match_default)
192 {
193 assert(text || start >= end);
194 if (start < end && text[start]) {
195 interval.start = interval.end = start;
196 return true;
197 }
198 interval.start = (interval.end = start) + 1;
199 return false;
200 }
201 };
202
203 using noop = basic_noop<char>;
205#ifdef _UNICODE
206 using tnoop = wnoop;
207#else
208 using tnoop = noop;
209#endif
211
215 template <class T>
216 class basic_any_cu : public basic_parser<T>
217 {
218 public:
219 basic_any_cu(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
220
221 virtual bool match(
222 _In_reads_or_z_(end) const T* text,
223 _In_ size_t start = 0,
224 _In_ size_t end = (size_t)-1,
225 _In_ int flags = match_default)
226 {
227 assert(text || start >= end);
228 if (start < end && text[start]) {
229 interval.end = (interval.start = start) + 1;
230 return true;
231 }
232 interval.start = (interval.end = start) + 1;
233 return false;
234 }
235 };
236
239#ifdef _UNICODE
240 using tany_cu = wany_cu;
241#else
242 using tany_cu = any_cu;
243#endif
244
248 class sgml_any_cp : public basic_any_cu<char>
249 {
250 public:
251 sgml_any_cp(_In_ const std::locale& locale = std::locale()) : basic_any_cu<char>(locale) {}
252
253 virtual bool match(
254 _In_reads_or_z_(end) const char* text,
255 _In_ size_t start = 0,
256 _In_ size_t end = (size_t)-1,
257 _In_ int flags = match_default)
258 {
259 assert(text || start >= end);
260 if (start < end && text[start]) {
261 if (text[start] == '&') {
262 // SGML entity
263 const auto& ctype = std::use_facet<std::ctype<char>>(m_locale);
264 for (interval.end = start + 1; interval.end < end && text[interval.end]; interval.end++)
265 if (text[interval.end] == ';') {
266 interval.end++;
267 interval.start = start;
268 return true;
269 }
270 else if (text[interval.end] == '&' || ctype.is(ctype.space, text[interval.end]))
271 break;
272 // Unterminated entity
273 }
274 interval.end = (interval.start = start) + 1;
275 return true;
276 }
277 interval.start = (interval.end = start) + 1;
278 return false;
279 }
280 };
281
285 template <class T>
286 class basic_cu : public basic_parser<T>
287 {
288 public:
289 basic_cu(T chr, bool invert = false, _In_ const std::locale& locale = std::locale()) :
290 basic_parser<T>(locale),
291 m_chr(chr),
292 m_invert(invert)
293 {}
294
295 virtual bool match(
296 _In_reads_or_z_(end) const T* text,
297 _In_ size_t start = 0,
298 _In_ size_t end = (size_t)-1,
299 _In_ int flags = match_default)
300 {
301 assert(text || start >= end);
302 if (start < end && text[start]) {
303 bool r;
304 if (flags & match_case_insensitive) {
305 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
306 r = ctype.tolower(text[start]) == ctype.tolower(m_chr);
307 }
308 else
309 r = text[start] == m_chr;
310 if (r && !m_invert || !r && m_invert) {
311 interval.end = (interval.start = start) + 1;
312 return true;
313 }
314 }
315 interval.start = (interval.end = start) + 1;
316 return false;
317 }
318
319 protected:
320 T m_chr;
321 bool m_invert;
322 };
323
324 using cu = basic_cu<char>;
325 using wcu = basic_cu<wchar_t>;
326#ifdef _UNICODE
327 using tcu = wcu;
328#else
329 using tcu = cu;
330#endif
331
335 class sgml_cp : public sgml_parser
336 {
337 public:
338 sgml_cp(const char* chr, size_t count = (size_t)-1, bool invert = false, _In_ const std::locale& locale = std::locale()) :
339 sgml_parser(locale),
340 m_invert(invert)
341 {
342 assert(chr || !count);
343 wchar_t buf[3];
344 size_t chr_end;
345 m_chr.assign(count ? next_sgml_cp(chr, 0, count, chr_end, buf) : L"");
346 }
347
348 virtual bool match(
349 _In_reads_or_z_(end) const char* text,
350 _In_ size_t start = 0,
351 _In_ size_t end = (size_t)-1,
352 _In_ int flags = match_default)
353 {
354 assert(text || start >= end);
355 if (start < end && text[start]) {
356 wchar_t buf[3];
357 const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf);
358 bool r = ((flags & match_case_insensitive) ?
359 stdex::strnicmp(chr, (size_t)-1, m_chr.c_str(), m_chr.size(), m_locale) :
360 stdex::strncmp(chr, (size_t)-1, m_chr.c_str(), m_chr.size())) == 0;
361 if (r && !m_invert || !r && m_invert) {
362 interval.start = start;
363 return true;
364 }
365 }
366 interval.start = (interval.end = start) + 1;
367 return false;
368 }
369
370 protected:
371 std::wstring m_chr;
372 bool m_invert;
373 };
374
378 template <class T>
379 class basic_space_cu : public basic_parser<T>
380 {
381 public:
382 basic_space_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) :
383 basic_parser<T>(locale),
384 m_invert(invert)
385 {}
386
387 virtual bool match(
388 _In_reads_or_z_(end) const T* text,
389 _In_ size_t start = 0,
390 _In_ size_t end = (size_t)-1,
391 _In_ int flags = match_default)
392 {
393 assert(text || start >= end);
394 if (start < end && text[start]) {
395 bool r =
396 ((flags & match_multiline) || !islbreak(text[start])) &&
397 std::use_facet<std::ctype<T>>(m_locale).is(std::ctype_base::space, text[start]);
398 if (r && !m_invert || !r && m_invert) {
399 interval.end = (interval.start = start) + 1;
400 return true;
401 }
402 }
403 interval.start = (interval.end = start) + 1;
404 return false;
405 }
406
407 protected:
408 bool m_invert;
409 };
410
413#ifdef _UNICODE
414 using tspace_cu = wspace_cu;
415#else
416 using tspace_cu = space_cu;
417#endif
418
422 class sgml_space_cp : public basic_space_cu<char>
423 {
424 public:
425 sgml_space_cp(_In_ bool invert = false, _In_ const std::locale& locale = std::locale()) :
426 basic_space_cu<char>(invert, locale)
427 {}
428
429 virtual bool match(
430 _In_reads_or_z_(end) const char* text,
431 _In_ size_t start = 0,
432 _In_ size_t end = (size_t)-1,
433 _In_ int flags = match_default)
434 {
435 assert(text || start >= end);
436 if (start < end && text[start]) {
437 wchar_t buf[3];
438 const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf);
439 const wchar_t* chr_end = chr + stdex::strlen(chr);
440 bool r =
441 ((flags & match_multiline) || !islbreak(chr, (size_t)-1)) &&
442 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::space, chr, chr_end) == chr_end;
443 if (r && !m_invert || !r && m_invert) {
444 interval.start = start;
445 return true;
446 }
447 }
448
449 interval.start = (interval.end = start) + 1;
450 return false;
451 }
452 };
453
457 template <class T>
458 class basic_punct_cu : public basic_parser<T>
459 {
460 public:
461 basic_punct_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) :
462 basic_parser<T>(locale),
463 m_invert(invert)
464 {}
465
466 virtual bool match(
467 _In_reads_or_z_(end) const T* text,
468 _In_ size_t start = 0,
469 _In_ size_t end = (size_t)-1,
470 _In_ int flags = match_default)
471 {
472 assert(text || start >= end);
473 if (start < end && text[start]) {
474 bool r = std::use_facet<std::ctype<T>>(m_locale).is(std::ctype_base::punct, text[start]);
475 if (r && !m_invert || !r && m_invert) {
476 interval.end = (interval.start = start) + 1;
477 return true;
478 }
479 }
480 interval.start = (interval.end = start) + 1;
481 return false;
482 }
483
484 protected:
485 bool m_invert;
486 };
487
490#ifdef _UNICODE
491 using tpunct_cu = wpunct_cu;
492#else
493 using tpunct_cu = punct_cu;
494#endif
495
499 class sgml_punct_cp : public basic_punct_cu<char>
500 {
501 public:
502 sgml_punct_cp(bool invert = false, _In_ const std::locale& locale = std::locale()) :
503 basic_punct_cu<char>(invert, locale)
504 {}
505
506 virtual bool match(
507 _In_reads_or_z_(end) const char* text,
508 _In_ size_t start = 0,
509 _In_ size_t end = (size_t)-1,
510 _In_ int flags = match_default)
511 {
512 assert(text || start >= end);
513 if (start < end && text[start]) {
514 wchar_t buf[3];
515 const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf);
516 const wchar_t* chr_end = chr + stdex::strlen(chr);
517 bool r = std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::punct, chr, chr_end) == chr_end;
518 if (r && !m_invert || !r && m_invert) {
519 interval.start = start;
520 return true;
521 }
522 }
523 interval.start = (interval.end = start) + 1;
524 return false;
525 }
526 };
527
531 template <class T>
533 {
534 public:
535 basic_space_or_punct_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) :
536 basic_parser<T>(locale),
537 m_invert(invert)
538 {}
539
540 virtual bool match(
541 _In_reads_or_z_(end) const T* text,
542 _In_ size_t start = 0,
543 _In_ size_t end = (size_t)-1,
544 _In_ int flags = match_default)
545 {
546 assert(text || start >= end);
547 if (start < end && text[start]) {
548 bool r =
549 ((flags & match_multiline) || !islbreak(text[start])) &&
550 std::use_facet<std::ctype<T>>(m_locale).is(std::ctype_base::space | std::ctype_base::punct, text[start]);
551 if (r && !m_invert || !r && m_invert) {
552 interval.end = (interval.start = start) + 1;
553 return true;
554 }
555 }
556 interval.start = (interval.end = start) + 1;
557 return false;
558 }
559
560 protected:
561 bool m_invert;
562 };
563
566#ifdef _UNICODE
568#else
570#endif
571
576 {
577 public:
578 sgml_space_or_punct_cp(bool invert = false, _In_ const std::locale& locale = std::locale()) :
579 basic_space_or_punct_cu<char>(invert, locale)
580 {}
581
582 virtual bool match(
583 _In_reads_or_z_(end) const char* text,
584 _In_ size_t start = 0,
585 _In_ size_t end = (size_t)-1,
586 _In_ int flags = match_default)
587 {
588 assert(text || start >= end);
589 if (start < end && text[start]) {
590 wchar_t buf[3];
591 const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf);
592 const wchar_t* chr_end = chr + stdex::strlen(chr);
593 bool r =
594 ((flags & match_multiline) || !islbreak(chr, (size_t)-1)) &&
595 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::space | std::ctype_base::punct, chr, chr_end) == chr_end;
596 if (r && !m_invert || !r && m_invert) {
597 interval.start = start;
598 return true;
599 }
600 }
601 interval.start = (interval.end = start) + 1;
602 return false;
603 }
604 };
605
609 template <class T>
610 class basic_bol : public basic_parser<T>
611 {
612 public:
613 basic_bol(bool invert = false) : m_invert(invert) {}
614
615 virtual bool match(
616 _In_reads_or_z_(end) const T* text,
617 _In_ size_t start = 0,
618 _In_ size_t end = (size_t)-1,
619 _In_ int flags = match_default)
620 {
621 assert(text || start >= end);
622 bool r = start == 0 || start <= end && islbreak(text[start - 1]);
623 if (r && !m_invert || !r && m_invert) {
624 interval.end = interval.start = start;
625 return true;
626 }
627 interval.start = (interval.end = start) + 1;
628 return false;
629 }
630
631 protected:
632 bool m_invert;
633 };
634
635 using bol = basic_bol<char>;
636 using wbol = basic_bol<wchar_t>;
637#ifdef _UNICODE
638 using tbol = wbol;
639#else
640 using tbol = bol;
641#endif
643
647 template <class T>
648 class basic_eol : public basic_parser<T>
649 {
650 public:
651 basic_eol(bool invert = false) : m_invert(invert) {}
652
653 virtual bool match(
654 _In_reads_or_z_(end) const T* text,
655 _In_ size_t start = 0,
656 _In_ size_t end = (size_t)-1,
657 _In_ int flags = match_default)
658 {
659 assert(text || start >= end);
660 bool r = islbreak(text[start]);
661 if (r && !m_invert || !r && m_invert) {
662 interval.end = interval.start = start;
663 return true;
664 }
665 interval.start = (interval.end = start) + 1;
666 return false;
667 }
668
669 protected:
670 bool m_invert;
671 };
672
673 using eol = basic_eol<char>;
674 using weol = basic_eol<wchar_t>;
675#ifdef _UNICODE
676 using teol = weol;
677#else
678 using teol = eol;
679#endif
681
682 template <class T>
683 class basic_set : public basic_parser<T>
684 {
685 public:
686 basic_set(bool invert = false, _In_ const std::locale& locale = std::locale()) :
687 basic_parser<T>(locale),
688 hit_offset((size_t)-1),
689 m_invert(invert)
690 {}
691
692 virtual bool match(
693 _In_reads_or_z_(end) const T* text,
694 _In_ size_t start = 0,
695 _In_ size_t end = (size_t)-1,
696 _In_ int flags = match_default) = 0;
697
698 virtual void invalidate()
699 {
700 hit_offset = (size_t)-1;
702 }
703
704 public:
705 size_t hit_offset;
706
707 protected:
708 bool m_invert;
709 };
710
714 template <class T>
715 class basic_cu_set : public basic_set<T>
716 {
717 public:
719 _In_reads_or_z_(count) const T* set,
720 _In_ size_t count = (size_t)-1,
721 _In_ bool invert = false,
722 _In_ const std::locale& locale = std::locale()) :
723 basic_set<T>(invert, locale)
724 {
725 if (set)
726 m_set.assign(set, set + stdex::strnlen(set, count));
727 }
728
729 virtual bool match(
730 _In_reads_or_z_(end) const T* text,
731 _In_ size_t start = 0,
732 _In_ size_t end = (size_t)-1,
733 _In_ int flags = match_default)
734 {
735 assert(text || start >= end);
736 if (start < end && text[start]) {
737 const T* set = m_set.c_str();
738 size_t r = (flags & match_case_insensitive) ?
739 stdex::strnichr(set, m_set.size(), text[start], m_locale) :
740 stdex::strnchr(set, m_set.size(), text[start]);
741 if (r != stdex::npos && !m_invert || r == stdex::npos && m_invert) {
742 hit_offset = r;
743 interval.end = (interval.start = start) + 1;
744 return true;
745 }
746 }
747 hit_offset = (size_t)-1;
748 interval.start = (interval.end = start) + 1;
749 return false;
750 }
751
752 protected:
753 std::basic_string<T> m_set;
754 };
755
758#ifdef _UNICODE
759 using tcu_set = wcu_set;
760#else
761 using tcu_set = cu_set;
762#endif
763
767 class sgml_cp_set : public basic_set<char>
768 {
769 public:
770 sgml_cp_set(const char* set, size_t count = (size_t)-1, bool invert = false, _In_ const std::locale& locale = std::locale()) :
771 basic_set<char>(invert, locale)
772 {
773 if (set)
774 m_set = sgml2wstr(set, count);
775 }
776
777 virtual bool match(
778 _In_reads_or_z_(end) const char* text,
779 _In_ size_t start = 0,
780 _In_ size_t end = (size_t)-1,
781 _In_ int flags = match_default)
782 {
783 assert(text || start >= end);
784 if (start < end && text[start]) {
785 wchar_t buf[3];
786 const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf);
787 const wchar_t* set = m_set.c_str();
788 size_t r = (flags & match_case_insensitive) ?
789 stdex::strnistr(set, m_set.size(), chr, m_locale) :
790 stdex::strnstr(set, m_set.size(), chr);
791 if (r != stdex::npos && !m_invert || r == stdex::npos && m_invert) {
792 hit_offset = r;
793 interval.start = start;
794 return true;
795 }
796 }
797 hit_offset = (size_t)-1;
798 interval.start = (interval.end = start) + 1;
799 return false;
800 }
801
802 protected:
803 std::wstring m_set;
804 };
805
809 template <class T>
810 class basic_string : public basic_parser<T>
811 {
812 public:
814 _In_reads_or_z_(count) const T* str,
815 _In_ size_t count = (size_t)-1,
816 _In_ const std::locale& locale = std::locale()) :
817 basic_parser<T>(locale),
818 m_str(str, str + stdex::strnlen(str, count))
819 {}
820
821 virtual bool match(
822 _In_reads_or_z_(end) const T* text,
823 _In_ size_t start = 0,
824 _In_ size_t end = (size_t)-1,
825 _In_ int flags = match_default)
826 {
827 assert(text || start >= end);
828 size_t
829 m = m_str.size(),
830 n = std::min<size_t>(end - start, m);
831 bool r = ((flags & match_case_insensitive) ?
832 stdex::strnicmp(text + start, n, m_str.c_str(), m, m_locale) :
833 stdex::strncmp(text + start, n, m_str.c_str(), m)) == 0;
834 if (r) {
835 interval.end = (interval.start = start) + n;
836 return true;
837 }
838 interval.start = (interval.end = start) + 1;
839 return false;
840 }
841
842 protected:
843 std::basic_string<T> m_str;
844 };
845
848#ifdef _UNICODE
849 using tstring = wstring;
850#else
851 using tstring = string;
852#endif
853
858 {
859 public:
860 sgml_string(const char* str, size_t count = (size_t)-1, _In_ const std::locale& locale = std::locale()) :
861 sgml_parser(locale),
862 m_str(sgml2wstr(str, count))
863 {}
864
865 virtual bool match(
866 _In_reads_or_z_(end) const char* text,
867 _In_ size_t start = 0,
868 _In_ size_t end = (size_t)-1,
869 _In_ int flags = match_default)
870 {
871 assert(text || start >= end);
872 const wchar_t* str = m_str.c_str();
873 const bool case_insensitive = flags & match_case_insensitive ? true : false;
874 const auto& ctype = std::use_facet<std::ctype<wchar_t>>(m_locale);
875 for (interval.end = start;;) {
876 if (!*str) {
877 interval.start = start;
878 return true;
879 }
880 if (interval.end >= end || !text[interval.end]) {
881 interval.start = (interval.end = start) + 1;
882 return false;
883 }
884 wchar_t buf[3];
885 const wchar_t* chr = next_sgml_cp(text, interval.end, end, interval.end, buf);
886 for (; *chr; ++str, ++chr) {
887 if (!*str ||
888 (case_insensitive ? ctype.tolower(*str) != ctype.tolower(*chr) : *str != *chr))
889 {
890 interval.start = (interval.end = start) + 1;
891 return false;
892 }
893 }
894 }
895 }
896
897 protected:
898 std::wstring m_str;
899 };
900
904 template <class T>
906 {
907 public:
908 basic_iterations(const std::shared_ptr<basic_parser<T>>& el, size_t min_iterations = 0, size_t max_iterations = (size_t)-1, bool greedy = true) :
909 m_el(el),
910 m_min_iterations(min_iterations),
911 m_max_iterations(max_iterations),
912 m_greedy(greedy)
913 {}
914
915 virtual bool match(
916 _In_reads_or_z_(end) const T* text,
917 _In_ size_t start = 0,
918 _In_ size_t end = (size_t)-1,
919 _In_ int flags = match_default)
920 {
921 assert(text || start >= end);
922 interval.start = interval.end = start;
923 for (size_t i = 0; ; i++) {
924 if (!m_greedy && i >= m_min_iterations || i >= m_max_iterations)
925 return true;
926 if (!m_el->match(text, interval.end, end, flags)) {
927 if (i >= m_min_iterations)
928 return true;
929 break;
930 }
931 if (m_el->interval.end == interval.end) {
932 // Element did match, but the matching interval was empty. Quit instead of spinning.
933 return true;
934 }
935 interval.end = m_el->interval.end;
936 }
937 interval.start = (interval.end = start) + 1;
938 return false;
939 }
940
941 protected:
942 std::shared_ptr<basic_parser<T>> m_el;
945 bool m_greedy;
946 };
947
950#ifdef _UNICODE
951 using titerations = witerations;
952#else
953 using titerations = iterations;
954#endif
956
960 template <class T>
962 {
963 protected:
964 parser_collection(_In_ const std::locale& locale) : basic_parser<T>(locale) {}
965
966 public:
968 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el,
969 _In_ size_t count,
970 _In_ const std::locale& locale = std::locale()) :
971 basic_parser<T>(locale)
972 {
973 assert(el || !count);
974 m_collection.reserve(count);
975 for (size_t i = 0; i < count; i++)
976 m_collection.push_back(el[i]);
977 }
978
980 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
981 _In_ const std::locale& locale = std::locale()) :
982 basic_parser<T>(locale),
983 m_collection(std::move(collection))
984 {}
985
986 virtual void invalidate()
987 {
988 for (auto& el: m_collection)
989 el->invalidate();
991 }
992
993 protected:
994 std::vector<std::shared_ptr<basic_parser<T>>> m_collection;
995 };
996
1000 template <class T>
1002 {
1003 public:
1005 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el = nullptr,
1006 _In_ size_t count = 0,
1007 _In_ const std::locale& locale = std::locale()) :
1008 parser_collection<T>(el, count, locale)
1009 {}
1010
1012 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1013 _In_ const std::locale& locale = std::locale()) :
1014 parser_collection<T>(std::move(collection), locale)
1015 {}
1016
1017 virtual bool match(
1018 _In_reads_or_z_(end) const T* text,
1019 _In_ size_t start = 0,
1020 _In_ size_t end = (size_t)-1,
1021 _In_ int flags = match_default)
1022 {
1023 assert(text || start >= end);
1024 interval.end = start;
1025 for (auto i = m_collection.begin(); i != m_collection.end(); ++i) {
1026 if (!(*i)->match(text, interval.end, end, flags)) {
1027 for (++i; i != m_collection.end(); ++i)
1028 (*i)->invalidate();
1029 interval.start = (interval.end = start) + 1;
1030 return false;
1031 }
1032 interval.end = (*i)->interval.end;
1033 }
1034 interval.start = start;
1035 return true;
1036 }
1037 };
1038
1041#ifdef _UNICODE
1042 using tsequence = wsequence;
1043#else
1044 using tsequence = sequence;
1045#endif
1047
1051 template <class T>
1053 {
1054 protected:
1055 basic_branch(_In_ const std::locale& locale) :
1056 parser_collection<T>(locale),
1057 hit_offset((size_t)-1)
1058 {}
1059
1060 public:
1062 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el = nullptr,
1063 _In_ size_t count = 0,
1064 _In_ const std::locale& locale = std::locale()) :
1065 parser_collection<T>(el, count, locale),
1066 hit_offset((size_t)-1)
1067 {}
1068
1070 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1071 _In_ const std::locale& locale = std::locale()) :
1072 parser_collection<T>(std::move(collection), locale),
1073 hit_offset((size_t)-1)
1074 {}
1075
1076 virtual bool match(
1077 _In_reads_or_z_(end) const T* text,
1078 _In_ size_t start = 0,
1079 _In_ size_t end = (size_t)-1,
1080 _In_ int flags = match_default)
1081 {
1082 assert(text || start >= end);
1083 hit_offset = 0;
1084 for (auto i = m_collection.begin(); i != m_collection.end(); ++i, ++hit_offset) {
1085 if ((*i)->match(text, start, end, flags)) {
1086 interval = (*i)->interval;
1087 for (++i; i != m_collection.end(); ++i)
1088 (*i)->invalidate();
1089 return true;
1090 }
1091 }
1092 hit_offset = (size_t)-1;
1093 interval.start = (interval.end = start) + 1;
1094 return false;
1095 }
1096
1097 virtual void invalidate()
1098 {
1099 hit_offset = (size_t)-1;
1101 }
1102
1103 public:
1104 size_t hit_offset;
1105 };
1106
1107 using branch = basic_branch<char>;
1109#ifdef _UNICODE
1110 using tbranch = wbranch;
1111#else
1112 using tbranch = branch;
1113#endif
1115
1119 template <class T, class T_parser = basic_string<T>>
1121 {
1122 public:
1123 inline basic_string_branch(
1124 _In_reads_(count) const T* str_z = nullptr,
1125 _In_ size_t count = 0,
1126 _In_ const std::locale& locale = std::locale()) :
1127 basic_branch<T>(locale)
1128 {
1129 build(str_z, count);
1130 }
1131
1132 inline basic_string_branch(_In_z_ const T* str, ...) :
1133 basic_branch<T>(std::locale())
1134 {
1135 va_list params;
1136 va_start(params, str);
1137 build(str, params);
1138 va_end(params);
1139 }
1140
1141 inline basic_string_branch(_In_ const std::locale& locale, _In_z_ const T* str, ...) :
1142 basic_branch<T>(locale)
1143 {
1144 va_list params;
1145 va_start(params, str);
1146 build(str, params);
1147 va_end(params);
1148 }
1149
1150 protected:
1151 void build(_In_reads_(count) const T* str_z, _In_ size_t count)
1152 {
1153 assert(str_z || !count);
1154 if (count) {
1155 size_t offset, n;
1156 for (
1157 offset = n = 0;
1158 offset < count && str_z[offset];
1159 offset += stdex::strnlen(str_z + offset, count - offset) + 1, ++n);
1160 m_collection.reserve(n);
1161 for (
1162 offset = 0;
1163 offset < count && str_z[offset];
1164 offset += stdex::strnlen(str_z + offset, count - offset) + 1)
1165 m_collection.push_back(std::move(std::make_shared<T_parser>(str_z + offset, count - offset, m_locale)));
1166 }
1167 }
1168
1169 void build(_In_z_ const T* str, _In_ va_list params)
1170 {
1171 const T* p;
1172 for (
1173 m_collection.push_back(std::move(std::make_shared<T_parser>(str, (size_t)-1, m_locale)));
1174 (p = va_arg(params, const T*)) != nullptr;
1175 m_collection.push_back(std::move(std::make_shared<T_parser>(p, (size_t)-1, m_locale))));
1176 }
1177 };
1178
1181#ifdef _UNICODE
1183#else
1185#endif
1187
1191 template <class T>
1193 {
1194 public:
1196 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el = nullptr,
1197 _In_ size_t count = 0,
1198 _In_ const std::locale& locale = std::locale()) :
1199 parser_collection<T>(el, count, locale)
1200 {}
1201
1203 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1204 _In_ const std::locale& locale = std::locale()) :
1205 parser_collection<T>(std::move(collection), locale)
1206 {}
1207
1208 virtual bool match(
1209 _In_reads_or_z_(end) const T* text,
1210 _In_ size_t start = 0,
1211 _In_ size_t end = (size_t)-1,
1212 _In_ int flags = match_default)
1213 {
1214 assert(text || start >= end);
1215 for (auto& el: m_collection)
1216 el->invalidate();
1217 if (match_recursively(text, start, end, flags)) {
1218 interval.start = start;
1219 return true;
1220 }
1221 interval.start = (interval.end = start) + 1;
1222 return false;
1223 }
1224
1225 protected:
1226 bool match_recursively(
1227 _In_reads_or_z_(end) const T* text,
1228 _In_ size_t start = 0,
1229 _In_ size_t end = (size_t)-1,
1230 _In_ int flags = match_default)
1231 {
1232 bool all_matched = true;
1233 for (auto& el: m_collection) {
1234 if (!el->interval) {
1235 // Element was not matched in permutatuion yet.
1236 all_matched = false;
1237 if (el->match(text, start, end, flags)) {
1238 // Element matched for the first time.
1239 if (match_recursively(text, el->interval.end, end, flags)) {
1240 // Rest of the elements matched too.
1241 return true;
1242 }
1243 el->invalidate();
1244 }
1245 }
1246 }
1247 if (all_matched) {
1248 interval.end = start;
1249 return true;
1250 }
1251 return false;
1252 }
1253 };
1254
1257#ifdef _UNICODE
1258 using tpermutation = wpermutation;
1259#else
1260 using tpermutation = permutation;
1261#endif
1263
1267 template <class T>
1268 class basic_integer : public basic_parser<T>
1269 {
1270 public:
1271 basic_integer(_In_ const std::locale& locale = std::locale()) :
1272 basic_parser<T>(locale),
1273 value(0)
1274 {}
1275
1276 virtual void invalidate()
1277 {
1278 value = 0;
1280 }
1281
1282 public:
1283 size_t value;
1284 };
1285
1289 template <class T>
1291 {
1292 public:
1294 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
1295 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
1296 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
1297 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
1298 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
1299 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
1300 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
1301 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
1302 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
1303 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
1304 _In_ const std::locale& locale = std::locale()) :
1305 basic_integer<T>(locale),
1306 m_digit_0(digit_0),
1307 m_digit_1(digit_1),
1308 m_digit_2(digit_2),
1309 m_digit_3(digit_3),
1310 m_digit_4(digit_4),
1311 m_digit_5(digit_5),
1312 m_digit_6(digit_6),
1313 m_digit_7(digit_7),
1314 m_digit_8(digit_8),
1315 m_digit_9(digit_9)
1316 {}
1317
1318 virtual bool match(
1319 _In_reads_or_z_(end) const T* text,
1320 _In_ size_t start = 0,
1321 _In_ size_t end = (size_t)-1,
1322 _In_ int flags = match_default)
1323 {
1324 assert(text || start >= end);
1325 for (interval.end = start, value = 0; interval.end < end && text[interval.end];) {
1326 size_t dig;
1327 if (m_digit_0->match(text, interval.end, end, flags)) { dig = 0; interval.end = m_digit_0->interval.end; }
1328 else if (m_digit_1->match(text, interval.end, end, flags)) { dig = 1; interval.end = m_digit_1->interval.end; }
1329 else if (m_digit_2->match(text, interval.end, end, flags)) { dig = 2; interval.end = m_digit_2->interval.end; }
1330 else if (m_digit_3->match(text, interval.end, end, flags)) { dig = 3; interval.end = m_digit_3->interval.end; }
1331 else if (m_digit_4->match(text, interval.end, end, flags)) { dig = 4; interval.end = m_digit_4->interval.end; }
1332 else if (m_digit_5->match(text, interval.end, end, flags)) { dig = 5; interval.end = m_digit_5->interval.end; }
1333 else if (m_digit_6->match(text, interval.end, end, flags)) { dig = 6; interval.end = m_digit_6->interval.end; }
1334 else if (m_digit_7->match(text, interval.end, end, flags)) { dig = 7; interval.end = m_digit_7->interval.end; }
1335 else if (m_digit_8->match(text, interval.end, end, flags)) { dig = 8; interval.end = m_digit_8->interval.end; }
1336 else if (m_digit_9->match(text, interval.end, end, flags)) { dig = 9; interval.end = m_digit_9->interval.end; }
1337 else break;
1338 value = value * 10 + dig;
1339 }
1340 if (start < interval.end) {
1341 interval.start = start;
1342 return true;
1343 }
1344 interval.start = (interval.end = start) + 1;
1345 return false;
1346 }
1347
1348 protected:
1349 std::shared_ptr<basic_parser<T>>
1350 m_digit_0,
1351 m_digit_1,
1352 m_digit_2,
1353 m_digit_3,
1354 m_digit_4,
1355 m_digit_5,
1356 m_digit_6,
1357 m_digit_7,
1358 m_digit_8,
1359 m_digit_9;
1360 };
1361
1364#ifdef _UNICODE
1365 using tinteger10 = winteger10;
1366#else
1367 using tinteger10 = integer10;
1368#endif
1370
1374 template <class T>
1376 {
1377 public:
1379 _In_ const std::shared_ptr<basic_integer10<T>>& digits,
1380 _In_ const std::shared_ptr<basic_set<T>>& separator,
1381 _In_ const std::locale& locale = std::locale()) :
1382 basic_integer<T>(locale),
1383 digit_count(0),
1384 has_separators(false),
1385 m_digits(digits),
1386 m_separator(separator)
1387 {}
1388
1389 virtual bool match(
1390 _In_reads_or_z_(end) const T* text,
1391 _In_ size_t start = 0,
1392 _In_ size_t end = (size_t)-1,
1393 _In_ int flags = match_default)
1394 {
1395 assert(text || start >= end);
1396 if (m_digits->match(text, start, end, flags)) {
1397 // Leading part match.
1398 value = m_digits->value;
1399 digit_count = m_digits->interval.size();
1400 has_separators = false;
1401 interval.start = start;
1402 interval.end = m_digits->interval.end;
1403 if (m_digits->interval.size() <= 3) {
1404 // Maybe separated with thousand separators?
1405 size_t hit_offset = (size_t)-1;
1406 while (m_separator->match(text, interval.end, end, flags) &&
1407 (hit_offset == (size_t)-1 || hit_offset == m_separator->hit_offset) && // All separators must be the same, no mixing.
1408 m_digits->match(text, m_separator->interval.end, end, flags) &&
1409 m_digits->interval.size() == 3)
1410 {
1411 // Thousand separator and three-digit integer followed.
1412 value = value * 1000 + m_digits->value;
1413 digit_count += 3;
1414 has_separators = true;
1415 interval.end = m_digits->interval.end;
1416 hit_offset = m_separator->hit_offset;
1417 }
1418 }
1419
1420 return true;
1421 }
1422 value = 0;
1423 interval.start = (interval.end = start) + 1;
1424 return false;
1425 }
1426
1427 virtual void invalidate()
1428 {
1429 digit_count = 0;
1430 has_separators = false;
1432 }
1433
1434 public:
1437
1438 protected:
1439 std::shared_ptr<basic_integer10<T>> m_digits;
1440 std::shared_ptr<basic_set<T>> m_separator;
1441 };
1442
1445#ifdef _UNICODE
1446 using tinteger10ts = winteger10ts;
1447#else
1448 using tinteger10ts = integer10ts;
1449#endif
1451
1455 template <class T>
1457 {
1458 public:
1460 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
1461 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
1462 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
1463 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
1464 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
1465 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
1466 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
1467 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
1468 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
1469 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
1470 _In_ const std::shared_ptr<basic_parser<T>>& digit_10,
1471 _In_ const std::shared_ptr<basic_parser<T>>& digit_11,
1472 _In_ const std::shared_ptr<basic_parser<T>>& digit_12,
1473 _In_ const std::shared_ptr<basic_parser<T>>& digit_13,
1474 _In_ const std::shared_ptr<basic_parser<T>>& digit_14,
1475 _In_ const std::shared_ptr<basic_parser<T>>& digit_15,
1476 _In_ const std::locale& locale = std::locale()) :
1477 basic_integer<T>(locale),
1478 m_digit_0(digit_0),
1479 m_digit_1(digit_1),
1480 m_digit_2(digit_2),
1481 m_digit_3(digit_3),
1482 m_digit_4(digit_4),
1483 m_digit_5(digit_5),
1484 m_digit_6(digit_6),
1485 m_digit_7(digit_7),
1486 m_digit_8(digit_8),
1487 m_digit_9(digit_9),
1488 m_digit_10(digit_10),
1489 m_digit_11(digit_11),
1490 m_digit_12(digit_12),
1491 m_digit_13(digit_13),
1492 m_digit_14(digit_14),
1493 m_digit_15(digit_15)
1494 {}
1495
1496 virtual bool match(
1497 _In_reads_or_z_(end) const T* text,
1498 _In_ size_t start = 0,
1499 _In_ size_t end = (size_t)-1,
1500 _In_ int flags = match_default)
1501 {
1502 assert(text || start >= end);
1503 for (interval.end = start, value = 0; interval.end < end && text[interval.end];) {
1504 size_t dig;
1505 if (m_digit_0->match(text, interval.end, end, flags)) { dig = 0; interval.end = m_digit_0->interval.end; }
1506 else if (m_digit_1->match(text, interval.end, end, flags)) { dig = 1; interval.end = m_digit_1->interval.end; }
1507 else if (m_digit_2->match(text, interval.end, end, flags)) { dig = 2; interval.end = m_digit_2->interval.end; }
1508 else if (m_digit_3->match(text, interval.end, end, flags)) { dig = 3; interval.end = m_digit_3->interval.end; }
1509 else if (m_digit_4->match(text, interval.end, end, flags)) { dig = 4; interval.end = m_digit_4->interval.end; }
1510 else if (m_digit_5->match(text, interval.end, end, flags)) { dig = 5; interval.end = m_digit_5->interval.end; }
1511 else if (m_digit_6->match(text, interval.end, end, flags)) { dig = 6; interval.end = m_digit_6->interval.end; }
1512 else if (m_digit_7->match(text, interval.end, end, flags)) { dig = 7; interval.end = m_digit_7->interval.end; }
1513 else if (m_digit_8->match(text, interval.end, end, flags)) { dig = 8; interval.end = m_digit_8->interval.end; }
1514 else if (m_digit_9->match(text, interval.end, end, flags)) { dig = 9; interval.end = m_digit_9->interval.end; }
1515 else if (m_digit_10->match(text, interval.end, end, flags)) { dig = 10; interval.end = m_digit_10->interval.end; }
1516 else if (m_digit_11->match(text, interval.end, end, flags)) { dig = 11; interval.end = m_digit_11->interval.end; }
1517 else if (m_digit_12->match(text, interval.end, end, flags)) { dig = 12; interval.end = m_digit_12->interval.end; }
1518 else if (m_digit_13->match(text, interval.end, end, flags)) { dig = 13; interval.end = m_digit_13->interval.end; }
1519 else if (m_digit_14->match(text, interval.end, end, flags)) { dig = 14; interval.end = m_digit_14->interval.end; }
1520 else if (m_digit_15->match(text, interval.end, end, flags)) { dig = 15; interval.end = m_digit_15->interval.end; }
1521 else break;
1522 value = value * 16 + dig;
1523 }
1524 if (start < interval.end) {
1525 interval.start = start;
1526 return true;
1527 }
1528 interval.start = (interval.end = start) + 1;
1529 return false;
1530 }
1531
1532 protected:
1533 std::shared_ptr<basic_parser<T>>
1534 m_digit_0,
1535 m_digit_1,
1536 m_digit_2,
1537 m_digit_3,
1538 m_digit_4,
1539 m_digit_5,
1540 m_digit_6,
1541 m_digit_7,
1542 m_digit_8,
1543 m_digit_9,
1544 m_digit_10,
1545 m_digit_11,
1546 m_digit_12,
1547 m_digit_13,
1548 m_digit_14,
1549 m_digit_15;
1550 };
1551
1554#ifdef _UNICODE
1555 using tinteger16 = winteger16;
1556#else
1557 using tinteger16 = integer16;
1558#endif
1560
1564 template <class T>
1566 {
1567 public:
1569 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
1570 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
1571 _In_ const std::shared_ptr<basic_parser<T>>& digit_10,
1572 _In_ const std::shared_ptr<basic_parser<T>>& digit_50,
1573 _In_ const std::shared_ptr<basic_parser<T>>& digit_100,
1574 _In_ const std::shared_ptr<basic_parser<T>>& digit_500,
1575 _In_ const std::shared_ptr<basic_parser<T>>& digit_1000,
1576 _In_ const std::shared_ptr<basic_parser<T>>& digit_5000,
1577 _In_ const std::shared_ptr<basic_parser<T>>& digit_10000,
1578 _In_ const std::locale& locale = std::locale()) :
1579 basic_integer<T>(locale),
1580 m_digit_1(digit_1),
1581 m_digit_5(digit_5),
1582 m_digit_10(digit_10),
1583 m_digit_50(digit_50),
1584 m_digit_100(digit_100),
1585 m_digit_500(digit_500),
1586 m_digit_1000(digit_1000),
1587 m_digit_5000(digit_5000),
1588 m_digit_10000(digit_10000)
1589 {}
1590
1591 virtual bool match(
1592 _In_reads_or_z_(end) const T* text,
1593 _In_ size_t start = 0,
1594 _In_ size_t end = (size_t)-1,
1595 _In_ int flags = match_default)
1596 {
1597 assert(text || start >= end);
1598 size_t
1599 dig[5] = { (size_t)-1, (size_t)-1, (size_t)-1, (size_t)-1, (size_t)-1 },
1600 end2;
1601
1602 for (interval.end = start, value = 0; interval.end < end && text[interval.end]; dig[3] = dig[2], dig[2] = dig[1], dig[1] = dig[0], interval.end = end2) {
1603 if (m_digit_1 && m_digit_1->match(text, interval.end, end, flags)) { dig[0] = 1; end2 = m_digit_1->interval.end; }
1604 else if (m_digit_5 && m_digit_5->match(text, interval.end, end, flags)) { dig[0] = 5; end2 = m_digit_5->interval.end; }
1605 else if (m_digit_10 && m_digit_10->match(text, interval.end, end, flags)) { dig[0] = 10; end2 = m_digit_10->interval.end; }
1606 else if (m_digit_50 && m_digit_50->match(text, interval.end, end, flags)) { dig[0] = 50; end2 = m_digit_50->interval.end; }
1607 else if (m_digit_100 && m_digit_100->match(text, interval.end, end, flags)) { dig[0] = 100; end2 = m_digit_100->interval.end; }
1608 else if (m_digit_500 && m_digit_500->match(text, interval.end, end, flags)) { dig[0] = 500; end2 = m_digit_500->interval.end; }
1609 else if (m_digit_1000 && m_digit_1000->match(text, interval.end, end, flags)) { dig[0] = 1000; end2 = m_digit_1000->interval.end; }
1610 else if (m_digit_5000 && m_digit_5000->match(text, interval.end, end, flags)) { dig[0] = 5000; end2 = m_digit_5000->interval.end; }
1611 else if (m_digit_10000 && m_digit_10000->match(text, interval.end, end, flags)) { dig[0] = 10000; end2 = m_digit_10000->interval.end; }
1612 else break;
1613
1614 // Store first digit.
1615 if (dig[4] == (size_t)-1) dig[4] = dig[0];
1616
1617 if (dig[3] == dig[2] && dig[2] == dig[1] && dig[1] == dig[0] && dig[0] != dig[4]) {
1618 // Same digit repeated four times. No-go, unless first digit. E.g. XIIII vs. XIV. MMMMMCD allowed, IIII also...
1619 break;
1620 }
1621 if (dig[0] <= dig[1]) {
1622 // Digit is less or equal previous one: add.
1623 value += dig[0];
1624 }
1625 else if (
1626 dig[1] == 1 && (dig[0] == 5 || dig[0] == 10) ||
1627 dig[1] == 10 && (dig[0] == 50 || dig[0] == 100) ||
1628 dig[1] == 100 && (dig[0] == 500 || dig[0] == 1000) ||
1629 dig[1] == 1000 && (dig[0] == 5000 || dig[0] == 10000))
1630 {
1631 // Digit is up to two orders bigger than previous one: subtract. But...
1632 if (dig[2] < dig[0]) {
1633 // Digit is also bigger than pre-previous one. E.g. VIX (V < X => invalid)
1634 break;
1635 }
1636 value -= dig[1]; // Cancel addition in the previous step.
1637 dig[0] -= dig[1]; // Combine last two digits.
1638 dig[1] = dig[2]; // The true previous digit is now pre-previous one. :)
1639 dig[2] = dig[3]; // The true pre-previous digit is now pre-pre-previous one. :)
1640 value += dig[0]; // Add combined value.
1641 }
1642 else {
1643 // New digit is too big than the previous one. E.g. VX (V < X => invalid)
1644 break;
1645 }
1646 }
1647 if (value) {
1648 interval.start = start;
1649 return true;
1650 }
1651 interval.start = (interval.end = start) + 1;
1652 return false;
1653 }
1654
1655 protected:
1656 std::shared_ptr<basic_parser<T>>
1657 m_digit_1,
1658 m_digit_5,
1659 m_digit_10,
1660 m_digit_50,
1661 m_digit_100,
1662 m_digit_500,
1663 m_digit_1000,
1664 m_digit_5000,
1665 m_digit_10000;
1666 };
1667
1670#ifdef _UNICODE
1672#else
1674#endif
1676
1680 template <class T>
1682 {
1683 public:
1685 _In_ const std::shared_ptr<basic_parser<T>>& _numerator,
1686 _In_ const std::shared_ptr<basic_parser<T>>& _fraction_line,
1687 _In_ const std::shared_ptr<basic_parser<T>>& _denominator,
1688 _In_ const std::locale& locale = std::locale()) :
1689 basic_parser<T>(locale),
1690 numerator(_numerator),
1691 fraction_line(_fraction_line),
1692 denominator(_denominator)
1693 {}
1694
1695 virtual bool match(
1696 _In_reads_or_z_(end) const T* text,
1697 _In_ size_t start = 0,
1698 _In_ size_t end = (size_t)-1,
1699 _In_ int flags = match_default)
1700 {
1701 assert(text || start >= end);
1702 if (numerator->match(text, start, end, flags) &&
1703 fraction_line->match(text, numerator->interval.end, end, flags) &&
1704 denominator->match(text, fraction_line->interval.end, end, flags))
1705 {
1706 interval.start = start;
1707 interval.end = denominator->interval.end;
1708 return true;
1709 }
1710 numerator->invalidate();
1711 fraction_line->invalidate();
1712 denominator->invalidate();
1713 interval.start = (interval.end = start) + 1;
1714 return false;
1715 }
1716
1717 virtual void invalidate()
1718 {
1719 numerator->invalidate();
1720 fraction_line->invalidate();
1721 denominator->invalidate();
1723 }
1724
1725 public:
1726 std::shared_ptr<basic_parser<T>> numerator;
1727 std::shared_ptr<basic_parser<T>> fraction_line;
1728 std::shared_ptr<basic_parser<T>> denominator;
1729 };
1730
1733#ifdef _UNICODE
1734 using tfraction = wfraction;
1735#else
1736 using tfraction = fraction;
1737#endif
1739
1743 template <class T>
1744 class basic_score : public basic_parser<T>
1745 {
1746 public:
1748 _In_ const std::shared_ptr<basic_parser<T>>& _home,
1749 _In_ const std::shared_ptr<basic_parser<T>>& _separator,
1750 _In_ const std::shared_ptr<basic_parser<T>>& _guest,
1751 _In_ const std::shared_ptr<basic_parser<T>>& space,
1752 _In_ const std::locale& locale = std::locale()) :
1753 basic_parser<T>(locale),
1754 home(_home),
1755 separator(_separator),
1756 guest(_guest),
1757 m_space(space)
1758 {}
1759
1760 virtual bool match(
1761 _In_reads_or_z_(end) const T* text,
1762 _In_ size_t start = 0,
1763 _In_ size_t end = (size_t)-1,
1764 _In_ int flags = match_default)
1765 {
1766 assert(text || start >= end);
1767 interval.end = start;
1768
1769 if (home->match(text, interval.end, end, flags))
1770 interval.end = home->interval.end;
1771 else
1772 goto end;
1773
1774 const int space_match_flags = flags & ~match_multiline; // Spaces in score must never be broken in new line.
1775 for (; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
1776
1777 if (separator->match(text, interval.end, end, flags))
1778 interval.end = separator->interval.end;
1779 else
1780 goto end;
1781
1782 for (; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
1783
1784 if (guest->match(text, interval.end, end, flags))
1785 interval.end = guest->interval.end;
1786 else
1787 goto end;
1788
1789 interval.start = start;
1790 return true;
1791
1792 end:
1793 home->invalidate();
1794 separator->invalidate();
1795 guest->invalidate();
1796 interval.start = (interval.end = start) + 1;
1797 return false;
1798 }
1799
1800 virtual void invalidate()
1801 {
1802 home->invalidate();
1803 separator->invalidate();
1804 guest->invalidate();
1806 }
1807
1808 public:
1809 std::shared_ptr<basic_parser<T>> home;
1810 std::shared_ptr<basic_parser<T>> separator;
1811 std::shared_ptr<basic_parser<T>> guest;
1812
1813 protected:
1814 std::shared_ptr<basic_parser<T>> m_space;
1815 };
1816
1817 using score = basic_score<char>;
1819#ifdef _UNICODE
1820 using tscore = wscore;
1821#else
1822 using tscore = score;
1823#endif
1825
1829 template <class T>
1831 {
1832 public:
1834 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
1835 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
1836 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
1837 _In_ const std::shared_ptr<basic_parser<T>>& _number,
1838 _In_ const std::locale& locale = std::locale()) :
1839 basic_parser<T>(locale),
1840 positive_sign(_positive_sign),
1841 negative_sign(_negative_sign),
1842 special_sign(_special_sign),
1843 number(_number)
1844 {}
1845
1846 virtual bool match(
1847 _In_reads_or_z_(end) const T* text,
1848 _In_ size_t start = 0,
1849 _In_ size_t end = (size_t)-1,
1850 _In_ int flags = match_default)
1851 {
1852 assert(text || start >= end);
1853 interval.end = start;
1854 if (positive_sign && positive_sign->match(text, interval.end, end, flags)) {
1855 interval.end = positive_sign->interval.end;
1856 if (negative_sign) negative_sign->invalidate();
1857 if (special_sign) special_sign->invalidate();
1858 }
1859 else if (negative_sign && negative_sign->match(text, interval.end, end, flags)) {
1860 interval.end = negative_sign->interval.end;
1861 if (positive_sign) positive_sign->invalidate();
1862 if (special_sign) special_sign->invalidate();
1863 }
1864 else if (special_sign && special_sign->match(text, interval.end, end, flags)) {
1865 interval.end = special_sign->interval.end;
1866 if (positive_sign) positive_sign->invalidate();
1867 if (negative_sign) negative_sign->invalidate();
1868 }
1869 else {
1870 if (positive_sign) positive_sign->invalidate();
1871 if (negative_sign) negative_sign->invalidate();
1872 if (special_sign) special_sign->invalidate();
1873 }
1874 if (number->match(text, interval.end, end, flags)) {
1875 interval.start = start;
1876 interval.end = number->interval.end;
1877 return true;
1878 }
1879 if (positive_sign) positive_sign->invalidate();
1880 if (negative_sign) negative_sign->invalidate();
1881 if (special_sign) special_sign->invalidate();
1882 number->invalidate();
1883 interval.start = (interval.end = start) + 1;
1884 return false;
1885 }
1886
1887 virtual void invalidate()
1888 {
1889 if (positive_sign) positive_sign->invalidate();
1890 if (negative_sign) negative_sign->invalidate();
1891 if (special_sign) special_sign->invalidate();
1892 number->invalidate();
1894 }
1895
1896 public:
1897 std::shared_ptr<basic_parser<T>> positive_sign;
1898 std::shared_ptr<basic_parser<T>> negative_sign;
1899 std::shared_ptr<basic_parser<T>> special_sign;
1900 std::shared_ptr<basic_parser<T>> number;
1901 };
1902
1905#ifdef _UNICODE
1907#else
1909#endif
1911
1915 template <class T>
1917 {
1918 public:
1920 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
1921 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
1922 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
1923 _In_ const std::shared_ptr<basic_parser<T>>& _integer,
1924 _In_ const std::shared_ptr<basic_parser<T>>& space,
1925 _In_ const std::shared_ptr<basic_parser<T>>& _fraction,
1926 _In_ const std::locale& locale = std::locale()) :
1927 basic_parser<T>(locale),
1928 positive_sign(_positive_sign),
1929 negative_sign(_negative_sign),
1930 special_sign(_special_sign),
1931 integer(_integer),
1932 fraction(_fraction),
1933 m_space(space)
1934 {}
1935
1936 virtual bool match(
1937 _In_reads_or_z_(end) const T* text,
1938 _In_ size_t start = 0,
1939 _In_ size_t end = (size_t)-1,
1940 _In_ int flags = match_default)
1941 {
1942 assert(text || start >= end);
1943 interval.end = start;
1944
1945 if (positive_sign && positive_sign->match(text, interval.end, end, flags)) {
1946 interval.end = positive_sign->interval.end;
1947 if (negative_sign) negative_sign->invalidate();
1948 if (special_sign) special_sign->invalidate();
1949 }
1950 else if (negative_sign && negative_sign->match(text, interval.end, end, flags)) {
1951 interval.end = negative_sign->interval.end;
1952 if (positive_sign) positive_sign->invalidate();
1953 if (special_sign) special_sign->invalidate();
1954 }
1955 else if (special_sign && special_sign->match(text, interval.end, end, flags)) {
1956 interval.end = special_sign->interval.end;
1957 if (positive_sign) positive_sign->invalidate();
1958 if (negative_sign) negative_sign->invalidate();
1959 }
1960 else {
1961 if (positive_sign) positive_sign->invalidate();
1962 if (negative_sign) negative_sign->invalidate();
1963 if (special_sign) special_sign->invalidate();
1964 }
1965
1966 // Check for <integer> <fraction>
1967 const int space_match_flags = flags & ~match_multiline; // Spaces in fractions must never be broken in new line.
1968 if (integer->match(text, interval.end, end, flags) &&
1969 m_space->match(text, integer->interval.end, end, space_match_flags))
1970 {
1971 for (interval.end = m_space->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
1972 if (fraction->match(text, interval.end, end, flags)) {
1973 interval.start = start;
1975 return true;
1976 }
1977 fraction->invalidate();
1978 interval.start = start;
1979 interval.end = integer->interval.end;
1980 return true;
1981 }
1982
1983 // Check for <fraction>
1984 if (fraction->match(text, interval.end, end, flags)) {
1985 integer->invalidate();
1986 interval.start = start;
1988 return true;
1989 }
1990
1991 // Check for <integer>
1992 if (integer->match(text, interval.end, end, flags)) {
1993 fraction->invalidate();
1994 interval.start = start;
1995 interval.end = integer->interval.end;
1996 return true;
1997 }
1998
1999 if (positive_sign) positive_sign->invalidate();
2000 if (negative_sign) negative_sign->invalidate();
2001 if (special_sign) special_sign->invalidate();
2002 integer->invalidate();
2003 fraction->invalidate();
2004 interval.start = (interval.end = start) + 1;
2005 return false;
2006 }
2007
2008 virtual void invalidate()
2009 {
2010 if (positive_sign) positive_sign->invalidate();
2011 if (negative_sign) negative_sign->invalidate();
2012 if (special_sign) special_sign->invalidate();
2013 integer->invalidate();
2014 fraction->invalidate();
2016 }
2017
2018 public:
2019 std::shared_ptr<basic_parser<T>> positive_sign;
2020 std::shared_ptr<basic_parser<T>> negative_sign;
2021 std::shared_ptr<basic_parser<T>> special_sign;
2022 std::shared_ptr<basic_parser<T>> integer;
2023 std::shared_ptr<basic_parser<T>> fraction;
2024
2025 protected:
2026 std::shared_ptr<basic_parser<T>> m_space;
2027 };
2028
2031#ifdef _UNICODE
2033#else
2035#endif
2037
2041 template <class T>
2043 {
2044 public:
2046 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
2047 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
2048 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
2049 _In_ const std::shared_ptr<basic_integer<T>>& _integer,
2050 _In_ const std::shared_ptr<basic_parser<T>>& _decimal_separator,
2051 _In_ const std::shared_ptr<basic_integer<T>>& _decimal,
2052 _In_ const std::shared_ptr<basic_parser<T>>& _exponent_symbol,
2053 _In_ const std::shared_ptr<basic_parser<T>>& _positive_exp_sign,
2054 _In_ const std::shared_ptr<basic_parser<T>>& _negative_exp_sign,
2055 _In_ const std::shared_ptr<basic_integer<T>>& _exponent,
2056 _In_ const std::locale& locale = std::locale()) :
2057 basic_parser<T>(locale),
2058 positive_sign(_positive_sign),
2059 negative_sign(_negative_sign),
2060 special_sign(_special_sign),
2061 integer(_integer),
2062 decimal_separator(_decimal_separator),
2063 decimal(_decimal),
2064 exponent_symbol(_exponent_symbol),
2065 positive_exp_sign(_positive_exp_sign),
2066 negative_exp_sign(_negative_exp_sign),
2067 exponent(_exponent),
2068 value(std::numeric_limits<double>::quiet_NaN())
2069 {}
2070
2071 virtual bool match(
2072 _In_reads_or_z_(end) const T* text,
2073 _In_ size_t start = 0,
2074 _In_ size_t end = (size_t)-1,
2075 _In_ int flags = match_default)
2076 {
2077 assert(text || start >= end);
2078 interval.end = start;
2079
2080 if (positive_sign && positive_sign->match(text, interval.end, end, flags)) {
2081 interval.end = positive_sign->interval.end;
2082 if (negative_sign) negative_sign->invalidate();
2083 if (special_sign) special_sign->invalidate();
2084 }
2085 else if (negative_sign && negative_sign->match(text, interval.end, end, flags)) {
2086 interval.end = negative_sign->interval.end;
2087 if (positive_sign) positive_sign->invalidate();
2088 if (special_sign) special_sign->invalidate();
2089 }
2090 else if (special_sign && special_sign->match(text, interval.end, end, flags)) {
2091 interval.end = special_sign->interval.end;
2092 if (positive_sign) positive_sign->invalidate();
2093 if (negative_sign) negative_sign->invalidate();
2094 }
2095 else {
2096 if (positive_sign) positive_sign->invalidate();
2097 if (negative_sign) negative_sign->invalidate();
2098 if (special_sign) special_sign->invalidate();
2099 }
2100
2101 if (integer->match(text, interval.end, end, flags))
2102 interval.end = integer->interval.end;
2103
2104 if (decimal_separator->match(text, interval.end, end, flags) &&
2105 decimal->match(text, decimal_separator->interval.end, end, flags))
2106 interval.end = decimal->interval.end;
2107 else {
2108 decimal_separator->invalidate();
2109 decimal->invalidate();
2110 }
2111
2112 if (integer->interval.empty() &&
2113 decimal->interval.empty())
2114 {
2115 // No integer part, no decimal part.
2116 if (positive_sign) positive_sign->invalidate();
2117 if (negative_sign) negative_sign->invalidate();
2118 if (special_sign) special_sign->invalidate();
2119 integer->invalidate();
2120 decimal_separator->invalidate();
2121 decimal->invalidate();
2122 if (exponent_symbol) exponent_symbol->invalidate();
2123 if (positive_exp_sign) positive_exp_sign->invalidate();
2124 if (negative_exp_sign) negative_exp_sign->invalidate();
2125 if (exponent) exponent->invalidate();
2126 interval.start = (interval.end = start) + 1;
2127 return false;
2128 }
2129
2130 if (exponent_symbol && exponent_symbol->match(text, interval.end, end, flags) &&
2131 (positive_exp_sign && positive_exp_sign->match(text, exponent_symbol->interval.end, end, flags) &&
2132 exponent && exponent->match(text, positive_exp_sign->interval.end, end, flags) ||
2133 exponent && exponent->match(text, exponent_symbol->interval.end, end, flags)))
2134 {
2135 interval.end = exponent->interval.end;
2136 if (negative_exp_sign) negative_exp_sign->invalidate();
2137 }
2138 else if (exponent_symbol && exponent_symbol->match(text, interval.end, end, flags) &&
2139 negative_exp_sign && negative_exp_sign->match(text, exponent_symbol->interval.end, end, flags) &&
2140 exponent && exponent->match(text, negative_exp_sign->interval.end, end, flags))
2141 {
2142 interval.end = exponent->interval.end;
2143 if (positive_exp_sign) positive_exp_sign->invalidate();
2144 }
2145 else {
2146 if (exponent_symbol) exponent_symbol->invalidate();
2147 if (positive_exp_sign) positive_exp_sign->invalidate();
2148 if (negative_exp_sign) negative_exp_sign->invalidate();
2149 if (exponent) exponent->invalidate();
2150 }
2151
2152 value = (double)integer->value;
2153 if (decimal->interval)
2154 value += (double)decimal->value * pow(10.0, -(double)decimal->interval.size());
2155 if (negative_sign && negative_sign->interval)
2156 value = -value;
2157 if (exponent && exponent->interval) {
2158 double e = (double)exponent->value;
2159 if (negative_exp_sign && negative_exp_sign->interval)
2160 e = -e;
2161 value *= pow(10.0, e);
2162 }
2163
2164 interval.start = start;
2165 return true;
2166 }
2167
2168 virtual void invalidate()
2169 {
2170 if (positive_sign) positive_sign->invalidate();
2171 if (negative_sign) negative_sign->invalidate();
2172 if (special_sign) special_sign->invalidate();
2173 integer->invalidate();
2174 decimal_separator->invalidate();
2175 decimal->invalidate();
2176 if (exponent_symbol) exponent_symbol->invalidate();
2177 if (positive_exp_sign) positive_exp_sign->invalidate();
2178 if (negative_exp_sign) negative_exp_sign->invalidate();
2179 if (exponent) exponent->invalidate();
2180 value = std::numeric_limits<double>::quiet_NaN();
2182 }
2183
2184 public:
2185 std::shared_ptr<basic_parser<T>> positive_sign;
2186 std::shared_ptr<basic_parser<T>> negative_sign;
2187 std::shared_ptr<basic_parser<T>> special_sign;
2188 std::shared_ptr<basic_integer<T>> integer;
2189 std::shared_ptr<basic_parser<T>> decimal_separator;
2190 std::shared_ptr<basic_integer<T>> decimal;
2191 std::shared_ptr<basic_parser<T>> exponent_symbol;
2192 std::shared_ptr<basic_parser<T>> positive_exp_sign;
2193 std::shared_ptr<basic_parser<T>> negative_exp_sign;
2194 std::shared_ptr<basic_integer<T>> exponent;
2195 double value;
2196 };
2197
2200#ifdef _UNICODE
2202#else
2204#endif
2206
2210 template <class T>
2212 {
2213 public:
2215 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
2216 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
2217 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
2218 _In_ const std::shared_ptr<basic_parser<T>>& _currency,
2219 _In_ const std::shared_ptr<basic_parser<T>>& _integer,
2220 _In_ const std::shared_ptr<basic_parser<T>>& _decimal_separator,
2221 _In_ const std::shared_ptr<basic_parser<T>>& _decimal,
2222 _In_ const std::locale& locale = std::locale()) :
2223 basic_parser<T>(locale),
2224 positive_sign(_positive_sign),
2225 negative_sign(_negative_sign),
2226 special_sign(_special_sign),
2227 currency(_currency),
2228 integer(_integer),
2229 decimal_separator(_decimal_separator),
2230 decimal(_decimal)
2231 {}
2232
2233 virtual bool match(
2234 _In_reads_or_z_(end) const T* text,
2235 _In_ size_t start = 0,
2236 _In_ size_t end = (size_t)-1,
2237 _In_ int flags = match_default)
2238 {
2239 assert(text || start >= end);
2240 interval.end = start;
2241
2242 if (positive_sign->match(text, interval.end, end, flags)) {
2243 interval.end = positive_sign->interval.end;
2244 if (negative_sign) negative_sign->invalidate();
2245 if (special_sign) special_sign->invalidate();
2246 }
2247 else if (negative_sign->match(text, interval.end, end, flags)) {
2248 interval.end = negative_sign->interval.end;
2249 if (positive_sign) positive_sign->invalidate();
2250 if (special_sign) special_sign->invalidate();
2251 }
2252 else if (special_sign->match(text, interval.end, end, flags)) {
2253 interval.end = special_sign->interval.end;
2254 if (positive_sign) positive_sign->invalidate();
2255 if (negative_sign) negative_sign->invalidate();
2256 }
2257 else {
2258 if (positive_sign) positive_sign->invalidate();
2259 if (negative_sign) negative_sign->invalidate();
2260 if (special_sign) special_sign->invalidate();
2261 }
2262
2263 if (currency->match(text, interval.end, end, flags))
2264 interval.end = currency->interval.end;
2265 else {
2266 if (positive_sign) positive_sign->invalidate();
2267 if (negative_sign) negative_sign->invalidate();
2268 if (special_sign) special_sign->invalidate();
2269 integer->invalidate();
2270 decimal_separator->invalidate();
2271 decimal->invalidate();
2272 interval.start = (interval.end = start) + 1;
2273 return false;
2274 }
2275
2276 if (integer->match(text, interval.end, end, flags))
2277 interval.end = integer->interval.end;
2278 if (decimal_separator->match(text, interval.end, end, flags) &&
2279 decimal->match(text, decimal_separator->interval.end, end, flags))
2280 interval.end = decimal->interval.end;
2281 else {
2282 decimal_separator->invalidate();
2283 decimal->invalidate();
2284 }
2285
2286 if (integer->interval.empty() &&
2287 decimal->interval.empty())
2288 {
2289 // No integer part, no decimal part.
2290 if (positive_sign) positive_sign->invalidate();
2291 if (negative_sign) negative_sign->invalidate();
2292 if (special_sign) special_sign->invalidate();
2293 currency->invalidate();
2294 integer->invalidate();
2295 decimal_separator->invalidate();
2296 decimal->invalidate();
2297 interval.start = (interval.end = start) + 1;
2298 return false;
2299 }
2300
2301 interval.start = start;
2302 return true;
2303 }
2304
2305 virtual void invalidate()
2306 {
2307 if (positive_sign) positive_sign->invalidate();
2308 if (negative_sign) negative_sign->invalidate();
2309 if (special_sign) special_sign->invalidate();
2310 currency->invalidate();
2311 integer->invalidate();
2312 decimal_separator->invalidate();
2313 decimal->invalidate();
2315 }
2316
2317 public:
2318 std::shared_ptr<basic_parser<T>> positive_sign;
2319 std::shared_ptr<basic_parser<T>> negative_sign;
2320 std::shared_ptr<basic_parser<T>> special_sign;
2321 std::shared_ptr<basic_parser<T>> currency;
2322 std::shared_ptr<basic_parser<T>> integer;
2323 std::shared_ptr<basic_parser<T>> decimal_separator;
2324 std::shared_ptr<basic_parser<T>> decimal;
2325 };
2326
2329#ifdef _UNICODE
2331#else
2333#endif
2335
2339 template <class T>
2341 {
2342 public:
2344 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
2345 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
2346 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
2347 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
2348 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
2349 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
2350 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
2351 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
2352 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
2353 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
2354 _In_ const std::shared_ptr<basic_parser<T>>& separator,
2355 _In_ const std::locale& locale = std::locale()) :
2356 basic_parser<T>(locale),
2357 m_digit_0(digit_0),
2358 m_digit_1(digit_1),
2359 m_digit_2(digit_2),
2360 m_digit_3(digit_3),
2361 m_digit_4(digit_4),
2362 m_digit_5(digit_5),
2363 m_digit_6(digit_6),
2364 m_digit_7(digit_7),
2365 m_digit_8(digit_8),
2366 m_digit_9(digit_9),
2367 m_separator(separator)
2368 {
2369 value.s_addr = 0;
2370 }
2371
2372 virtual bool match(
2373 _In_reads_or_z_(end) const T* text,
2374 _In_ size_t start = 0,
2375 _In_ size_t end = (size_t)-1,
2376 _In_ int flags = match_default)
2377 {
2378 assert(text || start >= end);
2379 interval.end = start;
2380 value.s_addr = 0;
2381
2382 size_t i;
2383 for (i = 0; i < 4; i++) {
2384 if (i) {
2385 if (m_separator->match(text, interval.end, end, flags))
2386 interval.end = m_separator->interval.end;
2387 else
2388 goto error;
2389 }
2390
2392 bool is_empty = true;
2393 size_t x;
2394 for (x = 0; interval.end < end && text[interval.end];) {
2395 size_t dig, digit_end;
2396 if (m_digit_0->match(text, interval.end, end, flags)) { dig = 0; digit_end = m_digit_0->interval.end; }
2397 else if (m_digit_1->match(text, interval.end, end, flags)) { dig = 1; digit_end = m_digit_1->interval.end; }
2398 else if (m_digit_2->match(text, interval.end, end, flags)) { dig = 2; digit_end = m_digit_2->interval.end; }
2399 else if (m_digit_3->match(text, interval.end, end, flags)) { dig = 3; digit_end = m_digit_3->interval.end; }
2400 else if (m_digit_4->match(text, interval.end, end, flags)) { dig = 4; digit_end = m_digit_4->interval.end; }
2401 else if (m_digit_5->match(text, interval.end, end, flags)) { dig = 5; digit_end = m_digit_5->interval.end; }
2402 else if (m_digit_6->match(text, interval.end, end, flags)) { dig = 6; digit_end = m_digit_6->interval.end; }
2403 else if (m_digit_7->match(text, interval.end, end, flags)) { dig = 7; digit_end = m_digit_7->interval.end; }
2404 else if (m_digit_8->match(text, interval.end, end, flags)) { dig = 8; digit_end = m_digit_8->interval.end; }
2405 else if (m_digit_9->match(text, interval.end, end, flags)) { dig = 9; digit_end = m_digit_9->interval.end; }
2406 else break;
2407 size_t x_n = x * 10 + dig;
2408 if (x_n <= 255) {
2409 x = x_n;
2410 interval.end = digit_end;
2411 is_empty = false;
2412 }
2413 else
2414 break;
2415 }
2416 if (is_empty)
2417 goto error;
2419 value.s_addr = (value.s_addr << 8) | (uint8_t)x;
2420 }
2421 if (i < 4)
2422 goto error;
2423
2424 interval.start = start;
2425 return true;
2426
2427 error:
2428 components[0].start = 1;
2429 components[0].end = 0;
2430 components[1].start = 1;
2431 components[1].end = 0;
2432 components[2].start = 1;
2433 components[2].end = 0;
2434 components[3].start = 1;
2435 components[3].end = 0;
2436 value.s_addr = 0;
2437 interval.start = (interval.end = start) + 1;
2438 return false;
2439 }
2440
2441 virtual void invalidate()
2442 {
2443 components[0].start = 1;
2444 components[0].end = 0;
2445 components[1].start = 1;
2446 components[1].end = 0;
2447 components[2].start = 1;
2448 components[2].end = 0;
2449 components[3].start = 1;
2450 components[3].end = 0;
2451 value.s_addr = 0;
2453 }
2454
2455 public:
2457 struct in_addr value;
2458
2459 protected:
2460 std::shared_ptr<basic_parser<T>>
2461 m_digit_0,
2462 m_digit_1,
2463 m_digit_2,
2464 m_digit_3,
2465 m_digit_4,
2466 m_digit_5,
2467 m_digit_6,
2468 m_digit_7,
2469 m_digit_8,
2470 m_digit_9;
2471 std::shared_ptr<basic_parser<T>> m_separator;
2472 };
2473
2476#ifdef _UNICODE
2478#else
2480#endif
2482
2486 template <class T>
2488 {
2489 public:
2490 basic_ipv6_scope_id_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
2491
2492 virtual bool match(
2493 _In_reads_or_z_(end) const T* text,
2494 _In_ size_t start = 0,
2495 _In_ size_t end = (size_t)-1,
2496 _In_ int flags = match_default)
2497 {
2498 assert(text || start >= end);
2499 if (start < end && text[start]) {
2500 if (text[start] == '-' ||
2501 text[start] == '_' ||
2502 text[start] == ':' ||
2503 std::use_facet<std::ctype<T>>(m_locale).is(std::ctype_base::alnum, text[start]))
2504 {
2505 interval.end = (interval.start = start) + 1;
2506 return true;
2507 }
2508 }
2509 interval.start = (interval.end = start) + 1;
2510 return false;
2511 }
2512 };
2513
2516#ifdef _UNICODE
2518#else
2520#endif
2521
2526 {
2527 public:
2528 sgml_ipv6_scope_id_char(_In_ const std::locale& locale = std::locale()) : sgml_parser(locale) {}
2529
2530 virtual bool match(
2531 _In_reads_or_z_(end) const char* text,
2532 _In_ size_t start = 0,
2533 _In_ size_t end = (size_t)-1,
2534 _In_ int flags = match_default)
2535 {
2536 assert(text || start >= end);
2537 if (start < end && text[start]) {
2538 wchar_t buf[3];
2539 const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf);
2540 const wchar_t* chr_end = chr + stdex::strlen(chr);
2541 if ((chr[0] == L'-' ||
2542 chr[0] == L'_' ||
2543 chr[0] == L':') && chr[1] == 0 ||
2544 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
2545 {
2546 interval.start = start;
2547 return true;
2548 }
2549 }
2550 interval.start = (interval.end = start) + 1;
2551 return false;
2552 }
2553 };
2554
2558 template <class T>
2560 {
2561 public:
2563 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
2564 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
2565 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
2566 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
2567 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
2568 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
2569 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
2570 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
2571 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
2572 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
2573 _In_ const std::shared_ptr<basic_parser<T>>& digit_10,
2574 _In_ const std::shared_ptr<basic_parser<T>>& digit_11,
2575 _In_ const std::shared_ptr<basic_parser<T>>& digit_12,
2576 _In_ const std::shared_ptr<basic_parser<T>>& digit_13,
2577 _In_ const std::shared_ptr<basic_parser<T>>& digit_14,
2578 _In_ const std::shared_ptr<basic_parser<T>>& digit_15,
2579 _In_ const std::shared_ptr<basic_parser<T>>& separator,
2580 _In_ const std::shared_ptr<basic_parser<T>>& scope_id_separator = nullptr,
2581 _In_ const std::shared_ptr<basic_parser<T>>& _scope_id = nullptr,
2582 _In_ const std::locale& locale = std::locale()) :
2583 basic_parser<T>(locale),
2584 m_digit_0(digit_0),
2585 m_digit_1(digit_1),
2586 m_digit_2(digit_2),
2587 m_digit_3(digit_3),
2588 m_digit_4(digit_4),
2589 m_digit_5(digit_5),
2590 m_digit_6(digit_6),
2591 m_digit_7(digit_7),
2592 m_digit_8(digit_8),
2593 m_digit_9(digit_9),
2594 m_digit_10(digit_10),
2595 m_digit_11(digit_11),
2596 m_digit_12(digit_12),
2597 m_digit_13(digit_13),
2598 m_digit_14(digit_14),
2599 m_digit_15(digit_15),
2600 m_separator(separator),
2601 m_scope_id_separator(scope_id_separator),
2602 scope_id(_scope_id)
2603 {
2604 memset(&value, 0, sizeof(value));
2605 }
2606
2607 virtual bool match(
2608 _In_reads_or_z_(end) const T* text,
2609 _In_ size_t start = 0,
2610 _In_ size_t end = (size_t)-1,
2611 _In_ int flags = match_default)
2612 {
2613 assert(text || start >= end);
2614 interval.end = start;
2615 memset(&value, 0, sizeof(value));
2616
2617 size_t i, compaction_i = (size_t)-1, compaction_start = start;
2618 for (i = 0; i < 8; i++) {
2619 bool is_empty = true;
2620
2621 if (m_separator->match(text, interval.end, end, flags)) {
2622 if (m_separator->match(text, m_separator->interval.end, end, flags)) {
2623 // :: found
2624 if (compaction_i == (size_t)-1) {
2625 // Zero compaction start
2626 compaction_i = i;
2627 compaction_start = m_separator->interval.start;
2628 interval.end = m_separator->interval.end;
2629 }
2630 else {
2631 // More than one zero compaction
2632 break;
2633 }
2634 }
2635 else if (i) {
2636 // Inner : found
2637 interval.end = m_separator->interval.end;
2638 }
2639 else {
2640 // Leading : found
2641 goto error;
2642 }
2643 }
2644 else if (i) {
2645 // : missing
2646 break;
2647 }
2648
2650 size_t x;
2651 for (x = 0; interval.end < end && text[interval.end];) {
2652 size_t dig, digit_end;
2653 if (m_digit_0->match(text, interval.end, end, flags)) { dig = 0; digit_end = m_digit_0->interval.end; }
2654 else if (m_digit_1->match(text, interval.end, end, flags)) { dig = 1; digit_end = m_digit_1->interval.end; }
2655 else if (m_digit_2->match(text, interval.end, end, flags)) { dig = 2; digit_end = m_digit_2->interval.end; }
2656 else if (m_digit_3->match(text, interval.end, end, flags)) { dig = 3; digit_end = m_digit_3->interval.end; }
2657 else if (m_digit_4->match(text, interval.end, end, flags)) { dig = 4; digit_end = m_digit_4->interval.end; }
2658 else if (m_digit_5->match(text, interval.end, end, flags)) { dig = 5; digit_end = m_digit_5->interval.end; }
2659 else if (m_digit_6->match(text, interval.end, end, flags)) { dig = 6; digit_end = m_digit_6->interval.end; }
2660 else if (m_digit_7->match(text, interval.end, end, flags)) { dig = 7; digit_end = m_digit_7->interval.end; }
2661 else if (m_digit_8->match(text, interval.end, end, flags)) { dig = 8; digit_end = m_digit_8->interval.end; }
2662 else if (m_digit_9->match(text, interval.end, end, flags)) { dig = 9; digit_end = m_digit_9->interval.end; }
2663 else if (m_digit_10->match(text, interval.end, end, flags)) { dig = 10; digit_end = m_digit_10->interval.end; }
2664 else if (m_digit_11->match(text, interval.end, end, flags)) { dig = 11; digit_end = m_digit_11->interval.end; }
2665 else if (m_digit_12->match(text, interval.end, end, flags)) { dig = 12; digit_end = m_digit_12->interval.end; }
2666 else if (m_digit_13->match(text, interval.end, end, flags)) { dig = 13; digit_end = m_digit_13->interval.end; }
2667 else if (m_digit_14->match(text, interval.end, end, flags)) { dig = 14; digit_end = m_digit_14->interval.end; }
2668 else if (m_digit_15->match(text, interval.end, end, flags)) { dig = 15; digit_end = m_digit_15->interval.end; }
2669 else break;
2670 size_t x_n = x * 16 + dig;
2671 if (x_n <= 0xffff) {
2672 x = x_n;
2673 interval.end = digit_end;
2674 is_empty = false;
2675 }
2676 else
2677 break;
2678 }
2679 if (is_empty) {
2680 if (compaction_i != (size_t)-1) {
2681 // Zero compaction active: no sweat.
2682 break;
2683 }
2684 goto error;
2685 }
2687 value.s6_words[i] = (uint16_t)x;
2688 }
2689
2690 if (compaction_i != (size_t)-1) {
2691 // Align components right due to zero compaction.
2692 size_t j, k;
2693 for (j = 8, k = i; k > compaction_i;) {
2694 value.s6_words[--j] = value.s6_words[--k];
2695 components[j] = components[k];
2696 }
2697 for (; j > compaction_i;) {
2698 value.s6_words[--j] = 0;
2699 components[j].start =
2700 components[j].end = compaction_start;
2701 }
2702 }
2703 else if (i < 8)
2704 goto error;
2705
2706 if (m_scope_id_separator && m_scope_id_separator->match(text, interval.end, end, flags) &&
2707 scope_id && scope_id->match(text, m_scope_id_separator->interval.end, end, flags))
2708 interval.end = scope_id->interval.end;
2709 else if (scope_id)
2710 scope_id->invalidate();
2711
2712 interval.start = start;
2713 return true;
2714
2715 error:
2716 components[0].start = 1;
2717 components[0].end = 0;
2718 components[1].start = 1;
2719 components[1].end = 0;
2720 components[2].start = 1;
2721 components[2].end = 0;
2722 components[3].start = 1;
2723 components[3].end = 0;
2724 components[4].start = 1;
2725 components[4].end = 0;
2726 components[5].start = 1;
2727 components[5].end = 0;
2728 components[6].start = 1;
2729 components[6].end = 0;
2730 components[7].start = 1;
2731 components[7].end = 0;
2732 memset(&value, 0, sizeof(value));
2733 if (scope_id) scope_id->invalidate();
2734 interval.start = (interval.end = start) + 1;
2735 return false;
2736 }
2737
2738 virtual void invalidate()
2739 {
2740 components[0].start = 1;
2741 components[0].end = 0;
2742 components[1].start = 1;
2743 components[1].end = 0;
2744 components[2].start = 1;
2745 components[2].end = 0;
2746 components[3].start = 1;
2747 components[3].end = 0;
2748 components[4].start = 1;
2749 components[4].end = 0;
2750 components[5].start = 1;
2751 components[5].end = 0;
2752 components[6].start = 1;
2753 components[6].end = 0;
2754 components[7].start = 1;
2755 components[7].end = 0;
2756 memset(&value, 0, sizeof(value));
2757 if (scope_id) scope_id->invalidate();
2759 }
2760
2761 public:
2763 struct in6_addr value;
2764 std::shared_ptr<basic_parser<T>> scope_id;
2765
2766 protected:
2767 std::shared_ptr<basic_parser<T>>
2768 m_digit_0,
2769 m_digit_1,
2770 m_digit_2,
2771 m_digit_3,
2772 m_digit_4,
2773 m_digit_5,
2774 m_digit_6,
2775 m_digit_7,
2776 m_digit_8,
2777 m_digit_9,
2778 m_digit_10,
2779 m_digit_11,
2780 m_digit_12,
2781 m_digit_13,
2782 m_digit_14,
2783 m_digit_15;
2784 std::shared_ptr<basic_parser<T>> m_separator, m_scope_id_separator;
2785 };
2786
2789#ifdef _UNICODE
2791#else
2793#endif
2795
2799 template <class T>
2801 {
2802 public:
2804 _In_ bool allow_idn,
2805 _In_ const std::locale& locale = std::locale()) :
2806 basic_parser<T>(locale),
2807 m_allow_idn(allow_idn),
2808 allow_on_edge(true)
2809 {}
2810
2811 virtual bool match(
2812 _In_reads_or_z_(end) const T* text,
2813 _In_ size_t start = 0,
2814 _In_ size_t end = (size_t)-1,
2815 _In_ int flags = match_default)
2816 {
2817 assert(text || start >= end);
2818 if (start < end && text[start]) {
2819 if (('A' <= text[start] && text[start] <= 'Z') ||
2820 ('a' <= text[start] && text[start] <= 'z') ||
2821 ('0' <= text[start] && text[start] <= '9'))
2822 allow_on_edge = true;
2823 else if (text[start] == '-')
2824 allow_on_edge = false;
2825 else if (m_allow_idn && std::use_facet<std::ctype<T>>(m_locale).is(std::ctype_base::alnum, text[start]))
2826 allow_on_edge = true;
2827 else {
2828 interval.start = (interval.end = start) + 1;
2829 return false;
2830 }
2831 interval.end = (interval.start = start) + 1;
2832 return true;
2833 }
2834 interval.start = (interval.end = start) + 1;
2835 return false;
2836 }
2837
2838 public:
2840
2841 protected:
2842 bool m_allow_idn;
2843 };
2844
2847#ifdef _UNICODE
2849#else
2851#endif
2852
2857 {
2858 public:
2860 _In_ bool allow_idn,
2861 _In_ const std::locale& locale = std::locale()) :
2862 basic_dns_domain_char<char>(allow_idn, locale)
2863 {}
2864
2865 virtual bool match(
2866 _In_reads_or_z_(end) const char* text,
2867 _In_ size_t start = 0,
2868 _In_ size_t end = (size_t)-1,
2869 _In_ int flags = match_default)
2870 {
2871 assert(text || start >= end);
2872 if (start < end && text[start]) {
2873 wchar_t buf[3];
2874 const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf);
2875 const wchar_t* chr_end = chr + stdex::strlen(chr);
2876 if ((('A' <= chr[0] && chr[0] <= 'Z') ||
2877 ('a' <= chr[0] && chr[0] <= 'z') ||
2878 ('0' <= chr[0] && chr[0] <= '9')) && chr[1] == 0)
2879 allow_on_edge = true;
2880 else if (chr[0] == '-' && chr[1] == 0)
2881 allow_on_edge = false;
2882 else if (m_allow_idn && std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
2883 allow_on_edge = true;
2884 else {
2885 interval.start = (interval.end = start) + 1;
2886 return false;
2887 }
2888 interval.start = start;
2889 return true;
2890 }
2891 interval.start = (interval.end = start) + 1;
2892 return false;
2893 }
2894 };
2895
2899 template <class T>
2901 {
2902 public:
2904 _In_ bool allow_absolute,
2905 _In_ const std::shared_ptr<basic_dns_domain_char<T>>& domain_char,
2906 _In_ const std::shared_ptr<basic_parser<T>>& separator,
2907 _In_ const std::locale& locale = std::locale()) :
2908 basic_parser<T>(locale),
2909 m_allow_absolute(allow_absolute),
2910 m_domain_char(domain_char),
2911 m_separator(separator)
2912 {}
2913
2914 virtual bool match(
2915 _In_reads_or_z_(end) const T* text,
2916 _In_ size_t start = 0,
2917 _In_ size_t end = (size_t)-1,
2918 _In_ int flags = match_default)
2919 {
2920 assert(text || start >= end);
2921 size_t i = start, count;
2922 for (count = 0; i < end && text[i] && count < 127; count++) {
2923 if (m_domain_char->match(text, i, end, flags) &&
2924 m_domain_char->allow_on_edge)
2925 {
2926 // Domain start
2927 interval.end = i = m_domain_char->interval.end;
2928 while (i < end && text[i]) {
2929 if (m_domain_char->allow_on_edge &&
2930 m_separator->match(text, i, end, flags))
2931 {
2932 // Domain end
2933 if (m_allow_absolute)
2934 interval.end = i = m_separator->interval.end;
2935 else {
2936 interval.end = i;
2937 i = m_separator->interval.end;
2938 }
2939 break;
2940 }
2941 if (m_domain_char->match(text, i, end, flags)) {
2942 if (m_domain_char->allow_on_edge)
2943 interval.end = i = m_domain_char->interval.end;
2944 else
2945 i = m_domain_char->interval.end;
2946 }
2947 else {
2948 interval.start = start;
2949 return true;
2950 }
2951 }
2952 }
2953 else
2954 break;
2955 }
2956 if (count) {
2957 interval.start = start;
2958 return true;
2959 }
2960 interval.start = (interval.end = start) + 1;
2961 return false;
2962 }
2963
2964 protected:
2966 std::shared_ptr<basic_dns_domain_char<T>> m_domain_char;
2967 std::shared_ptr<basic_parser<T>> m_separator;
2968 };
2969
2972#ifdef _UNICODE
2973 using tdns_name = wdns_name;
2974#else
2975 using tdns_name = dns_name;
2976#endif
2978
2982 template <class T>
2984 {
2985 public:
2986 basic_url_username_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
2987
2988 virtual bool match(
2989 _In_reads_or_z_(end) const T* text,
2990 _In_ size_t start = 0,
2991 _In_ size_t end = (size_t)-1,
2992 _In_ int flags = match_default)
2993 {
2994 assert(text || start >= end);
2995 if (start < end && text[start]) {
2996 if (text[start] == '-' ||
2997 text[start] == '.' ||
2998 text[start] == '_' ||
2999 text[start] == '~' ||
3000 text[start] == '%' ||
3001 text[start] == '!' ||
3002 text[start] == '$' ||
3003 text[start] == '&' ||
3004 text[start] == '\'' ||
3005 //text[start] == '(' ||
3006 //text[start] == ')' ||
3007 text[start] == '*' ||
3008 text[start] == '+' ||
3009 text[start] == ',' ||
3010 text[start] == ';' ||
3011 text[start] == '=' ||
3012 std::use_facet<std::ctype<T>>(m_locale).is(std::ctype_base::alnum, text[start]))
3013 {
3014 interval.end = (interval.start = start) + 1;
3015 return true;
3016 }
3017 }
3018 interval.start = (interval.end = start) + 1;
3019 return false;
3020 }
3021 };
3022
3025#ifdef _UNICODE
3027#else
3029#endif
3030
3035 {
3036 public:
3037 sgml_url_username_char(_In_ const std::locale& locale = std::locale()) : basic_url_username_char<char>(locale) {}
3038
3039 virtual bool match(
3040 _In_reads_or_z_(end) const char* text,
3041 _In_ size_t start = 0,
3042 _In_ size_t end = (size_t)-1,
3043 _In_ int flags = match_default)
3044 {
3045 assert(text || start >= end);
3046 if (start < end && text[start]) {
3047 wchar_t buf[3];
3048 const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf);
3049 const wchar_t* chr_end = chr + stdex::strlen(chr);
3050 if ((chr[0] == L'-' ||
3051 chr[0] == L'.' ||
3052 chr[0] == L'_' ||
3053 chr[0] == L'~' ||
3054 chr[0] == L'%' ||
3055 chr[0] == L'!' ||
3056 chr[0] == L'$' ||
3057 chr[0] == L'&' ||
3058 chr[0] == L'\'' ||
3059 //chr[0] == L'(' ||
3060 //chr[0] == L')' ||
3061 chr[0] == L'*' ||
3062 chr[0] == L'+' ||
3063 chr[0] == L',' ||
3064 chr[0] == L';' ||
3065 chr[0] == L'=') && chr[1] == 0 ||
3066 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
3067 {
3068 interval.start = start;
3069 return true;
3070 }
3071 }
3072
3073 interval.start = (interval.end = start) + 1;
3074 return false;
3075 }
3076 };
3077
3081 template <class T>
3083 {
3084 public:
3085 basic_url_password_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
3086
3087 virtual bool match(
3088 _In_reads_or_z_(end) const T* text,
3089 _In_ size_t start = 0,
3090 _In_ size_t end = (size_t)-1,
3091 _In_ int flags = match_default)
3092 {
3093 assert(text || start >= end);
3094 if (start < end && text[start]) {
3095 if (text[start] == '-' ||
3096 text[start] == '.' ||
3097 text[start] == '_' ||
3098 text[start] == '~' ||
3099 text[start] == '%' ||
3100 text[start] == '!' ||
3101 text[start] == '$' ||
3102 text[start] == '&' ||
3103 text[start] == '\'' ||
3104 text[start] == '(' ||
3105 text[start] == ')' ||
3106 text[start] == '*' ||
3107 text[start] == '+' ||
3108 text[start] == ',' ||
3109 text[start] == ';' ||
3110 text[start] == '=' ||
3111 text[start] == ':' ||
3112 std::use_facet<std::ctype<T>>(m_locale).is(std::ctype_base::alnum, text[start]))
3113 {
3114 interval.end = (interval.start = start) + 1;
3115 return true;
3116 }
3117 }
3118 interval.start = (interval.end = start) + 1;
3119 return false;
3120 }
3121 };
3122
3125#ifdef _UNICODE
3127#else
3129#endif
3130
3135 {
3136 public:
3137 sgml_url_password_char(_In_ const std::locale& locale = std::locale()) : basic_url_password_char<char>(locale) {}
3138
3139 virtual bool match(
3140 _In_reads_or_z_(end) const char* text,
3141 _In_ size_t start = 0,
3142 _In_ size_t end = (size_t)-1,
3143 _In_ int flags = match_default)
3144 {
3145 assert(text || start >= end);
3146 if (start < end && text[start]) {
3147 wchar_t buf[3];
3148 const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf);
3149 const wchar_t* chr_end = chr + stdex::strlen(chr);
3150 if ((chr[0] == L'-' ||
3151 chr[0] == L'.' ||
3152 chr[0] == L'_' ||
3153 chr[0] == L'~' ||
3154 chr[0] == L'%' ||
3155 chr[0] == L'!' ||
3156 chr[0] == L'$' ||
3157 chr[0] == L'&' ||
3158 chr[0] == L'\'' ||
3159 chr[0] == L'(' ||
3160 chr[0] == L')' ||
3161 chr[0] == L'*' ||
3162 chr[0] == L'+' ||
3163 chr[0] == L',' ||
3164 chr[0] == L';' ||
3165 chr[0] == L'=' ||
3166 chr[0] == L':') && chr[1] == 0 ||
3167 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
3168 {
3169 interval.start = start;
3170 return true;
3171 }
3172 }
3173 interval.start = (interval.end = start) + 1;
3174 return false;
3175 }
3176 };
3177
3181 template <class T>
3183 {
3184 public:
3185 basic_url_path_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
3186
3187 virtual bool match(
3188 _In_reads_or_z_(end) const T* text,
3189 _In_ size_t start = 0,
3190 _In_ size_t end = (size_t)-1,
3191 _In_ int flags = match_default)
3192 {
3193 assert(text || start >= end);
3194 if (start < end && text[start]) {
3195 if (text[start] == '/' ||
3196 text[start] == '-' ||
3197 text[start] == '.' ||
3198 text[start] == '_' ||
3199 text[start] == '~' ||
3200 text[start] == '%' ||
3201 text[start] == '!' ||
3202 text[start] == '$' ||
3203 text[start] == '&' ||
3204 text[start] == '\'' ||
3205 text[start] == '(' ||
3206 text[start] == ')' ||
3207 text[start] == '*' ||
3208 text[start] == '+' ||
3209 text[start] == ',' ||
3210 text[start] == ';' ||
3211 text[start] == '=' ||
3212 text[start] == ':' ||
3213 text[start] == '@' ||
3214 text[start] == '?' ||
3215 text[start] == '#' ||
3216 std::use_facet<std::ctype<T>>(m_locale).is(std::ctype_base::alnum, text[start]))
3217 {
3218 interval.end = (interval.start = start) + 1;
3219 return true;
3220 }
3221 }
3222 interval.start = (interval.end = start) + 1;
3223 return false;
3224 }
3225 };
3226
3229#ifdef _UNICODE
3231#else
3233#endif
3234
3239 {
3240 public:
3241 sgml_url_path_char(_In_ const std::locale& locale = std::locale()) : basic_url_path_char<char>(locale) {}
3242
3243 virtual bool match(
3244 _In_reads_or_z_(end) const char* text,
3245 _In_ size_t start = 0,
3246 _In_ size_t end = (size_t)-1,
3247 _In_ int flags = match_default)
3248 {
3249 assert(text || start >= end);
3250 if (start < end && text[start]) {
3251 wchar_t buf[3];
3252 const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf);
3253 const wchar_t* chr_end = chr + stdex::strlen(chr);
3254 if ((chr[0] == L'/' ||
3255 chr[0] == L'-' ||
3256 chr[0] == L'.' ||
3257 chr[0] == L'_' ||
3258 chr[0] == L'~' ||
3259 chr[0] == L'%' ||
3260 chr[0] == L'!' ||
3261 chr[0] == L'$' ||
3262 chr[0] == L'&' ||
3263 chr[0] == L'\'' ||
3264 chr[0] == L'(' ||
3265 chr[0] == L')' ||
3266 chr[0] == L'*' ||
3267 chr[0] == L'+' ||
3268 chr[0] == L',' ||
3269 chr[0] == L';' ||
3270 chr[0] == L'=' ||
3271 chr[0] == L':' ||
3272 chr[0] == L'@' ||
3273 chr[0] == L'?' ||
3274 chr[0] == L'#') && chr[1] == 0 ||
3275 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
3276 {
3277 interval.start = start;
3278 return true;
3279 }
3280 }
3281 interval.start = (interval.end = start) + 1;
3282 return false;
3283 }
3284 };
3285
3289 template <class T>
3291 {
3292 public:
3294 _In_ const std::shared_ptr<basic_parser<T>>& path_char,
3295 _In_ const std::shared_ptr<basic_parser<T>>& query_start,
3296 _In_ const std::shared_ptr<basic_parser<T>>& bookmark_start,
3297 _In_ const std::locale& locale = std::locale()) :
3298 basic_parser<T>(locale),
3299 m_path_char(path_char),
3300 m_query_start(query_start),
3301 m_bookmark_start(bookmark_start)
3302 {}
3303
3304 virtual bool match(
3305 _In_reads_or_z_(end) const T* text,
3306 _In_ size_t start = 0,
3307 _In_ size_t end = (size_t)-1,
3308 _In_ int flags = match_default)
3309 {
3310 assert(text || start >= end);
3311
3312 interval.end = start;
3313 path.start = start;
3314 query.start = 1;
3315 query.end = 0;
3316 bookmark.start = 1;
3317 bookmark.end = 0;
3318
3319 for (;;) {
3320 if (interval.end >= end || !text[interval.end])
3321 break;
3322 if (m_query_start->match(text, interval.end, end, flags)) {
3323 path.end = interval.end;
3324 query.start = interval.end = m_query_start->interval.end;
3325 for (;;) {
3326 if (interval.end >= end || !text[interval.end]) {
3327 query.end = interval.end;
3328 break;
3329 }
3330 if (m_bookmark_start->match(text, interval.end, end, flags)) {
3331 query.end = interval.end;
3332 bookmark.start = interval.end = m_bookmark_start->interval.end;
3333 for (;;) {
3334 if (interval.end >= end || !text[interval.end]) {
3335 bookmark.end = interval.end;
3336 break;
3337 }
3338 if (m_path_char->match(text, interval.end, end, flags))
3339 interval.end = m_path_char->interval.end;
3340 else {
3341 bookmark.end = interval.end;
3342 break;
3343 }
3344 }
3345 interval.start = start;
3346 return true;
3347 }
3348 if (m_path_char->match(text, interval.end, end, flags))
3349 interval.end = m_path_char->interval.end;
3350 else {
3351 query.end = interval.end;
3352 break;
3353 }
3354 }
3355 interval.start = start;
3356 return true;
3357 }
3358 if (m_bookmark_start->match(text, interval.end, end, flags)) {
3359 path.end = interval.end;
3360 bookmark.start = interval.end = m_bookmark_start->interval.end;
3361 for (;;) {
3362 if (interval.end >= end || !text[interval.end]) {
3363 bookmark.end = interval.end;
3364 break;
3365 }
3366 if (m_path_char->match(text, interval.end, end, flags))
3367 interval.end = m_path_char->interval.end;
3368 else {
3369 bookmark.end = interval.end;
3370 break;
3371 }
3372 }
3373 interval.start = start;
3374 return true;
3375 }
3376 if (m_path_char->match(text, interval.end, end, flags))
3377 interval.end = m_path_char->interval.end;
3378 else
3379 break;
3380 }
3381
3382 if (start < interval.end) {
3383 path.end = interval.end;
3384 interval.start = start;
3385 return true;
3386 }
3387
3388 path.start = 1;
3389 path.end = 0;
3390 bookmark.start = 1;
3391 bookmark.end = 0;
3392 interval.start = (interval.end = start) + 1;
3393 return false;
3394 }
3395
3396 virtual void invalidate()
3397 {
3398 path.start = 1;
3399 path.end = 0;
3400 query.start = 1;
3401 query.end = 0;
3402 bookmark.start = 1;
3403 bookmark.end = 0;
3405 }
3406
3407 public:
3410 stdex::interval<size_t> bookmark;
3411
3412 protected:
3413 std::shared_ptr<basic_parser<T>> m_path_char;
3414 std::shared_ptr<basic_parser<T>> m_query_start;
3415 std::shared_ptr<basic_parser<T>> m_bookmark_start;
3416 };
3417
3420#ifdef _UNICODE
3421 using turl_path = wurl_path;
3422#else
3423 using turl_path = url_path;
3424#endif
3426
3430 template <class T>
3431 class basic_url : public basic_parser<T>
3432 {
3433 public:
3434 basic_url(
3435 _In_ const std::shared_ptr<basic_parser<T>>& _http_scheme,
3436 _In_ const std::shared_ptr<basic_parser<T>>& _ftp_scheme,
3437 _In_ const std::shared_ptr<basic_parser<T>>& _mailto_scheme,
3438 _In_ const std::shared_ptr<basic_parser<T>>& _file_scheme,
3439 _In_ const std::shared_ptr<basic_parser<T>>& colon,
3440 _In_ const std::shared_ptr<basic_parser<T>>& slash,
3441 _In_ const std::shared_ptr<basic_parser<T>>& _username,
3442 _In_ const std::shared_ptr<basic_parser<T>>& _password,
3443 _In_ const std::shared_ptr<basic_parser<T>>& at,
3444 _In_ const std::shared_ptr<basic_parser<T>>& ip_lbracket,
3445 _In_ const std::shared_ptr<basic_parser<T>>& ip_rbracket,
3446 _In_ const std::shared_ptr<basic_parser<T>>& _ipv4_host,
3447 _In_ const std::shared_ptr<basic_parser<T>>& _ipv6_host,
3448 _In_ const std::shared_ptr<basic_parser<T>>& _dns_host,
3449 _In_ const std::shared_ptr<basic_parser<T>>& _port,
3450 _In_ const std::shared_ptr<basic_parser<T>>& _path,
3451 _In_ const std::locale& locale = std::locale()) :
3452 basic_parser<T>(locale),
3453 http_scheme(_http_scheme),
3454 ftp_scheme(_ftp_scheme),
3455 mailto_scheme(_mailto_scheme),
3456 file_scheme(_file_scheme),
3457 m_colon(colon),
3458 m_slash(slash),
3459 username(_username),
3460 password(_password),
3461 m_at(at),
3462 m_ip_lbracket(ip_lbracket),
3463 m_ip_rbracket(ip_rbracket),
3464 ipv4_host(_ipv4_host),
3465 ipv6_host(_ipv6_host),
3466 dns_host(_dns_host),
3467 port(_port),
3468 path(_path)
3469 {}
3470
3471 virtual bool match(
3472 _In_reads_or_z_(end) const T* text,
3473 _In_ size_t start = 0,
3474 _In_ size_t end = (size_t)-1,
3475 _In_ int flags = match_default)
3476 {
3477 assert(text || start >= end);
3478
3479 interval.end = start;
3480
3481 if (http_scheme->match(text, interval.end, end, flags) &&
3482 m_colon->match(text, http_scheme->interval.end, end, flags) &&
3483 m_slash->match(text, m_colon->interval.end, end, flags) &&
3484 m_slash->match(text, m_slash->interval.end, end, flags))
3485 {
3486 // http://
3487 interval.end = m_slash->interval.end;
3488 ftp_scheme->invalidate();
3489 mailto_scheme->invalidate();
3490 file_scheme->invalidate();
3491 }
3492 else if (ftp_scheme->match(text, interval.end, end, flags) &&
3493 m_colon->match(text, ftp_scheme->interval.end, end, flags) &&
3494 m_slash->match(text, m_colon->interval.end, end, flags) &&
3495 m_slash->match(text, m_slash->interval.end, end, flags))
3496 {
3497 // ftp://
3498 interval.end = m_slash->interval.end;
3499 http_scheme->invalidate();
3500 mailto_scheme->invalidate();
3501 file_scheme->invalidate();
3502 }
3503 else if (mailto_scheme->match(text, interval.end, end, flags) &&
3504 m_colon->match(text, mailto_scheme->interval.end, end, flags))
3505 {
3506 // mailto:
3507 interval.end = m_colon->interval.end;
3508 http_scheme->invalidate();
3509 ftp_scheme->invalidate();
3510 file_scheme->invalidate();
3511 }
3512 else if (file_scheme->match(text, interval.end, end, flags) &&
3513 m_colon->match(text, file_scheme->interval.end, end, flags) &&
3514 m_slash->match(text, m_colon->interval.end, end, flags) &&
3515 m_slash->match(text, m_slash->interval.end, end, flags))
3516 {
3517 // file://
3518 interval.end = m_slash->interval.end;
3519 http_scheme->invalidate();
3520 ftp_scheme->invalidate();
3521 mailto_scheme->invalidate();
3522 }
3523 else {
3524 // Default to http:
3525 http_scheme->invalidate();
3526 ftp_scheme->invalidate();
3527 mailto_scheme->invalidate();
3528 file_scheme->invalidate();
3529 }
3530
3531 if (ftp_scheme->interval) {
3532 if (username->match(text, interval.end, end, flags)) {
3533 if (m_colon->match(text, username->interval.end, end, flags) &&
3534 password->match(text, m_colon->interval.end, end, flags) &&
3535 m_at->match(text, password->interval.end, end, flags))
3536 {
3537 // Username and password
3538 interval.end = m_at->interval.end;
3539 }
3540 else if (m_at->match(text, interval.end, end, flags)) {
3541 // Username only
3542 interval.end = m_at->interval.end;
3543 password->invalidate();
3544 }
3545 else {
3546 username->invalidate();
3547 password->invalidate();
3548 }
3549 }
3550 else {
3551 username->invalidate();
3552 password->invalidate();
3553 }
3554
3555 if (ipv4_host->match(text, interval.end, end, flags)) {
3556 // Host is IPv4
3557 interval.end = ipv4_host->interval.end;
3558 ipv6_host->invalidate();
3559 dns_host->invalidate();
3560 }
3561 else if (
3562 m_ip_lbracket->match(text, interval.end, end, flags) &&
3563 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3564 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3565 {
3566 // Host is IPv6
3567 interval.end = m_ip_rbracket->interval.end;
3568 ipv4_host->invalidate();
3569 dns_host->invalidate();
3570 }
3571 else if (dns_host->match(text, interval.end, end, flags)) {
3572 // Host is hostname
3573 interval.end = dns_host->interval.end;
3574 ipv4_host->invalidate();
3575 ipv6_host->invalidate();
3576 }
3577 else {
3578 invalidate();
3579 return false;
3580 }
3581
3582 if (m_colon->match(text, interval.end, end, flags) &&
3583 port->match(text, m_colon->interval.end, end, flags))
3584 {
3585 // Port
3586 interval.end = port->interval.end;
3587 }
3588 else
3589 port->invalidate();
3590
3591 if (path->match(text, interval.end, end, flags)) {
3592 // Path
3593 interval.end = path->interval.end;
3594 }
3595
3596 interval.start = start;
3597 return true;
3598 }
3599
3600 if (mailto_scheme->interval) {
3601 if (username->match(text, interval.end, end, flags) &&
3602 m_at->match(text, username->interval.end, end, flags))
3603 {
3604 // Username
3605 interval.end = m_at->interval.end;
3606 }
3607 else {
3608 invalidate();
3609 return false;
3610 }
3611
3612 if (m_ip_lbracket->match(text, interval.end, end, flags) &&
3613 ipv4_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3614 m_ip_rbracket->match(text, ipv4_host->interval.end, end, flags))
3615 {
3616 // Host is IPv4
3617 interval.end = m_ip_rbracket->interval.end;
3618 ipv6_host->invalidate();
3619 dns_host->invalidate();
3620 }
3621 else if (
3622 m_ip_lbracket->match(text, interval.end, end, flags) &&
3623 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3624 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3625 {
3626 // Host is IPv6
3627 interval.end = m_ip_rbracket->interval.end;
3628 ipv4_host->invalidate();
3629 dns_host->invalidate();
3630 }
3631 else if (dns_host->match(text, interval.end, end, flags)) {
3632 // Host is hostname
3633 interval.end = dns_host->interval.end;
3634 ipv4_host->invalidate();
3635 ipv6_host->invalidate();
3636 }
3637 else {
3638 invalidate();
3639 return false;
3640 }
3641
3642 password->invalidate();
3643 port->invalidate();
3644 path->invalidate();
3645 interval.start = start;
3646 return true;
3647 }
3648
3649 if (file_scheme->interval) {
3650 if (path->match(text, interval.end, end, flags)) {
3651 // Path
3652 interval.end = path->interval.end;
3653 }
3654
3655 username->invalidate();
3656 password->invalidate();
3657 ipv4_host->invalidate();
3658 ipv6_host->invalidate();
3659 dns_host->invalidate();
3660 port->invalidate();
3661 interval.start = start;
3662 return true;
3663 }
3664
3665 // "http://" found or defaulted to
3666
3667 // If "http://" explicit, test for username&password.
3668 if (http_scheme->interval &&
3669 username->match(text, interval.end, end, flags))
3670 {
3671 if (m_colon->match(text, username->interval.end, end, flags) &&
3672 password->match(text, m_colon->interval.end, end, flags) &&
3673 m_at->match(text, password->interval.end, end, flags))
3674 {
3675 // Username and password
3676 interval.end = m_at->interval.end;
3677 }
3678 else if (m_at->match(text, username->interval.end, end, flags)) {
3679 // Username only
3680 interval.end = m_at->interval.end;
3681 password->invalidate();
3682 }
3683 else {
3684 username->invalidate();
3685 password->invalidate();
3686 }
3687 }
3688 else {
3689 username->invalidate();
3690 password->invalidate();
3691 }
3692
3693 if (ipv4_host->match(text, interval.end, end, flags)) {
3694 // Host is IPv4
3695 interval.end = ipv4_host->interval.end;
3696 ipv6_host->invalidate();
3697 dns_host->invalidate();
3698 }
3699 else if (
3700 m_ip_lbracket->match(text, interval.end, end, flags) &&
3701 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3702 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3703 {
3704 // Host is IPv6
3705 interval.end = m_ip_rbracket->interval.end;
3706 ipv4_host->invalidate();
3707 dns_host->invalidate();
3708 }
3709 else if (dns_host->match(text, interval.end, end, flags)) {
3710 // Host is hostname
3711 interval.end = dns_host->interval.end;
3712 ipv4_host->invalidate();
3713 ipv6_host->invalidate();
3714 }
3715 else {
3716 invalidate();
3717 return false;
3718 }
3719
3720 if (m_colon->match(text, interval.end, end, flags) &&
3721 port->match(text, m_colon->interval.end, end, flags))
3722 {
3723 // Port
3724 interval.end = port->interval.end;
3725 }
3726 else
3727 port->invalidate();
3728
3729 if (path->match(text, interval.end, end, flags)) {
3730 // Path
3731 interval.end = path->interval.end;
3732 }
3733
3734 interval.start = start;
3735 return true;
3736 }
3737
3738 virtual void invalidate()
3739 {
3740 http_scheme->invalidate();
3741 ftp_scheme->invalidate();
3742 mailto_scheme->invalidate();
3743 file_scheme->invalidate();
3744 username->invalidate();
3745 password->invalidate();
3746 ipv4_host->invalidate();
3747 ipv6_host->invalidate();
3748 dns_host->invalidate();
3749 port->invalidate();
3750 path->invalidate();
3752 }
3753
3754 public:
3755 std::shared_ptr<basic_parser<T>> http_scheme;
3756 std::shared_ptr<basic_parser<T>> ftp_scheme;
3757 std::shared_ptr<basic_parser<T>> mailto_scheme;
3758 std::shared_ptr<basic_parser<T>> file_scheme;
3759 std::shared_ptr<basic_parser<T>> username;
3760 std::shared_ptr<basic_parser<T>> password;
3761 std::shared_ptr<basic_parser<T>> ipv4_host;
3762 std::shared_ptr<basic_parser<T>> ipv6_host;
3763 std::shared_ptr<basic_parser<T>> dns_host;
3764 std::shared_ptr<basic_parser<T>> port;
3765 std::shared_ptr<basic_parser<T>> path;
3766
3767 protected:
3768 std::shared_ptr<basic_parser<T>> m_colon;
3769 std::shared_ptr<basic_parser<T>> m_slash;
3770 std::shared_ptr<basic_parser<T>> m_at;
3771 std::shared_ptr<basic_parser<T>> m_ip_lbracket;
3772 std::shared_ptr<basic_parser<T>> m_ip_rbracket;
3773 };
3774
3775 using url = basic_url<char>;
3776 using wurl = basic_url<wchar_t>;
3777#ifdef _UNICODE
3778 using turl = wurl;
3779#else
3780 using turl = url;
3781#endif
3782 using sgml_url = basic_url<char>;
3783
3787 template <class T>
3789 {
3790 public:
3792 _In_ const std::shared_ptr<basic_parser<T>>& _username,
3793 _In_ const std::shared_ptr<basic_parser<T>>& at,
3794 _In_ const std::shared_ptr<basic_parser<T>>& ip_lbracket,
3795 _In_ const std::shared_ptr<basic_parser<T>>& ip_rbracket,
3796 _In_ const std::shared_ptr<basic_parser<T>>& _ipv4_host,
3797 _In_ const std::shared_ptr<basic_parser<T>>& _ipv6_host,
3798 _In_ const std::shared_ptr<basic_parser<T>>& _dns_host,
3799 _In_ const std::locale& locale = std::locale()) :
3800 basic_parser<T>(locale),
3801 username(_username),
3802 m_at(at),
3803 m_ip_lbracket(ip_lbracket),
3804 m_ip_rbracket(ip_rbracket),
3805 ipv4_host(_ipv4_host),
3806 ipv6_host(_ipv6_host),
3807 dns_host(_dns_host)
3808 {}
3809
3810 virtual bool match(
3811 _In_reads_or_z_(end) const T* text,
3812 _In_ size_t start = 0,
3813 _In_ size_t end = (size_t)-1,
3814 _In_ int flags = match_default)
3815 {
3816 assert(text || start >= end);
3817
3818 if (username->match(text, start, end, flags) &&
3819 m_at->match(text, username->interval.end, end, flags))
3820 {
3821 // Username@
3822 if (m_ip_lbracket->match(text, m_at->interval.end, end, flags) &&
3823 ipv4_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3824 m_ip_rbracket->match(text, ipv4_host->interval.end, end, flags))
3825 {
3826 // Host is IPv4
3827 interval.end = m_ip_rbracket->interval.end;
3828 ipv6_host->invalidate();
3829 dns_host->invalidate();
3830 }
3831 else if (
3832 m_ip_lbracket->match(text, m_at->interval.end, end, flags) &&
3833 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3834 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3835 {
3836 // Host is IPv6
3837 interval.end = m_ip_rbracket->interval.end;
3838 ipv4_host->invalidate();
3839 dns_host->invalidate();
3840 }
3841 else if (dns_host->match(text, m_at->interval.end, end, flags)) {
3842 // Host is hostname
3843 interval.end = dns_host->interval.end;
3844 ipv4_host->invalidate();
3845 ipv6_host->invalidate();
3846 }
3847 else
3848 goto error;
3849 interval.start = start;
3850 return true;
3851 }
3852
3853 error:
3854 username->invalidate();
3855 ipv4_host->invalidate();
3856 ipv6_host->invalidate();
3857 dns_host->invalidate();
3858 interval.start = (interval.end = start) + 1;
3859 return false;
3860 }
3861
3862 virtual void invalidate()
3863 {
3864 username->invalidate();
3865 ipv4_host->invalidate();
3866 ipv6_host->invalidate();
3867 dns_host->invalidate();
3869 }
3870
3871 public:
3872 std::shared_ptr<basic_parser<T>> username;
3873 std::shared_ptr<basic_parser<T>> ipv4_host;
3874 std::shared_ptr<basic_parser<T>> ipv6_host;
3875 std::shared_ptr<basic_parser<T>> dns_host;
3876
3877 protected:
3878 std::shared_ptr<basic_parser<T>> m_at;
3879 std::shared_ptr<basic_parser<T>> m_ip_lbracket;
3880 std::shared_ptr<basic_parser<T>> m_ip_rbracket;
3881 };
3882
3885#ifdef _UNICODE
3887#else
3889#endif
3891
3895 template <class T>
3897 {
3898 public:
3900 _In_ const std::shared_ptr<basic_parser<T>>& _emoticon,
3901 _In_ const std::shared_ptr<basic_parser<T>>& _apex,
3902 _In_ const std::shared_ptr<basic_parser<T>>& _eyes,
3903 _In_ const std::shared_ptr<basic_parser<T>>& _nose,
3904 _In_ const std::shared_ptr<basic_set<T>>& _mouth,
3905 _In_ const std::locale& locale = std::locale()) :
3906 basic_parser<T>(locale),
3907 emoticon(_emoticon),
3908 apex(_apex),
3909 eyes(_eyes),
3910 nose(_nose),
3911 mouth(_mouth)
3912 {}
3913
3914 virtual bool match(
3915 _In_reads_or_z_(end) const T* text,
3916 _In_ size_t start = 0,
3917 _In_ size_t end = (size_t)-1,
3918 _In_ int flags = match_default)
3919 {
3920 assert(text || start >= end);
3921
3922 if (emoticon && emoticon->match(text, start, end, flags)) {
3923 if (apex) apex->invalidate();
3924 eyes->invalidate();
3925 if (nose) nose->invalidate();
3926 mouth->invalidate();
3927 interval.start = start;
3929 return true;
3930 }
3931
3932 interval.end = start;
3933
3934 if (apex && apex->match(text, interval.end, end, flags))
3935 interval.end = apex->interval.end;
3936
3937 if (eyes->match(text, interval.end, end, flags)) {
3938 if (nose && nose->match(text, eyes->interval.end, end, flags) &&
3939 mouth->match(text, nose->interval.end, end, flags))
3940 {
3941 size_t
3942 start_mouth = mouth->interval.start,
3943 hit_offset = mouth->hit_offset;
3944 // Mouth may repeat :-)))))))
3945 for (interval.end = mouth->interval.end; mouth->match(text, interval.end, end, flags) && mouth->hit_offset == hit_offset; interval.end = mouth->interval.end);
3946 mouth->interval.start = start_mouth;
3947 mouth->interval.end = interval.end;
3948 interval.start = start;
3949 return true;
3950 }
3951 if (mouth->match(text, eyes->interval.end, end, flags)) {
3952 size_t
3953 start_mouth = mouth->interval.start,
3954 hit_offset = mouth->hit_offset;
3955 // Mouth may repeat :-)))))))
3956 for (interval.end = mouth->interval.end; mouth->match(text, interval.end, end, flags) && mouth->hit_offset == hit_offset; interval.end = mouth->interval.end);
3957 if (nose) nose->invalidate();
3958 mouth->interval.start = start_mouth;
3959 mouth->interval.end = interval.end;
3960 interval.start = start;
3961 return true;
3962 }
3963 }
3964
3965 if (emoticon) emoticon->invalidate();
3966 if (apex) apex->invalidate();
3967 eyes->invalidate();
3968 if (nose) nose->invalidate();
3969 mouth->invalidate();
3970 interval.start = (interval.end = start) + 1;
3971 return false;
3972 }
3973
3974 virtual void invalidate()
3975 {
3976 if (emoticon) emoticon->invalidate();
3977 if (apex) apex->invalidate();
3978 eyes->invalidate();
3979 if (nose) nose->invalidate();
3980 mouth->invalidate();
3982 }
3983
3984 public:
3985 std::shared_ptr<basic_parser<T>> emoticon;
3986 std::shared_ptr<basic_parser<T>> apex;
3987 std::shared_ptr<basic_parser<T>> eyes;
3988 std::shared_ptr<basic_parser<T>> nose;
3989 std::shared_ptr<basic_set<T>> mouth;
3990 };
3991
3994#ifdef _UNICODE
3995 using temoticon = wemoticon;
3996#else
3997 using temoticon = emoticon;
3998#endif
4000
4004 ENUM_FLAGS(date_format_t, int) {
4005 none = 0,
4006 dmy = 0x1,
4007 mdy = 0x2,
4008 ymd = 0x4,
4009 ym = 0x8,
4010 my = 0x10,
4011 dm = 0x20,
4012 md = 0x40,
4013 };
4014
4018 template <class T>
4019 class basic_date : public basic_parser<T>
4020 {
4021 public:
4022 basic_date(
4023 _In_ int format_mask,
4024 _In_ const std::shared_ptr<basic_integer<T>>& _day,
4025 _In_ const std::shared_ptr<basic_integer<T>>& _month,
4026 _In_ const std::shared_ptr<basic_integer<T>>& _year,
4027 _In_ const std::shared_ptr<basic_set<T>>& separator,
4028 _In_ const std::shared_ptr<basic_parser<T>>& space,
4029 _In_ const std::locale& locale = std::locale()) :
4030 basic_parser<T>(locale),
4031 format(date_format_t::none),
4032 m_format_mask(format_mask),
4033 day(_day),
4034 month(_month),
4035 year(_year),
4036 m_separator(separator),
4037 m_space(space)
4038 {}
4039
4040 virtual bool match(
4041 _In_reads_or_z_(end) const T* text,
4042 _In_ size_t start = 0,
4043 _In_ size_t end = (size_t)-1,
4044 _In_ int flags = match_default)
4045 {
4046 assert(text || start >= end);
4047
4048 const int space_match_flags = flags & ~match_multiline; // Spaces in dates must never be broken in new line.
4049 if ((m_format_mask & date_format_t::dmy) == date_format_t::dmy) {
4050 if (day->match(text, start, end, flags)) {
4051 for (interval.end = day->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4052 if (m_separator->match(text, interval.end, end, flags)) {
4053 size_t hit_offset = m_separator->hit_offset;
4054 for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4055 if (month->match(text, interval.end, end, flags)) {
4056 for (interval.end = month->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4057 if (m_separator->match(text, interval.end, end, flags) &&
4058 m_separator->hit_offset == hit_offset) // Both separators must match.
4059 {
4060 for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4061 if (year->match(text, interval.end, end, flags) &&
4062 is_valid(day->value, month->value))
4063 {
4064 interval.start = start;
4065 interval.end = year->interval.end;
4066 format = date_format_t::dmy;
4067 return true;
4068 }
4069 }
4070 }
4071 }
4072 }
4073 }
4074
4075 if ((m_format_mask & date_format_t::mdy) == date_format_t::mdy) {
4076 if (month->match(text, start, end, flags)) {
4077 for (interval.end = month->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4078 if (m_separator->match(text, interval.end, end, flags)) {
4079 size_t hit_offset = m_separator->hit_offset;
4080 for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4081 if (day->match(text, interval.end, end, flags)) {
4082 for (interval.end = day->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4083 if (m_separator->match(text, interval.end, end, flags) &&
4084 m_separator->hit_offset == hit_offset) // Both separators must match.
4085 {
4086 for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4087 if (year->match(text, interval.end, end, flags) &&
4088 is_valid(day->value, month->value))
4089 {
4090 interval.start = start;
4091 interval.end = year->interval.end;
4092 format = date_format_t::mdy;
4093 return true;
4094 }
4095 }
4096 }
4097 }
4098 }
4099 }
4100
4101 if ((m_format_mask & date_format_t::ymd) == date_format_t::ymd) {
4102 if (year->match(text, start, end, flags)) {
4103 for (interval.end = year->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4104 if (m_separator->match(text, interval.end, end, flags)) {
4105 size_t hit_offset = m_separator->hit_offset;
4106 for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4107 if (month->match(text, interval.end, end, flags)) {
4108 for (interval.end = month->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4109 if (m_separator->match(text, interval.end, end, flags) &&
4110 m_separator->hit_offset == hit_offset) // Both separators must match.
4111 {
4112 for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4113 if (day->match(text, interval.end, end, flags) &&
4114 is_valid(day->value, month->value))
4115 {
4116 interval.start = start;
4117 interval.end = day->interval.end;
4118 format = date_format_t::ymd;
4119 return true;
4120 }
4121 }
4122 }
4123 }
4124 }
4125 }
4126
4127 if ((m_format_mask & date_format_t::ym) == date_format_t::ym) {
4128 if (year->match(text, start, end, flags)) {
4129 for (interval.end = year->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4130 if (m_separator->match(text, interval.end, end, flags)) {
4131 for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4132 if (month->match(text, interval.end, end, flags) &&
4133 is_valid((size_t)-1, month->value))
4134 {
4135 if (day) day->invalidate();
4136 interval.start = start;
4137 interval.end = month->interval.end;
4138 format = date_format_t::ym;
4139 return true;
4140 }
4141 }
4142 }
4143 }
4144
4145 if ((m_format_mask & date_format_t::my) == date_format_t::my) {
4146 if (month->match(text, start, end, flags)) {
4147 for (interval.end = month->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4148 if (m_separator->match(text, interval.end, end, flags)) {
4149 for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4150 if (year->match(text, interval.end, end, flags) &&
4151 is_valid((size_t)-1, month->value))
4152 {
4153 if (day) day->invalidate();
4154 interval.start = start;
4155 interval.end = year->interval.end;
4156 format = date_format_t::my;
4157 return true;
4158 }
4159 }
4160 }
4161 }
4162
4163 if ((m_format_mask & date_format_t::dm) == date_format_t::dm) {
4164 if (day->match(text, start, end, flags)) {
4165 for (interval.end = day->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4166 if (m_separator->match(text, interval.end, end, flags)) {
4167 size_t hit_offset = m_separator->hit_offset;
4168 for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4169 if (month->match(text, interval.end, end, flags) &&
4170 is_valid(day->value, month->value))
4171 {
4172 if (year) year->invalidate();
4173 interval.start = start;
4174 for (interval.end = month->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4175 if (m_separator->match(text, interval.end, end, flags) &&
4176 m_separator->hit_offset == hit_offset) // Both separators must match.
4177 interval.end = m_separator->interval.end;
4178 else
4179 interval.end = month->interval.end;
4180 format = date_format_t::dm;
4181 return true;
4182 }
4183 }
4184 }
4185 }
4186
4187 if ((m_format_mask & date_format_t::md) == date_format_t::md) {
4188 if (month->match(text, start, end, flags)) {
4189 for (interval.end = month->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4190 if (m_separator->match(text, interval.end, end, flags)) {
4191 size_t hit_offset = m_separator->hit_offset;
4192 for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4193 if (day->match(text, interval.end, end, flags) &&
4194 is_valid(day->value, month->value))
4195 {
4196 if (year) year->invalidate();
4197 interval.start = start;
4198 for (interval.end = day->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4199 if (m_separator->match(text, interval.end, end, flags) &&
4200 m_separator->hit_offset == hit_offset) // Both separators must match.
4201 interval.end = m_separator->interval.end;
4202 else
4203 interval.end = day->interval.end;
4204 format = date_format_t::md;
4205 return true;
4206 }
4207 }
4208 }
4209 }
4210
4211 if (day) day->invalidate();
4212 if (month) month->invalidate();
4213 if (year) year->invalidate();
4214 format = date_format_t::none;
4215 interval.start = (interval.end = start) + 1;
4216 return false;
4217 }
4218
4219 virtual void invalidate()
4220 {
4221 if (day) day->invalidate();
4222 if (month) month->invalidate();
4223 if (year) year->invalidate();
4224 format = date_format_t::none;
4226 }
4227
4228 protected:
4229 static inline bool is_valid(size_t day, size_t month)
4230 {
4231 if (month == (size_t)-1) {
4232 // Default to January. This allows validating day only, as January has all 31 days.
4233 month = 1;
4234 }
4235 if (day == (size_t)-1) {
4236 // Default to 1st day in month. This allows validating month only, as each month has 1st day.
4237 day = 1;
4238 }
4239
4240 switch (month) {
4241 case 1:
4242 case 3:
4243 case 5:
4244 case 7:
4245 case 8:
4246 case 10:
4247 case 12:
4248 return 1 <= day && day <= 31;
4249 case 2:
4250 return 1 <= day && day <= 29;
4251 case 4:
4252 case 6:
4253 case 9:
4254 case 11:
4255 return 1 <= day && day <= 30;
4256 default:
4257 return false;
4258 }
4259 }
4260
4261 public:
4262 date_format_t format;
4263 std::shared_ptr<basic_integer<T>> day;
4264 std::shared_ptr<basic_integer<T>> month;
4265 std::shared_ptr<basic_integer<T>> year;
4266
4267 protected:
4268 int m_format_mask;
4269 std::shared_ptr<basic_set<T>> m_separator;
4270 std::shared_ptr<basic_parser<T>> m_space;
4271 };
4272
4273 using date = basic_date<char>;
4274 using wdate = basic_date<wchar_t>;
4275#ifdef _UNICODE
4276 using tdate = wdate;
4277#else
4278 using tdate = date;
4279#endif
4281
4285 template <class T>
4286 class basic_time : public basic_parser<T>
4287 {
4288 public:
4289 basic_time(
4290 _In_ const std::shared_ptr<basic_integer10<T>>& _hour,
4291 _In_ const std::shared_ptr<basic_integer10<T>>& _minute,
4292 _In_ const std::shared_ptr<basic_integer10<T>>& _second,
4293 _In_ const std::shared_ptr<basic_integer10<T>>& _millisecond,
4294 _In_ const std::shared_ptr<basic_set<T>>& separator,
4295 _In_ const std::shared_ptr<basic_parser<T>>& millisecond_separator,
4296 _In_ const std::locale& locale = std::locale()) :
4297 basic_parser<T>(locale),
4298 hour(_hour),
4299 minute(_minute),
4300 second(_second),
4301 millisecond(_millisecond),
4302 m_separator(separator),
4303 m_millisecond_separator(millisecond_separator)
4304 {}
4305
4306 virtual bool match(
4307 _In_reads_or_z_(end) const T* text,
4308 _In_ size_t start = 0,
4309 _In_ size_t end = (size_t)-1,
4310 _In_ int flags = match_default)
4311 {
4312 assert(text || start >= end);
4313
4314 if (hour->match(text, start, end, flags) &&
4315 m_separator->match(text, hour->interval.end, end, flags) &&
4316 minute->match(text, m_separator->interval.end, end, flags) &&
4317 minute->value < 60)
4318 {
4319 // hh::mm
4320 size_t hit_offset = m_separator->hit_offset;
4321 if (m_separator->match(text, minute->interval.end, end, flags) &&
4322 m_separator->hit_offset == hit_offset && // Both separators must match.
4323 second && second->match(text, m_separator->interval.end, end, flags) &&
4324 second->value < 60)
4325 {
4326 // hh::mm:ss
4327 if (m_millisecond_separator && m_millisecond_separator->match(text, second->interval.end, end, flags) &&
4328 millisecond && millisecond->match(text, m_millisecond_separator->interval.end, end, flags) &&
4329 millisecond->value < 1000)
4330 {
4331 // hh::mm:ss.mmmm
4332 interval.end = millisecond->interval.end;
4333 }
4334 else {
4335 if (millisecond) millisecond->invalidate();
4336 interval.end = second->interval.end;
4337 }
4338 }
4339 else {
4340 if (second) second->invalidate();
4341 if (millisecond) millisecond->invalidate();
4342 interval.end = minute->interval.end;
4343 }
4344 interval.start = start;
4345 return true;
4346 }
4347
4348 hour->invalidate();
4349 minute->invalidate();
4350 if (second) second->invalidate();
4351 if (millisecond) millisecond->invalidate();
4352 interval.start = (interval.end = start) + 1;
4353 return false;
4354 }
4355
4356 virtual void invalidate()
4357 {
4358 hour->invalidate();
4359 minute->invalidate();
4360 if (second) second->invalidate();
4361 if (millisecond) millisecond->invalidate();
4363 }
4364
4365 public:
4366 std::shared_ptr<basic_integer10<T>> hour;
4367 std::shared_ptr<basic_integer10<T>> minute;
4368 std::shared_ptr<basic_integer10<T>> second;
4369 std::shared_ptr<basic_integer10<T>> millisecond;
4370
4371 protected:
4372 std::shared_ptr<basic_set<T>> m_separator;
4373 std::shared_ptr<basic_parser<T>> m_millisecond_separator;
4374 };
4375
4376 using time = basic_time<char>;
4377 using wtime = basic_time<wchar_t>;
4378#ifdef _UNICODE
4379 using ttime = wtime;
4380#else
4381 using ttime = time;
4382#endif
4384
4388 template <class T>
4389 class basic_angle : public basic_parser<T>
4390 {
4391 public:
4393 _In_ const std::shared_ptr<basic_integer10<T>>& _degree,
4394 _In_ const std::shared_ptr<basic_parser<T>>& _degree_separator,
4395 _In_ const std::shared_ptr<basic_integer10<T>>& _minute,
4396 _In_ const std::shared_ptr<basic_parser<T>>& _minute_separator,
4397 _In_ const std::shared_ptr<basic_integer10<T>>& _second,
4398 _In_ const std::shared_ptr<basic_parser<T>>& _second_separator,
4399 _In_ const std::shared_ptr<basic_parser<T>>& _decimal,
4400 _In_ const std::locale& locale = std::locale()) :
4401 basic_parser<T>(locale),
4402 degree(_degree),
4403 degree_separator(_degree_separator),
4404 minute(_minute),
4405 minute_separator(_minute_separator),
4406 second(_second),
4407 second_separator(_second_separator),
4408 decimal(_decimal)
4409 {}
4410
4411 virtual bool match(
4412 _In_reads_or_z_(end) const T* text,
4413 _In_ size_t start = 0,
4414 _In_ size_t end = (size_t)-1,
4415 _In_ int flags = match_default)
4416 {
4417 assert(text || start >= end);
4418
4419 interval.end = start;
4420
4421 if (degree->match(text, interval.end, end, flags) &&
4422 degree_separator->match(text, degree->interval.end, end, flags))
4423 {
4424 // Degrees
4425 interval.end = degree_separator->interval.end;
4426 }
4427 else {
4428 degree->invalidate();
4429 degree_separator->invalidate();
4430 }
4431
4432 if (minute->match(text, interval.end, end, flags) &&
4433 minute->value < 60 &&
4434 minute_separator->match(text, minute->interval.end, end, flags))
4435 {
4436 // Minutes
4437 interval.end = minute_separator->interval.end;
4438 }
4439 else {
4440 minute->invalidate();
4441 minute_separator->invalidate();
4442 }
4443
4444 if (second && second->match(text, interval.end, end, flags) &&
4445 second->value < 60)
4446 {
4447 // Seconds
4448 interval.end = second->interval.end;
4449 if (second_separator && second_separator->match(text, interval.end, end, flags))
4450 interval.end = second_separator->interval.end;
4451 else
4452 if (second_separator) second_separator->invalidate();
4453 }
4454 else {
4455 if (second) second->invalidate();
4456 if (second_separator) second_separator->invalidate();
4457 }
4458
4459 if (degree->interval.start < degree->interval.end ||
4460 minute->interval.start < minute->interval.end ||
4461 second && second->interval.start < second->interval.end)
4462 {
4463 if (decimal && decimal->match(text, interval.end, end, flags)) {
4464 // Decimals
4465 interval.end = decimal->interval.end;
4466 }
4467 else if (decimal)
4468 decimal->invalidate();
4469 interval.start = start;
4470 return true;
4471 }
4472 if (decimal) decimal->invalidate();
4473 interval.start = (interval.end = start) + 1;
4474 return false;
4475 }
4476
4477 virtual void invalidate()
4478 {
4479 degree->invalidate();
4480 degree_separator->invalidate();
4481 minute->invalidate();
4482 minute_separator->invalidate();
4483 if (second) second->invalidate();
4484 if (second_separator) second_separator->invalidate();
4485 if (decimal) decimal->invalidate();
4487 }
4488
4489 public:
4490 std::shared_ptr<basic_integer10<T>> degree;
4491 std::shared_ptr<basic_parser<T>> degree_separator;
4492 std::shared_ptr<basic_integer10<T>> minute;
4493 std::shared_ptr<basic_parser<T>> minute_separator;
4494 std::shared_ptr<basic_integer10<T>> second;
4495 std::shared_ptr<basic_parser<T>> second_separator;
4496 std::shared_ptr<basic_parser<T>> decimal;
4497 };
4498
4499 using angle = basic_angle<char>;
4501#ifdef _UNICODE
4502 using RRegElKot = wangle;
4503#else
4504 using RRegElKot = angle;
4505#endif
4507
4511 template <class T>
4513 {
4514 public:
4516 _In_ const std::shared_ptr<basic_parser<T>>& digit,
4517 _In_ const std::shared_ptr<basic_parser<T>>& plus_sign,
4518 _In_ const std::shared_ptr<basic_set<T>>& lparenthesis,
4519 _In_ const std::shared_ptr<basic_set<T>>& rparenthesis,
4520 _In_ const std::shared_ptr<basic_parser<T>>& separator,
4521 _In_ const std::shared_ptr<basic_parser<T>>& space,
4522 _In_ const std::locale& locale = std::locale()) :
4523 basic_parser<T>(locale),
4524 m_digit(digit),
4525 m_plus_sign(plus_sign),
4526 m_lparenthesis(lparenthesis),
4527 m_rparenthesis(rparenthesis),
4528 m_separator(separator),
4529 m_space(space)
4530 {}
4531
4532 virtual bool match(
4533 _In_reads_or_z_(end) const T* text,
4534 _In_ size_t start = 0,
4535 _In_ size_t end = (size_t)-1,
4536 _In_ int flags = match_default)
4537 {
4538 assert(text || start >= end);
4539
4540 size_t safe_digit_end = start, safe_value_size = 0;
4541 bool has_digits = false, after_digit = false, in_parentheses = false, after_parentheses = false;
4542 const int space_match_flags = flags & ~match_multiline; // Spaces in phone numbers must never be broken in new line.
4543
4544 interval.end = start;
4545 value.clear();
4546 m_lparenthesis->invalidate();
4547 m_rparenthesis->invalidate();
4548
4549 if (m_plus_sign && m_plus_sign->match(text, interval.end, end, flags)) {
4550 value.append(text + m_plus_sign->interval.start, text + m_plus_sign->interval.end);
4551 safe_value_size = value.size();
4552 interval.end = m_plus_sign->interval.end;
4553 }
4554
4555 for (;;) {
4556 assert(text || interval.end >= end);
4557 if (interval.end >= end || !text[interval.end])
4558 break;
4559 if (m_digit->match(text, interval.end, end, flags)) {
4560 // Digit
4561 value.append(text + m_digit->interval.start, text + m_digit->interval.end);
4562 interval.end = m_digit->interval.end;
4563 if (!in_parentheses) {
4564 safe_digit_end = interval.end;
4565 safe_value_size = value.size();
4566 has_digits = true;
4567 }
4568 after_digit = true;
4569 after_parentheses = false;
4570 }
4571 else if (
4572 m_lparenthesis && !m_lparenthesis->interval && // No left parenthesis yet
4573 m_rparenthesis && !m_rparenthesis->interval && // Right parenthesis after left
4574 m_lparenthesis->match(text, interval.end, end, flags))
4575 {
4576 // Left parenthesis
4577 value.append(text + m_lparenthesis->interval.start, m_lparenthesis->interval.size());
4578 interval.end = m_lparenthesis->interval.end;
4579 in_parentheses = true;
4580 after_digit = false;
4581 after_parentheses = false;
4582 }
4583 else if (
4584 in_parentheses && // After left parenthesis
4585 m_rparenthesis && !m_rparenthesis->interval && // No right parenthesis yet
4586 m_rparenthesis->match(text, interval.end, end, flags) &&
4587 m_lparenthesis->hit_offset == m_rparenthesis->hit_offset) // Left and right parentheses must match
4588 {
4589 // Right parenthesis
4590 value.append(text + m_rparenthesis->interval.start, text + m_rparenthesis->interval.end);
4591 interval.end = m_rparenthesis->interval.end;
4592 safe_digit_end = interval.end;
4593 safe_value_size = value.size();
4594 in_parentheses = false;
4595 after_digit = false;
4596 after_parentheses = true;
4597 }
4598 else if (
4599 after_digit &&
4600 !in_parentheses && // No separators inside parentheses
4601 !after_parentheses && // No separators following right parenthesis
4602 m_separator && m_separator->match(text, interval.end, end, flags))
4603 {
4604 // Separator
4605 interval.end = m_separator->interval.end;
4606 after_digit = false;
4607 after_parentheses = false;
4608 }
4609 else if (
4610 (after_digit || after_parentheses) &&
4611 m_space && m_space->match(text, interval.end, end, space_match_flags))
4612 {
4613 // Space
4614 interval.end = m_space->interval.end;
4615 after_digit = false;
4616 after_parentheses = false;
4617 }
4618 else
4619 break;
4620 }
4621 if (has_digits) {
4622 value.erase(safe_value_size);
4623 interval.start = start;
4624 interval.end = safe_digit_end;
4625 return true;
4626 }
4627 value.clear();
4628 interval.start = (interval.end = start) + 1;
4629 return false;
4630 }
4631
4632 virtual void invalidate()
4633 {
4634 value.clear();
4636 }
4637
4638 public:
4639 std::basic_string<T> value;
4640
4641 protected:
4642 std::shared_ptr<basic_parser<T>> m_digit;
4643 std::shared_ptr<basic_parser<T>> m_plus_sign;
4644 std::shared_ptr<basic_set<T>> m_lparenthesis;
4645 std::shared_ptr<basic_set<T>> m_rparenthesis;
4646 std::shared_ptr<basic_parser<T>> m_separator;
4647 std::shared_ptr<basic_parser<T>> m_space;
4648 };
4649
4652#ifdef _UNICODE
4654#else
4656#endif
4658
4662 template <class T>
4664 {
4665 public:
4667 _In_ const std::shared_ptr<basic_parser<T>>& element,
4668 _In_ const std::shared_ptr<basic_parser<T>>& digit,
4669 _In_ const std::shared_ptr<basic_parser<T>>& sign,
4670 _In_ const std::locale& locale = std::locale()) :
4671 basic_parser<T>(locale),
4672 m_element(element),
4673 m_digit(digit),
4674 m_sign(sign),
4675 has_digits(false),
4676 has_charge(false)
4677 {}
4678
4679 virtual bool match(
4680 _In_reads_or_z_(end) const T* text,
4681 _In_ size_t start = 0,
4682 _In_ size_t end = (size_t)-1,
4683 _In_ int flags = match_default)
4684 {
4685 assert(text || start >= end);
4686
4687 has_digits = false;
4688 has_charge = false;
4689 interval.end = start;
4690
4691 const int element_match_flags = flags & ~match_case_insensitive; // Chemical elements are always case-sensitive.
4692 for (;;) {
4693 if (m_element->match(text, interval.end, end, element_match_flags)) {
4694 interval.end = m_element->interval.end;
4695 while (m_digit->match(text, interval.end, end, flags)) {
4696 interval.end = m_digit->interval.end;
4697 has_digits = true;
4698 }
4699 }
4700 else if (start < interval.end) {
4701 if (m_sign->match(text, interval.end, end, flags)) {
4702 interval.end = m_sign->interval.end;
4703 has_charge = true;
4704 }
4705 interval.start = start;
4706 return true;
4707 }
4708 else {
4709 interval.start = (interval.end = start) + 1;
4710 return false;
4711 }
4712 }
4713 }
4714
4715 virtual void invalidate()
4716 {
4717 has_digits = false;
4718 has_charge = false;
4720 }
4721
4722 public:
4723 bool has_digits;
4724 bool has_charge;
4725
4726 protected:
4727 std::shared_ptr<basic_parser<T>> m_element;
4728 std::shared_ptr<basic_parser<T>> m_digit;
4729 std::shared_ptr<basic_parser<T>> m_sign;
4730 };
4731
4734#ifdef _UNICODE
4736#else
4738#endif
4740
4745 {
4746 public:
4747 virtual bool match(
4748 _In_reads_or_z_(end) const char* text,
4749 _In_ size_t start = 0,
4750 _In_ size_t end = (size_t)-1,
4751 _In_ int flags = match_default)
4752 {
4753 assert(text || start >= end);
4754 interval.end = start;
4755
4756 assert(text || interval.end >= end);
4757 if (interval.end < end && text[interval.end]) {
4758 if (text[interval.end] == '\r') {
4759 interval.end++;
4760 if (interval.end < end && text[interval.end] == '\n') {
4761 interval.start = start;
4762 interval.end++;
4763 return true;
4764 }
4765 }
4766 else if (text[interval.end] == '\n') {
4767 interval.start = start;
4768 interval.end++;
4769 return true;
4770 }
4771 }
4772 interval.start = (interval.end = start) + 1;
4773 return false;
4774 }
4775 };
4776
4780 class http_space : public parser
4781 {
4782 public:
4783 virtual bool match(
4784 _In_reads_or_z_(end) const char* text,
4785 _In_ size_t start = 0,
4786 _In_ size_t end = (size_t)-1,
4787 _In_ int flags = match_default)
4788 {
4789 assert(text || start >= end);
4790 interval.end = start;
4791 if (m_line_break.match(text, interval.end, end, flags)) {
4792 interval.end = m_line_break.interval.end;
4793 if (interval.end < end && text[interval.end] && isspace(text[interval.end])) {
4794 interval.start = start;
4795 interval.end++;
4796 while (interval.end < end && text[interval.end] && isspace(text[interval.end])) interval.end++;
4797 return true;
4798 }
4799 }
4800 else if (interval.end < end && text[interval.end] && isspace(text[interval.end])) {
4801 interval.start = start;
4802 interval.end++;
4803 while (interval.end < end && text[interval.end] && isspace(text[interval.end])) interval.end++;
4804 return true;
4805 }
4806 interval.start = (interval.end = start) + 1;
4807 return false;
4808 }
4809
4810 protected:
4811 http_line_break m_line_break;
4812 };
4813
4817 class http_text_char : public parser
4818 {
4819 public:
4820 virtual bool match(
4821 _In_reads_or_z_(end) const char* text,
4822 _In_ size_t start = 0,
4823 _In_ size_t end = (size_t)-1,
4824 _In_ int flags = match_default)
4825 {
4826 assert(text || start >= end);
4827 interval.end = start;
4828
4829 assert(text || interval.end >= end);
4830 if (m_space.match(text, interval.end, end, flags)) {
4831 interval.start = start;
4832 interval.end = m_space.interval.end;
4833 return true;
4834 }
4835 else if (interval.end < end && text[interval.end] && text[interval.end] >= 0x20) {
4836 interval.start = start;
4837 interval.end++;
4838 return true;
4839 }
4840 interval.start = (interval.end = start) + 1;
4841 return false;
4842 }
4843
4844 protected:
4845 http_space m_space;
4846 };
4847
4851 class http_token : public parser
4852 {
4853 public:
4854 virtual bool match(
4855 _In_reads_or_z_(end) const char* text,
4856 _In_ size_t start = 0,
4857 _In_ size_t end = (size_t)-1,
4858 _In_ int flags = match_default)
4859 {
4860 assert(text || start >= end);
4861 interval.end = start;
4862 for (;;) {
4863 if (interval.end < end && text[interval.end]) {
4864 if ((unsigned int)text[interval.end] < 0x20 ||
4865 (unsigned int)text[interval.end] == 0x7f ||
4866 text[interval.end] == '(' ||
4867 text[interval.end] == ')' ||
4868 text[interval.end] == '<' ||
4869 text[interval.end] == '>' ||
4870 text[interval.end] == '@' ||
4871 text[interval.end] == ',' ||
4872 text[interval.end] == ';' ||
4873 text[interval.end] == ':' ||
4874 text[interval.end] == '\\' ||
4875 text[interval.end] == '\"' ||
4876 text[interval.end] == '/' ||
4877 text[interval.end] == '[' ||
4878 text[interval.end] == ']' ||
4879 text[interval.end] == '?' ||
4880 text[interval.end] == '=' ||
4881 text[interval.end] == '{' ||
4882 text[interval.end] == '}' ||
4883 isspace(text[interval.end]))
4884 break;
4885 else
4886 interval.end++;
4887 }
4888 else
4889 break;
4890 }
4891 if (start < interval.end) {
4892 interval.start = start;
4893 return true;
4894 }
4895 else {
4896 interval.start = (interval.end = start) + 1;
4897 return false;
4898 }
4899 }
4900 };
4901
4906 {
4907 public:
4908 virtual bool match(
4909 _In_reads_or_z_(end) const char* text,
4910 _In_ size_t start = 0,
4911 _In_ size_t end = (size_t)-1,
4912 _In_ int flags = match_default)
4913 {
4914 assert(text || start >= end);
4915 interval.end = start;
4916 if (interval.end < end && text[interval.end] != '"')
4917 goto error;
4918 interval.end++;
4920 for (;;) {
4921 assert(text || interval.end >= end);
4922 if (interval.end < end && text[interval.end]) {
4923 if (text[interval.end] == '"') {
4925 interval.end++;
4926 break;
4927 }
4928 else if (text[interval.end] == '\\') {
4929 interval.end++;
4930 if (interval.end < end && text[interval.end]) {
4931 interval.end++;
4932 }
4933 else
4934 goto error;
4935 }
4936 else if (m_chr.match(text, interval.end, end, flags))
4937 interval.end++;
4938 else
4939 goto error;
4940 }
4941 else
4942 goto error;
4943 }
4944 interval.start = start;
4945 return true;
4946
4947 error:
4948 content.start = 1;
4949 content.end = 0;
4950 interval.start = (interval.end = start) + 1;
4951 return false;
4952 }
4953
4954 virtual void invalidate()
4955 {
4956 content.start = 1;
4957 content.end = 0;
4958 parser::invalidate();
4959 }
4960
4961 public:
4963
4964 protected:
4965 http_text_char m_chr;
4966 };
4967
4971 class http_value : public parser
4972 {
4973 public:
4974 virtual bool match(
4975 _In_reads_or_z_(end) const char* text,
4976 _In_ size_t start = 0,
4977 _In_ size_t end = (size_t)-1,
4978 _In_ int flags = match_default)
4979 {
4980 assert(text || start >= end);
4981 interval.end = start;
4982 if (string.match(text, interval.end, end, flags)) {
4983 token.invalidate();
4984 interval.end = string.interval.end;
4985 interval.start = start;
4986 return true;
4987 }
4988 else if (token.match(text, interval.end, end, flags)) {
4989 string.invalidate();
4991 interval.start = start;
4992 return true;
4993 }
4994 else {
4995 interval.start = (interval.end = start) + 1;
4996 return false;
4997 }
4998 }
4999
5000 virtual void invalidate()
5001 {
5002 string.invalidate();
5003 token.invalidate();
5004 parser::invalidate();
5005 }
5006
5007 public:
5010 };
5011
5015 class http_parameter : public parser
5016 {
5017 public:
5018 virtual bool match(
5019 _In_reads_or_z_(end) const char* text,
5020 _In_ size_t start = 0,
5021 _In_ size_t end = (size_t)-1,
5022 _In_ int flags = match_default)
5023 {
5024 assert(text || start >= end);
5025 interval.end = start;
5026 if (name.match(text, interval.end, end, flags))
5028 else
5029 goto error;
5030 while (m_space.match(text, interval.end, end, flags))
5031 interval.end = m_space.interval.end;
5032 assert(text || interval.end >= end);
5033 if (interval.end < end && text[interval.end] == '=')
5034 interval.end++;
5035 else
5036 while (m_space.match(text, interval.end, end, flags))
5037 interval.end = m_space.interval.end;
5038 if (value.match(text, interval.end, end, flags))
5040 else
5041 goto error;
5042 interval.start = start;
5043 return true;
5044
5045 error:
5046 name.invalidate();
5047 value.invalidate();
5048 interval.start = (interval.end = start) + 1;
5049 return false;
5050 }
5051
5052 virtual void invalidate()
5053 {
5054 name.invalidate();
5055 value.invalidate();
5056 parser::invalidate();
5057 }
5058
5059 public:
5062
5063 protected:
5064 http_space m_space;
5065 };
5066
5070 class http_any_type : public parser
5071 {
5072 public:
5073 virtual bool match(
5074 _In_reads_or_z_(end) const char* text,
5075 _In_ size_t start = 0,
5076 _In_ size_t end = (size_t)-1,
5077 _In_ int flags = match_default)
5078 {
5079 assert(text || start >= end);
5080 if (start + 2 < end &&
5081 text[start] == '*' &&
5082 text[start + 1] == '/' &&
5083 text[start + 2] == '*')
5084 {
5085 interval.end = (interval.start = start) + 3;
5086 return true;
5087 }
5088 else if (start < end && text[start] == '*') {
5089 interval.end = (interval.start = start) + 1;
5090 return true;
5091 }
5092 else {
5093 interval.start = (interval.end = start) + 1;
5094 return false;
5095 }
5096 }
5097 };
5098
5103 {
5104 public:
5105 virtual bool match(
5106 _In_reads_or_z_(end) const char* text,
5107 _In_ size_t start = 0,
5108 _In_ size_t end = (size_t)-1,
5109 _In_ int flags = match_default)
5110 {
5111 assert(text || start >= end);
5112 interval.end = start;
5113 if (type.match(text, interval.end, end, flags))
5114 interval.end = type.interval.end;
5115 else
5116 goto error;
5117 while (m_space.match(text, interval.end, end, flags))
5118 interval.end = m_space.interval.end;
5119 if (interval.end < end && text[interval.end] == '/')
5120 interval.end++;
5121 else
5122 goto error;
5123 while (m_space.match(text, interval.end, end, flags))
5124 interval.end = m_space.interval.end;
5125 if (subtype.match(text, interval.end, end, flags))
5126 interval.end = subtype.interval.end;
5127 else
5128 goto error;
5129 interval.start = start;
5130 return true;
5131
5132 error:
5133 type.invalidate();
5134 subtype.invalidate();
5135 interval.start = (interval.end = start) + 1;
5136 return false;
5137 }
5138
5139 virtual void invalidate()
5140 {
5141 type.invalidate();
5142 subtype.invalidate();
5143 parser::invalidate();
5144 }
5145
5146 public:
5147 http_token type;
5148 http_token subtype;
5149
5150 protected:
5151 http_space m_space;
5152 };
5153
5158 {
5159 public:
5160 virtual bool match(
5161 _In_reads_or_z_(end) const char* text,
5162 _In_ size_t start = 0,
5163 _In_ size_t end = (size_t)-1,
5164 _In_ int flags = match_default)
5165 {
5166 assert(text || start >= end);
5167 if (!http_media_range::match(text, start, end, flags))
5168 goto error;
5169 params.clear();
5170 for (;;) {
5171 if (interval.end < end && text[interval.end]) {
5172 if (m_space.match(text, interval.end, end, flags))
5173 interval.end = m_space.interval.end;
5174 else if (text[interval.end] == ';') {
5175 interval.end++;
5176 while (m_space.match(text, interval.end, end, flags))
5177 interval.end = m_space.interval.end;
5178 http_parameter param;
5179 if (param.match(text, interval.end, end, flags)) {
5180 interval.end = param.interval.end;
5181 params.push_back(std::move(param));
5182 }
5183 else
5184 break;
5185 }
5186 else
5187 break;
5188 }
5189 else
5190 break;
5191 }
5192 interval.end = params.empty() ? subtype.interval.end : params.back().interval.end;
5193 return true;
5194
5195 error:
5196 http_media_range::invalidate();
5197 params.clear();
5198 interval.start = (interval.end = start) + 1;
5199 return false;
5200 }
5201
5202 virtual void invalidate()
5203 {
5204 params.clear();
5205 http_media_range::invalidate();
5206 }
5207
5208 public:
5209 std::list<http_parameter> params;
5210 };
5211
5216 {
5217 public:
5218 virtual bool match(
5219 _In_reads_or_z_(end) const char* text,
5220 _In_ size_t start = 0,
5221 _In_ size_t end = (size_t)-1,
5222 _In_ int flags = match_default)
5223 {
5224 assert(text || start >= end);
5225 interval.end = start;
5226 for (;;) {
5227 if (interval.end < end && text[interval.end]) {
5228 if ((unsigned int)text[interval.end] < 0x20 ||
5229 (unsigned int)text[interval.end] == 0x7f ||
5230 text[interval.end] == ':' ||
5231 text[interval.end] == '/' ||
5232 isspace(text[interval.end]))
5233 break;
5234 else
5235 interval.end++;
5236 }
5237 else
5238 break;
5239 }
5240 if (start < interval.end) {
5241 interval.start = start;
5242 return true;
5243 }
5244 interval.start = (interval.end = start) + 1;
5245 return false;
5246 }
5247 };
5248
5252 class http_url_port : public parser
5253 {
5254 public:
5255 http_url_port(_In_ const std::locale& locale = std::locale()) :
5256 parser(locale),
5257 value(0)
5258 {}
5259
5260 virtual bool match(
5261 _In_reads_or_z_(end) const char* text,
5262 _In_ size_t start = 0,
5263 _In_ size_t end = (size_t)-1,
5264 _In_ int flags = match_default)
5265 {
5266 assert(text || start >= end);
5267 value = 0;
5268 interval.end = start;
5269 for (;;) {
5270 if (interval.end < end && text[interval.end]) {
5271 if ('0' <= text[interval.end] && text[interval.end] <= '9') {
5272 size_t _value = (size_t)value * 10 + text[interval.end] - '0';
5273 if (_value > (uint16_t)-1) {
5274 value = 0;
5275 interval.start = (interval.end = start) + 1;
5276 return false;
5277 }
5278 value = (uint16_t)_value;
5279 interval.end++;
5280 }
5281 else
5282 break;
5283 }
5284 else
5285 break;
5286 }
5287 if (start < interval.end) {
5288 interval.start = start;
5289 return true;
5290 }
5291 interval.start = (interval.end = start) + 1;
5292 return false;
5293 }
5294
5295 virtual void invalidate()
5296 {
5297 value = 0;
5298 parser::invalidate();
5299 }
5300
5301 public:
5302 uint16_t value;
5303 };
5304
5309 {
5310 public:
5311 virtual bool match(
5312 _In_reads_or_z_(end) const char* text,
5313 _In_ size_t start = 0,
5314 _In_ size_t end = (size_t)-1,
5315 _In_ int flags = match_default)
5316 {
5317 assert(text || start >= end);
5318 interval.end = start;
5319 for (;;) {
5320 if (interval.end < end && text[interval.end]) {
5321 if ((unsigned int)text[interval.end] < 0x20 ||
5322 (unsigned int)text[interval.end] == 0x7f ||
5323 text[interval.end] == '?' ||
5324 text[interval.end] == '/' ||
5325 isspace(text[interval.end]))
5326 break;
5327 else
5328 interval.end++;
5329 }
5330 else
5331 break;
5332 }
5333 interval.start = start;
5334 return true;
5335 }
5336 };
5337
5341 class http_url_path : public parser
5342 {
5343 public:
5344 virtual bool match(
5345 _In_reads_or_z_(end) const char* text,
5346 _In_ size_t start = 0,
5347 _In_ size_t end = (size_t)-1,
5348 _In_ int flags = match_default)
5349 {
5350 assert(text || start >= end);
5352 interval.end = start;
5353 segments.clear();
5354 assert(text || interval.end >= end);
5355 if (interval.end < end && text[interval.end] != '/')
5356 goto error;
5357 interval.end++;
5358 s.match(text, interval.end, end, flags);
5359 segments.push_back(s);
5361 for (;;) {
5362 if (interval.end < end && text[interval.end]) {
5363 if (text[interval.end] == '/') {
5364 interval.end++;
5365 s.match(text, interval.end, end, flags);
5366 segments.push_back(s);
5368 }
5369 else
5370 break;
5371 }
5372 else
5373 break;
5374 }
5375 interval.start = start;
5376 return true;
5377
5378 error:
5379 segments.clear();
5380 interval.start = (interval.end = start) + 1;
5381 return false;
5382 }
5383
5384 virtual void invalidate()
5385 {
5386 segments.clear();
5387 parser::invalidate();
5388 }
5389
5390 public:
5391 std::vector<http_url_path_segment> segments;
5392 };
5393
5398 {
5399 public:
5400 virtual bool match(
5401 _In_reads_or_z_(end) const char* text,
5402 _In_ size_t start = 0,
5403 _In_ size_t end = (size_t)-1,
5404 _In_ int flags = match_default)
5405 {
5406 assert(text || start >= end);
5407 interval.end = start;
5408 name.start = interval.end;
5409 for (;;) {
5410 if (interval.end < end && text[interval.end]) {
5411 if ((unsigned int)text[interval.end] < 0x20 ||
5412 (unsigned int)text[interval.end] == 0x7f ||
5413 text[interval.end] == '&' ||
5414 text[interval.end] == '=' ||
5415 isspace(text[interval.end]))
5416 break;
5417 else
5418 interval.end++;
5419 }
5420 else
5421 break;
5422 }
5423 if (start < interval.end)
5424 name.end = interval.end;
5425 else
5426 goto error;
5427 if (text[interval.end] == '=') {
5428 interval.end++;
5429 value.start = interval.end;
5430 for (;;) {
5431 if (interval.end < end && text[interval.end]) {
5432 if ((unsigned int)text[interval.end] < 0x20 ||
5433 (unsigned int)text[interval.end] == 0x7f ||
5434 text[interval.end] == '&' ||
5435 isspace(text[interval.end]))
5436 break;
5437 else
5438 interval.end++;
5439 }
5440 else
5441 break;
5442 }
5443 value.end = interval.end;
5444 }
5445 else {
5446 value.start = 1;
5447 value.end = 0;
5448 }
5449 interval.start = start;
5450 return true;
5451
5452 error:
5453 name.start = 1;
5454 name.end = 0;
5455 value.start = 1;
5456 value.end = 0;
5457 interval.start = (interval.end = start) + 1;
5458 return false;
5459 }
5460
5461 virtual void invalidate()
5462 {
5463 name.start = 1;
5464 name.end = 0;
5465 value.start = 1;
5466 value.end = 0;
5467 parser::invalidate();
5468 }
5469
5470 public:
5473 };
5474
5478 class http_url : public parser
5479 {
5480 public:
5481 http_url(_In_ const std::locale& locale = std::locale()) :
5482 parser(locale),
5483 port(locale)
5484 {}
5485
5486 virtual bool match(
5487 _In_reads_or_z_(end) const char* text,
5488 _In_ size_t start = 0,
5489 _In_ size_t end = (size_t)-1,
5490 _In_ int flags = match_default)
5491 {
5492 assert(text || start >= end);
5493 interval.end = start;
5494
5495 if (interval.end + 7 <= end && stdex::strnicmp(text + interval.end, 7, "http://", (size_t)-1, m_locale) == 0) {
5496 interval.end += 7;
5497 if (server.match(text, interval.end, end, flags))
5498 interval.end = server.interval.end;
5499 else
5500 goto error;
5501 if (interval.end < end && text[interval.end] == ':') {
5502 interval.end++;
5503 if (port.match(text, interval.end, end, flags))
5504 interval.end = port.interval.end;
5505 }
5506 else {
5507 port.invalidate();
5508 port.value = 80;
5509 }
5510 }
5511 else {
5512 server.invalidate();
5513 port.invalidate();
5514 port.value = 80;
5515 }
5516
5517 if (path.match(text, interval.end, end, flags))
5518 interval.end = path.interval.end;
5519 else
5520 goto error;
5521
5522 params.clear();
5523
5524 if (interval.end < end && text[interval.end] == '?') {
5525 interval.end++;
5526 for (;;) {
5527 if (interval.end < end && text[interval.end]) {
5528 if ((unsigned int)text[interval.end] < 0x20 ||
5529 (unsigned int)text[interval.end] == 0x7f ||
5530 isspace(text[interval.end]))
5531 break;
5532 else if (text[interval.end] == '&')
5533 interval.end++;
5534 else {
5535 http_url_parameter param;
5536 if (param.match(text, interval.end, end, flags)) {
5537 interval.end = param.interval.end;
5538 params.push_back(std::move(param));
5539 }
5540 else
5541 break;
5542 }
5543 }
5544 else
5545 break;
5546 }
5547 }
5548
5549 interval.start = start;
5550 return true;
5551
5552 error:
5553 server.invalidate();
5554 port.invalidate();
5555 path.invalidate();
5556 params.clear();
5557 interval.start = (interval.end = start) + 1;
5558 return false;
5559 }
5560
5561 virtual void invalidate()
5562 {
5563 server.invalidate();
5564 port.invalidate();
5565 path.invalidate();
5566 params.clear();
5567 parser::invalidate();
5568 }
5569
5570 public:
5571 http_url_server server;
5572 http_url_port port;
5573 http_url_path path;
5574 std::list<http_url_parameter> params;
5575 };
5576
5580 class http_language : public parser
5581 {
5582 public:
5583 virtual bool match(
5584 _In_reads_or_z_(end) const char* text,
5585 _In_ size_t start = 0,
5586 _In_ size_t end = (size_t)-1,
5587 _In_ int flags = match_default)
5588 {
5589 assert(text || start >= end);
5590 interval.end = start;
5591 components.clear();
5592 for (;;) {
5593 if (interval.end < end && text[interval.end]) {
5595 k.end = interval.end;
5596 for (;;) {
5597 if (k.end < end && text[k.end]) {
5598 if (isalpha(text[k.end]))
5599 k.end++;
5600 else
5601 break;
5602 }
5603 else
5604 break;
5605 }
5606 if (interval.end < k.end) {
5607 k.start = interval.end;
5608 interval.end = k.end;
5609 components.push_back(k);
5610 }
5611 else
5612 break;
5613 if (interval.end < end && text[interval.end] == '-')
5614 interval.end++;
5615 else
5616 break;
5617 }
5618 else
5619 break;
5620 }
5621 if (!components.empty()) {
5622 interval.start = start;
5623 interval.end = components.back().end;
5624 return true;
5625 }
5626 interval.start = (interval.end = start) + 1;
5627 return false;
5628 }
5629
5630 virtual void invalidate()
5631 {
5632 components.clear();
5633 parser::invalidate();
5634 }
5635
5636 public:
5637 std::vector<stdex::interval<size_t>> components;
5638 };
5639
5643 class http_weight : public parser
5644 {
5645 public:
5646 http_weight(_In_ const std::locale& locale = std::locale()) :
5647 parser(locale),
5648 value(1.0f)
5649 {}
5650
5651 virtual bool match(
5652 _In_reads_or_z_(end) const char* text,
5653 _In_ size_t start = 0,
5654 _In_ size_t end = (size_t)-1,
5655 _In_ int flags = match_default)
5656 {
5657 assert(text || start >= end);
5658 size_t celi_del = 0, decimalni_del = 0, decimalni_del_n = 1;
5659 interval.end = start;
5660 for (;;) {
5661 if (interval.end < end && text[interval.end]) {
5662 if ('0' <= text[interval.end] && text[interval.end] <= '9') {
5663 celi_del = celi_del * 10 + text[interval.end] - '0';
5664 interval.end++;
5665 }
5666 else if (text[interval.end] == '.') {
5667 interval.end++;
5668 for (;;) {
5669 if (interval.end < end && text[interval.end]) {
5670 if ('0' <= text[interval.end] && text[interval.end] <= '9') {
5671 decimalni_del = decimalni_del * 10 + text[interval.end] - '0';
5672 decimalni_del_n *= 10;
5673 interval.end++;
5674 }
5675 else
5676 break;
5677 }
5678 else
5679 break;
5680 }
5681 break;
5682 }
5683 else
5684 break;
5685 }
5686 else
5687 break;
5688 }
5689 if (start < interval.end) {
5690 value = (float)((double)celi_del + (double)decimalni_del / decimalni_del_n);
5691 interval.start = start;
5692 return true;
5693 }
5694 value = 1.0f;
5695 interval.start = (interval.end = start) + 1;
5696 return false;
5697 }
5698
5699 virtual void invalidate()
5700 {
5701 value = 1.0f;
5702 parser::invalidate();
5703 }
5704
5705 public:
5706 float value;
5707 };
5708
5712 class http_asterisk : public parser
5713 {
5714 public:
5715 virtual bool match(
5716 _In_reads_or_z_(end) const char* text,
5717 _In_ size_t start = 0,
5718 _In_ size_t end = (size_t)-1,
5719 _In_ int flags = match_default)
5720 {
5721 assert(text || end <= start);
5722 if (start < end && text[start] == '*') {
5723 interval.end = (interval.start = start) + 1;
5724 return true;
5725 }
5726 interval.start = (interval.end = start) + 1;
5727 return false;
5728 }
5729 };
5730
5734 template <class T, class T_asterisk = http_asterisk>
5736 {
5737 public:
5738 http_weighted_value(_In_ const std::locale& locale = std::locale()) :
5739 parser(locale),
5740 factor(locale)
5741 {}
5742
5743 virtual bool match(
5744 _In_reads_or_z_(end) const char* text,
5745 _In_ size_t start = 0,
5746 _In_ size_t end = (size_t)-1,
5747 _In_ int flags = match_default)
5748 {
5749 assert(text || start >= end);
5750 size_t konec_vrednosti;
5751 interval.end = start;
5752 if (asterisk.match(text, interval.end, end, flags)) {
5753 interval.end = konec_vrednosti = asterisk.interval.end;
5754 value.invalidate();
5755 }
5756 else if (value.match(text, interval.end, end, flags)) {
5757 interval.end = konec_vrednosti = value.interval.end;
5758 asterisk.invalidate();
5759 }
5760 else {
5761 asterisk.invalidate();
5762 value.invalidate();
5763 interval.start = (interval.end = start) + 1;
5764 return false;
5765 }
5766
5767 while (interval.end < end && text[interval.end] && isspace(text[interval.end])) interval.end++;
5768 if (interval.end < end && text[interval.end] == ';') {
5769 interval.end++;
5770 while (interval.end < end && text[interval.end] && isspace(text[interval.end])) interval.end++;
5771 if (interval.end < end && (text[interval.end] == 'q' || text[interval.end] == 'Q')) {
5772 interval.end++;
5773 while (interval.end < end && text[interval.end] && isspace(text[interval.end])) interval.end++;
5774 if (interval.end < end && text[interval.end] == '=') {
5775 interval.end++;
5776 while (interval.end < end && text[interval.end] && isspace(text[interval.end])) interval.end++;
5777 if (factor.match(text, interval.end, end, flags))
5778 interval.end = factor.interval.end;
5779 }
5780 }
5781 }
5782 if (!factor.interval) {
5783 factor.invalidate();
5784 interval.end = konec_vrednosti;
5785 }
5786 interval.start = start;
5787 return true;
5788 }
5789
5790 virtual void invalidate()
5791 {
5792 asterisk.invalidate();
5793 value.invalidate();
5794 factor.invalidate();
5795 parser::invalidate();
5796 }
5797
5798 public:
5799 T_asterisk asterisk;
5800 T value;
5801 http_weight factor;
5802 };
5803
5808 {
5809 public:
5810 virtual bool match(
5811 _In_reads_or_z_(end) const char* text,
5812 _In_ size_t start = 0,
5813 _In_ size_t end = (size_t)-1,
5814 _In_ int flags = match_default)
5815 {
5816 assert(text || start >= end);
5817 interval.end = start;
5818 if (interval.end < end && text[interval.end] == '$')
5819 interval.end++;
5820 else
5821 goto error;
5822 if (name.match(text, interval.end, end, flags))
5823 interval.end = name.interval.end;
5824 else
5825 goto error;
5826 while (m_space.match(text, interval.end, end, flags))
5827 interval.end = m_space.interval.end;
5828 if (interval.end < end && text[interval.end] == '=')
5829 interval.end++;
5830 else
5831 goto error;
5832 while (m_space.match(text, interval.end, end, flags))
5833 interval.end = m_space.interval.end;
5834 if (value.match(text, interval.end, end, flags))
5835 interval.end = value.interval.end;
5836 else
5837 goto error;
5838 interval.start = start;
5839 return true;
5840
5841 error:
5842 name.invalidate();
5843 value.invalidate();
5844 interval.start = (interval.end = start) + 1;
5845 return false;
5846 }
5847
5848 virtual void invalidate()
5849 {
5850 name.invalidate();
5851 value.invalidate();
5852 parser::invalidate();
5853 }
5854
5855 public:
5856 http_token name;
5857 http_value value;
5858
5859 protected:
5860 http_space m_space;
5861 };
5862
5866 class http_cookie : public parser
5867 {
5868 public:
5869 virtual bool match(
5870 _In_reads_or_z_(end) const char* text,
5871 _In_ size_t start = 0,
5872 _In_ size_t end = (size_t)-1,
5873 _In_ int flags = match_default)
5874 {
5875 assert(text || start >= end);
5876 interval.end = start;
5877 if (name.match(text, interval.end, end, flags))
5879 else
5880 goto error;
5881 while (m_space.match(text, interval.end, end, flags))
5882 interval.end = m_space.interval.end;
5883 if (interval.end < end && text[interval.end] == '=')
5884 interval.end++;
5885 else
5886 goto error;
5887 while (m_space.match(text, interval.end, end, flags))
5888 interval.end = m_space.interval.end;
5889 if (value.match(text, interval.end, end, flags))
5891 else
5892 goto error;
5893 params.clear();
5894 for (;;) {
5895 if (interval.end < end && text[interval.end]) {
5896 if (m_space.match(text, interval.end, end, flags))
5897 interval.end = m_space.interval.end;
5898 else if (text[interval.end] == ';') {
5899 interval.end++;
5900 while (m_space.match(text, interval.end, end, flags))
5901 interval.end = m_space.interval.end;
5903 if (param.match(text, interval.end, end, flags)) {
5904 interval.end = param.interval.end;
5905 params.push_back(std::move(param));
5906 }
5907 else
5908 break;
5909 }
5910 else
5911 break;
5912 }
5913 else
5914 break;
5915 }
5916 interval.start = start;
5917 interval.end = params.empty() ? value.interval.end : params.back().interval.end;
5918 return true;
5919
5920 error:
5921 name.invalidate();
5922 value.invalidate();
5923 params.clear();
5924 interval.start = (interval.end = start) + 1;
5925 return false;
5926 }
5927
5928 virtual void invalidate()
5929 {
5930 name.invalidate();
5931 value.invalidate();
5932 params.clear();
5933 parser::invalidate();
5934 }
5935
5936 public:
5939 std::list<http_cookie_parameter> params;
5940
5941 protected:
5942 http_space m_space;
5943 };
5944
5948 class http_agent : public parser
5949 {
5950 public:
5951 virtual bool match(
5952 _In_reads_or_z_(end) const char* text,
5953 _In_ size_t start = 0,
5954 _In_ size_t end = (size_t)-1,
5955 _In_ int flags = match_default)
5956 {
5957 assert(text || start >= end);
5958 interval.end = start;
5959 type.start = interval.end;
5960 for (;;) {
5961 if (interval.end < end && text[interval.end]) {
5962 if (text[interval.end] == '/') {
5963 type.end = interval.end;
5964 interval.end++;
5965 version.start = interval.end;
5966 for (;;) {
5967 if (interval.end < end && text[interval.end]) {
5968 if (isspace(text[interval.end])) {
5969 version.end = interval.end;
5970 break;
5971 }
5972 else
5973 interval.end++;
5974 }
5975 else {
5976 version.end = interval.end;
5977 break;
5978 }
5979 }
5980 break;
5981 }
5982 else if (isspace(text[interval.end])) {
5983 type.end = interval.end;
5984 break;
5985 }
5986 else
5987 interval.end++;
5988 }
5989 else {
5990 type.end = interval.end;
5991 break;
5992 }
5993 }
5994 if (start < interval.end) {
5995 interval.start = start;
5996 return true;
5997 }
5998 type.start = 1;
5999 type.end = 0;
6000 version.start = 1;
6001 version.end = 0;
6002 interval.start = 1;
6003 interval.end = 0;
6004 return false;
6005 }
6006
6007 virtual void invalidate()
6008 {
6009 type.start = 1;
6010 type.end = 0;
6011 version.start = 1;
6012 version.end = 0;
6013 parser::invalidate();
6014 }
6015
6016 public:
6019 };
6020
6024 class http_protocol : public parser
6025 {
6026 public:
6027 http_protocol(_In_ const std::locale& locale = std::locale()) :
6028 parser(locale),
6029 version(0x009)
6030 {}
6031
6032 virtual bool match(
6033 _In_reads_or_z_(end) const char* text,
6034 _In_ size_t start = 0,
6035 _In_ size_t end = (size_t)-1,
6036 _In_ int flags = match_default)
6037 {
6038 assert(text || start >= end);
6039 interval.end = start;
6040 type.start = interval.end;
6041 for (;;) {
6042 if (interval.end < end && text[interval.end]) {
6043 if (text[interval.end] == '/') {
6044 type.end = interval.end;
6045 interval.end++;
6046 break;
6047 }
6048 else if (isspace(text[interval.end]))
6049 goto error;
6050 else
6051 interval.end++;
6052 }
6053 else {
6054 type.end = interval.end;
6055 goto error;
6056 }
6057 }
6058 version_maj.start = interval.end;
6059 for (;;) {
6060 if (interval.end < end && text[interval.end]) {
6061 if (text[interval.end] == '.') {
6062 version_maj.end = interval.end;
6063 interval.end++;
6064 version_min.start = interval.end;
6065 for (;;) {
6066 if (interval.end < end && text[interval.end]) {
6067 if (isspace(text[interval.end])) {
6068 version_min.end = interval.end;
6069 version =
6070 (uint16_t)strtoui(text + version_maj.start, version_maj.size(), nullptr, 10) * 0x100 +
6071 (uint16_t)strtoui(text + version_min.start, version_min.size(), nullptr, 10);
6072 break;
6073 }
6074 else
6075 interval.end++;
6076 }
6077 else
6078 goto error;
6079 }
6080 break;
6081 }
6082 else if (isspace(text[interval.end])) {
6083 version_maj.end = interval.end;
6084 version_min.start = 1;
6085 version_min.end = 0;
6086 version = (uint16_t)strtoui(text + version_maj.start, version_maj.size(), nullptr, 10) * 0x100;
6087 break;
6088 }
6089 else
6090 interval.end++;
6091 }
6092 else
6093 goto error;
6094 }
6095 interval.start = start;
6096 return true;
6097
6098 error:
6099 type.start = 1;
6100 type.end = 0;
6101 version_maj.start = 1;
6102 version_maj.end = 0;
6103 version_min.start = 1;
6104 version_min.end = 0;
6105 version = 0x009;
6106 interval.start = 1;
6107 interval.end = 0;
6108 return false;
6109 }
6110
6111 virtual void invalidate()
6112 {
6113 type.start = 1;
6114 type.end = 0;
6115 version_maj.start = 1;
6116 version_maj.end = 0;
6117 version_min.start = 1;
6118 version_min.end = 0;
6119 version = 0x009;
6120 parser::invalidate();
6121 }
6122
6123 public:
6125 stdex::interval<size_t> version_maj;
6126 stdex::interval<size_t> version_min;
6127 uint16_t version;
6128 };
6129
6133 class http_request : public parser
6134 {
6135 public:
6136 http_request(_In_ const std::locale& locale = std::locale()) :
6137 parser(locale),
6138 url(locale),
6139 protocol(locale)
6140 {}
6141
6142 virtual bool match(
6143 _In_reads_or_z_(end) const char* text,
6144 _In_ size_t start = 0,
6145 _In_ size_t end = (size_t)-1,
6146 _In_ int flags = match_default)
6147 {
6148 assert(text || start >= end);
6149 interval.end = start;
6150
6151 for (;;) {
6152 if (m_line_break.match(text, interval.end, end, flags))
6153 goto error;
6154 else if (interval.end < end && text[interval.end]) {
6155 if (isspace(text[interval.end]))
6156 interval.end++;
6157 else
6158 break;
6159 }
6160 else
6161 goto error;
6162 }
6163 verb.start = interval.end;
6164 for (;;) {
6165 if (m_line_break.match(text, interval.end, end, flags))
6166 goto error;
6167 else if (interval.end < end && text[interval.end]) {
6168 if (isspace(text[interval.end])) {
6169 verb.end = interval.end;
6170 interval.end++;
6171 break;
6172 }
6173 else
6174 interval.end++;
6175 }
6176 else
6177 goto error;
6178 }
6179
6180 for (;;) {
6181 if (m_line_break.match(text, interval.end, end, flags))
6182 goto error;
6183 else if (interval.end < end && text[interval.end]) {
6184 if (isspace(text[interval.end]))
6185 interval.end++;
6186 else
6187 break;
6188 }
6189 else
6190 goto error;
6191 }
6192 if (url.match(text, interval.end, end, flags))
6194 else
6195 goto error;
6196
6197 protocol.invalidate();
6198 for (;;) {
6199 if (m_line_break.match(text, interval.end, end, flags)) {
6200 interval.end = m_line_break.interval.end;
6201 goto end;
6202 }
6203 else if (interval.end < end && text[interval.end]) {
6204 if (isspace(text[interval.end]))
6205 interval.end++;
6206 else
6207 break;
6208 }
6209 else
6210 goto end;
6211 }
6212 for (;;) {
6213 if (m_line_break.match(text, interval.end, end, flags)) {
6214 interval.end = m_line_break.interval.end;
6215 goto end;
6216 }
6217 else if (protocol.match(text, interval.end, end, flags)) {
6218 interval.end = protocol.interval.end;
6219 break;
6220 }
6221 else
6222 goto end;
6223 }
6224
6225 for (;;) {
6226 if (m_line_break.match(text, interval.end, end, flags)) {
6227 interval.end = m_line_break.interval.end;
6228 break;
6229 }
6230 else if (interval.end < end && text[interval.end])
6231 interval.end++;
6232 else
6233 goto end;
6234 }
6235
6236 end:
6237 interval.start = start;
6238 return true;
6239
6240 error:
6241 verb.start = 1;
6242 verb.end = 0;
6243 url.invalidate();
6244 protocol.invalidate();
6245 interval.start = 1;
6246 interval.end = 0;
6247 return false;
6248 }
6249
6250 virtual void invalidate()
6251 {
6252 verb.start = 1;
6253 verb.end = 0;
6254 url.invalidate();
6255 protocol.invalidate();
6256 parser::invalidate();
6257 }
6258
6259 public:
6261 http_url url;
6262 http_protocol protocol;
6263
6264 protected:
6265 http_line_break m_line_break;
6266 };
6267
6271 class http_header : public parser
6272 {
6273 public:
6274 virtual bool match(
6275 _In_reads_or_z_(end) const char* text,
6276 _In_ size_t start = 0,
6277 _In_ size_t end = (size_t)-1,
6278 _In_ int flags = match_default)
6279 {
6280 assert(text || start >= end);
6281 interval.end = start;
6282
6283 if (m_line_break.match(text, interval.end, end, flags) ||
6284 interval.end < end && text[interval.end] && isspace(text[interval.end]))
6285 goto error;
6286 name.start = interval.end;
6287 for (;;) {
6288 if (m_line_break.match(text, interval.end, end, flags))
6289 goto error;
6290 else if (interval.end < end && text[interval.end]) {
6291 if (isspace(text[interval.end])) {
6292 name.end = interval.end;
6293 interval.end++;
6294 for (;;) {
6295 if (m_line_break.match(text, interval.end, end, flags))
6296 goto error;
6297 else if (interval.end < end && text[interval.end]) {
6298 if (isspace(text[interval.end]))
6299 interval.end++;
6300 else
6301 break;
6302 }
6303 else
6304 goto error;
6305 }
6306 if (interval.end < end && text[interval.end] == ':') {
6307 interval.end++;
6308 break;
6309 }
6310 else
6311 goto error;
6312 break;
6313 }
6314 else if (text[interval.end] == ':') {
6315 name.end = interval.end;
6316 interval.end++;
6317 break;
6318 }
6319 else
6320 interval.end++;
6321 }
6322 else
6323 goto error;
6324 }
6325 value.start = (size_t)-1;
6326 value.end = 0;
6327 for (;;) {
6328 if (m_line_break.match(text, interval.end, end, flags)) {
6329 interval.end = m_line_break.interval.end;
6330 if (!m_line_break.match(text, interval.end, end, flags) &&
6331 interval.end < end && text[interval.end] && isspace(text[interval.end]))
6332 interval.end++;
6333 else
6334 break;
6335 }
6336 else if (interval.end < end && text[interval.end]) {
6337 if (isspace(text[interval.end]))
6338 interval.end++;
6339 else {
6340 if (value.start == (size_t)-1) value.start = interval.end;
6341 value.end = ++interval.end;
6342 }
6343 }
6344 else
6345 break;
6346 }
6347 interval.start = start;
6348 return true;
6349
6350 error:
6351 name.start = 1;
6352 name.end = 0;
6353 value.start = 1;
6354 value.end = 0;
6355 interval.start = 1;
6356 interval.end = 0;
6357 return false;
6358 }
6359
6360 virtual void invalidate()
6361 {
6362 name.start = 1;
6363 name.end = 0;
6364 value.start = 1;
6365 value.end = 0;
6366 parser::invalidate();
6367 }
6368
6369 public:
6372
6373 protected:
6374 http_line_break m_line_break;
6375 };
6376
6380 template <class T>
6381 class http_value_collection : public T
6382 {
6383 public:
6384 void insert(
6385 _In_reads_or_z_(end) const char* text,
6386 _In_ size_t start = 0,
6387 _In_ size_t end = (size_t)-1,
6388 _In_ int flags = match_default)
6389 {
6390 while (start < end) {
6391 while (start < end && text[start] && isspace(text[start])) start++;
6392 if (start < end && text[start] == ',') {
6393 start++;
6394 while (start < end&& text[start] && isspace(text[start])) start++;
6395 }
6396 T::key_type el;
6397 if (el.match(text, start, end, flags)) {
6398 start = el.interval.end;
6399 T::insert(std::move(el));
6400 }
6401 else
6402 break;
6403 }
6404 }
6405 };
6406
6407 template <class T>
6409 constexpr bool operator()(const T& a, const T& b) const noexcept
6410 {
6411 return a.factor.value > b.factor.value;
6412 }
6413 };
6414
6418 template <class T, class _Alloc = std::allocator<T>>
6420
6424 template <class T>
6426 {
6427 public:
6429 _In_ const std::shared_ptr<basic_parser<T>>& quote,
6430 _In_ const std::shared_ptr<basic_parser<T>>& chr,
6431 _In_ const std::shared_ptr<basic_parser<T>>& escape,
6432 _In_ const std::shared_ptr<basic_parser<T>>& sol,
6433 _In_ const std::shared_ptr<basic_parser<T>>& bs,
6434 _In_ const std::shared_ptr<basic_parser<T>>& ff,
6435 _In_ const std::shared_ptr<basic_parser<T>>& lf,
6436 _In_ const std::shared_ptr<basic_parser<T>>& cr,
6437 _In_ const std::shared_ptr<basic_parser<T>>& htab,
6438 _In_ const std::shared_ptr<basic_parser<T>>& uni,
6439 _In_ const std::shared_ptr<basic_integer16<T>>& hex,
6440 _In_ const std::locale& locale = std::locale()) :
6441 basic_parser<T>(locale),
6442 m_quote(quote),
6443 m_chr(chr),
6444 m_escape(escape),
6445 m_sol(sol),
6446 m_bs(bs),
6447 m_ff(ff),
6448 m_lf(lf),
6449 m_cr(cr),
6450 m_htab(htab),
6451 m_uni(uni),
6452 m_hex(hex)
6453 {}
6454
6455 virtual bool match(
6456 _In_reads_or_z_(end) const T* text,
6457 _In_ size_t start = 0,
6458 _In_ size_t end = (size_t)-1,
6459 _In_ int flags = match_default)
6460 {
6461 assert(text || start >= end);
6462 interval.end = start;
6463 if (m_quote->match(text, interval.end, end, flags)) {
6464 interval.end = m_quote->interval.end;
6465 value.clear();
6466 for (;;) {
6467 if (m_quote->match(text, interval.end, end, flags)) {
6468 interval.start = start;
6469 interval.end = m_quote->interval.end;
6470 return true;
6471 }
6472 if (m_escape->match(text, interval.end, end, flags)) {
6473 if (m_quote->match(text, m_escape->interval.end, end, flags)) {
6474 value += '"'; interval.end = m_quote->interval.end;
6475 continue;
6476 }
6477 if (m_sol->match(text, m_escape->interval.end, end, flags)) {
6478 value += '/'; interval.end = m_sol->interval.end;
6479 continue;
6480 }
6481 if (m_bs->match(text, m_escape->interval.end, end, flags)) {
6482 value += '\b'; interval.end = m_bs->interval.end;
6483 continue;
6484 }
6485 if (m_ff->match(text, m_escape->interval.end, end, flags)) {
6486 value += '\f'; interval.end = m_ff->interval.end;
6487 continue;
6488 }
6489 if (m_lf->match(text, m_escape->interval.end, end, flags)) {
6490 value += '\n'; interval.end = m_lf->interval.end;
6491 continue;
6492 }
6493 if (m_cr->match(text, m_escape->interval.end, end, flags)) {
6494 value += '\r'; interval.end = m_cr->interval.end;
6495 continue;
6496 }
6497 if (m_htab->match(text, m_escape->interval.end, end, flags)) {
6498 value += '\t'; interval.end = m_htab->interval.end;
6499 continue;
6500 }
6501 if (
6502 m_uni->match(text, m_escape->interval.end, end, flags) &&
6503 m_hex->match(text, m_uni->interval.end, std::min(m_uni->interval.end + 4, end), flags | match_case_insensitive) &&
6504 m_hex->interval.size() == 4 /* JSON requests 4-digit Unicode sequneces: \u.... */)
6505 {
6506 assert(m_hex->value <= 0xffff);
6507 if (sizeof(T) == 1) {
6508 if (m_hex->value > 0x7ff) {
6509 value += (T)(0xe0 | (m_hex->value >> 12) & 0x0f);
6510 value += (T)(0x80 | (m_hex->value >> 6) & 0x3f);
6511 value += (T)(0x80 | m_hex->value & 0x3f);
6512 }
6513 else if (m_hex->value > 0x7f) {
6514 value += (T)(0xc0 | (m_hex->value >> 6) & 0x1f);
6515 value += (T)(0x80 | m_hex->value & 0x3f);
6516 }
6517 else
6518 value += (T)(m_hex->value & 0x7f);
6519 }
6520 else
6521 value += (T)m_hex->value;
6522 interval.end = m_hex->interval.end;
6523 continue;
6524 }
6525 if (m_escape->match(text, m_escape->interval.end, end, flags)) {
6526 value += '\\'; interval.end = m_escape->interval.end;
6527 continue;
6528 }
6529 }
6530 if (m_chr->match(text, interval.end, end, flags)) {
6531 value.Prilepi(text + m_chr->interval.start, m_chr->interval.size());
6532 interval.end = m_chr->interval.end;
6533 continue;
6534 }
6535 break;
6536 }
6537 }
6538 value.clear();
6539 interval.start = (interval.end = start) + 1;
6540 return false;
6541 }
6542
6543 virtual void invalidate()
6544 {
6545 value.clear();
6547 }
6548
6549 public:
6550 std::basic_string<T> value;
6551
6552 protected:
6553 std::shared_ptr<basic_parser<T>> m_quote;
6554 std::shared_ptr<basic_parser<T>> m_chr;
6555 std::shared_ptr<basic_parser<T>> m_escape;
6556 std::shared_ptr<basic_parser<T>> m_sol;
6557 std::shared_ptr<basic_parser<T>> m_bs;
6558 std::shared_ptr<basic_parser<T>> m_ff;
6559 std::shared_ptr<basic_parser<T>> m_lf;
6560 std::shared_ptr<basic_parser<T>> m_cr;
6561 std::shared_ptr<basic_parser<T>> m_htab;
6562 std::shared_ptr<basic_parser<T>> m_uni;
6563 std::shared_ptr<basic_integer16<T>> m_hex;
6564 };
6565
6568#ifdef _UNICODE
6569 using tjson_string = wjson_string;
6570#else
6571 using tjson_string = json_string;
6572#endif
6573 }
6574}
6575
6576#undef ENUM_FLAG_OPERATOR
6577#undef ENUM_FLAGS
6578
6579#ifdef _MSC_VER
6580#pragma warning(pop)
6581#endif
Test for angle in d°mm'ss.dddd form.
Definition parser.hpp:4390
Test for any code unit.
Definition parser.hpp:217
Test for beginning of line.
Definition parser.hpp:611
Test for any.
Definition parser.hpp:1053
Test for chemical formula.
Definition parser.hpp:4664
Test for any code unit from a given string of code units.
Definition parser.hpp:716
Test for specific code unit.
Definition parser.hpp:287
Test for date.
Definition parser.hpp:4020
Test for valid DNS domain character.
Definition parser.hpp:2801
bool allow_on_edge
Is character allowed at the beginning or an end of a DNS domain?
Definition parser.hpp:2839
Test for DNS domain/hostname.
Definition parser.hpp:2901
bool m_allow_absolute
May DNS names end with a dot (absolute name)?
Definition parser.hpp:2965
Test for e-mail address.
Definition parser.hpp:3789
Test for emoticon.
Definition parser.hpp:3897
std::shared_ptr< basic_parser< T > > apex
apex/eyebrows/halo (e.g. O, 0)
Definition parser.hpp:3986
std::shared_ptr< basic_parser< T > > eyes
eyes (e.g. :, ;, >, |, B)
Definition parser.hpp:3987
std::shared_ptr< basic_set< T > > mouth
mouth (e.g. ), ), (, (, |, P, D, p, d)
Definition parser.hpp:3989
std::shared_ptr< basic_parser< T > > nose
nose (e.g. -, o)
Definition parser.hpp:3988
std::shared_ptr< basic_parser< T > > emoticon
emoticon as a whole (e.g. 😀, 🤔, 😶)
Definition parser.hpp:3985
Test for end of line.
Definition parser.hpp:649
Test for fraction.
Definition parser.hpp:1682
Test for decimal integer.
Definition parser.hpp:1291
Test for decimal integer possibly containing thousand separators.
Definition parser.hpp:1376
bool has_separators
Did integer have any separators?
Definition parser.hpp:1436
size_t digit_count
Total number of digits in integer.
Definition parser.hpp:1435
Test for hexadecimal integer.
Definition parser.hpp:1457
Base class for integer testing.
Definition parser.hpp:1269
size_t value
Calculated value of the numeral.
Definition parser.hpp:1283
Test for IPv4 address.
Definition parser.hpp:2341
stdex::interval< size_t > components[4]
Individual component intervals.
Definition parser.hpp:2456
struct in_addr value
IPv4 address value.
Definition parser.hpp:2457
Test for IPv6 address.
Definition parser.hpp:2560
std::shared_ptr< basic_parser< T > > scope_id
Scope ID (e.g. NIC index with link-local addresses)
Definition parser.hpp:2764
stdex::interval< size_t > components[8]
Individual component intervals.
Definition parser.hpp:2762
struct in6_addr value
IPv6 address value.
Definition parser.hpp:2763
Test for valid IPv6 address scope ID character.
Definition parser.hpp:2488
Test for repeating.
Definition parser.hpp:906
bool m_greedy
try to match as long sequence as possible
Definition parser.hpp:945
std::shared_ptr< basic_parser< T > > m_el
repeating element
Definition parser.hpp:942
size_t m_min_iterations
minimum number of iterations
Definition parser.hpp:943
size_t m_max_iterations
maximum number of iterations
Definition parser.hpp:944
Test for JSON string.
Definition parser.hpp:6426
Test for mixed numeral.
Definition parser.hpp:1917
std::shared_ptr< basic_parser< T > > fraction
fraction
Definition parser.hpp:2023
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:2021
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:2020
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:2019
std::shared_ptr< basic_parser< T > > integer
Integer part.
Definition parser.hpp:2022
Test for monetary numeral.
Definition parser.hpp:2212
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:2318
std::shared_ptr< basic_parser< T > > decimal_separator
Decimal separator.
Definition parser.hpp:2323
std::shared_ptr< basic_parser< T > > currency
Currency part.
Definition parser.hpp:2321
std::shared_ptr< basic_parser< T > > decimal
Decimal part.
Definition parser.hpp:2324
std::shared_ptr< basic_parser< T > > integer
Integer part.
Definition parser.hpp:2322
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:2319
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:2320
"No-op" match
Definition parser.hpp:185
Base template for all parsers.
Definition parser.hpp:66
interval< size_t > interval
Region of the last match.
Definition parser.hpp:165
Test for permutation.
Definition parser.hpp:1193
Test for phone number.
Definition parser.hpp:4513
std::basic_string< T > value
Normalized phone number.
Definition parser.hpp:4639
Test for any punctuation code unit.
Definition parser.hpp:459
Test for Roman numeral.
Definition parser.hpp:1566
Test for scientific numeral.
Definition parser.hpp:2043
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:2187
std::shared_ptr< basic_parser< T > > exponent_symbol
Exponent symbol (e.g. 'e')
Definition parser.hpp:2191
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:2185
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:2186
double value
Calculated value of the numeral.
Definition parser.hpp:2195
std::shared_ptr< basic_parser< T > > negative_exp_sign
Negative exponent sign (e.g. '-')
Definition parser.hpp:2193
std::shared_ptr< basic_integer< T > > decimal
Decimal part.
Definition parser.hpp:2190
std::shared_ptr< basic_parser< T > > positive_exp_sign
Positive exponent sign (e.g. '+')
Definition parser.hpp:2192
std::shared_ptr< basic_integer< T > > exponent
Exponent part.
Definition parser.hpp:2194
std::shared_ptr< basic_parser< T > > decimal_separator
Decimal separator.
Definition parser.hpp:2189
std::shared_ptr< basic_integer< T > > integer
Integer part.
Definition parser.hpp:2188
Test for match score.
Definition parser.hpp:1745
Test for sequence.
Definition parser.hpp:1002
Definition parser.hpp:684
Test for signed numeral.
Definition parser.hpp:1831
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:1899
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:1898
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:1897
std::shared_ptr< basic_parser< T > > number
Number.
Definition parser.hpp:1900
Test for any space code unit.
Definition parser.hpp:380
Test for any space or punctuation code unit.
Definition parser.hpp:533
Test for any string.
Definition parser.hpp:1121
Test for given string.
Definition parser.hpp:811
Test for time.
Definition parser.hpp:4287
Test for valid URL password character.
Definition parser.hpp:3083
Test for valid URL path character.
Definition parser.hpp:3183
Test for URL path.
Definition parser.hpp:3291
Test for valid URL username character.
Definition parser.hpp:2984
Test for URL.
Definition parser.hpp:3432
Test for HTTP agent.
Definition parser.hpp:5949
Test for HTTP any type.
Definition parser.hpp:5071
Test for HTTP asterisk.
Definition parser.hpp:5713
Test for HTTP header.
Definition parser.hpp:6272
Test for HTTP language (RFC1766)
Definition parser.hpp:5581
Test for HTTP line break (RFC2616: CRLF | LF)
Definition parser.hpp:4745
Test for HTTP media range (RFC2616: media-range)
Definition parser.hpp:5103
Test for HTTP media type (RFC2616: media-type)
Definition parser.hpp:5158
Test for HTTP parameter (RFC2616: parameter)
Definition parser.hpp:5016
http_token name
Parameter name.
Definition parser.hpp:5060
http_value value
Parameter value.
Definition parser.hpp:5061
Test for HTTP protocol.
Definition parser.hpp:6025
uint16_t version
HTTP protocol version: 0x100 = 1.0, 0x101 = 1.1...
Definition parser.hpp:6127
Test for HTTP quoted string (RFC2616: quoted-string)
Definition parser.hpp:4906
stdex::interval< size_t > content
String content (without quotes)
Definition parser.hpp:4962
Test for HTTP request.
Definition parser.hpp:6134
Test for HTTP space (RFC2616: LWS)
Definition parser.hpp:4781
Test for HTTP text character (RFC2616: TEXT)
Definition parser.hpp:4818
Test for HTTP token (RFC2616: token - tolerates non-ASCII)
Definition parser.hpp:4852
Test for HTTP URL parameter.
Definition parser.hpp:5398
Test for HTTP URL path segment.
Definition parser.hpp:5309
Test for HTTP URL path segment.
Definition parser.hpp:5342
std::vector< http_url_path_segment > segments
Path segments.
Definition parser.hpp:5391
Test for HTTP URL port.
Definition parser.hpp:5253
Test for HTTP URL server.
Definition parser.hpp:5216
Test for HTTP URL.
Definition parser.hpp:5479
Collection of HTTP values.
Definition parser.hpp:6382
Test for HTTP value (RFC2616: value)
Definition parser.hpp:4972
http_quoted_string string
Value when matched as quoted string.
Definition parser.hpp:5008
http_token token
Value when matched as token.
Definition parser.hpp:5009
Test for HTTP weight factor.
Definition parser.hpp:5644
float value
Calculated value of the weight factor.
Definition parser.hpp:5706
Test for HTTP weighted value.
Definition parser.hpp:5736
Base template for collection-holding parsers.
Definition parser.hpp:962
Test for any SGML code point.
Definition parser.hpp:249
Test for any SGML code point from a given string of SGML code points.
Definition parser.hpp:768
Test for specific SGML code point.
Definition parser.hpp:336
Test for valid DNS domain SGML character.
Definition parser.hpp:2857
Test for valid IPv6 address scope ID SGML character.
Definition parser.hpp:2526
Test for any SGML punctuation code point.
Definition parser.hpp:500
Test for any SGML space code point.
Definition parser.hpp:423
Test for any SGML space or punctuation code point.
Definition parser.hpp:576
Test for SGML given string.
Definition parser.hpp:858
Test for valid URL password SGML character.
Definition parser.hpp:3135
Test for valid URL path SGML character.
Definition parser.hpp:3239
Test for valid URL username SGML character.
Definition parser.hpp:3035
Numerical interval.
Definition interval.hpp:18
T size() const
Returns interval size.
Definition interval.hpp:47
T end
interval end
Definition interval.hpp:20
interval() noexcept
Constructs an invalid interval.
Definition interval.hpp:25
T start
interval start
Definition interval.hpp:19
Definition parser.hpp:6408