stdex
Additional custom or not Standard C++ covered algorithms
Loading...
Searching...
No Matches
parser.hpp
1/*
2 SPDX-License-Identifier: MIT
3 Copyright © 2023 Amebis
4*/
5
6#pragma once
7
8#include "interval.hpp"
9#include "memory.hpp"
10#include "sal.hpp"
11#include "sgml.hpp"
12#include "string.hpp"
13#include <assert.h>
14#include <stdarg.h>
15#include <stdint.h>
16#include <limits>
17#include <list>
18#include <memory>
19#include <set>
20#include <string>
21#ifdef _WIN32
22#include <winsock2.h>
23#else
24#include <inaddr.h>
25#include <in6addr.h>
26#endif
27
28#ifdef _MSC_VER
29#pragma warning(push)
30#pragma warning(disable: 4100)
31#endif
32
33namespace stdex
34{
35 namespace parser
36 {
40 constexpr int match_default = 0;
41 constexpr int match_case_insensitive = 0x1;
42 constexpr int match_multiline = 0x2;
43
47 template <class T>
49 {
50 public:
51 basic_parser(_In_ const std::locale& locale = std::locale()) : m_locale(locale) {}
52 virtual ~basic_parser() {}
53
54 bool search(
55 _In_reads_or_z_(end) const T* text,
56 _In_ size_t start = 0,
57 _In_ size_t end = (size_t)-1,
58 _In_ int flags = match_default)
59 {
60 for (size_t i = start; i < end && text[i]; i++)
61 if (match(text, i, end, flags))
62 return true;
63 return false;
64 }
65
66 virtual bool match(
67 _In_reads_or_z_(end) const T* text,
68 _In_ size_t start = 0,
69 _In_ size_t end = (size_t)-1,
70 _In_ int flags = match_default) = 0;
71
72 template<class _Traits, class _Ax>
73 inline bool match(
74 const std::basic_string<T, _Traits, _Ax>& text,
75 _In_ size_t start = 0,
76 _In_ size_t end = (size_t)-1,
77 _In_ int flags = match_default)
78 {
79 return match(text.c_str(), start, std::min<size_t>(end, text.size()), flags);
80 }
81
82 virtual void invalidate()
83 {
84 interval.start = 1;
85 interval.end = 0;
86 }
87
88 protected:
90 const wchar_t* next_sgml_cp(_In_ const char* text, _In_ size_t start, _In_ size_t end, _Out_ size_t& chr_end, _Out_ wchar_t(&buf)[3])
91 {
92 if (text[start] == '&') {
93 // Potential entity start
94 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
95 for (chr_end = start + 1;; chr_end++) {
96 if (chr_end >= end || text[chr_end] == 0) {
97 // Unterminated entity
98 break;
99 }
100 if (text[chr_end] == ';') {
101 // Entity end
102 size_t n = chr_end - start - 1;
103 if (n >= 2 && text[start + 1] == '#') {
104 // Numerical entity
105 char32_t unicode;
106 if (text[start + 2] == 'x' || text[start + 2] == 'X')
107 unicode = strtou32(text + start + 3, n - 2, nullptr, 16);
108 else
109 unicode = strtou32(text + start + 2, n - 1, nullptr, 10);
110#ifdef _WIN32
111 if (unicode < 0x10000) {
112 buf[0] = (wchar_t)unicode;
113 buf[1] = 0;
114 }
115 else {
116 ucs4_to_surrogate_pair(buf, unicode);
117 buf[2] = 0;
118 }
119#else
120 buf[0] = (wchar_t)unicode;
121 buf[1] = 0;
122#endif
123 chr_end++;
124 return buf;
125 }
126 const wchar_t* entity_w = sgml2uni(text + start + 1, n);
127 if (entity_w) {
128 chr_end++;
129 return entity_w;
130 }
131 // Unknown entity.
132 break;
133 }
134 else if (text[chr_end] == '&' || ctype.is(ctype.space, text[chr_end])) {
135 // This char cannot possibly be a part of entity.
136 break;
137 }
138 }
139 }
140 buf[0] = text[start];
141 buf[1] = 0;
142 chr_end = start + 1;
143 return buf;
144 }
146
147 public:
149
150 protected:
151 std::locale m_locale;
152 };
153
156#ifdef _UNICODE
157 using tparser = wparser;
158#else
159 using tparser = parser;
160#endif
162
166 template <class T>
167 class basic_noop : public basic_parser<T>
168 {
169 public:
170 virtual bool match(
171 _In_reads_or_z_(end) const T* text,
172 _In_ size_t start = 0,
173 _In_ size_t end = (size_t)-1,
174 _In_ int flags = match_default)
175 {
176 assert(text || start >= end);
177 if (start < end && text[start]) {
178 interval.start = interval.end = start;
179 return true;
180 }
181 interval.start = (interval.end = start) + 1;
182 return false;
183 }
184 };
185
186 using noop = basic_noop<char>;
188#ifdef _UNICODE
189 using tnoop = wnoop;
190#else
191 using tnoop = noop;
192#endif
194
198 template <class T>
199 class basic_any_cu : public basic_parser<T>
200 {
201 public:
202 basic_any_cu(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
203
204 virtual bool match(
205 _In_reads_or_z_(end) const T* text,
206 _In_ size_t start = 0,
207 _In_ size_t end = (size_t)-1,
208 _In_ int flags = match_default)
209 {
210 assert(text || start >= end);
211 if (start < end && text[start]) {
212 interval.end = (interval.start = start) + 1;
213 return true;
214 }
215 interval.start = (interval.end = start) + 1;
216 return false;
217 }
218 };
219
222#ifdef _UNICODE
223 using tany_cu = wany_cu;
224#else
225 using tany_cu = any_cu;
226#endif
227
231 class sgml_any_cp : public basic_any_cu<char>
232 {
233 public:
234 sgml_any_cp(_In_ const std::locale& locale = std::locale()) : basic_any_cu<char>(locale) {}
235
236 virtual bool match(
237 _In_reads_or_z_(end) const char* text,
238 _In_ size_t start = 0,
239 _In_ size_t end = (size_t)-1,
240 _In_ int flags = match_default)
241 {
242 assert(text || start >= end);
243 if (start < end && text[start]) {
244 if (text[start] == '&') {
245 // SGML entity
246 const auto& ctype = std::use_facet<std::ctype<char>>(m_locale);
247 for (interval.end = start + 1; interval.end < end && text[interval.end]; interval.end++)
248 if (text[interval.end] == ';') {
249 interval.end++;
250 interval.start = start;
251 return true;
252 }
253 else if (text[interval.end] == '&' || ctype.is(ctype.space, text[interval.end]))
254 break;
255 // Unterminated entity
256 }
257 interval.end = (interval.start = start) + 1;
258 return true;
259 }
260 interval.start = (interval.end = start) + 1;
261 return false;
262 }
263 };
264
268 template <class T>
269 class basic_cu : public basic_parser<T>
270 {
271 public:
272 basic_cu(T chr, bool invert = false, _In_ const std::locale& locale = std::locale()) :
273 basic_parser<T>(locale),
274 m_chr(chr),
275 m_invert(invert)
276 {}
277
278 virtual bool match(
279 _In_reads_or_z_(end) const T* text,
280 _In_ size_t start = 0,
281 _In_ size_t end = (size_t)-1,
282 _In_ int flags = match_default)
283 {
284 assert(text || start >= end);
285 if (start < end && text[start]) {
286 bool r;
287 if (flags & match_case_insensitive) {
288 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
289 r = ctype.tolower(text[start]) == ctype.tolower(m_chr);
290 }
291 else
292 r = text[start] == m_chr;
293 if (r && !m_invert || !r && m_invert) {
294 interval.end = (interval.start = start) + 1;
295 return true;
296 }
297 }
298 interval.start = (interval.end = start) + 1;
299 return false;
300 }
301
302 protected:
303 T m_chr;
304 bool m_invert;
305 };
306
307 using cu = basic_cu<char>;
308 using wcu = basic_cu<wchar_t>;
309#ifdef _UNICODE
310 using tcu = wcu;
311#else
312 using tcu = cu;
313#endif
314
318 class sgml_cp : public sgml_parser
319 {
320 public:
321 sgml_cp(const char* chr, size_t count = (size_t)-1, bool invert = false, _In_ const std::locale& locale = std::locale()) :
322 sgml_parser(locale),
323 m_invert(invert)
324 {
325 assert(chr || !count);
326 wchar_t buf[3];
327 size_t chr_end;
328 m_chr.assign(count ? next_sgml_cp(chr, 0, count, chr_end, buf) : L"");
329 }
330
331 virtual bool match(
332 _In_reads_or_z_(end) const char* text,
333 _In_ size_t start = 0,
334 _In_ size_t end = (size_t)-1,
335 _In_ int flags = match_default)
336 {
337 assert(text || start >= end);
338 if (start < end && text[start]) {
339 wchar_t buf[3];
340 const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf);
341 bool r = ((flags & match_case_insensitive) ?
342 stdex::strnicmp(chr, (size_t)-1, m_chr.c_str(), m_chr.size(), m_locale) :
343 stdex::strncmp(chr, (size_t)-1, m_chr.c_str(), m_chr.size())) == 0;
344 if (r && !m_invert || !r && m_invert) {
345 interval.start = start;
346 return true;
347 }
348 }
349 interval.start = (interval.end = start) + 1;
350 return false;
351 }
352
353 protected:
354 std::wstring m_chr;
355 bool m_invert;
356 };
357
361 template <class T>
362 class basic_space_cu : public basic_parser<T>
363 {
364 public:
365 basic_space_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) :
366 basic_parser<T>(locale),
367 m_invert(invert)
368 {}
369
370 virtual bool match(
371 _In_reads_or_z_(end) const T* text,
372 _In_ size_t start = 0,
373 _In_ size_t end = (size_t)-1,
374 _In_ int flags = match_default)
375 {
376 assert(text || start >= end);
377 if (start < end && text[start]) {
378 bool r =
379 ((flags & match_multiline) || !islbreak(text[start])) &&
380 std::use_facet<std::ctype<T>>(m_locale).is(std::ctype_base::space, text[start]);
381 if (r && !m_invert || !r && m_invert) {
382 interval.end = (interval.start = start) + 1;
383 return true;
384 }
385 }
386 interval.start = (interval.end = start) + 1;
387 return false;
388 }
389
390 protected:
391 bool m_invert;
392 };
393
396#ifdef _UNICODE
397 using tspace_cu = wspace_cu;
398#else
399 using tspace_cu = space_cu;
400#endif
401
405 class sgml_space_cp : public basic_space_cu<char>
406 {
407 public:
408 sgml_space_cp(_In_ bool invert = false, _In_ const std::locale& locale = std::locale()) :
409 basic_space_cu<char>(invert, locale)
410 {}
411
412 virtual bool match(
413 _In_reads_or_z_(end) const char* text,
414 _In_ size_t start = 0,
415 _In_ size_t end = (size_t)-1,
416 _In_ int flags = match_default)
417 {
418 assert(text || start >= end);
419 if (start < end && text[start]) {
420 wchar_t buf[3];
421 const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf);
422 const wchar_t* chr_end = chr + stdex::strlen(chr);
423 bool r =
424 ((flags & match_multiline) || !islbreak(chr, (size_t)-1)) &&
425 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::space, chr, chr_end) == chr_end;
426 if (r && !m_invert || !r && m_invert) {
427 interval.start = start;
428 return true;
429 }
430 }
431
432 interval.start = (interval.end = start) + 1;
433 return false;
434 }
435 };
436
440 template <class T>
441 class basic_punct_cu : public basic_parser<T>
442 {
443 public:
444 basic_punct_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) :
445 basic_parser<T>(locale),
446 m_invert(invert)
447 {}
448
449 virtual bool match(
450 _In_reads_or_z_(end) const T* text,
451 _In_ size_t start = 0,
452 _In_ size_t end = (size_t)-1,
453 _In_ int flags = match_default)
454 {
455 assert(text || start >= end);
456 if (start < end && text[start]) {
457 bool r = std::use_facet<std::ctype<T>>(m_locale).is(std::ctype_base::punct, text[start]);
458 if (r && !m_invert || !r && m_invert) {
459 interval.end = (interval.start = start) + 1;
460 return true;
461 }
462 }
463 interval.start = (interval.end = start) + 1;
464 return false;
465 }
466
467 protected:
468 bool m_invert;
469 };
470
473#ifdef _UNICODE
474 using tpunct_cu = wpunct_cu;
475#else
476 using tpunct_cu = punct_cu;
477#endif
478
482 class sgml_punct_cp : public basic_punct_cu<char>
483 {
484 public:
485 sgml_punct_cp(bool invert = false, _In_ const std::locale& locale = std::locale()) :
486 basic_punct_cu<char>(invert, locale)
487 {}
488
489 virtual bool match(
490 _In_reads_or_z_(end) const char* text,
491 _In_ size_t start = 0,
492 _In_ size_t end = (size_t)-1,
493 _In_ int flags = match_default)
494 {
495 assert(text || start >= end);
496 if (start < end && text[start]) {
497 wchar_t buf[3];
498 const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf);
499 const wchar_t* chr_end = chr + stdex::strlen(chr);
500 bool r = std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::punct, chr, chr_end) == chr_end;
501 if (r && !m_invert || !r && m_invert) {
502 interval.start = start;
503 return true;
504 }
505 }
506 interval.start = (interval.end = start) + 1;
507 return false;
508 }
509 };
510
514 template <class T>
516 {
517 public:
518 basic_space_or_punct_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) :
519 basic_parser<T>(locale),
520 m_invert(invert)
521 {}
522
523 virtual bool match(
524 _In_reads_or_z_(end) const T* text,
525 _In_ size_t start = 0,
526 _In_ size_t end = (size_t)-1,
527 _In_ int flags = match_default)
528 {
529 assert(text || start >= end);
530 if (start < end && text[start]) {
531 bool r =
532 ((flags & match_multiline) || !islbreak(text[start])) &&
533 std::use_facet<std::ctype<T>>(m_locale).is(std::ctype_base::space | std::ctype_base::punct, text[start]);
534 if (r && !m_invert || !r && m_invert) {
535 interval.end = (interval.start = start) + 1;
536 return true;
537 }
538 }
539 interval.start = (interval.end = start) + 1;
540 return false;
541 }
542
543 protected:
544 bool m_invert;
545 };
546
549#ifdef _UNICODE
551#else
553#endif
554
559 {
560 public:
561 sgml_space_or_punct_cp(bool invert = false, _In_ const std::locale& locale = std::locale()) :
562 basic_space_or_punct_cu<char>(invert, locale)
563 {}
564
565 virtual bool match(
566 _In_reads_or_z_(end) const char* text,
567 _In_ size_t start = 0,
568 _In_ size_t end = (size_t)-1,
569 _In_ int flags = match_default)
570 {
571 assert(text || start >= end);
572 if (start < end && text[start]) {
573 wchar_t buf[3];
574 const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf);
575 const wchar_t* chr_end = chr + stdex::strlen(chr);
576 bool r =
577 ((flags & match_multiline) || !islbreak(chr, (size_t)-1)) &&
578 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::space | std::ctype_base::punct, chr, chr_end) == chr_end;
579 if (r && !m_invert || !r && m_invert) {
580 interval.start = start;
581 return true;
582 }
583 }
584 interval.start = (interval.end = start) + 1;
585 return false;
586 }
587 };
588
592 template <class T>
593 class basic_bol : public basic_parser<T>
594 {
595 public:
596 basic_bol(bool invert = false) : m_invert(invert) {}
597
598 virtual bool match(
599 _In_reads_or_z_(end) const T* text,
600 _In_ size_t start = 0,
601 _In_ size_t end = (size_t)-1,
602 _In_ int flags = match_default)
603 {
604 assert(text || start >= end);
605 bool r = start == 0 || start <= end && islbreak(text[start - 1]);
606 if (r && !m_invert || !r && m_invert) {
607 interval.end = interval.start = start;
608 return true;
609 }
610 interval.start = (interval.end = start) + 1;
611 return false;
612 }
613
614 protected:
615 bool m_invert;
616 };
617
618 using bol = basic_bol<char>;
619 using wbol = basic_bol<wchar_t>;
620#ifdef _UNICODE
621 using tbol = wbol;
622#else
623 using tbol = bol;
624#endif
626
630 template <class T>
631 class basic_eol : public basic_parser<T>
632 {
633 public:
634 basic_eol(bool invert = false) : m_invert(invert) {}
635
636 virtual bool match(
637 _In_reads_or_z_(end) const T* text,
638 _In_ size_t start = 0,
639 _In_ size_t end = (size_t)-1,
640 _In_ int flags = match_default)
641 {
642 assert(text || start >= end);
643 bool r = islbreak(text[start]);
644 if (r && !m_invert || !r && m_invert) {
645 interval.end = interval.start = start;
646 return true;
647 }
648 interval.start = (interval.end = start) + 1;
649 return false;
650 }
651
652 protected:
653 bool m_invert;
654 };
655
656 using eol = basic_eol<char>;
657 using weol = basic_eol<wchar_t>;
658#ifdef _UNICODE
659 using teol = weol;
660#else
661 using teol = eol;
662#endif
664
665 template <class T>
666 class basic_set : public basic_parser<T>
667 {
668 public:
669 basic_set(bool invert = false, _In_ const std::locale& locale = std::locale()) :
670 basic_parser<T>(locale),
671 hit_offset((size_t)-1),
672 m_invert(invert)
673 {}
674
675 virtual bool match(
676 _In_reads_or_z_(end) const T* text,
677 _In_ size_t start = 0,
678 _In_ size_t end = (size_t)-1,
679 _In_ int flags = match_default) = 0;
680
681 virtual void invalidate()
682 {
683 hit_offset = (size_t)-1;
685 }
686
687 public:
688 size_t hit_offset;
689
690 protected:
691 bool m_invert;
692 };
693
697 template <class T>
698 class basic_cu_set : public basic_set<T>
699 {
700 public:
702 _In_reads_or_z_(count) const T* set,
703 _In_ size_t count = (size_t)-1,
704 _In_ bool invert = false,
705 _In_ const std::locale& locale = std::locale()) :
706 basic_set<T>(invert, locale)
707 {
708 if (set)
709 m_set.assign(set, set + stdex::strnlen(set, count));
710 }
711
712 virtual bool match(
713 _In_reads_or_z_(end) const T* text,
714 _In_ size_t start = 0,
715 _In_ size_t end = (size_t)-1,
716 _In_ int flags = match_default)
717 {
718 assert(text || start >= end);
719 if (start < end && text[start]) {
720 const T* set = m_set.c_str();
721 size_t r = (flags & match_case_insensitive) ?
722 stdex::strnichr(set, m_set.size(), text[start], m_locale) :
723 stdex::strnchr(set, m_set.size(), text[start]);
724 if (r != stdex::npos && !m_invert || r == stdex::npos && m_invert) {
725 hit_offset = r;
726 interval.end = (interval.start = start) + 1;
727 return true;
728 }
729 }
730 hit_offset = (size_t)-1;
731 interval.start = (interval.end = start) + 1;
732 return false;
733 }
734
735 protected:
736 std::basic_string<T> m_set;
737 };
738
741#ifdef _UNICODE
742 using tcu_set = wcu_set;
743#else
744 using tcu_set = cu_set;
745#endif
746
750 class sgml_cp_set : public basic_set<char>
751 {
752 public:
753 sgml_cp_set(const char* set, size_t count = (size_t)-1, bool invert = false, _In_ const std::locale& locale = std::locale()) :
754 basic_set<char>(invert, locale)
755 {
756 if (set)
757 m_set = sgml2wstr(set, count);
758 }
759
760 virtual bool match(
761 _In_reads_or_z_(end) const char* text,
762 _In_ size_t start = 0,
763 _In_ size_t end = (size_t)-1,
764 _In_ int flags = match_default)
765 {
766 assert(text || start >= end);
767 if (start < end && text[start]) {
768 wchar_t buf[3];
769 const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf);
770 const wchar_t* set = m_set.c_str();
771 size_t r = (flags & match_case_insensitive) ?
772 stdex::strnistr(set, m_set.size(), chr, m_locale) :
773 stdex::strnstr(set, m_set.size(), chr);
774 if (r != stdex::npos && !m_invert || r == stdex::npos && m_invert) {
775 hit_offset = r;
776 interval.start = start;
777 return true;
778 }
779 }
780 hit_offset = (size_t)-1;
781 interval.start = (interval.end = start) + 1;
782 return false;
783 }
784
785 protected:
786 std::wstring m_set;
787 };
788
792 template <class T>
793 class basic_string : public basic_parser<T>
794 {
795 public:
797 _In_reads_or_z_(count) const T* str,
798 _In_ size_t count = (size_t)-1,
799 _In_ const std::locale& locale = std::locale()) :
800 basic_parser<T>(locale),
801 m_str(str, str + stdex::strnlen(str, count))
802 {}
803
804 virtual bool match(
805 _In_reads_or_z_(end) const T* text,
806 _In_ size_t start = 0,
807 _In_ size_t end = (size_t)-1,
808 _In_ int flags = match_default)
809 {
810 assert(text || start >= end);
811 size_t
812 m = m_str.size(),
813 n = std::min<size_t>(end - start, m);
814 bool r = ((flags & match_case_insensitive) ?
815 stdex::strnicmp(text + start, n, m_str.c_str(), m, m_locale) :
816 stdex::strncmp(text + start, n, m_str.c_str(), m)) == 0;
817 if (r) {
818 interval.end = (interval.start = start) + n;
819 return true;
820 }
821 interval.start = (interval.end = start) + 1;
822 return false;
823 }
824
825 protected:
826 std::basic_string<T> m_str;
827 };
828
831#ifdef _UNICODE
832 using tstring = wstring;
833#else
834 using tstring = string;
835#endif
836
841 {
842 public:
843 sgml_string(const char* str, size_t count = (size_t)-1, _In_ const std::locale& locale = std::locale()) :
844 sgml_parser(locale),
845 m_str(sgml2wstr(str, count))
846 {}
847
848 virtual bool match(
849 _In_reads_or_z_(end) const char* text,
850 _In_ size_t start = 0,
851 _In_ size_t end = (size_t)-1,
852 _In_ int flags = match_default)
853 {
854 assert(text || start >= end);
855 const wchar_t* str = m_str.c_str();
856 const bool case_insensitive = flags & match_case_insensitive ? true : false;
857 const auto& ctype = std::use_facet<std::ctype<wchar_t>>(m_locale);
858 for (interval.end = start;;) {
859 if (!*str) {
860 interval.start = start;
861 return true;
862 }
863 if (interval.end >= end || !text[interval.end]) {
864 interval.start = (interval.end = start) + 1;
865 return false;
866 }
867 wchar_t buf[3];
868 const wchar_t* chr = next_sgml_cp(text, interval.end, end, interval.end, buf);
869 for (; *chr; ++str, ++chr) {
870 if (!*str ||
871 (case_insensitive ? ctype.tolower(*str) != ctype.tolower(*chr) : *str != *chr))
872 {
873 interval.start = (interval.end = start) + 1;
874 return false;
875 }
876 }
877 }
878 }
879
880 protected:
881 std::wstring m_str;
882 };
883
887 template <class T>
889 {
890 public:
891 basic_iterations(const std::shared_ptr<basic_parser<T>>& el, size_t min_iterations = 0, size_t max_iterations = (size_t)-1, bool greedy = true) :
892 m_el(el),
893 m_min_iterations(min_iterations),
894 m_max_iterations(max_iterations),
895 m_greedy(greedy)
896 {}
897
898 virtual bool match(
899 _In_reads_or_z_(end) const T* text,
900 _In_ size_t start = 0,
901 _In_ size_t end = (size_t)-1,
902 _In_ int flags = match_default)
903 {
904 assert(text || start >= end);
905 interval.start = interval.end = start;
906 for (size_t i = 0; ; i++) {
907 if (!m_greedy && i >= m_min_iterations || i >= m_max_iterations)
908 return true;
909 if (!m_el->match(text, interval.end, end, flags)) {
910 if (i >= m_min_iterations)
911 return true;
912 break;
913 }
914 if (m_el->interval.end == interval.end) {
915 // Element did match, but the matching interval was empty. Quit instead of spinning.
916 return true;
917 }
918 interval.end = m_el->interval.end;
919 }
920 interval.start = (interval.end = start) + 1;
921 return false;
922 }
923
924 protected:
925 std::shared_ptr<basic_parser<T>> m_el;
928 bool m_greedy;
929 };
930
933#ifdef _UNICODE
934 using titerations = witerations;
935#else
936 using titerations = iterations;
937#endif
939
943 template <class T>
945 {
946 protected:
947 parser_collection(_In_ const std::locale& locale) : basic_parser<T>(locale) {}
948
949 public:
951 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el,
952 _In_ size_t count,
953 _In_ const std::locale& locale = std::locale()) :
954 basic_parser<T>(locale)
955 {
956 assert(el || !count);
957 m_collection.reserve(count);
958 for (size_t i = 0; i < count; i++)
959 m_collection.push_back(el[i]);
960 }
961
963 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
964 _In_ const std::locale& locale = std::locale()) :
965 basic_parser<T>(locale),
966 m_collection(std::move(collection))
967 {}
968
969 virtual void invalidate()
970 {
971 for (auto& el: m_collection)
972 el->invalidate();
974 }
975
976 protected:
977 std::vector<std::shared_ptr<basic_parser<T>>> m_collection;
978 };
979
983 template <class T>
985 {
986 public:
988 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el = nullptr,
989 _In_ size_t count = 0,
990 _In_ const std::locale& locale = std::locale()) :
991 parser_collection<T>(el, count, locale)
992 {}
993
995 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
996 _In_ const std::locale& locale = std::locale()) :
997 parser_collection<T>(std::move(collection), locale)
998 {}
999
1000 virtual bool match(
1001 _In_reads_or_z_(end) const T* text,
1002 _In_ size_t start = 0,
1003 _In_ size_t end = (size_t)-1,
1004 _In_ int flags = match_default)
1005 {
1006 assert(text || start >= end);
1007 interval.end = start;
1008 for (auto i = m_collection.begin(); i != m_collection.end(); ++i) {
1009 if (!(*i)->match(text, interval.end, end, flags)) {
1010 for (++i; i != m_collection.end(); ++i)
1011 (*i)->invalidate();
1012 interval.start = (interval.end = start) + 1;
1013 return false;
1014 }
1015 interval.end = (*i)->interval.end;
1016 }
1017 interval.start = start;
1018 return true;
1019 }
1020 };
1021
1024#ifdef _UNICODE
1025 using tsequence = wsequence;
1026#else
1027 using tsequence = sequence;
1028#endif
1030
1034 template <class T>
1036 {
1037 protected:
1038 basic_branch(_In_ const std::locale& locale) :
1039 parser_collection<T>(locale),
1040 hit_offset((size_t)-1)
1041 {}
1042
1043 public:
1045 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el = nullptr,
1046 _In_ size_t count = 0,
1047 _In_ const std::locale& locale = std::locale()) :
1048 parser_collection<T>(el, count, locale),
1049 hit_offset((size_t)-1)
1050 {}
1051
1053 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1054 _In_ const std::locale& locale = std::locale()) :
1055 parser_collection<T>(std::move(collection), locale),
1056 hit_offset((size_t)-1)
1057 {}
1058
1059 virtual bool match(
1060 _In_reads_or_z_(end) const T* text,
1061 _In_ size_t start = 0,
1062 _In_ size_t end = (size_t)-1,
1063 _In_ int flags = match_default)
1064 {
1065 assert(text || start >= end);
1066 hit_offset = 0;
1067 for (auto i = m_collection.begin(); i != m_collection.end(); ++i, ++hit_offset) {
1068 if ((*i)->match(text, start, end, flags)) {
1069 interval = (*i)->interval;
1070 for (++i; i != m_collection.end(); ++i)
1071 (*i)->invalidate();
1072 return true;
1073 }
1074 }
1075 hit_offset = (size_t)-1;
1076 interval.start = (interval.end = start) + 1;
1077 return false;
1078 }
1079
1080 virtual void invalidate()
1081 {
1082 hit_offset = (size_t)-1;
1084 }
1085
1086 public:
1087 size_t hit_offset;
1088 };
1089
1090 using branch = basic_branch<char>;
1092#ifdef _UNICODE
1093 using tbranch = wbranch;
1094#else
1095 using tbranch = branch;
1096#endif
1098
1102 template <class T, class T_parser = basic_string<T>>
1104 {
1105 public:
1106 inline basic_string_branch(
1107 _In_reads_(count) const T* str_z = nullptr,
1108 _In_ size_t count = 0,
1109 _In_ const std::locale& locale = std::locale()) :
1110 basic_branch<T>(locale)
1111 {
1112 build(str_z, count);
1113 }
1114
1115 inline basic_string_branch(_In_z_ const T* str, ...) :
1116 basic_branch<T>(std::locale())
1117 {
1118 va_list params;
1119 va_start(params, str);
1120 build(str, params);
1121 va_end(params);
1122 }
1123
1124 inline basic_string_branch(_In_ const std::locale& locale, _In_z_ const T* str, ...) :
1125 basic_branch<T>(locale)
1126 {
1127 va_list params;
1128 va_start(params, str);
1129 build(str, params);
1130 va_end(params);
1131 }
1132
1133 protected:
1134 void build(_In_reads_(count) const T* str_z, _In_ size_t count)
1135 {
1136 assert(str_z || !count);
1137 if (count) {
1138 size_t offset, n;
1139 for (
1140 offset = n = 0;
1141 offset < count && str_z[offset];
1142 offset += stdex::strnlen(str_z + offset, count - offset) + 1, ++n);
1143 m_collection.reserve(n);
1144 for (
1145 offset = 0;
1146 offset < count && str_z[offset];
1147 offset += stdex::strnlen(str_z + offset, count - offset) + 1)
1148 m_collection.push_back(std::move(std::make_shared<T_parser>(str_z + offset, count - offset, m_locale)));
1149 }
1150 }
1151
1152 void build(_In_z_ const T* str, _In_ va_list params)
1153 {
1154 const T* p;
1155 for (
1156 m_collection.push_back(std::move(std::make_shared<T_parser>(str, (size_t)-1, m_locale)));
1157 (p = va_arg(params, const T*)) != nullptr;
1158 m_collection.push_back(std::move(std::make_shared<T_parser>(p, (size_t)-1, m_locale))));
1159 }
1160 };
1161
1164#ifdef _UNICODE
1166#else
1168#endif
1170
1174 template <class T>
1176 {
1177 public:
1179 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el = nullptr,
1180 _In_ size_t count = 0,
1181 _In_ const std::locale& locale = std::locale()) :
1182 parser_collection<T>(el, count, locale)
1183 {}
1184
1186 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1187 _In_ const std::locale& locale = std::locale()) :
1188 parser_collection<T>(std::move(collection), locale)
1189 {}
1190
1191 virtual bool match(
1192 _In_reads_or_z_(end) const T* text,
1193 _In_ size_t start = 0,
1194 _In_ size_t end = (size_t)-1,
1195 _In_ int flags = match_default)
1196 {
1197 assert(text || start >= end);
1198 for (auto& el: m_collection)
1199 el->invalidate();
1200 if (match_recursively(text, start, end, flags)) {
1201 interval.start = start;
1202 return true;
1203 }
1204 interval.start = (interval.end = start) + 1;
1205 return false;
1206 }
1207
1208 protected:
1209 bool match_recursively(
1210 _In_reads_or_z_(end) const T* text,
1211 _In_ size_t start = 0,
1212 _In_ size_t end = (size_t)-1,
1213 _In_ int flags = match_default)
1214 {
1215 bool all_matched = true;
1216 for (auto& el: m_collection) {
1217 if (!el->interval) {
1218 // Element was not matched in permutatuion yet.
1219 all_matched = false;
1220 if (el->match(text, start, end, flags)) {
1221 // Element matched for the first time.
1222 if (match_recursively(text, el->interval.end, end, flags)) {
1223 // Rest of the elements matched too.
1224 return true;
1225 }
1226 el->invalidate();
1227 }
1228 }
1229 }
1230 if (all_matched) {
1231 interval.end = start;
1232 return true;
1233 }
1234 return false;
1235 }
1236 };
1237
1240#ifdef _UNICODE
1241 using tpermutation = wpermutation;
1242#else
1243 using tpermutation = permutation;
1244#endif
1246
1250 template <class T>
1251 class basic_integer : public basic_parser<T>
1252 {
1253 public:
1254 basic_integer(_In_ const std::locale& locale = std::locale()) :
1255 basic_parser<T>(locale),
1256 value(0)
1257 {}
1258
1259 virtual void invalidate()
1260 {
1261 value = 0;
1263 }
1264
1265 public:
1266 size_t value;
1267 };
1268
1272 template <class T>
1274 {
1275 public:
1277 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
1278 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
1279 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
1280 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
1281 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
1282 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
1283 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
1284 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
1285 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
1286 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
1287 _In_ const std::locale& locale = std::locale()) :
1288 basic_integer<T>(locale),
1289 m_digit_0(digit_0),
1290 m_digit_1(digit_1),
1291 m_digit_2(digit_2),
1292 m_digit_3(digit_3),
1293 m_digit_4(digit_4),
1294 m_digit_5(digit_5),
1295 m_digit_6(digit_6),
1296 m_digit_7(digit_7),
1297 m_digit_8(digit_8),
1298 m_digit_9(digit_9)
1299 {}
1300
1301 virtual bool match(
1302 _In_reads_or_z_(end) const T* text,
1303 _In_ size_t start = 0,
1304 _In_ size_t end = (size_t)-1,
1305 _In_ int flags = match_default)
1306 {
1307 assert(text || start >= end);
1308 for (interval.end = start, value = 0; interval.end < end && text[interval.end];) {
1309 size_t dig;
1310 if (m_digit_0->match(text, interval.end, end, flags)) { dig = 0; interval.end = m_digit_0->interval.end; }
1311 else if (m_digit_1->match(text, interval.end, end, flags)) { dig = 1; interval.end = m_digit_1->interval.end; }
1312 else if (m_digit_2->match(text, interval.end, end, flags)) { dig = 2; interval.end = m_digit_2->interval.end; }
1313 else if (m_digit_3->match(text, interval.end, end, flags)) { dig = 3; interval.end = m_digit_3->interval.end; }
1314 else if (m_digit_4->match(text, interval.end, end, flags)) { dig = 4; interval.end = m_digit_4->interval.end; }
1315 else if (m_digit_5->match(text, interval.end, end, flags)) { dig = 5; interval.end = m_digit_5->interval.end; }
1316 else if (m_digit_6->match(text, interval.end, end, flags)) { dig = 6; interval.end = m_digit_6->interval.end; }
1317 else if (m_digit_7->match(text, interval.end, end, flags)) { dig = 7; interval.end = m_digit_7->interval.end; }
1318 else if (m_digit_8->match(text, interval.end, end, flags)) { dig = 8; interval.end = m_digit_8->interval.end; }
1319 else if (m_digit_9->match(text, interval.end, end, flags)) { dig = 9; interval.end = m_digit_9->interval.end; }
1320 else break;
1321 value = value * 10 + dig;
1322 }
1323 if (start < interval.end) {
1324 interval.start = start;
1325 return true;
1326 }
1327 interval.start = (interval.end = start) + 1;
1328 return false;
1329 }
1330
1331 protected:
1332 std::shared_ptr<basic_parser<T>>
1333 m_digit_0,
1334 m_digit_1,
1335 m_digit_2,
1336 m_digit_3,
1337 m_digit_4,
1338 m_digit_5,
1339 m_digit_6,
1340 m_digit_7,
1341 m_digit_8,
1342 m_digit_9;
1343 };
1344
1347#ifdef _UNICODE
1348 using tinteger10 = winteger10;
1349#else
1350 using tinteger10 = integer10;
1351#endif
1353
1357 template <class T>
1359 {
1360 public:
1362 _In_ const std::shared_ptr<basic_integer10<T>>& digits,
1363 _In_ const std::shared_ptr<basic_set<T>>& separator,
1364 _In_ const std::locale& locale = std::locale()) :
1365 basic_integer<T>(locale),
1366 digit_count(0),
1367 has_separators(false),
1368 m_digits(digits),
1369 m_separator(separator)
1370 {}
1371
1372 virtual bool match(
1373 _In_reads_or_z_(end) const T* text,
1374 _In_ size_t start = 0,
1375 _In_ size_t end = (size_t)-1,
1376 _In_ int flags = match_default)
1377 {
1378 assert(text || start >= end);
1379 if (m_digits->match(text, start, end, flags)) {
1380 // Leading part match.
1381 value = m_digits->value;
1382 digit_count = m_digits->interval.size();
1383 has_separators = false;
1384 interval.start = start;
1385 interval.end = m_digits->interval.end;
1386 if (m_digits->interval.size() <= 3) {
1387 // Maybe separated with thousand separators?
1388 size_t hit_offset = (size_t)-1;
1389 while (m_separator->match(text, interval.end, end, flags) &&
1390 (hit_offset == (size_t)-1 || hit_offset == m_separator->hit_offset) && // All separators must be the same, no mixing.
1391 m_digits->match(text, m_separator->interval.end, end, flags) &&
1392 m_digits->interval.size() == 3)
1393 {
1394 // Thousand separator and three-digit integer followed.
1395 value = value * 1000 + m_digits->value;
1396 digit_count += 3;
1397 has_separators = true;
1398 interval.end = m_digits->interval.end;
1399 hit_offset = m_separator->hit_offset;
1400 }
1401 }
1402
1403 return true;
1404 }
1405 value = 0;
1406 interval.start = (interval.end = start) + 1;
1407 return false;
1408 }
1409
1410 virtual void invalidate()
1411 {
1412 digit_count = 0;
1413 has_separators = false;
1415 }
1416
1417 public:
1420
1421 protected:
1422 std::shared_ptr<basic_integer10<T>> m_digits;
1423 std::shared_ptr<basic_set<T>> m_separator;
1424 };
1425
1428#ifdef _UNICODE
1429 using tinteger10ts = winteger10ts;
1430#else
1431 using tinteger10ts = integer10ts;
1432#endif
1434
1438 template <class T>
1440 {
1441 public:
1443 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
1444 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
1445 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
1446 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
1447 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
1448 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
1449 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
1450 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
1451 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
1452 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
1453 _In_ const std::shared_ptr<basic_parser<T>>& digit_10,
1454 _In_ const std::shared_ptr<basic_parser<T>>& digit_11,
1455 _In_ const std::shared_ptr<basic_parser<T>>& digit_12,
1456 _In_ const std::shared_ptr<basic_parser<T>>& digit_13,
1457 _In_ const std::shared_ptr<basic_parser<T>>& digit_14,
1458 _In_ const std::shared_ptr<basic_parser<T>>& digit_15,
1459 _In_ const std::locale& locale = std::locale()) :
1460 basic_integer<T>(locale),
1461 m_digit_0(digit_0),
1462 m_digit_1(digit_1),
1463 m_digit_2(digit_2),
1464 m_digit_3(digit_3),
1465 m_digit_4(digit_4),
1466 m_digit_5(digit_5),
1467 m_digit_6(digit_6),
1468 m_digit_7(digit_7),
1469 m_digit_8(digit_8),
1470 m_digit_9(digit_9),
1471 m_digit_10(digit_10),
1472 m_digit_11(digit_11),
1473 m_digit_12(digit_12),
1474 m_digit_13(digit_13),
1475 m_digit_14(digit_14),
1476 m_digit_15(digit_15)
1477 {}
1478
1479 virtual bool match(
1480 _In_reads_or_z_(end) const T* text,
1481 _In_ size_t start = 0,
1482 _In_ size_t end = (size_t)-1,
1483 _In_ int flags = match_default)
1484 {
1485 assert(text || start >= end);
1486 for (interval.end = start, value = 0; interval.end < end && text[interval.end];) {
1487 size_t dig;
1488 if (m_digit_0->match(text, interval.end, end, flags)) { dig = 0; interval.end = m_digit_0->interval.end; }
1489 else if (m_digit_1->match(text, interval.end, end, flags)) { dig = 1; interval.end = m_digit_1->interval.end; }
1490 else if (m_digit_2->match(text, interval.end, end, flags)) { dig = 2; interval.end = m_digit_2->interval.end; }
1491 else if (m_digit_3->match(text, interval.end, end, flags)) { dig = 3; interval.end = m_digit_3->interval.end; }
1492 else if (m_digit_4->match(text, interval.end, end, flags)) { dig = 4; interval.end = m_digit_4->interval.end; }
1493 else if (m_digit_5->match(text, interval.end, end, flags)) { dig = 5; interval.end = m_digit_5->interval.end; }
1494 else if (m_digit_6->match(text, interval.end, end, flags)) { dig = 6; interval.end = m_digit_6->interval.end; }
1495 else if (m_digit_7->match(text, interval.end, end, flags)) { dig = 7; interval.end = m_digit_7->interval.end; }
1496 else if (m_digit_8->match(text, interval.end, end, flags)) { dig = 8; interval.end = m_digit_8->interval.end; }
1497 else if (m_digit_9->match(text, interval.end, end, flags)) { dig = 9; interval.end = m_digit_9->interval.end; }
1498 else if (m_digit_10->match(text, interval.end, end, flags)) { dig = 10; interval.end = m_digit_10->interval.end; }
1499 else if (m_digit_11->match(text, interval.end, end, flags)) { dig = 11; interval.end = m_digit_11->interval.end; }
1500 else if (m_digit_12->match(text, interval.end, end, flags)) { dig = 12; interval.end = m_digit_12->interval.end; }
1501 else if (m_digit_13->match(text, interval.end, end, flags)) { dig = 13; interval.end = m_digit_13->interval.end; }
1502 else if (m_digit_14->match(text, interval.end, end, flags)) { dig = 14; interval.end = m_digit_14->interval.end; }
1503 else if (m_digit_15->match(text, interval.end, end, flags)) { dig = 15; interval.end = m_digit_15->interval.end; }
1504 else break;
1505 value = value * 16 + dig;
1506 }
1507 if (start < interval.end) {
1508 interval.start = start;
1509 return true;
1510 }
1511 interval.start = (interval.end = start) + 1;
1512 return false;
1513 }
1514
1515 protected:
1516 std::shared_ptr<basic_parser<T>>
1517 m_digit_0,
1518 m_digit_1,
1519 m_digit_2,
1520 m_digit_3,
1521 m_digit_4,
1522 m_digit_5,
1523 m_digit_6,
1524 m_digit_7,
1525 m_digit_8,
1526 m_digit_9,
1527 m_digit_10,
1528 m_digit_11,
1529 m_digit_12,
1530 m_digit_13,
1531 m_digit_14,
1532 m_digit_15;
1533 };
1534
1537#ifdef _UNICODE
1538 using tinteger16 = winteger16;
1539#else
1540 using tinteger16 = integer16;
1541#endif
1543
1547 template <class T>
1549 {
1550 public:
1552 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
1553 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
1554 _In_ const std::shared_ptr<basic_parser<T>>& digit_10,
1555 _In_ const std::shared_ptr<basic_parser<T>>& digit_50,
1556 _In_ const std::shared_ptr<basic_parser<T>>& digit_100,
1557 _In_ const std::shared_ptr<basic_parser<T>>& digit_500,
1558 _In_ const std::shared_ptr<basic_parser<T>>& digit_1000,
1559 _In_ const std::shared_ptr<basic_parser<T>>& digit_5000,
1560 _In_ const std::shared_ptr<basic_parser<T>>& digit_10000,
1561 _In_ const std::locale& locale = std::locale()) :
1562 basic_integer<T>(locale),
1563 m_digit_1(digit_1),
1564 m_digit_5(digit_5),
1565 m_digit_10(digit_10),
1566 m_digit_50(digit_50),
1567 m_digit_100(digit_100),
1568 m_digit_500(digit_500),
1569 m_digit_1000(digit_1000),
1570 m_digit_5000(digit_5000),
1571 m_digit_10000(digit_10000)
1572 {}
1573
1574 virtual bool match(
1575 _In_reads_or_z_(end) const T* text,
1576 _In_ size_t start = 0,
1577 _In_ size_t end = (size_t)-1,
1578 _In_ int flags = match_default)
1579 {
1580 assert(text || start >= end);
1581 size_t
1582 dig[5] = { (size_t)-1, (size_t)-1, (size_t)-1, (size_t)-1, (size_t)-1 },
1583 end2;
1584
1585 for (interval.end = start, value = 0; interval.end < end && text[interval.end]; dig[3] = dig[2], dig[2] = dig[1], dig[1] = dig[0], interval.end = end2) {
1586 if (m_digit_1 && m_digit_1->match(text, interval.end, end, flags)) { dig[0] = 1; end2 = m_digit_1->interval.end; }
1587 else if (m_digit_5 && m_digit_5->match(text, interval.end, end, flags)) { dig[0] = 5; end2 = m_digit_5->interval.end; }
1588 else if (m_digit_10 && m_digit_10->match(text, interval.end, end, flags)) { dig[0] = 10; end2 = m_digit_10->interval.end; }
1589 else if (m_digit_50 && m_digit_50->match(text, interval.end, end, flags)) { dig[0] = 50; end2 = m_digit_50->interval.end; }
1590 else if (m_digit_100 && m_digit_100->match(text, interval.end, end, flags)) { dig[0] = 100; end2 = m_digit_100->interval.end; }
1591 else if (m_digit_500 && m_digit_500->match(text, interval.end, end, flags)) { dig[0] = 500; end2 = m_digit_500->interval.end; }
1592 else if (m_digit_1000 && m_digit_1000->match(text, interval.end, end, flags)) { dig[0] = 1000; end2 = m_digit_1000->interval.end; }
1593 else if (m_digit_5000 && m_digit_5000->match(text, interval.end, end, flags)) { dig[0] = 5000; end2 = m_digit_5000->interval.end; }
1594 else if (m_digit_10000 && m_digit_10000->match(text, interval.end, end, flags)) { dig[0] = 10000; end2 = m_digit_10000->interval.end; }
1595 else break;
1596
1597 // Store first digit.
1598 if (dig[4] == (size_t)-1) dig[4] = dig[0];
1599
1600 if (dig[3] == dig[2] && dig[2] == dig[1] && dig[1] == dig[0] && dig[0] != dig[4]) {
1601 // Same digit repeated four times. No-go, unless first digit. E.g. XIIII vs. XIV. MMMMMCD allowed, IIII also...
1602 break;
1603 }
1604 if (dig[0] <= dig[1]) {
1605 // Digit is less or equal previous one: add.
1606 value += dig[0];
1607 }
1608 else if (
1609 dig[1] == 1 && (dig[0] == 5 || dig[0] == 10) ||
1610 dig[1] == 10 && (dig[0] == 50 || dig[0] == 100) ||
1611 dig[1] == 100 && (dig[0] == 500 || dig[0] == 1000) ||
1612 dig[1] == 1000 && (dig[0] == 5000 || dig[0] == 10000))
1613 {
1614 // Digit is up to two orders bigger than previous one: subtract. But...
1615 if (dig[2] < dig[0]) {
1616 // Digit is also bigger than pre-previous one. E.g. VIX (V < X => invalid)
1617 break;
1618 }
1619 value -= dig[1]; // Cancel addition in the previous step.
1620 dig[0] -= dig[1]; // Combine last two digits.
1621 dig[1] = dig[2]; // The true previous digit is now pre-previous one. :)
1622 dig[2] = dig[3]; // The true pre-previous digit is now pre-pre-previous one. :)
1623 value += dig[0]; // Add combined value.
1624 }
1625 else {
1626 // New digit is too big than the previous one. E.g. VX (V < X => invalid)
1627 break;
1628 }
1629 }
1630 if (value) {
1631 interval.start = start;
1632 return true;
1633 }
1634 interval.start = (interval.end = start) + 1;
1635 return false;
1636 }
1637
1638 protected:
1639 std::shared_ptr<basic_parser<T>>
1640 m_digit_1,
1641 m_digit_5,
1642 m_digit_10,
1643 m_digit_50,
1644 m_digit_100,
1645 m_digit_500,
1646 m_digit_1000,
1647 m_digit_5000,
1648 m_digit_10000;
1649 };
1650
1653#ifdef _UNICODE
1655#else
1657#endif
1659
1663 template <class T>
1665 {
1666 public:
1668 _In_ const std::shared_ptr<basic_parser<T>>& _numerator,
1669 _In_ const std::shared_ptr<basic_parser<T>>& _fraction_line,
1670 _In_ const std::shared_ptr<basic_parser<T>>& _denominator,
1671 _In_ const std::locale& locale = std::locale()) :
1672 basic_parser<T>(locale),
1673 numerator(_numerator),
1674 fraction_line(_fraction_line),
1675 denominator(_denominator)
1676 {}
1677
1678 virtual bool match(
1679 _In_reads_or_z_(end) const T* text,
1680 _In_ size_t start = 0,
1681 _In_ size_t end = (size_t)-1,
1682 _In_ int flags = match_default)
1683 {
1684 assert(text || start >= end);
1685 if (numerator->match(text, start, end, flags) &&
1686 fraction_line->match(text, numerator->interval.end, end, flags) &&
1687 denominator->match(text, fraction_line->interval.end, end, flags))
1688 {
1689 interval.start = start;
1690 interval.end = denominator->interval.end;
1691 return true;
1692 }
1693 numerator->invalidate();
1694 fraction_line->invalidate();
1695 denominator->invalidate();
1696 interval.start = (interval.end = start) + 1;
1697 return false;
1698 }
1699
1700 virtual void invalidate()
1701 {
1702 numerator->invalidate();
1703 fraction_line->invalidate();
1704 denominator->invalidate();
1706 }
1707
1708 public:
1709 std::shared_ptr<basic_parser<T>> numerator;
1710 std::shared_ptr<basic_parser<T>> fraction_line;
1711 std::shared_ptr<basic_parser<T>> denominator;
1712 };
1713
1716#ifdef _UNICODE
1717 using tfraction = wfraction;
1718#else
1719 using tfraction = fraction;
1720#endif
1722
1726 template <class T>
1727 class basic_score : public basic_parser<T>
1728 {
1729 public:
1731 _In_ const std::shared_ptr<basic_parser<T>>& _home,
1732 _In_ const std::shared_ptr<basic_parser<T>>& _separator,
1733 _In_ const std::shared_ptr<basic_parser<T>>& _guest,
1734 _In_ const std::shared_ptr<basic_parser<T>>& space,
1735 _In_ const std::locale& locale = std::locale()) :
1736 basic_parser<T>(locale),
1737 home(_home),
1738 separator(_separator),
1739 guest(_guest),
1740 m_space(space)
1741 {}
1742
1743 virtual bool match(
1744 _In_reads_or_z_(end) const T* text,
1745 _In_ size_t start = 0,
1746 _In_ size_t end = (size_t)-1,
1747 _In_ int flags = match_default)
1748 {
1749 assert(text || start >= end);
1750 interval.end = start;
1751
1752 if (home->match(text, interval.end, end, flags))
1753 interval.end = home->interval.end;
1754 else
1755 goto end;
1756
1757 const int space_match_flags = flags & ~match_multiline; // Spaces in score must never be broken in new line.
1758 for (; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
1759
1760 if (separator->match(text, interval.end, end, flags))
1761 interval.end = separator->interval.end;
1762 else
1763 goto end;
1764
1765 for (; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
1766
1767 if (guest->match(text, interval.end, end, flags))
1768 interval.end = guest->interval.end;
1769 else
1770 goto end;
1771
1772 interval.start = start;
1773 return true;
1774
1775 end:
1776 home->invalidate();
1777 separator->invalidate();
1778 guest->invalidate();
1779 interval.start = (interval.end = start) + 1;
1780 return false;
1781 }
1782
1783 virtual void invalidate()
1784 {
1785 home->invalidate();
1786 separator->invalidate();
1787 guest->invalidate();
1789 }
1790
1791 public:
1792 std::shared_ptr<basic_parser<T>> home;
1793 std::shared_ptr<basic_parser<T>> separator;
1794 std::shared_ptr<basic_parser<T>> guest;
1795
1796 protected:
1797 std::shared_ptr<basic_parser<T>> m_space;
1798 };
1799
1800 using score = basic_score<char>;
1802#ifdef _UNICODE
1803 using tscore = wscore;
1804#else
1805 using tscore = score;
1806#endif
1808
1812 template <class T>
1814 {
1815 public:
1817 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
1818 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
1819 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
1820 _In_ const std::shared_ptr<basic_parser<T>>& _number,
1821 _In_ const std::locale& locale = std::locale()) :
1822 basic_parser<T>(locale),
1823 positive_sign(_positive_sign),
1824 negative_sign(_negative_sign),
1825 special_sign(_special_sign),
1826 number(_number)
1827 {}
1828
1829 virtual bool match(
1830 _In_reads_or_z_(end) const T* text,
1831 _In_ size_t start = 0,
1832 _In_ size_t end = (size_t)-1,
1833 _In_ int flags = match_default)
1834 {
1835 assert(text || start >= end);
1836 interval.end = start;
1837 if (positive_sign && positive_sign->match(text, interval.end, end, flags)) {
1838 interval.end = positive_sign->interval.end;
1839 if (negative_sign) negative_sign->invalidate();
1840 if (special_sign) special_sign->invalidate();
1841 }
1842 else if (negative_sign && negative_sign->match(text, interval.end, end, flags)) {
1843 interval.end = negative_sign->interval.end;
1844 if (positive_sign) positive_sign->invalidate();
1845 if (special_sign) special_sign->invalidate();
1846 }
1847 else if (special_sign && special_sign->match(text, interval.end, end, flags)) {
1848 interval.end = special_sign->interval.end;
1849 if (positive_sign) positive_sign->invalidate();
1850 if (negative_sign) negative_sign->invalidate();
1851 }
1852 else {
1853 if (positive_sign) positive_sign->invalidate();
1854 if (negative_sign) negative_sign->invalidate();
1855 if (special_sign) special_sign->invalidate();
1856 }
1857 if (number->match(text, interval.end, end, flags)) {
1858 interval.start = start;
1859 interval.end = number->interval.end;
1860 return true;
1861 }
1862 if (positive_sign) positive_sign->invalidate();
1863 if (negative_sign) negative_sign->invalidate();
1864 if (special_sign) special_sign->invalidate();
1865 number->invalidate();
1866 interval.start = (interval.end = start) + 1;
1867 return false;
1868 }
1869
1870 virtual void invalidate()
1871 {
1872 if (positive_sign) positive_sign->invalidate();
1873 if (negative_sign) negative_sign->invalidate();
1874 if (special_sign) special_sign->invalidate();
1875 number->invalidate();
1877 }
1878
1879 public:
1880 std::shared_ptr<basic_parser<T>> positive_sign;
1881 std::shared_ptr<basic_parser<T>> negative_sign;
1882 std::shared_ptr<basic_parser<T>> special_sign;
1883 std::shared_ptr<basic_parser<T>> number;
1884 };
1885
1888#ifdef _UNICODE
1890#else
1892#endif
1894
1898 template <class T>
1900 {
1901 public:
1903 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
1904 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
1905 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
1906 _In_ const std::shared_ptr<basic_parser<T>>& _integer,
1907 _In_ const std::shared_ptr<basic_parser<T>>& space,
1908 _In_ const std::shared_ptr<basic_parser<T>>& _fraction,
1909 _In_ const std::locale& locale = std::locale()) :
1910 basic_parser<T>(locale),
1911 positive_sign(_positive_sign),
1912 negative_sign(_negative_sign),
1913 special_sign(_special_sign),
1914 integer(_integer),
1915 fraction(_fraction),
1916 m_space(space)
1917 {}
1918
1919 virtual bool match(
1920 _In_reads_or_z_(end) const T* text,
1921 _In_ size_t start = 0,
1922 _In_ size_t end = (size_t)-1,
1923 _In_ int flags = match_default)
1924 {
1925 assert(text || start >= end);
1926 interval.end = start;
1927
1928 if (positive_sign && positive_sign->match(text, interval.end, end, flags)) {
1929 interval.end = positive_sign->interval.end;
1930 if (negative_sign) negative_sign->invalidate();
1931 if (special_sign) special_sign->invalidate();
1932 }
1933 else if (negative_sign && negative_sign->match(text, interval.end, end, flags)) {
1934 interval.end = negative_sign->interval.end;
1935 if (positive_sign) positive_sign->invalidate();
1936 if (special_sign) special_sign->invalidate();
1937 }
1938 else if (special_sign && special_sign->match(text, interval.end, end, flags)) {
1939 interval.end = special_sign->interval.end;
1940 if (positive_sign) positive_sign->invalidate();
1941 if (negative_sign) negative_sign->invalidate();
1942 }
1943 else {
1944 if (positive_sign) positive_sign->invalidate();
1945 if (negative_sign) negative_sign->invalidate();
1946 if (special_sign) special_sign->invalidate();
1947 }
1948
1949 // Check for <integer> <fraction>
1950 const int space_match_flags = flags & ~match_multiline; // Spaces in fractions must never be broken in new line.
1951 if (integer->match(text, interval.end, end, flags) &&
1952 m_space->match(text, integer->interval.end, end, space_match_flags))
1953 {
1954 for (interval.end = m_space->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
1955 if (fraction->match(text, interval.end, end, flags)) {
1956 interval.start = start;
1958 return true;
1959 }
1960 fraction->invalidate();
1961 interval.start = start;
1962 interval.end = integer->interval.end;
1963 return true;
1964 }
1965
1966 // Check for <fraction>
1967 if (fraction->match(text, interval.end, end, flags)) {
1968 integer->invalidate();
1969 interval.start = start;
1971 return true;
1972 }
1973
1974 // Check for <integer>
1975 if (integer->match(text, interval.end, end, flags)) {
1976 fraction->invalidate();
1977 interval.start = start;
1978 interval.end = integer->interval.end;
1979 return true;
1980 }
1981
1982 if (positive_sign) positive_sign->invalidate();
1983 if (negative_sign) negative_sign->invalidate();
1984 if (special_sign) special_sign->invalidate();
1985 integer->invalidate();
1986 fraction->invalidate();
1987 interval.start = (interval.end = start) + 1;
1988 return false;
1989 }
1990
1991 virtual void invalidate()
1992 {
1993 if (positive_sign) positive_sign->invalidate();
1994 if (negative_sign) negative_sign->invalidate();
1995 if (special_sign) special_sign->invalidate();
1996 integer->invalidate();
1997 fraction->invalidate();
1999 }
2000
2001 public:
2002 std::shared_ptr<basic_parser<T>> positive_sign;
2003 std::shared_ptr<basic_parser<T>> negative_sign;
2004 std::shared_ptr<basic_parser<T>> special_sign;
2005 std::shared_ptr<basic_parser<T>> integer;
2006 std::shared_ptr<basic_parser<T>> fraction;
2007
2008 protected:
2009 std::shared_ptr<basic_parser<T>> m_space;
2010 };
2011
2014#ifdef _UNICODE
2016#else
2018#endif
2020
2024 template <class T>
2026 {
2027 public:
2029 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
2030 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
2031 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
2032 _In_ const std::shared_ptr<basic_integer<T>>& _integer,
2033 _In_ const std::shared_ptr<basic_parser<T>>& _decimal_separator,
2034 _In_ const std::shared_ptr<basic_integer<T>>& _decimal,
2035 _In_ const std::shared_ptr<basic_parser<T>>& _exponent_symbol,
2036 _In_ const std::shared_ptr<basic_parser<T>>& _positive_exp_sign,
2037 _In_ const std::shared_ptr<basic_parser<T>>& _negative_exp_sign,
2038 _In_ const std::shared_ptr<basic_integer<T>>& _exponent,
2039 _In_ const std::locale& locale = std::locale()) :
2040 basic_parser<T>(locale),
2041 positive_sign(_positive_sign),
2042 negative_sign(_negative_sign),
2043 special_sign(_special_sign),
2044 integer(_integer),
2045 decimal_separator(_decimal_separator),
2046 decimal(_decimal),
2047 exponent_symbol(_exponent_symbol),
2048 positive_exp_sign(_positive_exp_sign),
2049 negative_exp_sign(_negative_exp_sign),
2050 exponent(_exponent),
2051 value(std::numeric_limits<double>::quiet_NaN())
2052 {}
2053
2054 virtual bool match(
2055 _In_reads_or_z_(end) const T* text,
2056 _In_ size_t start = 0,
2057 _In_ size_t end = (size_t)-1,
2058 _In_ int flags = match_default)
2059 {
2060 assert(text || start >= end);
2061 interval.end = start;
2062
2063 if (positive_sign && positive_sign->match(text, interval.end, end, flags)) {
2064 interval.end = positive_sign->interval.end;
2065 if (negative_sign) negative_sign->invalidate();
2066 if (special_sign) special_sign->invalidate();
2067 }
2068 else if (negative_sign && negative_sign->match(text, interval.end, end, flags)) {
2069 interval.end = negative_sign->interval.end;
2070 if (positive_sign) positive_sign->invalidate();
2071 if (special_sign) special_sign->invalidate();
2072 }
2073 else if (special_sign && special_sign->match(text, interval.end, end, flags)) {
2074 interval.end = special_sign->interval.end;
2075 if (positive_sign) positive_sign->invalidate();
2076 if (negative_sign) negative_sign->invalidate();
2077 }
2078 else {
2079 if (positive_sign) positive_sign->invalidate();
2080 if (negative_sign) negative_sign->invalidate();
2081 if (special_sign) special_sign->invalidate();
2082 }
2083
2084 if (integer->match(text, interval.end, end, flags))
2085 interval.end = integer->interval.end;
2086
2087 if (decimal_separator->match(text, interval.end, end, flags) &&
2088 decimal->match(text, decimal_separator->interval.end, end, flags))
2089 interval.end = decimal->interval.end;
2090 else {
2091 decimal_separator->invalidate();
2092 decimal->invalidate();
2093 }
2094
2095 if (!integer->interval.empty() &&
2096 decimal->interval.empty())
2097 {
2098 // No integer part, no decimal part.
2099 if (positive_sign) positive_sign->invalidate();
2100 if (negative_sign) negative_sign->invalidate();
2101 if (special_sign) special_sign->invalidate();
2102 integer->invalidate();
2103 decimal_separator->invalidate();
2104 decimal->invalidate();
2105 if (exponent_symbol) exponent_symbol->invalidate();
2106 if (positive_exp_sign) positive_exp_sign->invalidate();
2107 if (negative_exp_sign) negative_exp_sign->invalidate();
2108 if (exponent) exponent->invalidate();
2109 interval.start = (interval.end = start) + 1;
2110 return false;
2111 }
2112
2113 if (exponent_symbol && exponent_symbol->match(text, interval.end, end, flags) &&
2114 (positive_exp_sign && positive_exp_sign->match(text, exponent_symbol->interval.end, end, flags) &&
2115 exponent && exponent->match(text, positive_exp_sign->interval.end, end, flags) ||
2116 exponent && exponent->match(text, exponent_symbol->interval.end, end, flags)))
2117 {
2118 interval.end = exponent->interval.end;
2119 if (negative_exp_sign) negative_exp_sign->invalidate();
2120 }
2121 else if (exponent_symbol && exponent_symbol->match(text, interval.end, end, flags) &&
2122 negative_exp_sign && negative_exp_sign->match(text, exponent_symbol->interval.end, end, flags) &&
2123 exponent && exponent->match(text, negative_exp_sign->interval.end, end, flags))
2124 {
2125 interval.end = exponent->interval.end;
2126 if (positive_exp_sign) positive_exp_sign->invalidate();
2127 }
2128 else {
2129 if (exponent_symbol) exponent_symbol->invalidate();
2130 if (positive_exp_sign) positive_exp_sign->invalidate();
2131 if (negative_exp_sign) negative_exp_sign->invalidate();
2132 if (exponent) exponent->invalidate();
2133 }
2134
2135 value = (double)integer->value;
2136 if (decimal->interval)
2137 value += (double)decimal->value * pow(10.0, -(double)decimal->interval.size());
2138 if (negative_sign && negative_sign->interval)
2139 value = -value;
2140 if (exponent && exponent->interval) {
2141 double e = (double)exponent->value;
2142 if (negative_exp_sign && negative_exp_sign->interval)
2143 e = -e;
2144 value *= pow(10.0, e);
2145 }
2146
2147 interval.start = start;
2148 return true;
2149 }
2150
2151 virtual void invalidate()
2152 {
2153 if (positive_sign) positive_sign->invalidate();
2154 if (negative_sign) negative_sign->invalidate();
2155 if (special_sign) special_sign->invalidate();
2156 integer->invalidate();
2157 decimal_separator->invalidate();
2158 decimal->invalidate();
2159 if (exponent_symbol) exponent_symbol->invalidate();
2160 if (positive_exp_sign) positive_exp_sign->invalidate();
2161 if (negative_exp_sign) negative_exp_sign->invalidate();
2162 if (exponent) exponent->invalidate();
2163 value = std::numeric_limits<double>::quiet_NaN();
2165 }
2166
2167 public:
2168 std::shared_ptr<basic_parser<T>> positive_sign;
2169 std::shared_ptr<basic_parser<T>> negative_sign;
2170 std::shared_ptr<basic_parser<T>> special_sign;
2171 std::shared_ptr<basic_integer<T>> integer;
2172 std::shared_ptr<basic_parser<T>> decimal_separator;
2173 std::shared_ptr<basic_integer<T>> decimal;
2174 std::shared_ptr<basic_parser<T>> exponent_symbol;
2175 std::shared_ptr<basic_parser<T>> positive_exp_sign;
2176 std::shared_ptr<basic_parser<T>> negative_exp_sign;
2177 std::shared_ptr<basic_integer<T>> exponent;
2178 double value;
2179 };
2180
2183#ifdef _UNICODE
2185#else
2187#endif
2189
2193 template <class T>
2195 {
2196 public:
2198 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
2199 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
2200 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
2201 _In_ const std::shared_ptr<basic_parser<T>>& _currency,
2202 _In_ const std::shared_ptr<basic_parser<T>>& _integer,
2203 _In_ const std::shared_ptr<basic_parser<T>>& _decimal_separator,
2204 _In_ const std::shared_ptr<basic_parser<T>>& _decimal,
2205 _In_ const std::locale& locale = std::locale()) :
2206 basic_parser<T>(locale),
2207 positive_sign(_positive_sign),
2208 negative_sign(_negative_sign),
2209 special_sign(_special_sign),
2210 currency(_currency),
2211 integer(_integer),
2212 decimal_separator(_decimal_separator),
2213 decimal(_decimal)
2214 {}
2215
2216 virtual bool match(
2217 _In_reads_or_z_(end) const T* text,
2218 _In_ size_t start = 0,
2219 _In_ size_t end = (size_t)-1,
2220 _In_ int flags = match_default)
2221 {
2222 assert(text || start >= end);
2223 interval.end = start;
2224
2225 if (positive_sign->match(text, interval.end, end, flags)) {
2226 interval.end = positive_sign->interval.end;
2227 if (negative_sign) negative_sign->invalidate();
2228 if (special_sign) special_sign->invalidate();
2229 }
2230 else if (negative_sign->match(text, interval.end, end, flags)) {
2231 interval.end = negative_sign->interval.end;
2232 if (positive_sign) positive_sign->invalidate();
2233 if (special_sign) special_sign->invalidate();
2234 }
2235 else if (special_sign->match(text, interval.end, end, flags)) {
2236 interval.end = special_sign->interval.end;
2237 if (positive_sign) positive_sign->invalidate();
2238 if (negative_sign) negative_sign->invalidate();
2239 }
2240 else {
2241 if (positive_sign) positive_sign->invalidate();
2242 if (negative_sign) negative_sign->invalidate();
2243 if (special_sign) special_sign->invalidate();
2244 }
2245
2246 if (currency->match(text, interval.end, end, flags))
2247 interval.end = currency->interval.end;
2248 else {
2249 if (positive_sign) positive_sign->invalidate();
2250 if (negative_sign) negative_sign->invalidate();
2251 if (special_sign) special_sign->invalidate();
2252 integer->invalidate();
2253 decimal_separator->invalidate();
2254 decimal->invalidate();
2255 interval.start = (interval.end = start) + 1;
2256 return false;
2257 }
2258
2259 if (integer->match(text, interval.end, end, flags))
2260 interval.end = integer->interval.end;
2261 if (decimal_separator->match(text, interval.end, end, flags) &&
2262 decimal->match(text, decimal_separator->interval.end, end, flags))
2263 interval.end = decimal->interval.end;
2264 else {
2265 decimal_separator->invalidate();
2266 decimal->invalidate();
2267 }
2268
2269 if (integer->interval.empty() &&
2270 decimal->interval.empty())
2271 {
2272 // No integer part, no decimal part.
2273 if (positive_sign) positive_sign->invalidate();
2274 if (negative_sign) negative_sign->invalidate();
2275 if (special_sign) special_sign->invalidate();
2276 currency->invalidate();
2277 integer->invalidate();
2278 decimal_separator->invalidate();
2279 decimal->invalidate();
2280 interval.start = (interval.end = start) + 1;
2281 return false;
2282 }
2283
2284 interval.start = start;
2285 return true;
2286 }
2287
2288 virtual void invalidate()
2289 {
2290 if (positive_sign) positive_sign->invalidate();
2291 if (negative_sign) negative_sign->invalidate();
2292 if (special_sign) special_sign->invalidate();
2293 currency->invalidate();
2294 integer->invalidate();
2295 decimal_separator->invalidate();
2296 decimal->invalidate();
2298 }
2299
2300 public:
2301 std::shared_ptr<basic_parser<T>> positive_sign;
2302 std::shared_ptr<basic_parser<T>> negative_sign;
2303 std::shared_ptr<basic_parser<T>> special_sign;
2304 std::shared_ptr<basic_parser<T>> currency;
2305 std::shared_ptr<basic_parser<T>> integer;
2306 std::shared_ptr<basic_parser<T>> decimal_separator;
2307 std::shared_ptr<basic_parser<T>> decimal;
2308 };
2309
2312#ifdef _UNICODE
2314#else
2316#endif
2318
2322 template <class T>
2324 {
2325 public:
2327 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
2328 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
2329 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
2330 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
2331 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
2332 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
2333 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
2334 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
2335 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
2336 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
2337 _In_ const std::shared_ptr<basic_parser<T>>& separator,
2338 _In_ const std::locale& locale = std::locale()) :
2339 basic_parser<T>(locale),
2340 m_digit_0(digit_0),
2341 m_digit_1(digit_1),
2342 m_digit_2(digit_2),
2343 m_digit_3(digit_3),
2344 m_digit_4(digit_4),
2345 m_digit_5(digit_5),
2346 m_digit_6(digit_6),
2347 m_digit_7(digit_7),
2348 m_digit_8(digit_8),
2349 m_digit_9(digit_9),
2350 m_separator(separator)
2351 {
2352 value.s_addr = 0;
2353 }
2354
2355 virtual bool match(
2356 _In_reads_or_z_(end) const T* text,
2357 _In_ size_t start = 0,
2358 _In_ size_t end = (size_t)-1,
2359 _In_ int flags = match_default)
2360 {
2361 assert(text || start >= end);
2362 interval.end = start;
2363 value.s_addr = 0;
2364
2365 size_t i;
2366 for (i = 0; i < 4; i++) {
2367 if (i) {
2368 if (m_separator->match(text, interval.end, end, flags))
2369 interval.end = m_separator->interval.end;
2370 else
2371 goto error;
2372 }
2373
2375 bool is_empty = true;
2376 size_t x;
2377 for (x = 0; interval.end < end && text[interval.end];) {
2378 size_t dig, digit_end;
2379 if (m_digit_0->match(text, interval.end, end, flags)) { dig = 0; digit_end = m_digit_0->interval.end; }
2380 else if (m_digit_1->match(text, interval.end, end, flags)) { dig = 1; digit_end = m_digit_1->interval.end; }
2381 else if (m_digit_2->match(text, interval.end, end, flags)) { dig = 2; digit_end = m_digit_2->interval.end; }
2382 else if (m_digit_3->match(text, interval.end, end, flags)) { dig = 3; digit_end = m_digit_3->interval.end; }
2383 else if (m_digit_4->match(text, interval.end, end, flags)) { dig = 4; digit_end = m_digit_4->interval.end; }
2384 else if (m_digit_5->match(text, interval.end, end, flags)) { dig = 5; digit_end = m_digit_5->interval.end; }
2385 else if (m_digit_6->match(text, interval.end, end, flags)) { dig = 6; digit_end = m_digit_6->interval.end; }
2386 else if (m_digit_7->match(text, interval.end, end, flags)) { dig = 7; digit_end = m_digit_7->interval.end; }
2387 else if (m_digit_8->match(text, interval.end, end, flags)) { dig = 8; digit_end = m_digit_8->interval.end; }
2388 else if (m_digit_9->match(text, interval.end, end, flags)) { dig = 9; digit_end = m_digit_9->interval.end; }
2389 else break;
2390 size_t x_n = x * 10 + dig;
2391 if (x_n <= 255) {
2392 x = x_n;
2393 interval.end = digit_end;
2394 is_empty = false;
2395 }
2396 else
2397 break;
2398 }
2399 if (is_empty)
2400 goto error;
2402 value.s_addr = (value.s_addr << 8) | (uint8_t)x;
2403 }
2404 if (i < 4)
2405 goto error;
2406
2407 interval.start = start;
2408 return true;
2409
2410 error:
2411 components[0].start = 1;
2412 components[0].end = 0;
2413 components[1].start = 1;
2414 components[1].end = 0;
2415 components[2].start = 1;
2416 components[2].end = 0;
2417 components[3].start = 1;
2418 components[3].end = 0;
2419 value = 0;
2420 interval.start = (interval.end = start) + 1;
2421 return false;
2422 }
2423
2424 virtual void invalidate()
2425 {
2426 components[0].start = 1;
2427 components[0].end = 0;
2428 components[1].start = 1;
2429 components[1].end = 0;
2430 components[2].start = 1;
2431 components[2].end = 0;
2432 components[3].start = 1;
2433 components[3].end = 0;
2434 value = 0;
2436 }
2437
2438 public:
2440 struct in_addr value;
2441
2442 protected:
2443 std::shared_ptr<basic_parser<T>>
2444 m_digit_0,
2445 m_digit_1,
2446 m_digit_2,
2447 m_digit_3,
2448 m_digit_4,
2449 m_digit_5,
2450 m_digit_6,
2451 m_digit_7,
2452 m_digit_8,
2453 m_digit_9;
2454 std::shared_ptr<basic_parser<T>> m_separator;
2455 };
2456
2459#ifdef _UNICODE
2461#else
2463#endif
2465
2469 template <class T>
2471 {
2472 public:
2473 basic_ipv6_scope_id_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
2474
2475 virtual bool match(
2476 _In_reads_or_z_(end) const T* text,
2477 _In_ size_t start = 0,
2478 _In_ size_t end = (size_t)-1,
2479 _In_ int flags = match_default)
2480 {
2481 assert(text || start >= end);
2482 if (start < end && text[start]) {
2483 if (text[start] == '-' ||
2484 text[start] == '_' ||
2485 text[start] == ':' ||
2486 std::use_facet<std::ctype<T>>(m_locale).is(std::ctype_base::alnum, text[start]))
2487 {
2488 interval.end = (interval.start = start) + 1;
2489 return true;
2490 }
2491 }
2492 interval.start = (interval.end = start) + 1;
2493 return false;
2494 }
2495 };
2496
2499#ifdef _UNICODE
2501#else
2503#endif
2504
2509 {
2510 public:
2511 sgml_ipv6_scope_id_char(_In_ const std::locale& locale = std::locale()) : sgml_parser(locale) {}
2512
2513 virtual bool match(
2514 _In_reads_or_z_(end) const char* text,
2515 _In_ size_t start = 0,
2516 _In_ size_t end = (size_t)-1,
2517 _In_ int flags = match_default)
2518 {
2519 assert(text || start >= end);
2520 if (start < end && text[start]) {
2521 wchar_t buf[3];
2522 const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf);
2523 const wchar_t* chr_end = chr + stdex::strlen(chr);
2524 if ((chr[0] == L'-' ||
2525 chr[0] == L'_' ||
2526 chr[0] == L':') && chr[1] == 0 ||
2527 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
2528 {
2529 interval.start = start;
2530 return true;
2531 }
2532 }
2533 interval.start = (interval.end = start) + 1;
2534 return false;
2535 }
2536 };
2537
2541 template <class T>
2543 {
2544 public:
2546 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
2547 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
2548 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
2549 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
2550 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
2551 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
2552 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
2553 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
2554 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
2555 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
2556 _In_ const std::shared_ptr<basic_parser<T>>& digit_10,
2557 _In_ const std::shared_ptr<basic_parser<T>>& digit_11,
2558 _In_ const std::shared_ptr<basic_parser<T>>& digit_12,
2559 _In_ const std::shared_ptr<basic_parser<T>>& digit_13,
2560 _In_ const std::shared_ptr<basic_parser<T>>& digit_14,
2561 _In_ const std::shared_ptr<basic_parser<T>>& digit_15,
2562 _In_ const std::shared_ptr<basic_parser<T>>& separator,
2563 _In_ const std::shared_ptr<basic_parser<T>>& scope_id_separator = nullptr,
2564 _In_ const std::shared_ptr<basic_parser<T>>& _scope_id = nullptr,
2565 _In_ const std::locale& locale = std::locale()) :
2566 basic_parser<T>(locale),
2567 m_digit_0(digit_0),
2568 m_digit_1(digit_1),
2569 m_digit_2(digit_2),
2570 m_digit_3(digit_3),
2571 m_digit_4(digit_4),
2572 m_digit_5(digit_5),
2573 m_digit_6(digit_6),
2574 m_digit_7(digit_7),
2575 m_digit_8(digit_8),
2576 m_digit_9(digit_9),
2577 m_digit_10(digit_10),
2578 m_digit_11(digit_11),
2579 m_digit_12(digit_12),
2580 m_digit_13(digit_13),
2581 m_digit_14(digit_14),
2582 m_digit_15(digit_15),
2583 m_separator(separator),
2584 m_scope_id_separator(scope_id_separator),
2585 scope_id(_scope_id)
2586 {
2587 memset(value, 0, sizeof(value));
2588 }
2589
2590 virtual bool match(
2591 _In_reads_or_z_(end) const T* text,
2592 _In_ size_t start = 0,
2593 _In_ size_t end = (size_t)-1,
2594 _In_ int flags = match_default)
2595 {
2596 assert(text || start >= end);
2597 interval.end = start;
2598 memset(value, 0, sizeof(value));
2599
2600 size_t i, compaction_i = (size_t)-1, compaction_start = start;
2601 for (i = 0; i < 8; i++) {
2602 bool is_empty = true;
2603
2604 if (m_separator->match(text, interval.end, end, flags)) {
2605 if (m_separator->match(text, m_separator->interval.end, end, flags)) {
2606 // :: found
2607 if (compaction_i == (size_t)-1) {
2608 // Zero compaction start
2609 compaction_i = i;
2610 compaction_start = m_separator->interval.start;
2611 interval.end = m_separator->interval.end;
2612 }
2613 else {
2614 // More than one zero compaction
2615 break;
2616 }
2617 }
2618 else if (i) {
2619 // Inner : found
2620 interval.end = m_separator->interval.end;
2621 }
2622 else {
2623 // Leading : found
2624 goto error;
2625 }
2626 }
2627 else if (i) {
2628 // : missing
2629 break;
2630 }
2631
2633 size_t x;
2634 for (x = 0; interval.end < end && text[interval.end];) {
2635 size_t dig, digit_end;
2636 if (m_digit_0->match(text, interval.end, end, flags)) { dig = 0; digit_end = m_digit_0->interval.end; }
2637 else if (m_digit_1->match(text, interval.end, end, flags)) { dig = 1; digit_end = m_digit_1->interval.end; }
2638 else if (m_digit_2->match(text, interval.end, end, flags)) { dig = 2; digit_end = m_digit_2->interval.end; }
2639 else if (m_digit_3->match(text, interval.end, end, flags)) { dig = 3; digit_end = m_digit_3->interval.end; }
2640 else if (m_digit_4->match(text, interval.end, end, flags)) { dig = 4; digit_end = m_digit_4->interval.end; }
2641 else if (m_digit_5->match(text, interval.end, end, flags)) { dig = 5; digit_end = m_digit_5->interval.end; }
2642 else if (m_digit_6->match(text, interval.end, end, flags)) { dig = 6; digit_end = m_digit_6->interval.end; }
2643 else if (m_digit_7->match(text, interval.end, end, flags)) { dig = 7; digit_end = m_digit_7->interval.end; }
2644 else if (m_digit_8->match(text, interval.end, end, flags)) { dig = 8; digit_end = m_digit_8->interval.end; }
2645 else if (m_digit_9->match(text, interval.end, end, flags)) { dig = 9; digit_end = m_digit_9->interval.end; }
2646 else if (m_digit_10->match(text, interval.end, end, flags)) { dig = 10; digit_end = m_digit_10->interval.end; }
2647 else if (m_digit_11->match(text, interval.end, end, flags)) { dig = 11; digit_end = m_digit_11->interval.end; }
2648 else if (m_digit_12->match(text, interval.end, end, flags)) { dig = 12; digit_end = m_digit_12->interval.end; }
2649 else if (m_digit_13->match(text, interval.end, end, flags)) { dig = 13; digit_end = m_digit_13->interval.end; }
2650 else if (m_digit_14->match(text, interval.end, end, flags)) { dig = 14; digit_end = m_digit_14->interval.end; }
2651 else if (m_digit_15->match(text, interval.end, end, flags)) { dig = 15; digit_end = m_digit_15->interval.end; }
2652 else break;
2653 size_t x_n = x * 16 + dig;
2654 if (x_n <= 0xffff) {
2655 x = x_n;
2656 interval.end = digit_end;
2657 is_empty = false;
2658 }
2659 else
2660 break;
2661 }
2662 if (is_empty) {
2663 if (compaction_i != (size_t)-1) {
2664 // Zero compaction active: no sweat.
2665 break;
2666 }
2667 goto error;
2668 }
2670 value.s6_words[i] = (uint16_t)x;
2671 }
2672
2673 if (compaction_i != (size_t)-1) {
2674 // Align components right due to zero compaction.
2675 size_t j, k;
2676 for (j = 8, k = i; k > compaction_i;) {
2677 value.s6_words[--j] = value.s6_words[--k];
2678 components[j] = components[k];
2679 }
2680 for (; j > compaction_i;) {
2681 value.s6_words[--j] = 0;
2682 components[j].start =
2683 components[j].end = compaction_start;
2684 }
2685 }
2686 else if (i < 8)
2687 goto error;
2688
2689 if (m_scope_id_separator && m_scope_id_separator->match(text, interval.end, end, flags) &&
2690 scope_id && scope_id->match(text, m_scope_id_separator->interval.end, end, flags))
2691 interval.end = scope_id->interval.end;
2692 else if (scope_id)
2693 scope_id->invalidate();
2694
2695 interval.start = start;
2696 return true;
2697
2698 error:
2699 components[0].start = 1;
2700 components[0].end = 0;
2701 components[1].start = 1;
2702 components[1].end = 0;
2703 components[2].start = 1;
2704 components[2].end = 0;
2705 components[3].start = 1;
2706 components[3].end = 0;
2707 components[4].start = 1;
2708 components[4].end = 0;
2709 components[5].start = 1;
2710 components[5].end = 0;
2711 components[6].start = 1;
2712 components[6].end = 0;
2713 components[7].start = 1;
2714 components[7].end = 0;
2715 memset(value, 0, sizeof(value));
2716 if (scope_id) scope_id->invalidate();
2717 interval.start = (interval.end = start) + 1;
2718 return false;
2719 }
2720
2721 virtual void invalidate()
2722 {
2723 components[0].start = 1;
2724 components[0].end = 0;
2725 components[1].start = 1;
2726 components[1].end = 0;
2727 components[2].start = 1;
2728 components[2].end = 0;
2729 components[3].start = 1;
2730 components[3].end = 0;
2731 components[4].start = 1;
2732 components[4].end = 0;
2733 components[5].start = 1;
2734 components[5].end = 0;
2735 components[6].start = 1;
2736 components[6].end = 0;
2737 components[7].start = 1;
2738 components[7].end = 0;
2739 memset(value, 0, sizeof(value));
2740 if (scope_id) scope_id->invalidate();
2742 }
2743
2744 public:
2746 struct in6_addr value;
2747 std::shared_ptr<basic_parser<T>> scope_id;
2748
2749 protected:
2750 std::shared_ptr<basic_parser<T>>
2751 m_digit_0,
2752 m_digit_1,
2753 m_digit_2,
2754 m_digit_3,
2755 m_digit_4,
2756 m_digit_5,
2757 m_digit_6,
2758 m_digit_7,
2759 m_digit_8,
2760 m_digit_9,
2761 m_digit_10,
2762 m_digit_11,
2763 m_digit_12,
2764 m_digit_13,
2765 m_digit_14,
2766 m_digit_15;
2767 std::shared_ptr<basic_parser<T>> m_separator, m_scope_id_separator;
2768 };
2769
2772#ifdef _UNICODE
2774#else
2776#endif
2778
2782 template <class T>
2784 {
2785 public:
2787 _In_ bool allow_idn,
2788 _In_ const std::locale& locale = std::locale()) :
2789 basic_parser<T>(locale),
2790 m_allow_idn(allow_idn),
2791 allow_on_edge(true)
2792 {}
2793
2794 virtual bool match(
2795 _In_reads_or_z_(end) const T* text,
2796 _In_ size_t start = 0,
2797 _In_ size_t end = (size_t)-1,
2798 _In_ int flags = match_default)
2799 {
2800 assert(text || start >= end);
2801 if (start < end && text[start]) {
2802 if (('A' <= text[start] && text[start] <= 'Z') ||
2803 ('a' <= text[start] && text[start] <= 'z') ||
2804 ('0' <= text[start] && text[start] <= '9'))
2805 allow_on_edge = true;
2806 else if (text[start] == '-')
2807 allow_on_edge = false;
2808 else if (m_allow_idn && std::use_facet<std::ctype<T>>(m_locale).is(std::ctype_base::alnum, text[start]))
2809 allow_on_edge = true;
2810 else {
2811 interval.start = (interval.end = start) + 1;
2812 return false;
2813 }
2814 interval.end = (interval.start = start) + 1;
2815 return true;
2816 }
2817 interval.start = (interval.end = start) + 1;
2818 return false;
2819 }
2820
2821 public:
2823
2824 protected:
2825 bool m_allow_idn;
2826 };
2827
2830#ifdef _UNICODE
2832#else
2834#endif
2835
2840 {
2841 public:
2843 _In_ bool allow_idn,
2844 _In_ const std::locale& locale = std::locale()) :
2845 basic_dns_domain_char<char>(allow_idn, locale)
2846 {}
2847
2848 virtual bool match(
2849 _In_reads_or_z_(end) const char* text,
2850 _In_ size_t start = 0,
2851 _In_ size_t end = (size_t)-1,
2852 _In_ int flags = match_default)
2853 {
2854 assert(text || start >= end);
2855 if (start < end && text[start]) {
2856 wchar_t buf[3];
2857 const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf);
2858 const wchar_t* chr_end = chr + stdex::strlen(chr);
2859 if ((('A' <= chr[0] && chr[0] <= 'Z') ||
2860 ('a' <= chr[0] && chr[0] <= 'z') ||
2861 ('0' <= chr[0] && chr[0] <= '9')) && chr[1] == 0)
2862 allow_on_edge = true;
2863 else if (chr[0] == '-' && chr[1] == 0)
2864 allow_on_edge = false;
2865 else if (m_allow_idn && std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
2866 allow_on_edge = true;
2867 else {
2868 interval.start = (interval.end = start) + 1;
2869 return false;
2870 }
2871 interval.start = start;
2872 return true;
2873 }
2874 interval.start = (interval.end = start) + 1;
2875 return false;
2876 }
2877 };
2878
2882 template <class T>
2884 {
2885 public:
2887 _In_ bool allow_absolute,
2888 _In_ const std::shared_ptr<basic_dns_domain_char<T>>& domain_char,
2889 _In_ const std::shared_ptr<basic_parser<T>>& separator,
2890 _In_ const std::locale& locale = std::locale()) :
2891 basic_parser<T>(locale),
2892 m_allow_absolute(allow_absolute),
2893 m_domain_char(domain_char),
2894 m_separator(separator)
2895 {}
2896
2897 virtual bool match(
2898 _In_reads_or_z_(end) const T* text,
2899 _In_ size_t start = 0,
2900 _In_ size_t end = (size_t)-1,
2901 _In_ int flags = match_default)
2902 {
2903 assert(text || start >= end);
2904 size_t i = start, count;
2905 for (count = 0; i < end && text[i] && count < 127; count++) {
2906 if (m_domain_char->match(text, i, end, flags) &&
2907 m_domain_char->allow_on_edge)
2908 {
2909 // Domain start
2910 interval.end = i = m_domain_char->interval.end;
2911 while (i < end && text[i]) {
2912 if (m_domain_char->allow_on_edge &&
2913 m_separator->match(text, i, end, flags))
2914 {
2915 // Domain end
2916 if (m_allow_absolute)
2917 interval.end = i = m_separator->interval.end;
2918 else {
2919 interval.end = i;
2920 i = m_separator->interval.end;
2921 }
2922 break;
2923 }
2924 if (m_domain_char->match(text, i, end, flags)) {
2925 if (m_domain_char->allow_on_edge)
2926 interval.end = i = m_domain_char->interval.end;
2927 else
2928 i = m_domain_char->interval.end;
2929 }
2930 else {
2931 interval.start = start;
2932 return true;
2933 }
2934 }
2935 }
2936 else
2937 break;
2938 }
2939 if (count) {
2940 interval.start = start;
2941 return true;
2942 }
2943 interval.start = (interval.end = start) + 1;
2944 return false;
2945 }
2946
2947 protected:
2949 std::shared_ptr<basic_dns_domain_char<T>> m_domain_char;
2950 std::shared_ptr<basic_parser<T>> m_separator;
2951 };
2952
2955#ifdef _UNICODE
2956 using tdns_name = wdns_name;
2957#else
2958 using tdns_name = dns_name;
2959#endif
2961
2965 template <class T>
2967 {
2968 public:
2969 basic_url_username_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
2970
2971 virtual bool match(
2972 _In_reads_or_z_(end) const T* text,
2973 _In_ size_t start = 0,
2974 _In_ size_t end = (size_t)-1,
2975 _In_ int flags = match_default)
2976 {
2977 assert(text || start >= end);
2978 if (start < end && text[start]) {
2979 if (text[start] == '-' ||
2980 text[start] == '.' ||
2981 text[start] == '_' ||
2982 text[start] == '~' ||
2983 text[start] == '%' ||
2984 text[start] == '!' ||
2985 text[start] == '$' ||
2986 text[start] == '&' ||
2987 text[start] == '\'' ||
2988 //text[start] == '(' ||
2989 //text[start] == ')' ||
2990 text[start] == '*' ||
2991 text[start] == '+' ||
2992 text[start] == ',' ||
2993 text[start] == ';' ||
2994 text[start] == '=' ||
2995 std::use_facet<std::ctype<T>>(m_locale).is(std::ctype_base::alnum, text[start]))
2996 {
2997 interval.end = (interval.start = start) + 1;
2998 return true;
2999 }
3000 }
3001 interval.start = (interval.end = start) + 1;
3002 return false;
3003 }
3004 };
3005
3008#ifdef _UNICODE
3010#else
3012#endif
3013
3018 {
3019 public:
3020 sgml_url_username_char(_In_ const std::locale& locale = std::locale()) : basic_url_username_char<char>(locale) {}
3021
3022 virtual bool match(
3023 _In_reads_or_z_(end) const char* text,
3024 _In_ size_t start = 0,
3025 _In_ size_t end = (size_t)-1,
3026 _In_ int flags = match_default)
3027 {
3028 assert(text || start >= end);
3029 if (start < end && text[start]) {
3030 wchar_t buf[3];
3031 const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf);
3032 const wchar_t* chr_end = chr + stdex::strlen(chr);
3033 if ((chr[0] == L'-' ||
3034 chr[0] == L'.' ||
3035 chr[0] == L'_' ||
3036 chr[0] == L'~' ||
3037 chr[0] == L'%' ||
3038 chr[0] == L'!' ||
3039 chr[0] == L'$' ||
3040 chr[0] == L'&' ||
3041 chr[0] == L'\'' ||
3042 //chr[0] == L'(' ||
3043 //chr[0] == L')' ||
3044 chr[0] == L'*' ||
3045 chr[0] == L'+' ||
3046 chr[0] == L',' ||
3047 chr[0] == L';' ||
3048 chr[0] == L'=') && chr[1] == 0 ||
3049 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
3050 {
3051 interval.start = start;
3052 return true;
3053 }
3054 }
3055
3056 interval.start = (interval.end = start) + 1;
3057 return false;
3058 }
3059 };
3060
3064 template <class T>
3066 {
3067 public:
3068 basic_url_password_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
3069
3070 virtual bool match(
3071 _In_reads_or_z_(end) const T* text,
3072 _In_ size_t start = 0,
3073 _In_ size_t end = (size_t)-1,
3074 _In_ int flags = match_default)
3075 {
3076 assert(text || start >= end);
3077 if (start < end && text[start]) {
3078 if (text[start] == '-' ||
3079 text[start] == '.' ||
3080 text[start] == '_' ||
3081 text[start] == '~' ||
3082 text[start] == '%' ||
3083 text[start] == '!' ||
3084 text[start] == '$' ||
3085 text[start] == '&' ||
3086 text[start] == '\'' ||
3087 text[start] == '(' ||
3088 text[start] == ')' ||
3089 text[start] == '*' ||
3090 text[start] == '+' ||
3091 text[start] == ',' ||
3092 text[start] == ';' ||
3093 text[start] == '=' ||
3094 text[start] == ':' ||
3095 std::use_facet<std::ctype<T>>(m_locale).is(std::ctype_base::alnum, text[start]))
3096 {
3097 interval.end = (interval.start = start) + 1;
3098 return true;
3099 }
3100 }
3101 interval.start = (interval.end = start) + 1;
3102 return false;
3103 }
3104 };
3105
3108#ifdef _UNICODE
3110#else
3112#endif
3113
3118 {
3119 public:
3120 sgml_url_password_char(_In_ const std::locale& locale = std::locale()) : basic_url_password_char<char>(locale) {}
3121
3122 virtual bool match(
3123 _In_reads_or_z_(end) const char* text,
3124 _In_ size_t start = 0,
3125 _In_ size_t end = (size_t)-1,
3126 _In_ int flags = match_default)
3127 {
3128 assert(text || start >= end);
3129 if (start < end && text[start]) {
3130 wchar_t buf[3];
3131 const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf);
3132 const wchar_t* chr_end = chr + stdex::strlen(chr);
3133 if ((chr[0] == L'-' ||
3134 chr[0] == L'.' ||
3135 chr[0] == L'_' ||
3136 chr[0] == L'~' ||
3137 chr[0] == L'%' ||
3138 chr[0] == L'!' ||
3139 chr[0] == L'$' ||
3140 chr[0] == L'&' ||
3141 chr[0] == L'\'' ||
3142 chr[0] == L'(' ||
3143 chr[0] == L')' ||
3144 chr[0] == L'*' ||
3145 chr[0] == L'+' ||
3146 chr[0] == L',' ||
3147 chr[0] == L';' ||
3148 chr[0] == L'=' ||
3149 chr[0] == L':') && chr[1] == 0 ||
3150 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
3151 {
3152 interval.start = start;
3153 return true;
3154 }
3155 }
3156 interval.start = (interval.end = start) + 1;
3157 return false;
3158 }
3159 };
3160
3164 template <class T>
3166 {
3167 public:
3168 basic_url_path_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
3169
3170 virtual bool match(
3171 _In_reads_or_z_(end) const T* text,
3172 _In_ size_t start = 0,
3173 _In_ size_t end = (size_t)-1,
3174 _In_ int flags = match_default)
3175 {
3176 assert(text || start >= end);
3177 if (start < end && text[start]) {
3178 if (text[start] == '/' ||
3179 text[start] == '-' ||
3180 text[start] == '.' ||
3181 text[start] == '_' ||
3182 text[start] == '~' ||
3183 text[start] == '%' ||
3184 text[start] == '!' ||
3185 text[start] == '$' ||
3186 text[start] == '&' ||
3187 text[start] == '\'' ||
3188 text[start] == '(' ||
3189 text[start] == ')' ||
3190 text[start] == '*' ||
3191 text[start] == '+' ||
3192 text[start] == ',' ||
3193 text[start] == ';' ||
3194 text[start] == '=' ||
3195 text[start] == ':' ||
3196 text[start] == '@' ||
3197 text[start] == '?' ||
3198 text[start] == '#' ||
3199 std::use_facet<std::ctype<T>>(m_locale).is(std::ctype_base::alnum, text[start]))
3200 {
3201 interval.end = (interval.start = start) + 1;
3202 return true;
3203 }
3204 }
3205 interval.start = (interval.end = start) + 1;
3206 return false;
3207 }
3208 };
3209
3212#ifdef _UNICODE
3214#else
3216#endif
3217
3222 {
3223 public:
3224 sgml_url_path_char(_In_ const std::locale& locale = std::locale()) : basic_url_path_char<char>(locale) {}
3225
3226 virtual bool match(
3227 _In_reads_or_z_(end) const char* text,
3228 _In_ size_t start = 0,
3229 _In_ size_t end = (size_t)-1,
3230 _In_ int flags = match_default)
3231 {
3232 assert(text || start >= end);
3233 if (start < end && text[start]) {
3234 wchar_t buf[3];
3235 const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf);
3236 const wchar_t* chr_end = chr + stdex::strlen(chr);
3237 if ((chr[0] == L'/' ||
3238 chr[0] == L'-' ||
3239 chr[0] == L'.' ||
3240 chr[0] == L'_' ||
3241 chr[0] == L'~' ||
3242 chr[0] == L'%' ||
3243 chr[0] == L'!' ||
3244 chr[0] == L'$' ||
3245 chr[0] == L'&' ||
3246 chr[0] == L'\'' ||
3247 chr[0] == L'(' ||
3248 chr[0] == L')' ||
3249 chr[0] == L'*' ||
3250 chr[0] == L'+' ||
3251 chr[0] == L',' ||
3252 chr[0] == L';' ||
3253 chr[0] == L'=' ||
3254 chr[0] == L':' ||
3255 chr[0] == L'@' ||
3256 chr[0] == L'?' ||
3257 chr[0] == L'#') && chr[1] == 0 ||
3258 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
3259 {
3260 interval.start = start;
3261 return true;
3262 }
3263 }
3264 interval.start = (interval.end = start) + 1;
3265 return false;
3266 }
3267 };
3268
3272 template <class T>
3274 {
3275 public:
3277 _In_ const std::shared_ptr<basic_parser<T>>& path_char,
3278 _In_ const std::shared_ptr<basic_parser<T>>& query_start,
3279 _In_ const std::shared_ptr<basic_parser<T>>& bookmark_start,
3280 _In_ const std::locale& locale = std::locale()) :
3281 basic_parser<T>(locale),
3282 m_path_char(path_char),
3283 m_query_start(query_start),
3284 m_bookmark_start(bookmark_start)
3285 {}
3286
3287 virtual bool match(
3288 _In_reads_or_z_(end) const T* text,
3289 _In_ size_t start = 0,
3290 _In_ size_t end = (size_t)-1,
3291 _In_ int flags = match_default)
3292 {
3293 assert(text || start >= end);
3294
3295 interval.end = start;
3296 path.start = start;
3297 query.start = 1;
3298 query.end = 0;
3299 bookmark.start = 1;
3300 bookmark.end = 0;
3301
3302 for (;;) {
3303 if (interval.end >= end || !text[interval.end])
3304 break;
3305 if (m_query_start->match(text, interval.end, end, flags)) {
3306 path.end = interval.end;
3307 query.start = interval.end = m_query_start->interval.end;
3308 for (;;) {
3309 if (interval.end >= end || !text[interval.end]) {
3310 query.end = interval.end;
3311 break;
3312 }
3313 if (m_bookmark_start->match(text, interval.end, end, flags)) {
3314 query.end = interval.end;
3315 bookmark.start = interval.end = m_bookmark_start->interval.end;
3316 for (;;) {
3317 if (interval.end >= end || !text[interval.end]) {
3318 bookmark.end = interval.end;
3319 break;
3320 }
3321 if (m_path_char->match(text, interval.end, end, flags))
3322 interval.end = m_path_char->interval.end;
3323 else {
3324 bookmark.end = interval.end;
3325 break;
3326 }
3327 }
3328 interval.start = start;
3329 return true;
3330 }
3331 if (m_path_char->match(text, interval.end, end, flags))
3332 interval.end = m_path_char->interval.end;
3333 else {
3334 query.end = interval.end;
3335 break;
3336 }
3337 }
3338 interval.start = start;
3339 return true;
3340 }
3341 if (m_bookmark_start->match(text, interval.end, end, flags)) {
3342 path.end = interval.end;
3343 bookmark.start = interval.end = m_bookmark_start->interval.end;
3344 for (;;) {
3345 if (interval.end >= end || !text[interval.end]) {
3346 bookmark.end = interval.end;
3347 break;
3348 }
3349 if (m_path_char->match(text, interval.end, end, flags))
3350 interval.end = m_path_char->interval.end;
3351 else {
3352 bookmark.end = interval.end;
3353 break;
3354 }
3355 }
3356 interval.start = start;
3357 return true;
3358 }
3359 if (m_path_char->match(text, interval.end, end, flags))
3360 interval.end = m_path_char->interval.end;
3361 else
3362 break;
3363 }
3364
3365 if (start < interval.end) {
3366 path.end = interval.end;
3367 interval.start = start;
3368 return true;
3369 }
3370
3371 path.start = 1;
3372 path.end = 0;
3373 bookmark.start = 1;
3374 bookmark.end = 0;
3375 interval.start = (interval.end = start) + 1;
3376 return false;
3377 }
3378
3379 virtual void invalidate()
3380 {
3381 path.start = 1;
3382 path.end = 0;
3383 query.start = 1;
3384 query.end = 0;
3385 bookmark.start = 1;
3386 bookmark.end = 0;
3388 }
3389
3390 public:
3393 stdex::interval<size_t> bookmark;
3394
3395 protected:
3396 std::shared_ptr<basic_parser<T>> m_path_char;
3397 std::shared_ptr<basic_parser<T>> m_query_start;
3398 std::shared_ptr<basic_parser<T>> m_bookmark_start;
3399 };
3400
3403#ifdef _UNICODE
3404 using turl_path = wurl_path;
3405#else
3406 using turl_path = url_path;
3407#endif
3409
3413 template <class T>
3414 class basic_url : public basic_parser<T>
3415 {
3416 public:
3417 basic_url(
3418 _In_ const std::shared_ptr<basic_parser<T>>& _http_scheme,
3419 _In_ const std::shared_ptr<basic_parser<T>>& _ftp_scheme,
3420 _In_ const std::shared_ptr<basic_parser<T>>& _mailto_scheme,
3421 _In_ const std::shared_ptr<basic_parser<T>>& _file_scheme,
3422 _In_ const std::shared_ptr<basic_parser<T>>& colon,
3423 _In_ const std::shared_ptr<basic_parser<T>>& slash,
3424 _In_ const std::shared_ptr<basic_parser<T>>& _username,
3425 _In_ const std::shared_ptr<basic_parser<T>>& _password,
3426 _In_ const std::shared_ptr<basic_parser<T>>& at,
3427 _In_ const std::shared_ptr<basic_parser<T>>& ip_lbracket,
3428 _In_ const std::shared_ptr<basic_parser<T>>& ip_rbracket,
3429 _In_ const std::shared_ptr<basic_parser<T>>& _ipv4_host,
3430 _In_ const std::shared_ptr<basic_parser<T>>& _ipv6_host,
3431 _In_ const std::shared_ptr<basic_parser<T>>& _dns_host,
3432 _In_ const std::shared_ptr<basic_parser<T>>& _port,
3433 _In_ const std::shared_ptr<basic_parser<T>>& _path,
3434 _In_ const std::locale& locale = std::locale()) :
3435 basic_parser<T>(locale),
3436 http_scheme(_http_scheme),
3437 ftp_scheme(_ftp_scheme),
3438 mailto_scheme(_mailto_scheme),
3439 file_scheme(_file_scheme),
3440 m_colon(colon),
3441 m_slash(slash),
3442 username(_username),
3443 password(_password),
3444 m_at(at),
3445 m_ip_lbracket(ip_lbracket),
3446 m_ip_rbracket(ip_rbracket),
3447 ipv4_host(_ipv4_host),
3448 ipv6_host(_ipv6_host),
3449 dns_host(_dns_host),
3450 port(_port),
3451 path(_path)
3452 {}
3453
3454 virtual bool match(
3455 _In_reads_or_z_(end) const T* text,
3456 _In_ size_t start = 0,
3457 _In_ size_t end = (size_t)-1,
3458 _In_ int flags = match_default)
3459 {
3460 assert(text || start >= end);
3461
3462 interval.end = start;
3463
3464 if (http_scheme->match(text, interval.end, end, flags) &&
3465 m_colon->match(text, http_scheme->interval.end, end, flags) &&
3466 m_slash->match(text, m_colon->interval.end, end, flags) &&
3467 m_slash->match(text, m_slash->interval.end, end, flags))
3468 {
3469 // http://
3470 interval.end = m_slash->interval.end;
3471 ftp_scheme->invalidate();
3472 mailto_scheme->invalidate();
3473 file_scheme->invalidate();
3474 }
3475 else if (ftp_scheme->match(text, interval.end, end, flags) &&
3476 m_colon->match(text, ftp_scheme->interval.end, end, flags) &&
3477 m_slash->match(text, m_colon->interval.end, end, flags) &&
3478 m_slash->match(text, m_slash->interval.end, end, flags))
3479 {
3480 // ftp://
3481 interval.end = m_slash->interval.end;
3482 http_scheme->invalidate();
3483 mailto_scheme->invalidate();
3484 file_scheme->invalidate();
3485 }
3486 else if (mailto_scheme->match(text, interval.end, end, flags) &&
3487 m_colon->match(text, mailto_scheme->interval.end, end, flags))
3488 {
3489 // mailto:
3490 interval.end = m_colon->interval.end;
3491 http_scheme->invalidate();
3492 ftp_scheme->invalidate();
3493 file_scheme->invalidate();
3494 }
3495 else if (file_scheme->match(text, interval.end, end, flags) &&
3496 m_colon->match(text, file_scheme->interval.end, end, flags) &&
3497 m_slash->match(text, m_colon->interval.end, end, flags) &&
3498 m_slash->match(text, m_slash->interval.end, end, flags))
3499 {
3500 // file://
3501 interval.end = m_slash->interval.end;
3502 http_scheme->invalidate();
3503 ftp_scheme->invalidate();
3504 mailto_scheme->invalidate();
3505 }
3506 else {
3507 // Default to http:
3508 http_scheme->invalidate();
3509 ftp_scheme->invalidate();
3510 mailto_scheme->invalidate();
3511 file_scheme->invalidate();
3512 }
3513
3514 if (ftp_scheme->interval) {
3515 if (username->match(text, interval.end, end, flags)) {
3516 if (m_colon->match(text, username->interval.end, end, flags) &&
3517 password->match(text, m_colon->interval.end, end, flags) &&
3518 m_at->match(text, password->interval.end, end, flags))
3519 {
3520 // Username and password
3521 interval.end = m_at->interval.end;
3522 }
3523 else if (m_at->match(text, interval.end, end, flags)) {
3524 // Username only
3525 interval.end = m_at->interval.end;
3526 password->invalidate();
3527 }
3528 else {
3529 username->invalidate();
3530 password->invalidate();
3531 }
3532 }
3533 else {
3534 username->invalidate();
3535 password->invalidate();
3536 }
3537
3538 if (ipv4_host->match(text, interval.end, end, flags)) {
3539 // Host is IPv4
3540 interval.end = ipv4_host->interval.end;
3541 ipv6_host->invalidate();
3542 dns_host->invalidate();
3543 }
3544 else if (
3545 m_ip_lbracket->match(text, interval.end, end, flags) &&
3546 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3547 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3548 {
3549 // Host is IPv6
3550 interval.end = m_ip_rbracket->interval.end;
3551 ipv4_host->invalidate();
3552 dns_host->invalidate();
3553 }
3554 else if (dns_host->match(text, interval.end, end, flags)) {
3555 // Host is hostname
3556 interval.end = dns_host->interval.end;
3557 ipv4_host->invalidate();
3558 ipv6_host->invalidate();
3559 }
3560 else {
3561 invalidate();
3562 return false;
3563 }
3564
3565 if (m_colon->match(text, interval.end, end, flags) &&
3566 port->match(text, m_colon->interval.end, end, flags))
3567 {
3568 // Port
3569 interval.end = port->interval.end;
3570 }
3571 else
3572 port->invalidate();
3573
3574 if (path->match(text, interval.end, end, flags)) {
3575 // Path
3576 interval.end = path->interval.end;
3577 }
3578
3579 interval.start = start;
3580 return true;
3581 }
3582
3583 if (mailto_scheme->interval) {
3584 if (username->match(text, interval.end, end, flags) &&
3585 m_at->match(text, username->interval.end, end, flags))
3586 {
3587 // Username
3588 interval.end = m_at->interval.end;
3589 }
3590 else {
3591 invalidate();
3592 return false;
3593 }
3594
3595 if (m_ip_lbracket->match(text, interval.end, end, flags) &&
3596 ipv4_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3597 m_ip_rbracket->match(text, ipv4_host->interval.end, end, flags))
3598 {
3599 // Host is IPv4
3600 interval.end = m_ip_rbracket->interval.end;
3601 ipv6_host->invalidate();
3602 dns_host->invalidate();
3603 }
3604 else if (
3605 m_ip_lbracket->match(text, interval.end, end, flags) &&
3606 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3607 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3608 {
3609 // Host is IPv6
3610 interval.end = m_ip_rbracket->interval.end;
3611 ipv4_host->invalidate();
3612 dns_host->invalidate();
3613 }
3614 else if (dns_host->match(text, interval.end, end, flags)) {
3615 // Host is hostname
3616 interval.end = dns_host->interval.end;
3617 ipv4_host->invalidate();
3618 ipv6_host->invalidate();
3619 }
3620 else {
3621 invalidate();
3622 return false;
3623 }
3624
3625 password->invalidate();
3626 port->invalidate();
3627 path->invalidate();
3628 interval.start = start;
3629 return true;
3630 }
3631
3632 if (file_scheme->interval) {
3633 if (path->match(text, interval.end, end, flags)) {
3634 // Path
3635 interval.end = path->interval.end;
3636 }
3637
3638 username->invalidate();
3639 password->invalidate();
3640 ipv4_host->invalidate();
3641 ipv6_host->invalidate();
3642 dns_host->invalidate();
3643 port->invalidate();
3644 interval.start = start;
3645 return true;
3646 }
3647
3648 // "http://" found or defaulted to
3649
3650 // If "http://" explicit, test for username&password.
3651 if (http_scheme->interval &&
3652 username->match(text, interval.end, end, flags))
3653 {
3654 if (m_colon->match(text, username->interval.end, end, flags) &&
3655 password->match(text, m_colon->interval.end, end, flags) &&
3656 m_at->match(text, password->interval.end, end, flags))
3657 {
3658 // Username and password
3659 interval.end = m_at->interval.end;
3660 }
3661 else if (m_at->match(text, username->interval.end, end, flags)) {
3662 // Username only
3663 interval.end = m_at->interval.end;
3664 password->invalidate();
3665 }
3666 else {
3667 username->invalidate();
3668 password->invalidate();
3669 }
3670 }
3671 else {
3672 username->invalidate();
3673 password->invalidate();
3674 }
3675
3676 if (ipv4_host->match(text, interval.end, end, flags)) {
3677 // Host is IPv4
3678 interval.end = ipv4_host->interval.end;
3679 ipv6_host->invalidate();
3680 dns_host->invalidate();
3681 }
3682 else if (
3683 m_ip_lbracket->match(text, interval.end, end, flags) &&
3684 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3685 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3686 {
3687 // Host is IPv6
3688 interval.end = m_ip_rbracket->interval.end;
3689 ipv4_host->invalidate();
3690 dns_host->invalidate();
3691 }
3692 else if (dns_host->match(text, interval.end, end, flags)) {
3693 // Host is hostname
3694 interval.end = dns_host->interval.end;
3695 ipv4_host->invalidate();
3696 ipv6_host->invalidate();
3697 }
3698 else {
3699 invalidate();
3700 return false;
3701 }
3702
3703 if (m_colon->match(text, interval.end, end, flags) &&
3704 port->match(text, m_colon->interval.end, end, flags))
3705 {
3706 // Port
3707 interval.end = port->interval.end;
3708 }
3709 else
3710 port->invalidate();
3711
3712 if (path->match(text, interval.end, end, flags)) {
3713 // Path
3714 interval.end = path->interval.end;
3715 }
3716
3717 interval.start = start;
3718 return true;
3719 }
3720
3721 virtual void invalidate()
3722 {
3723 http_scheme->invalidate();
3724 ftp_scheme->invalidate();
3725 mailto_scheme->invalidate();
3726 file_scheme->invalidate();
3727 username->invalidate();
3728 password->invalidate();
3729 ipv4_host->invalidate();
3730 ipv6_host->invalidate();
3731 dns_host->invalidate();
3732 port->invalidate();
3733 path->invalidate();
3735 }
3736
3737 public:
3738 std::shared_ptr<basic_parser<T>> http_scheme;
3739 std::shared_ptr<basic_parser<T>> ftp_scheme;
3740 std::shared_ptr<basic_parser<T>> mailto_scheme;
3741 std::shared_ptr<basic_parser<T>> file_scheme;
3742 std::shared_ptr<basic_parser<T>> username;
3743 std::shared_ptr<basic_parser<T>> password;
3744 std::shared_ptr<basic_parser<T>> ipv4_host;
3745 std::shared_ptr<basic_parser<T>> ipv6_host;
3746 std::shared_ptr<basic_parser<T>> dns_host;
3747 std::shared_ptr<basic_parser<T>> port;
3748 std::shared_ptr<basic_parser<T>> path;
3749
3750 protected:
3751 std::shared_ptr<basic_parser<T>> m_colon;
3752 std::shared_ptr<basic_parser<T>> m_slash;
3753 std::shared_ptr<basic_parser<T>> m_at;
3754 std::shared_ptr<basic_parser<T>> m_ip_lbracket;
3755 std::shared_ptr<basic_parser<T>> m_ip_rbracket;
3756 };
3757
3758 using url = basic_url<char>;
3759 using wurl = basic_url<wchar_t>;
3760#ifdef _UNICODE
3761 using turl = wurl;
3762#else
3763 using turl = url;
3764#endif
3765 using sgml_url = basic_url<char>;
3766
3770 template <class T>
3772 {
3773 public:
3775 _In_ const std::shared_ptr<basic_parser<T>>& _username,
3776 _In_ const std::shared_ptr<basic_parser<T>>& at,
3777 _In_ const std::shared_ptr<basic_parser<T>>& ip_lbracket,
3778 _In_ const std::shared_ptr<basic_parser<T>>& ip_rbracket,
3779 _In_ const std::shared_ptr<basic_parser<T>>& _ipv4_host,
3780 _In_ const std::shared_ptr<basic_parser<T>>& _ipv6_host,
3781 _In_ const std::shared_ptr<basic_parser<T>>& _dns_host,
3782 _In_ const std::locale& locale = std::locale()) :
3783 basic_parser<T>(locale),
3784 username(_username),
3785 m_at(at),
3786 m_ip_lbracket(ip_lbracket),
3787 m_ip_rbracket(ip_rbracket),
3788 ipv4_host(_ipv4_host),
3789 ipv6_host(_ipv6_host),
3790 dns_host(_dns_host)
3791 {}
3792
3793 virtual bool match(
3794 _In_reads_or_z_(end) const T* text,
3795 _In_ size_t start = 0,
3796 _In_ size_t end = (size_t)-1,
3797 _In_ int flags = match_default)
3798 {
3799 assert(text || start >= end);
3800
3801 if (username->match(text, start, end, flags) &&
3802 m_at->match(text, username->interval.end, end, flags))
3803 {
3804 // Username@
3805 if (m_ip_lbracket->match(text, m_at->interval.end, end, flags) &&
3806 ipv4_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3807 m_ip_rbracket->match(text, ipv4_host->interval.end, end, flags))
3808 {
3809 // Host is IPv4
3810 interval.end = m_ip_rbracket->interval.end;
3811 ipv6_host->invalidate();
3812 dns_host->invalidate();
3813 }
3814 else if (
3815 m_ip_lbracket->match(text, m_at->interval.end, end, flags) &&
3816 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3817 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3818 {
3819 // Host is IPv6
3820 interval.end = m_ip_rbracket->interval.end;
3821 ipv4_host->invalidate();
3822 dns_host->invalidate();
3823 }
3824 else if (dns_host->match(text, m_at->interval.end, end, flags)) {
3825 // Host is hostname
3826 interval.end = dns_host->interval.end;
3827 ipv4_host->invalidate();
3828 ipv6_host->invalidate();
3829 }
3830 else
3831 goto error;
3832 interval.start = start;
3833 return true;
3834 }
3835
3836 error:
3837 username->invalidate();
3838 ipv4_host->invalidate();
3839 ipv6_host->invalidate();
3840 dns_host->invalidate();
3841 interval.start = (interval.end = start) + 1;
3842 return false;
3843 }
3844
3845 virtual void invalidate()
3846 {
3847 username->invalidate();
3848 ipv4_host->invalidate();
3849 ipv6_host->invalidate();
3850 dns_host->invalidate();
3852 }
3853
3854 public:
3855 std::shared_ptr<basic_parser<T>> username;
3856 std::shared_ptr<basic_parser<T>> ipv4_host;
3857 std::shared_ptr<basic_parser<T>> ipv6_host;
3858 std::shared_ptr<basic_parser<T>> dns_host;
3859
3860 protected:
3861 std::shared_ptr<basic_parser<T>> m_at;
3862 std::shared_ptr<basic_parser<T>> m_ip_lbracket;
3863 std::shared_ptr<basic_parser<T>> m_ip_rbracket;
3864 };
3865
3868#ifdef _UNICODE
3870#else
3872#endif
3874
3878 template <class T>
3880 {
3881 public:
3883 _In_ const std::shared_ptr<basic_parser<T>>& _emoticon,
3884 _In_ const std::shared_ptr<basic_parser<T>>& _apex,
3885 _In_ const std::shared_ptr<basic_parser<T>>& _eyes,
3886 _In_ const std::shared_ptr<basic_parser<T>>& _nose,
3887 _In_ const std::shared_ptr<basic_set<T>>& _mouth,
3888 _In_ const std::locale& locale = std::locale()) :
3889 basic_parser<T>(locale),
3890 emoticon(_emoticon),
3891 apex(_apex),
3892 eyes(_eyes),
3893 nose(_nose),
3894 mouth(_mouth)
3895 {}
3896
3897 virtual bool match(
3898 _In_reads_or_z_(end) const T* text,
3899 _In_ size_t start = 0,
3900 _In_ size_t end = (size_t)-1,
3901 _In_ int flags = match_default)
3902 {
3903 assert(text || start >= end);
3904
3905 if (emoticon && emoticon->match(text, start, end, flags)) {
3906 if (apex) apex->invalidate();
3907 eyes->invalidate();
3908 if (nose) nose->invalidate();
3909 mouth->invalidate();
3910 interval.start = start;
3912 return true;
3913 }
3914
3915 interval.end = start;
3916
3917 if (apex && apex->match(text, interval.end, end, flags))
3918 interval.end = apex->interval.end;
3919
3920 if (eyes->match(text, interval.end, end, flags)) {
3921 if (nose && nose->match(text, eyes->interval.end, end, flags) &&
3922 mouth->match(text, nose->interval.end, end, flags))
3923 {
3924 size_t
3925 start_mouth = mouth->interval.start,
3926 hit_offset = mouth->hit_offset;
3927 // Mouth may repeat :-)))))))
3928 for (interval.end = mouth->interval.end; mouth->match(text, interval.end, end, flags) && mouth->hit_offset == hit_offset; interval.end = mouth->interval.end);
3929 mouth->interval.start = start_mouth;
3930 mouth->interval.end = interval.end;
3931 interval.start = start;
3932 return true;
3933 }
3934 if (mouth->match(text, eyes->interval.end, end, flags)) {
3935 size_t
3936 start_mouth = mouth->interval.start,
3937 hit_offset = mouth->hit_offset;
3938 // Mouth may repeat :-)))))))
3939 for (interval.end = mouth->interval.end; mouth->match(text, interval.end, end, flags) && mouth->hit_offset == hit_offset; interval.end = mouth->interval.end);
3940 if (nose) nose->invalidate();
3941 mouth->interval.start = start_mouth;
3942 mouth->interval.end = interval.end;
3943 interval.start = start;
3944 return true;
3945 }
3946 }
3947
3948 if (emoticon) emoticon->invalidate();
3949 if (apex) apex->invalidate();
3950 eyes->invalidate();
3951 if (nose) nose->invalidate();
3952 mouth->invalidate();
3953 interval.start = (interval.end = start) + 1;
3954 return false;
3955 }
3956
3957 virtual void invalidate()
3958 {
3959 if (emoticon) emoticon->invalidate();
3960 if (apex) apex->invalidate();
3961 eyes->invalidate();
3962 if (nose) nose->invalidate();
3963 mouth->invalidate();
3965 }
3966
3967 public:
3968 std::shared_ptr<basic_parser<T>> emoticon;
3969 std::shared_ptr<basic_parser<T>> apex;
3970 std::shared_ptr<basic_parser<T>> eyes;
3971 std::shared_ptr<basic_parser<T>> nose;
3972 std::shared_ptr<basic_set<T>> mouth;
3973 };
3974
3977#ifdef _UNICODE
3978 using temoticon = wemoticon;
3979#else
3980 using temoticon = emoticon;
3981#endif
3983
3987 template <class T>
3988 class basic_date : public basic_parser<T>
3989 {
3990 public:
3991 enum class format {
3992 dmy = 0x1,
3993 mdy = 0x2,
3994 ymd = 0x4,
3995 ym = 0x8,
3996 my = 0x10,
3997 dm = 0x20,
3998 md = 0x40,
3999 };
4000
4001 basic_date(
4002 _In_ int format_mask,
4003 _In_ const std::shared_ptr<basic_integer<T>>& _day,
4004 _In_ const std::shared_ptr<basic_integer<T>>& _month,
4005 _In_ const std::shared_ptr<basic_integer<T>>& _year,
4006 _In_ const std::shared_ptr<basic_set<T>>& separator,
4007 _In_ const std::shared_ptr<basic_parser<T>>& space,
4008 _In_ const std::locale& locale = std::locale()) :
4009 basic_parser<T>(locale),
4010 format(0),
4011 m_format_mask(format_mask),
4012 day(_day),
4013 month(_month),
4014 year(_year),
4015 m_separator(separator),
4016 m_space(space)
4017 {}
4018
4019 virtual bool match(
4020 _In_reads_or_z_(end) const T* text,
4021 _In_ size_t start = 0,
4022 _In_ size_t end = (size_t)-1,
4023 _In_ int flags = match_default)
4024 {
4025 assert(text || start >= end);
4026
4027 const int space_match_flags = flags & ~match_multiline; // Spaces in dates must never be broken in new line.
4028 if ((m_format_mask & format::dmy) != 0) {
4029 if (day->match(text, start, end, flags)) {
4030 for (interval.end = day->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4031 if (m_separator->match(text, interval.end, end, flags)) {
4032 size_t hit_offset = m_separator->hit_offset;
4033 for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4034 if (month->match(text, interval.end, end, flags)) {
4035 for (interval.end = month->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4036 if (m_separator->match(text, interval.end, end, flags) &&
4037 m_separator->hit_offset == hit_offset) // Both separators must match.
4038 {
4039 for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4040 if (year->match(text, interval.end, end, flags) &&
4041 is_valid(day->value, month->value))
4042 {
4043 interval.start = start;
4044 interval.end = year->interval.end;
4045 format = format::dmy;
4046 return true;
4047 }
4048 }
4049 }
4050 }
4051 }
4052 }
4053
4054 if ((m_format_mask & format::mdy) != 0) {
4055 if (month->match(text, start, end, flags)) {
4056 for (interval.end = month->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4057 if (m_separator->match(text, interval.end, end, flags)) {
4058 size_t hit_offset = m_separator->hit_offset;
4059 for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4060 if (day->match(text, interval.end, end, flags)) {
4061 for (interval.end = day->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4062 if (m_separator->match(text, interval.end, end, flags) &&
4063 m_separator->hit_offset == hit_offset) // Both separators must match.
4064 {
4065 for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4066 if (year->match(text, interval.end, end, flags) &&
4067 is_valid(day->value, month->value))
4068 {
4069 interval.start = start;
4070 interval.end = year->interval.end;
4071 format = format::mdy;
4072 return true;
4073 }
4074 }
4075 }
4076 }
4077 }
4078 }
4079
4080 if ((m_format_mask & format::ymd) != 0) {
4081 if (year->match(text, start, end, flags)) {
4082 for (interval.end = year->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4083 if (m_separator->match(text, interval.end, end, flags)) {
4084 size_t hit_offset = m_separator->hit_offset;
4085 for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4086 if (month->match(text, interval.end, end, flags)) {
4087 for (interval.end = month->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4088 if (m_separator->match(text, interval.end, end, flags) &&
4089 m_separator->hit_offset == hit_offset) // Both separators must match.
4090 {
4091 for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4092 if (day->match(text, interval.end, end, flags) &&
4093 is_valid(day->value, month->value))
4094 {
4095 interval.start = start;
4096 interval.end = day->interval.end;
4097 format = format::ymd;
4098 return true;
4099 }
4100 }
4101 }
4102 }
4103 }
4104 }
4105
4106 if ((m_format_mask & format::ym) != 0) {
4107 if (year->match(text, start, end, flags)) {
4108 for (interval.end = year->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4109 if (m_separator->match(text, interval.end, end, flags)) {
4110 for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4111 if (month->match(text, interval.end, end, flags) &&
4112 is_valid((size_t)-1, month->value))
4113 {
4114 if (day) day->invalidate();
4115 interval.start = start;
4116 interval.end = month->interval.end;
4117 format = format::ym;
4118 return true;
4119 }
4120 }
4121 }
4122 }
4123
4124 if ((m_format_mask & format::my) != 0) {
4125 if (month->match(text, start, end, flags)) {
4126 for (interval.end = month->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4127 if (m_separator->match(text, interval.end, end, flags)) {
4128 for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4129 if (year->match(text, interval.end, end, flags) &&
4130 is_valid((size_t)-1, month->value))
4131 {
4132 if (day) day->invalidate();
4133 interval.start = start;
4134 interval.end = year->interval.end;
4135 format = format::my;
4136 return true;
4137 }
4138 }
4139 }
4140 }
4141
4142 if ((m_format_mask & format::dm) != 0) {
4143 if (day->match(text, start, end, flags)) {
4144 for (interval.end = day->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4145 if (m_separator->match(text, interval.end, end, flags)) {
4146 size_t hit_offset = m_separator->hit_offset;
4147 for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4148 if (month->match(text, interval.end, end, flags) &&
4149 is_valid(day->value, month->value))
4150 {
4151 if (year) year->invalidate();
4152 interval.start = start;
4153 for (interval.end = month->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4154 if (m_separator->match(text, interval.end, end, flags) &&
4155 m_separator->hit_offset == hit_offset) // Both separators must match.
4156 interval.end = m_separator->interval.end;
4157 else
4158 interval.end = month->interval.end;
4159 format = format::dm;
4160 return true;
4161 }
4162 }
4163 }
4164 }
4165
4166 if ((m_format_mask & format::md) != 0) {
4167 if (month->match(text, start, end, flags)) {
4168 for (interval.end = month->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4169 if (m_separator->match(text, interval.end, end, flags)) {
4170 size_t hit_offset = m_separator->hit_offset;
4171 for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4172 if (day->match(text, interval.end, end, flags) &&
4173 is_valid(day->value, month->value))
4174 {
4175 if (year) year->invalidate();
4176 interval.start = start;
4177 for (interval.end = day->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4178 if (m_separator->match(text, interval.end, end, flags) &&
4179 m_separator->hit_offset == hit_offset) // Both separators must match.
4180 interval.end = m_separator->interval.end;
4181 else
4182 interval.end = day->interval.end;
4183 format = format::md;
4184 return true;
4185 }
4186 }
4187 }
4188 }
4189
4190 if (day) day->invalidate();
4191 if (month) month->invalidate();
4192 if (year) year->invalidate();
4193 format = 0;
4194 interval.start = (interval.end = start) + 1;
4195 return false;
4196 }
4197
4198 virtual void invalidate()
4199 {
4200 if (day) day->invalidate();
4201 if (month) month->invalidate();
4202 if (year) year->invalidate();
4203 format = 0;
4205 }
4206
4207 protected:
4208 static inline bool is_valid(size_t day, size_t month)
4209 {
4210 if (month == (size_t)-1) {
4211 // Default to January. This allows validating day only, as January has all 31 days.
4212 month = 1;
4213 }
4214 if (day == (size_t)-1) {
4215 // Default to 1st day in month. This allows validating month only, as each month has 1st day.
4216 day = 1;
4217 }
4218
4219 switch (month) {
4220 case 1:
4221 case 3:
4222 case 5:
4223 case 7:
4224 case 8:
4225 case 10:
4226 case 12:
4227 return 1 <= day && day <= 31;
4228 case 2:
4229 return 1 <= day && day <= 29;
4230 case 4:
4231 case 6:
4232 case 9:
4233 case 11:
4234 return 1 <= day && day <= 30;
4235 default:
4236 return false;
4237 }
4238 }
4239
4240 public:
4241 format format;
4242 std::shared_ptr<basic_integer<T>> day;
4243 std::shared_ptr<basic_integer<T>> month;
4244 std::shared_ptr<basic_integer<T>> year;
4245
4246 protected:
4247 int m_format_mask;
4248 std::shared_ptr<basic_set<T>> m_separator;
4249 std::shared_ptr<basic_parser<T>> m_space;
4250 };
4251
4252 using date = basic_date<char>;
4253 using wdate = basic_date<wchar_t>;
4254#ifdef _UNICODE
4255 using tdate = wdate;
4256#else
4257 using tdate = date;
4258#endif
4260
4264 template <class T>
4265 class basic_time : public basic_parser<T>
4266 {
4267 public:
4268 basic_time(
4269 _In_ const std::shared_ptr<basic_integer10<T>>& _hour,
4270 _In_ const std::shared_ptr<basic_integer10<T>>& _minute,
4271 _In_ const std::shared_ptr<basic_integer10<T>>& _second,
4272 _In_ const std::shared_ptr<basic_integer10<T>>& _millisecond,
4273 _In_ const std::shared_ptr<basic_set<T>>& separator,
4274 _In_ const std::shared_ptr<basic_parser<T>>& millisecond_separator,
4275 _In_ const std::locale& locale = std::locale()) :
4276 basic_parser<T>(locale),
4277 hour(_hour),
4278 minute(_minute),
4279 second(_second),
4280 millisecond(_millisecond),
4281 m_separator(separator),
4282 m_millisecond_separator(millisecond_separator)
4283 {}
4284
4285 virtual bool match(
4286 _In_reads_or_z_(end) const T* text,
4287 _In_ size_t start = 0,
4288 _In_ size_t end = (size_t)-1,
4289 _In_ int flags = match_default)
4290 {
4291 assert(text || start >= end);
4292
4293 if (hour->match(text, start, end, flags) &&
4294 m_separator->match(text, hour->interval.end, end, flags) &&
4295 minute->match(text, m_separator->interval.end, end, flags) &&
4296 minute->value < 60)
4297 {
4298 // hh::mm
4299 size_t hit_offset = m_separator->hit_offset;
4300 if (m_separator->match(text, minute->interval.end, end, flags) &&
4301 m_separator->hit_offset == hit_offset && // Both separators must match.
4302 second && second->match(text, m_separator->interval.end, end, flags) &&
4303 second->value < 60)
4304 {
4305 // hh::mm:ss
4306 if (m_millisecond_separator && m_millisecond_separator->match(text, second->interval.end, end, flags) &&
4307 millisecond && millisecond->match(text, m_millisecond_separator->interval.end, end, flags) &&
4308 millisecond->value < 1000)
4309 {
4310 // hh::mm:ss.mmmm
4311 interval.end = millisecond->interval.end;
4312 }
4313 else {
4314 if (millisecond) millisecond->invalidate();
4315 interval.end = second->interval.end;
4316 }
4317 }
4318 else {
4319 if (second) second->invalidate();
4320 if (millisecond) millisecond->invalidate();
4321 interval.end = minute->interval.end;
4322 }
4323 interval.start = start;
4324 return true;
4325 }
4326
4327 hour->invalidate();
4328 minute->invalidate();
4329 if (second) second->invalidate();
4330 if (millisecond) millisecond->invalidate();
4331 interval.start = (interval.end = start) + 1;
4332 return false;
4333 }
4334
4335 virtual void invalidate()
4336 {
4337 hour->invalidate();
4338 minute->invalidate();
4339 if (second) second->invalidate();
4340 if (millisecond) millisecond->invalidate();
4342 }
4343
4344 public:
4345 std::shared_ptr<basic_integer10<T>> hour;
4346 std::shared_ptr<basic_integer10<T>> minute;
4347 std::shared_ptr<basic_integer10<T>> second;
4348 std::shared_ptr<basic_integer10<T>> millisecond;
4349
4350 protected:
4351 std::shared_ptr<basic_set<T>> m_separator;
4352 std::shared_ptr<basic_parser<T>> m_millisecond_separator;
4353 };
4354
4355 using time = basic_time<char>;
4356 using wtime = basic_time<wchar_t>;
4357#ifdef _UNICODE
4358 using ttime = wtime;
4359#else
4360 using ttime = time;
4361#endif
4363
4367 template <class T>
4368 class basic_angle : public basic_parser<T>
4369 {
4370 public:
4372 _In_ const std::shared_ptr<basic_integer10<T>>& _degree,
4373 _In_ const std::shared_ptr<basic_parser<T>>& _degree_separator,
4374 _In_ const std::shared_ptr<basic_integer10<T>>& _minute,
4375 _In_ const std::shared_ptr<basic_parser<T>>& _minute_separator,
4376 _In_ const std::shared_ptr<basic_integer10<T>>& _second,
4377 _In_ const std::shared_ptr<basic_parser<T>>& _second_separator,
4378 _In_ const std::shared_ptr<basic_parser<T>>& _decimal,
4379 _In_ const std::locale& locale = std::locale()) :
4380 basic_parser<T>(locale),
4381 degree(_degree),
4382 degree_separator(_degree_separator),
4383 minute(_minute),
4384 minute_separator(_minute_separator),
4385 second(_second),
4386 second_separator(_second_separator),
4387 decimal(_decimal)
4388 {}
4389
4390 virtual bool match(
4391 _In_reads_or_z_(end) const T* text,
4392 _In_ size_t start = 0,
4393 _In_ size_t end = (size_t)-1,
4394 _In_ int flags = match_default)
4395 {
4396 assert(text || start >= end);
4397
4398 interval.end = start;
4399
4400 if (degree->match(text, interval.end, end, flags) &&
4401 degree_separator->match(text, degree->interval.end, end, flags))
4402 {
4403 // Degrees
4404 interval.end = degree_separator->interval.end;
4405 }
4406 else {
4407 degree->invalidate();
4408 degree_separator->invalidate();
4409 }
4410
4411 if (minute->match(text, interval.end, end, flags) &&
4412 minute->value < 60 &&
4413 minute_separator->match(text, minute->interval.end, end, flags))
4414 {
4415 // Minutes
4416 interval.end = minute_separator->interval.end;
4417 }
4418 else {
4419 minute->invalidate();
4420 minute_separator->invalidate();
4421 }
4422
4423 if (second && second->match(text, interval.end, end, flags) &&
4424 second->value < 60)
4425 {
4426 // Seconds
4427 interval.end = second->interval.end;
4428 if (second_separator && second_separator->match(text, interval.end, end, flags))
4429 interval.end = second_separator->interval.end;
4430 else
4431 if (second_separator) second_separator->invalidate();
4432 }
4433 else {
4434 if (second) second->invalidate();
4435 if (second_separator) second_separator->invalidate();
4436 }
4437
4438 if (degree->interval.start < degree->interval.end ||
4439 minute->interval.start < minute->interval.end ||
4440 second && second->interval.start < second->interval.end)
4441 {
4442 if (decimal && decimal->match(text, interval.end, end, flags)) {
4443 // Decimals
4444 interval.end = decimal->interval.end;
4445 }
4446 else if (decimal)
4447 decimal->invalidate();
4448 interval.start = start;
4449 return true;
4450 }
4451 if (decimal) decimal->invalidate();
4452 interval.start = (interval.end = start) + 1;
4453 return false;
4454 }
4455
4456 virtual void invalidate()
4457 {
4458 degree->invalidate();
4459 degree_separator->invalidate();
4460 minute->invalidate();
4461 minute_separator->invalidate();
4462 if (second) second->invalidate();
4463 if (second_separator) second_separator->invalidate();
4464 if (decimal) decimal->invalidate();
4466 }
4467
4468 public:
4469 std::shared_ptr<basic_integer10<T>> degree;
4470 std::shared_ptr<basic_parser<T>> degree_separator;
4471 std::shared_ptr<basic_integer10<T>> minute;
4472 std::shared_ptr<basic_parser<T>> minute_separator;
4473 std::shared_ptr<basic_integer10<T>> second;
4474 std::shared_ptr<basic_parser<T>> second_separator;
4475 std::shared_ptr<basic_parser<T>> decimal;
4476 };
4477
4478 using angle = basic_angle<char>;
4480#ifdef _UNICODE
4481 using RRegElKot = wangle;
4482#else
4483 using RRegElKot = angle;
4484#endif
4486
4490 template <class T>
4492 {
4493 public:
4495 _In_ const std::shared_ptr<basic_parser<T>>& digit,
4496 _In_ const std::shared_ptr<basic_parser<T>>& plus_sign,
4497 _In_ const std::shared_ptr<basic_set<T>>& lparenthesis,
4498 _In_ const std::shared_ptr<basic_set<T>>& rparenthesis,
4499 _In_ const std::shared_ptr<basic_parser<T>>& separator,
4500 _In_ const std::shared_ptr<basic_parser<T>>& space,
4501 _In_ const std::locale& locale = std::locale()) :
4502 basic_parser<T>(locale),
4503 m_digit(digit),
4504 m_plus_sign(plus_sign),
4505 m_lparenthesis(lparenthesis),
4506 m_rparenthesis(rparenthesis),
4507 m_separator(separator),
4508 m_space(space)
4509 {}
4510
4511 virtual bool match(
4512 _In_reads_or_z_(end) const T* text,
4513 _In_ size_t start = 0,
4514 _In_ size_t end = (size_t)-1,
4515 _In_ int flags = match_default)
4516 {
4517 assert(text || start >= end);
4518
4519 size_t safe_digit_end = start, safe_value_size = 0;
4520 bool has_digits = false, after_digit = false, in_parentheses = false, after_parentheses = false;
4521 const int space_match_flags = flags & ~match_multiline; // Spaces in phone numbers must never be broken in new line.
4522
4523 interval.end = start;
4524 value.clear();
4525 m_lparenthesis->invalidate();
4526 m_rparenthesis->invalidate();
4527
4528 if (m_plus_sign && m_plus_sign->match(text, interval.end, end, flags)) {
4529 value.append(text + m_plus_sign->interval.start, text + m_plus_sign->interval.end);
4530 safe_value_size = value.size();
4531 interval.end = m_plus_sign->interval.end;
4532 }
4533
4534 for (;;) {
4535 assert(text || interval.end >= end);
4536 if (interval.end >= end || !text[interval.end])
4537 break;
4538 if (m_digit->match(text, interval.end, end, flags)) {
4539 // Digit
4540 value.append(text + m_digit->interval.start, text + m_digit->interval.end);
4541 interval.end = m_digit->interval.end;
4542 if (!in_parentheses) {
4543 safe_digit_end = interval.end;
4544 safe_value_size = value.size();
4545 has_digits = true;
4546 }
4547 after_digit = true;
4548 after_parentheses = false;
4549 }
4550 else if (
4551 m_lparenthesis && !m_lparenthesis->interval && // No left parenthesis yet
4552 m_rparenthesis && !m_rparenthesis->interval && // Right parenthesis after left
4553 m_lparenthesis->match(text, interval.end, end, flags))
4554 {
4555 // Left parenthesis
4556 value.Prilepi(text + m_lparenthesis->interval.start, m_lparenthesis->interval.size());
4557 interval.end = m_lparenthesis->interval.end;
4558 in_parentheses = true;
4559 after_digit = false;
4560 after_parentheses = false;
4561 }
4562 else if (
4563 in_parentheses && // After left parenthesis
4564 m_rparenthesis && !m_rparenthesis->interval && // No right parenthesis yet
4565 m_rparenthesis->match(text, interval.end, end, flags) &&
4566 m_lparenthesis->hit_offset == m_rparenthesis->hit_offset) // Left and right parentheses must match
4567 {
4568 // Right parenthesis
4569 value.append(text + m_rparenthesis->interval.start, text + m_rparenthesis->interval.end);
4570 interval.end = m_rparenthesis->interval.end;
4571 safe_digit_end = interval.end;
4572 safe_value_size = value.size();
4573 in_parentheses = false;
4574 after_digit = false;
4575 after_parentheses = true;
4576 }
4577 else if (
4578 after_digit &&
4579 !in_parentheses && // No separators inside parentheses
4580 !after_parentheses && // No separators following right parenthesis
4581 m_separator && m_separator->match(text, interval.end, end, flags))
4582 {
4583 // Separator
4584 interval.end = m_separator->interval.end;
4585 after_digit = false;
4586 after_parentheses = false;
4587 }
4588 else if (
4589 (after_digit || after_parentheses) &&
4590 m_space && m_space->match(text, interval.end, end, space_match_flags))
4591 {
4592 // Space
4593 interval.end = m_space->interval.end;
4594 after_digit = false;
4595 after_parentheses = false;
4596 }
4597 else
4598 break;
4599 }
4600 if (has_digits) {
4601 value.erase(safe_value_size);
4602 interval.start = start;
4603 interval.end = safe_digit_end;
4604 return true;
4605 }
4606 value.clear();
4607 interval.start = (interval.end = start) + 1;
4608 return false;
4609 }
4610
4611 virtual void invalidate()
4612 {
4613 value.clear();
4615 }
4616
4617 public:
4618 std::basic_string<T> value;
4619
4620 protected:
4621 std::shared_ptr<basic_parser<T>> m_digit;
4622 std::shared_ptr<basic_parser<T>> m_plus_sign;
4623 std::shared_ptr<basic_set<T>> m_lparenthesis;
4624 std::shared_ptr<basic_set<T>> m_rparenthesis;
4625 std::shared_ptr<basic_parser<T>> m_separator;
4626 std::shared_ptr<basic_parser<T>> m_space;
4627 };
4628
4631#ifdef _UNICODE
4633#else
4635#endif
4637
4641 template <class T>
4643 {
4644 public:
4646 _In_ const std::shared_ptr<basic_parser<T>>& element,
4647 _In_ const std::shared_ptr<basic_parser<T>>& digit,
4648 _In_ const std::shared_ptr<basic_parser<T>>& sign,
4649 _In_ const std::locale& locale = std::locale()) :
4650 basic_parser<T>(locale),
4651 m_element(element),
4652 m_digit(digit),
4653 m_sign(sign),
4654 has_digits(false),
4655 has_charge(false)
4656 {}
4657
4658 virtual bool match(
4659 _In_reads_or_z_(end) const T* text,
4660 _In_ size_t start = 0,
4661 _In_ size_t end = (size_t)-1,
4662 _In_ int flags = match_default)
4663 {
4664 assert(text || start >= end);
4665
4666 has_digits = false;
4667 has_charge = false;
4668 interval.end = start;
4669
4670 const int element_match_flags = flags & ~match_case_insensitive; // Chemical elements are always case-sensitive.
4671 for (;;) {
4672 if (m_element->match(text, interval.end, end, element_match_flags)) {
4673 interval.end = m_element->interval.end;
4674 while (m_digit->match(text, interval.end, end, flags)) {
4675 interval.end = m_digit->interval.end;
4676 has_digits = true;
4677 }
4678 }
4679 else if (start < interval.end) {
4680 if (m_sign->match(text, interval.end, end, flags)) {
4681 interval.end = m_sign->interval.end;
4682 has_charge = true;
4683 }
4684 interval.start = start;
4685 return true;
4686 }
4687 else {
4688 interval.start = (interval.end = start) + 1;
4689 return false;
4690 }
4691 }
4692 }
4693
4694 virtual void invalidate()
4695 {
4696 has_digits = false;
4697 has_charge = false;
4699 }
4700
4701 public:
4702 bool has_digits;
4703 bool has_charge;
4704
4705 protected:
4706 std::shared_ptr<basic_parser<T>> m_element;
4707 std::shared_ptr<basic_parser<T>> m_digit;
4708 std::shared_ptr<basic_parser<T>> m_sign;
4709 };
4710
4713#ifdef _UNICODE
4715#else
4717#endif
4719
4724 {
4725 public:
4726 virtual bool match(
4727 _In_reads_or_z_(end) const char* text,
4728 _In_ size_t start = 0,
4729 _In_ size_t end = (size_t)-1,
4730 _In_ int flags = match_default)
4731 {
4732 assert(text || start >= end);
4733 interval.end = start;
4734
4735 assert(text || interval.end >= end);
4736 if (interval.end < end && text[interval.end]) {
4737 if (text[interval.end] == '\r') {
4738 interval.end++;
4739 if (interval.end < end && text[interval.end] == '\n') {
4740 interval.start = start;
4741 interval.end++;
4742 return true;
4743 }
4744 }
4745 else if (text[interval.end] == '\n') {
4746 interval.start = start;
4747 interval.end++;
4748 return true;
4749 }
4750 }
4751 interval.start = (interval.end = start) + 1;
4752 return false;
4753 }
4754 };
4755
4759 class http_space : public parser
4760 {
4761 public:
4762 virtual bool match(
4763 _In_reads_or_z_(end) const char* text,
4764 _In_ size_t start = 0,
4765 _In_ size_t end = (size_t)-1,
4766 _In_ int flags = match_default)
4767 {
4768 assert(text || start >= end);
4769 interval.end = start;
4770 if (m_line_break.match(text, interval.end, end, flags)) {
4771 interval.end = m_line_break.interval.end;
4772 if (interval.end < end && text[interval.end] && isspace(text[interval.end])) {
4773 interval.start = start;
4774 interval.end++;
4775 while (interval.end < end && text[interval.end] && isspace(text[interval.end])) interval.end++;
4776 return true;
4777 }
4778 }
4779 else if (interval.end < end && text[interval.end] && isspace(text[interval.end])) {
4780 interval.start = start;
4781 interval.end++;
4782 while (interval.end < end && text[interval.end] && isspace(text[interval.end])) interval.end++;
4783 return true;
4784 }
4785 interval.start = (interval.end = start) + 1;
4786 return false;
4787 }
4788
4789 protected:
4790 http_line_break m_line_break;
4791 };
4792
4796 class http_text_char : public parser
4797 {
4798 public:
4799 virtual bool match(
4800 _In_reads_or_z_(end) const char* text,
4801 _In_ size_t start = 0,
4802 _In_ size_t end = (size_t)-1,
4803 _In_ int flags = match_default)
4804 {
4805 assert(text || start >= end);
4806 interval.end = start;
4807
4808 assert(text || interval.end >= end);
4809 if (m_space.match(text, interval.end, end, flags)) {
4810 interval.start = start;
4811 interval.end = m_space.interval.end;
4812 return true;
4813 }
4814 else if (interval.end < end && text[interval.end] && text[interval.end] >= 0x20) {
4815 interval.start = start;
4816 interval.end++;
4817 return true;
4818 }
4819 interval.start = (interval.end = start) + 1;
4820 return false;
4821 }
4822
4823 protected:
4824 http_space m_space;
4825 };
4826
4830 class http_token : public parser
4831 {
4832 public:
4833 virtual bool match(
4834 _In_reads_or_z_(end) const char* text,
4835 _In_ size_t start = 0,
4836 _In_ size_t end = (size_t)-1,
4837 _In_ int flags = match_default)
4838 {
4839 assert(text || start >= end);
4840 interval.end = start;
4841 for (;;) {
4842 if (interval.end < end && text[interval.end]) {
4843 if ((unsigned int)text[interval.end] < 0x20 ||
4844 (unsigned int)text[interval.end] == 0x7f ||
4845 text[interval.end] == '(' ||
4846 text[interval.end] == ')' ||
4847 text[interval.end] == '<' ||
4848 text[interval.end] == '>' ||
4849 text[interval.end] == '@' ||
4850 text[interval.end] == ',' ||
4851 text[interval.end] == ';' ||
4852 text[interval.end] == ':' ||
4853 text[interval.end] == '\\' ||
4854 text[interval.end] == '\"' ||
4855 text[interval.end] == '/' ||
4856 text[interval.end] == '[' ||
4857 text[interval.end] == ']' ||
4858 text[interval.end] == '?' ||
4859 text[interval.end] == '=' ||
4860 text[interval.end] == '{' ||
4861 text[interval.end] == '}' ||
4862 isspace(text[interval.end]))
4863 break;
4864 else
4865 interval.end++;
4866 }
4867 else
4868 break;
4869 }
4870 if (start < interval.end) {
4871 interval.start = start;
4872 return true;
4873 }
4874 else {
4875 interval.start = (interval.end = start) + 1;
4876 return false;
4877 }
4878 }
4879 };
4880
4885 {
4886 public:
4887 virtual bool match(
4888 _In_reads_or_z_(end) const char* text,
4889 _In_ size_t start = 0,
4890 _In_ size_t end = (size_t)-1,
4891 _In_ int flags = match_default)
4892 {
4893 assert(text || start >= end);
4894 interval.end = start;
4895 if (interval.end < end && text[interval.end] != '"')
4896 goto error;
4897 interval.end++;
4899 for (;;) {
4900 assert(text || interval.end >= end);
4901 if (interval.end < end && text[interval.end]) {
4902 if (text[interval.end] == '"') {
4904 interval.end++;
4905 break;
4906 }
4907 else if (text[interval.end] == '\\') {
4908 interval.end++;
4909 if (interval.end < end && text[interval.end]) {
4910 interval.end++;
4911 }
4912 else
4913 goto error;
4914 }
4915 else if (m_chr.match(text, interval.end, end, flags))
4916 interval.end++;
4917 else
4918 goto error;
4919 }
4920 else
4921 goto error;
4922 }
4923 interval.start = start;
4924 return true;
4925
4926 error:
4927 content.start = 1;
4928 content.end = 0;
4929 interval.start = (interval.end = start) + 1;
4930 return false;
4931 }
4932
4933 virtual void invalidate()
4934 {
4935 content.start = 1;
4936 content.end = 0;
4937 parser::invalidate();
4938 }
4939
4940 public:
4942
4943 protected:
4944 http_text_char m_chr;
4945 };
4946
4950 class http_value : public parser
4951 {
4952 public:
4953 virtual bool match(
4954 _In_reads_or_z_(end) const char* text,
4955 _In_ size_t start = 0,
4956 _In_ size_t end = (size_t)-1,
4957 _In_ int flags = match_default)
4958 {
4959 assert(text || start >= end);
4960 interval.end = start;
4961 if (string.match(text, interval.end, end, flags)) {
4962 token.invalidate();
4963 interval.end = string.interval.end;
4964 interval.start = start;
4965 return true;
4966 }
4967 else if (token.match(text, interval.end, end, flags)) {
4968 string.invalidate();
4970 interval.start = start;
4971 return true;
4972 }
4973 else {
4974 interval.start = (interval.end = start) + 1;
4975 return false;
4976 }
4977 }
4978
4979 virtual void invalidate()
4980 {
4981 string.invalidate();
4982 token.invalidate();
4983 parser::invalidate();
4984 }
4985
4986 public:
4989 };
4990
4994 class http_parameter : public parser
4995 {
4996 public:
4997 virtual bool match(
4998 _In_reads_or_z_(end) const char* text,
4999 _In_ size_t start = 0,
5000 _In_ size_t end = (size_t)-1,
5001 _In_ int flags = match_default)
5002 {
5003 assert(text || start >= end);
5004 interval.end = start;
5005 if (name.match(text, interval.end, end, flags))
5007 else
5008 goto error;
5009 while (m_space.match(text, interval.end, end, flags))
5010 interval.end = m_space.interval.end;
5011 assert(text || interval.end >= end);
5012 if (interval.end < end && text[interval.end] == '=')
5013 interval.end++;
5014 else
5015 while (m_space.match(text, interval.end, end, flags))
5016 interval.end = m_space.interval.end;
5017 if (value.match(text, interval.end, end, flags))
5019 else
5020 goto error;
5021 interval.start = start;
5022 return true;
5023
5024 error:
5025 name.invalidate();
5026 value.invalidate();
5027 interval.start = (interval.end = start) + 1;
5028 return false;
5029 }
5030
5031 virtual void invalidate()
5032 {
5033 name.invalidate();
5034 value.invalidate();
5035 parser::invalidate();
5036 }
5037
5038 public:
5041
5042 protected:
5043 http_space m_space;
5044 };
5045
5049 class http_any_type : public parser
5050 {
5051 public:
5052 virtual bool match(
5053 _In_reads_or_z_(end) const char* text,
5054 _In_ size_t start = 0,
5055 _In_ size_t end = (size_t)-1,
5056 _In_ int flags = match_default)
5057 {
5058 assert(text || start >= end);
5059 if (start + 2 < end &&
5060 text[start] == '*' &&
5061 text[start + 1] == '/' &&
5062 text[start + 2] == '*')
5063 {
5064 interval.end = (interval.start = start) + 3;
5065 return true;
5066 }
5067 else if (start < end && text[start] == '*') {
5068 interval.end = (interval.start = start) + 1;
5069 return true;
5070 }
5071 else {
5072 interval.start = (interval.end = start) + 1;
5073 return false;
5074 }
5075 }
5076 };
5077
5082 {
5083 public:
5084 virtual bool match(
5085 _In_reads_or_z_(end) const char* text,
5086 _In_ size_t start = 0,
5087 _In_ size_t end = (size_t)-1,
5088 _In_ int flags = match_default)
5089 {
5090 assert(text || start >= end);
5091 interval.end = start;
5092 if (type.match(text, interval.end, end, flags))
5093 interval.end = type.interval.end;
5094 else
5095 goto error;
5096 while (m_space.match(text, interval.end, end, flags))
5097 interval.end = m_space.interval.end;
5098 if (interval.end < end && text[interval.end] == '/')
5099 interval.end++;
5100 else
5101 goto error;
5102 while (m_space.match(text, interval.end, end, flags))
5103 interval.end = m_space.interval.end;
5104 if (subtype.match(text, interval.end, end, flags))
5105 interval.end = subtype.interval.end;
5106 else
5107 goto error;
5108 interval.start = start;
5109 return true;
5110
5111 error:
5112 type.invalidate();
5113 subtype.invalidate();
5114 interval.start = (interval.end = start) + 1;
5115 return false;
5116 }
5117
5118 virtual void invalidate()
5119 {
5120 type.invalidate();
5121 subtype.invalidate();
5122 parser::invalidate();
5123 }
5124
5125 public:
5126 http_token type;
5127 http_token subtype;
5128
5129 protected:
5130 http_space m_space;
5131 };
5132
5137 {
5138 public:
5139 virtual bool match(
5140 _In_reads_or_z_(end) const char* text,
5141 _In_ size_t start = 0,
5142 _In_ size_t end = (size_t)-1,
5143 _In_ int flags = match_default)
5144 {
5145 assert(text || start >= end);
5146 if (!http_media_range::match(text, start, end, flags))
5147 goto error;
5148 params.clear();
5149 for (;;) {
5150 if (interval.end < end && text[interval.end]) {
5151 if (m_space.match(text, interval.end, end, flags))
5152 interval.end = m_space.interval.end;
5153 else if (text[interval.end] == ';') {
5154 interval.end++;
5155 while (m_space.match(text, interval.end, end, flags))
5156 interval.end = m_space.interval.end;
5157 http_parameter param;
5158 if (param.match(text, interval.end, end, flags)) {
5159 interval.end = param.interval.end;
5160 params.push_back(std::move(param));
5161 }
5162 else
5163 break;
5164 }
5165 else
5166 break;
5167 }
5168 else
5169 break;
5170 }
5171 interval.end = params.empty() ? subtype.interval.end : params.back().interval.end;
5172 return true;
5173
5174 error:
5175 http_media_range::invalidate();
5176 params.clear();
5177 interval.start = (interval.end = start) + 1;
5178 return false;
5179 }
5180
5181 virtual void invalidate()
5182 {
5183 params.clear();
5184 http_media_range::invalidate();
5185 }
5186
5187 public:
5188 std::list<http_parameter> params;
5189 };
5190
5195 {
5196 public:
5197 virtual bool match(
5198 _In_reads_or_z_(end) const char* text,
5199 _In_ size_t start = 0,
5200 _In_ size_t end = (size_t)-1,
5201 _In_ int flags = match_default)
5202 {
5203 assert(text || start >= end);
5204 interval.end = start;
5205 for (;;) {
5206 if (interval.end < end && text[interval.end]) {
5207 if ((unsigned int)text[interval.end] < 0x20 ||
5208 (unsigned int)text[interval.end] == 0x7f ||
5209 text[interval.end] == ':' ||
5210 text[interval.end] == '/' ||
5211 isspace(text[interval.end]))
5212 break;
5213 else
5214 interval.end++;
5215 }
5216 else
5217 break;
5218 }
5219 if (start < interval.end) {
5220 interval.start = start;
5221 return true;
5222 }
5223 interval.start = (interval.end = start) + 1;
5224 return false;
5225 }
5226 };
5227
5231 class http_url_port : public parser
5232 {
5233 public:
5234 http_url_port(_In_ const std::locale& locale = std::locale()) :
5235 parser(locale),
5236 value(0)
5237 {}
5238
5239 virtual bool match(
5240 _In_reads_or_z_(end) const char* text,
5241 _In_ size_t start = 0,
5242 _In_ size_t end = (size_t)-1,
5243 _In_ int flags = match_default)
5244 {
5245 assert(text || start >= end);
5246 value = 0;
5247 interval.end = start;
5248 for (;;) {
5249 if (interval.end < end && text[interval.end]) {
5250 if ('0' <= text[interval.end] && text[interval.end] <= '9') {
5251 size_t _value = (size_t)value * 10 + text[interval.end] - '0';
5252 if (_value > (uint16_t)-1) {
5253 value = 0;
5254 interval.start = (interval.end = start) + 1;
5255 return false;
5256 }
5257 value = (uint16_t)_value;
5258 interval.end++;
5259 }
5260 else
5261 break;
5262 }
5263 else
5264 break;
5265 }
5266 if (start < interval.end) {
5267 interval.start = start;
5268 return true;
5269 }
5270 interval.start = (interval.end = start) + 1;
5271 return false;
5272 }
5273
5274 virtual void invalidate()
5275 {
5276 value = 0;
5277 parser::invalidate();
5278 }
5279
5280 public:
5281 uint16_t value;
5282 };
5283
5288 {
5289 public:
5290 virtual bool match(
5291 _In_reads_or_z_(end) const char* text,
5292 _In_ size_t start = 0,
5293 _In_ size_t end = (size_t)-1,
5294 _In_ int flags = match_default)
5295 {
5296 assert(text || start >= end);
5297 interval.end = start;
5298 for (;;) {
5299 if (interval.end < end && text[interval.end]) {
5300 if ((unsigned int)text[interval.end] < 0x20 ||
5301 (unsigned int)text[interval.end] == 0x7f ||
5302 text[interval.end] == '?' ||
5303 text[interval.end] == '/' ||
5304 isspace(text[interval.end]))
5305 break;
5306 else
5307 interval.end++;
5308 }
5309 else
5310 break;
5311 }
5312 interval.start = start;
5313 return true;
5314 }
5315 };
5316
5320 class http_url_path : public parser
5321 {
5322 public:
5323 virtual bool match(
5324 _In_reads_or_z_(end) const char* text,
5325 _In_ size_t start = 0,
5326 _In_ size_t end = (size_t)-1,
5327 _In_ int flags = match_default)
5328 {
5329 assert(text || start >= end);
5331 interval.end = start;
5332 segments.clear();
5333 assert(text || interval.end >= end);
5334 if (interval.end < end && text[interval.end] != '/')
5335 goto error;
5336 interval.end++;
5337 s.match(text, interval.end, end, flags);
5338 segments.push_back(s);
5340 for (;;) {
5341 if (interval.end < end && text[interval.end]) {
5342 if (text[interval.end] == '/') {
5343 interval.end++;
5344 s.match(text, interval.end, end, flags);
5345 segments.push_back(s);
5347 }
5348 else
5349 break;
5350 }
5351 else
5352 break;
5353 }
5354 interval.start = start;
5355 return true;
5356
5357 error:
5358 segments.clear();
5359 interval.start = (interval.end = start) + 1;
5360 return false;
5361 }
5362
5363 virtual void invalidate()
5364 {
5365 segments.clear();
5366 parser::invalidate();
5367 }
5368
5369 public:
5370 std::vector<http_url_path_segment> segments;
5371 };
5372
5377 {
5378 public:
5379 virtual bool match(
5380 _In_reads_or_z_(end) const char* text,
5381 _In_ size_t start = 0,
5382 _In_ size_t end = (size_t)-1,
5383 _In_ int flags = match_default)
5384 {
5385 assert(text || start >= end);
5386 interval.end = start;
5387 name.start = interval.end;
5388 for (;;) {
5389 if (interval.end < end && text[interval.end]) {
5390 if ((unsigned int)text[interval.end] < 0x20 ||
5391 (unsigned int)text[interval.end] == 0x7f ||
5392 text[interval.end] == '&' ||
5393 text[interval.end] == '=' ||
5394 isspace(text[interval.end]))
5395 break;
5396 else
5397 interval.end++;
5398 }
5399 else
5400 break;
5401 }
5402 if (start < interval.end)
5403 name.end = interval.end;
5404 else
5405 goto error;
5406 if (text[interval.end] == '=') {
5407 interval.end++;
5408 value.start = interval.end;
5409 for (;;) {
5410 if (interval.end < end && text[interval.end]) {
5411 if ((unsigned int)text[interval.end] < 0x20 ||
5412 (unsigned int)text[interval.end] == 0x7f ||
5413 text[interval.end] == '&' ||
5414 isspace(text[interval.end]))
5415 break;
5416 else
5417 interval.end++;
5418 }
5419 else
5420 break;
5421 }
5422 value.end = interval.end;
5423 }
5424 else {
5425 value.start = 1;
5426 value.end = 0;
5427 }
5428 interval.start = start;
5429 return true;
5430
5431 error:
5432 name.start = 1;
5433 name.end = 0;
5434 value.start = 1;
5435 value.end = 0;
5436 interval.start = (interval.end = start) + 1;
5437 return false;
5438 }
5439
5440 virtual void invalidate()
5441 {
5442 name.start = 1;
5443 name.end = 0;
5444 value.start = 1;
5445 value.end = 0;
5446 parser::invalidate();
5447 }
5448
5449 public:
5452 };
5453
5457 class http_url : public parser
5458 {
5459 public:
5460 http_url(_In_ const std::locale& locale = std::locale()) :
5461 parser(locale),
5462 port(locale)
5463 {}
5464
5465 virtual bool match(
5466 _In_reads_or_z_(end) const char* text,
5467 _In_ size_t start = 0,
5468 _In_ size_t end = (size_t)-1,
5469 _In_ int flags = match_default)
5470 {
5471 assert(text || start >= end);
5472 interval.end = start;
5473
5474 if (interval.end + 7 <= end && stdex::strnicmp(text + interval.end, 7, "http://", (size_t)-1, m_locale) == 0) {
5475 interval.end += 7;
5476 if (server.match(text, interval.end, end, flags))
5477 interval.end = server.interval.end;
5478 else
5479 goto error;
5480 if (interval.end < end && text[interval.end] == ':') {
5481 interval.end++;
5482 if (port.match(text, interval.end, end, flags))
5483 interval.end = port.interval.end;
5484 }
5485 else {
5486 port.invalidate();
5487 port.value = 80;
5488 }
5489 }
5490 else {
5491 server.invalidate();
5492 port.invalidate();
5493 port.value = 80;
5494 }
5495
5496 if (path.match(text, interval.end, end, flags))
5497 interval.end = path.interval.end;
5498 else
5499 goto error;
5500
5501 params.clear();
5502
5503 if (interval.end < end && text[interval.end] == '?') {
5504 interval.end++;
5505 for (;;) {
5506 if (interval.end < end && text[interval.end]) {
5507 if ((unsigned int)text[interval.end] < 0x20 ||
5508 (unsigned int)text[interval.end] == 0x7f ||
5509 isspace(text[interval.end]))
5510 break;
5511 else if (text[interval.end] == '&')
5512 interval.end++;
5513 else {
5514 http_url_parameter param;
5515 if (param.match(text, interval.end, end, flags)) {
5516 interval.end = param.interval.end;
5517 params.push_back(std::move(param));
5518 }
5519 else
5520 break;
5521 }
5522 }
5523 else
5524 break;
5525 }
5526 }
5527
5528 interval.start = start;
5529 return true;
5530
5531 error:
5532 server.invalidate();
5533 port.invalidate();
5534 path.invalidate();
5535 params.clear();
5536 interval.start = (interval.end = start) + 1;
5537 return false;
5538 }
5539
5540 virtual void invalidate()
5541 {
5542 server.invalidate();
5543 port.invalidate();
5544 path.invalidate();
5545 params.clear();
5546 parser::invalidate();
5547 }
5548
5549 public:
5550 http_url_server server;
5551 http_url_port port;
5552 http_url_path path;
5553 std::list<http_url_parameter> params;
5554 };
5555
5559 class http_language : public parser
5560 {
5561 public:
5562 virtual bool match(
5563 _In_reads_or_z_(end) const char* text,
5564 _In_ size_t start = 0,
5565 _In_ size_t end = (size_t)-1,
5566 _In_ int flags = match_default)
5567 {
5568 assert(text || start >= end);
5569 interval.end = start;
5570 components.clear();
5571 for (;;) {
5572 if (interval.end < end && text[interval.end]) {
5574 k.end = interval.end;
5575 for (;;) {
5576 if (k.end < end && text[k.end]) {
5577 if (isalpha(text[k.end]))
5578 k.end++;
5579 else
5580 break;
5581 }
5582 else
5583 break;
5584 }
5585 if (interval.end < k.end) {
5586 k.start = interval.end;
5587 interval.end = k.end;
5588 components.push_back(k);
5589 }
5590 else
5591 break;
5592 if (interval.end < end && text[interval.end] == '-')
5593 interval.end++;
5594 else
5595 break;
5596 }
5597 else
5598 break;
5599 }
5600 if (!components.empty()) {
5601 interval.start = start;
5602 interval.end = components.back().end;
5603 return true;
5604 }
5605 interval.start = (interval.end = start) + 1;
5606 return false;
5607 }
5608
5609 virtual void invalidate()
5610 {
5611 components.clear();
5612 parser::invalidate();
5613 }
5614
5615 public:
5616 std::vector<stdex::interval<size_t>> components;
5617 };
5618
5622 class http_weight : public parser
5623 {
5624 public:
5625 http_weight(_In_ const std::locale& locale = std::locale()) :
5626 parser(locale),
5627 value(1.0f)
5628 {}
5629
5630 virtual bool match(
5631 _In_reads_or_z_(end) const char* text,
5632 _In_ size_t start = 0,
5633 _In_ size_t end = (size_t)-1,
5634 _In_ int flags = match_default)
5635 {
5636 assert(text || start >= end);
5637 size_t celi_del = 0, decimalni_del = 0, decimalni_del_n = 1;
5638 interval.end = start;
5639 for (;;) {
5640 if (interval.end < end && text[interval.end]) {
5641 if ('0' <= text[interval.end] && text[interval.end] <= '9') {
5642 celi_del = celi_del * 10 + text[interval.end] - '0';
5643 interval.end++;
5644 }
5645 else if (text[interval.end] == '.') {
5646 interval.end++;
5647 for (;;) {
5648 if (interval.end < end && text[interval.end]) {
5649 if ('0' <= text[interval.end] && text[interval.end] <= '9') {
5650 decimalni_del = decimalni_del * 10 + text[interval.end] - '0';
5651 decimalni_del_n *= 10;
5652 interval.end++;
5653 }
5654 else
5655 break;
5656 }
5657 else
5658 break;
5659 }
5660 break;
5661 }
5662 else
5663 break;
5664 }
5665 else
5666 break;
5667 }
5668 if (start < interval.end) {
5669 value = (float)((double)celi_del + (double)decimalni_del / decimalni_del_n);
5670 interval.start = start;
5671 return true;
5672 }
5673 value = 1.0f;
5674 interval.start = (interval.end = start) + 1;
5675 return false;
5676 }
5677
5678 virtual void invalidate()
5679 {
5680 value = 1.0f;
5681 parser::invalidate();
5682 }
5683
5684 public:
5685 float value;
5686 };
5687
5691 class http_asterisk : public parser
5692 {
5693 public:
5694 virtual bool match(
5695 _In_reads_or_z_(end) const char* text,
5696 _In_ size_t start = 0,
5697 _In_ size_t end = (size_t)-1,
5698 _In_ int flags = match_default)
5699 {
5700 assert(text || end <= start);
5701 if (start < end && text[start] == '*') {
5702 interval.end = (interval.start = start) + 1;
5703 return true;
5704 }
5705 interval.start = (interval.end = start) + 1;
5706 return false;
5707 }
5708 };
5709
5713 template <class T, class T_asterisk = http_asterisk>
5715 {
5716 public:
5717 http_weighted_value(_In_ const std::locale& locale = std::locale()) :
5718 parser(locale),
5719 factor(locale)
5720 {}
5721
5722 virtual bool match(
5723 _In_reads_or_z_(end) const char* text,
5724 _In_ size_t start = 0,
5725 _In_ size_t end = (size_t)-1,
5726 _In_ int flags = match_default)
5727 {
5728 assert(text || start >= end);
5729 size_t konec_vrednosti;
5730 interval.end = start;
5731 if (asterisk.match(text, interval.end, end, flags)) {
5732 interval.end = konec_vrednosti = asterisk.interval.end;
5733 value.invalidate();
5734 }
5735 else if (value.match(text, interval.end, end, flags)) {
5736 interval.end = konec_vrednosti = value.interval.end;
5737 asterisk.invalidate();
5738 }
5739 else {
5740 asterisk.invalidate();
5741 value.invalidate();
5742 interval.start = (interval.end = start) + 1;
5743 return false;
5744 }
5745
5746 while (interval.end < end && text[interval.end] && isspace(text[interval.end])) interval.end++;
5747 if (interval.end < end && text[interval.end] == ';') {
5748 interval.end++;
5749 while (interval.end < end && text[interval.end] && isspace(text[interval.end])) interval.end++;
5750 if (interval.end < end && (text[interval.end] == 'q' || text[interval.end] == 'Q')) {
5751 interval.end++;
5752 while (interval.end < end && text[interval.end] && isspace(text[interval.end])) interval.end++;
5753 if (interval.end < end && text[interval.end] == '=') {
5754 interval.end++;
5755 while (interval.end < end && text[interval.end] && isspace(text[interval.end])) interval.end++;
5756 if (factor.match(text, interval.end, end, flags))
5757 interval.end = factor.interval.end;
5758 }
5759 }
5760 }
5761 if (!factor.interval) {
5762 factor.invalidate();
5763 interval.end = konec_vrednosti;
5764 }
5765 interval.start = start;
5766 return true;
5767 }
5768
5769 virtual void invalidate()
5770 {
5771 asterisk.invalidate();
5772 value.invalidate();
5773 factor.invalidate();
5774 parser::invalidate();
5775 }
5776
5777 public:
5778 T_asterisk asterisk;
5779 T value;
5780 http_weight factor;
5781 };
5782
5787 {
5788 public:
5789 virtual bool match(
5790 _In_reads_or_z_(end) const char* text,
5791 _In_ size_t start = 0,
5792 _In_ size_t end = (size_t)-1,
5793 _In_ int flags = match_default)
5794 {
5795 assert(text || start >= end);
5796 interval.end = start;
5797 if (interval.end < end && text[interval.end] == '$')
5798 interval.end++;
5799 else
5800 goto error;
5801 if (name.match(text, interval.end, end, flags))
5802 interval.end = name.interval.end;
5803 else
5804 goto error;
5805 while (m_space.match(text, interval.end, end, flags))
5806 interval.end = m_space.interval.end;
5807 if (interval.end < end && text[interval.end] == '=')
5808 interval.end++;
5809 else
5810 goto error;
5811 while (m_space.match(text, interval.end, end, flags))
5812 interval.end = m_space.interval.end;
5813 if (value.match(text, interval.end, end, flags))
5814 interval.end = value.interval.end;
5815 else
5816 goto error;
5817 interval.start = start;
5818 return true;
5819
5820 error:
5821 name.invalidate();
5822 value.invalidate();
5823 interval.start = (interval.end = start) + 1;
5824 return false;
5825 }
5826
5827 virtual void invalidate()
5828 {
5829 name.invalidate();
5830 value.invalidate();
5831 parser::invalidate();
5832 }
5833
5834 public:
5835 http_token name;
5836 http_value value;
5837
5838 protected:
5839 http_space m_space;
5840 };
5841
5845 class http_cookie : public parser
5846 {
5847 public:
5848 virtual bool match(
5849 _In_reads_or_z_(end) const char* text,
5850 _In_ size_t start = 0,
5851 _In_ size_t end = (size_t)-1,
5852 _In_ int flags = match_default)
5853 {
5854 assert(text || start >= end);
5855 interval.end = start;
5856 if (name.match(text, interval.end, end, flags))
5858 else
5859 goto error;
5860 while (m_space.match(text, interval.end, end, flags))
5861 interval.end = m_space.interval.end;
5862 if (interval.end < end && text[interval.end] == '=')
5863 interval.end++;
5864 else
5865 goto error;
5866 while (m_space.match(text, interval.end, end, flags))
5867 interval.end = m_space.interval.end;
5868 if (value.match(text, interval.end, end, flags))
5870 else
5871 goto error;
5872 params.clear();
5873 for (;;) {
5874 if (interval.end < end && text[interval.end]) {
5875 if (m_space.match(text, interval.end, end, flags))
5876 interval.end = m_space.interval.end;
5877 else if (text[interval.end] == ';') {
5878 interval.end++;
5879 while (m_space.match(text, interval.end, end, flags))
5880 interval.end = m_space.interval.end;
5882 if (param.match(text, interval.end, end, flags)) {
5883 interval.end = param.interval.end;
5884 params.push_back(std::move(param));
5885 }
5886 else
5887 break;
5888 }
5889 else
5890 break;
5891 }
5892 else
5893 break;
5894 }
5895 interval.start = start;
5896 interval.end = params.empty() ? value.interval.end : params.back().interval.end;
5897 return true;
5898
5899 error:
5900 name.invalidate();
5901 value.invalidate();
5902 params.clear();
5903 interval.start = (interval.end = start) + 1;
5904 return false;
5905 }
5906
5907 virtual void invalidate()
5908 {
5909 name.invalidate();
5910 value.invalidate();
5911 params.clear();
5912 parser::invalidate();
5913 }
5914
5915 public:
5918 std::list<http_cookie_parameter> params;
5919
5920 protected:
5921 http_space m_space;
5922 };
5923
5927 class http_agent : public parser
5928 {
5929 public:
5930 virtual bool match(
5931 _In_reads_or_z_(end) const char* text,
5932 _In_ size_t start = 0,
5933 _In_ size_t end = (size_t)-1,
5934 _In_ int flags = match_default)
5935 {
5936 assert(text || start >= end);
5937 interval.end = start;
5938 type.start = interval.end;
5939 for (;;) {
5940 if (interval.end < end && text[interval.end]) {
5941 if (text[interval.end] == '/') {
5942 type.end = interval.end;
5943 interval.end++;
5944 version.start = interval.end;
5945 for (;;) {
5946 if (interval.end < end && text[interval.end]) {
5947 if (isspace(text[interval.end])) {
5948 version.end = interval.end;
5949 break;
5950 }
5951 else
5952 interval.end++;
5953 }
5954 else {
5955 version.end = interval.end;
5956 break;
5957 }
5958 }
5959 break;
5960 }
5961 else if (isspace(text[interval.end])) {
5962 type.end = interval.end;
5963 break;
5964 }
5965 else
5966 interval.end++;
5967 }
5968 else {
5969 type.end = interval.end;
5970 break;
5971 }
5972 }
5973 if (start < interval.end) {
5974 interval.start = start;
5975 return true;
5976 }
5977 type.start = 1;
5978 type.end = 0;
5979 version.start = 1;
5980 version.end = 0;
5981 interval.start = 1;
5982 interval.end = 0;
5983 return false;
5984 }
5985
5986 virtual void invalidate()
5987 {
5988 type.start = 1;
5989 type.end = 0;
5990 version.start = 1;
5991 version.end = 0;
5992 parser::invalidate();
5993 }
5994
5995 public:
5998 };
5999
6003 class http_protocol : public parser
6004 {
6005 public:
6006 http_protocol(_In_ const std::locale& locale = std::locale()) :
6007 parser(locale),
6008 version(0x009)
6009 {}
6010
6011 virtual bool match(
6012 _In_reads_or_z_(end) const char* text,
6013 _In_ size_t start = 0,
6014 _In_ size_t end = (size_t)-1,
6015 _In_ int flags = match_default)
6016 {
6017 assert(text || start >= end);
6018 interval.end = start;
6019 type.start = interval.end;
6020 for (;;) {
6021 if (interval.end < end && text[interval.end]) {
6022 if (text[interval.end] == '/') {
6023 type.end = interval.end;
6024 interval.end++;
6025 break;
6026 }
6027 else if (isspace(text[interval.end]))
6028 goto error;
6029 else
6030 interval.end++;
6031 }
6032 else {
6033 type.end = interval.end;
6034 goto error;
6035 }
6036 }
6037 version_maj.start = interval.end;
6038 for (;;) {
6039 if (interval.end < end && text[interval.end]) {
6040 if (text[interval.end] == '.') {
6041 version_maj.end = interval.end;
6042 interval.end++;
6043 version_min.start = interval.end;
6044 for (;;) {
6045 if (interval.end < end && text[interval.end]) {
6046 if (isspace(text[interval.end])) {
6047 version_min.end = interval.end;
6048 version =
6049 (uint16_t)strtoui(text + version_maj.start, version_maj.size(), nullptr, 10) * 0x100 +
6050 (uint16_t)strtoui(text + version_min.start, version_min.size(), nullptr, 10);
6051 break;
6052 }
6053 else
6054 interval.end++;
6055 }
6056 else
6057 goto error;
6058 }
6059 break;
6060 }
6061 else if (isspace(text[interval.end])) {
6062 version_maj.end = interval.end;
6063 version_min.start = 1;
6064 version_min.end = 0;
6065 version = (uint16_t)strtoui(text + version_maj.start, version_maj.size(), nullptr, 10) * 0x100;
6066 break;
6067 }
6068 else
6069 interval.end++;
6070 }
6071 else
6072 goto error;
6073 }
6074 interval.start = start;
6075 return true;
6076
6077 error:
6078 type.start = 1;
6079 type.end = 0;
6080 version_maj.start = 1;
6081 version_maj.end = 0;
6082 version_min.start = 1;
6083 version_min.end = 0;
6084 version = 0x009;
6085 interval.start = 1;
6086 interval.end = 0;
6087 return false;
6088 }
6089
6090 virtual void invalidate()
6091 {
6092 type.start = 1;
6093 type.end = 0;
6094 version_maj.start = 1;
6095 version_maj.end = 0;
6096 version_min.start = 1;
6097 version_min.end = 0;
6098 version = 0x009;
6099 parser::invalidate();
6100 }
6101
6102 public:
6104 stdex::interval<size_t> version_maj;
6105 stdex::interval<size_t> version_min;
6106 uint16_t version;
6107 };
6108
6112 class http_request : public parser
6113 {
6114 public:
6115 http_request(_In_ const std::locale& locale = std::locale()) :
6116 parser(locale),
6117 url(locale),
6118 protocol(locale)
6119 {}
6120
6121 virtual bool match(
6122 _In_reads_or_z_(end) const char* text,
6123 _In_ size_t start = 0,
6124 _In_ size_t end = (size_t)-1,
6125 _In_ int flags = match_default)
6126 {
6127 assert(text || start >= end);
6128 interval.end = start;
6129
6130 for (;;) {
6131 if (m_line_break.match(text, interval.end, end, flags))
6132 goto error;
6133 else if (interval.end < end && text[interval.end]) {
6134 if (isspace(text[interval.end]))
6135 interval.end++;
6136 else
6137 break;
6138 }
6139 else
6140 goto error;
6141 }
6142 verb.start = interval.end;
6143 for (;;) {
6144 if (m_line_break.match(text, interval.end, end, flags))
6145 goto error;
6146 else if (interval.end < end && text[interval.end]) {
6147 if (isspace(text[interval.end])) {
6148 verb.end = interval.end;
6149 interval.end++;
6150 break;
6151 }
6152 else
6153 interval.end++;
6154 }
6155 else
6156 goto error;
6157 }
6158
6159 for (;;) {
6160 if (m_line_break.match(text, interval.end, end, flags))
6161 goto error;
6162 else if (interval.end < end && text[interval.end]) {
6163 if (isspace(text[interval.end]))
6164 interval.end++;
6165 else
6166 break;
6167 }
6168 else
6169 goto error;
6170 }
6171 if (url.match(text, interval.end, end, flags))
6173 else
6174 goto error;
6175
6176 protocol.invalidate();
6177 for (;;) {
6178 if (m_line_break.match(text, interval.end, end, flags)) {
6179 interval.end = m_line_break.interval.end;
6180 goto end;
6181 }
6182 else if (interval.end < end && text[interval.end]) {
6183 if (isspace(text[interval.end]))
6184 interval.end++;
6185 else
6186 break;
6187 }
6188 else
6189 goto end;
6190 }
6191 for (;;) {
6192 if (m_line_break.match(text, interval.end, end, flags)) {
6193 interval.end = m_line_break.interval.end;
6194 goto end;
6195 }
6196 else if (protocol.match(text, interval.end, end, flags)) {
6197 interval.end = protocol.interval.end;
6198 break;
6199 }
6200 else
6201 goto end;
6202 }
6203
6204 for (;;) {
6205 if (m_line_break.match(text, interval.end, end, flags)) {
6206 interval.end = m_line_break.interval.end;
6207 break;
6208 }
6209 else if (interval.end < end && text[interval.end])
6210 interval.end++;
6211 else
6212 goto end;
6213 }
6214
6215 end:
6216 interval.start = start;
6217 return true;
6218
6219 error:
6220 verb.start = 1;
6221 verb.end = 0;
6222 url.invalidate();
6223 protocol.invalidate();
6224 interval.start = 1;
6225 interval.end = 0;
6226 return false;
6227 }
6228
6229 virtual void invalidate()
6230 {
6231 verb.start = 1;
6232 verb.end = 0;
6233 url.invalidate();
6234 protocol.invalidate();
6235 parser::invalidate();
6236 }
6237
6238 public:
6240 http_url url;
6241 http_protocol protocol;
6242
6243 protected:
6244 http_line_break m_line_break;
6245 };
6246
6250 class http_header : public parser
6251 {
6252 public:
6253 virtual bool match(
6254 _In_reads_or_z_(end) const char* text,
6255 _In_ size_t start = 0,
6256 _In_ size_t end = (size_t)-1,
6257 _In_ int flags = match_default)
6258 {
6259 assert(text || start >= end);
6260 interval.end = start;
6261
6262 if (m_line_break.match(text, interval.end, end, flags) ||
6263 interval.end < end && text[interval.end] && isspace(text[interval.end]))
6264 goto error;
6265 name.start = interval.end;
6266 for (;;) {
6267 if (m_line_break.match(text, interval.end, end, flags))
6268 goto error;
6269 else if (interval.end < end && text[interval.end]) {
6270 if (isspace(text[interval.end])) {
6271 name.end = interval.end;
6272 interval.end++;
6273 for (;;) {
6274 if (m_line_break.match(text, interval.end, end, flags))
6275 goto error;
6276 else if (interval.end < end && text[interval.end]) {
6277 if (isspace(text[interval.end]))
6278 interval.end++;
6279 else
6280 break;
6281 }
6282 else
6283 goto error;
6284 }
6285 if (interval.end < end && text[interval.end] == ':') {
6286 interval.end++;
6287 break;
6288 }
6289 else
6290 goto error;
6291 break;
6292 }
6293 else if (text[interval.end] == ':') {
6294 name.end = interval.end;
6295 interval.end++;
6296 break;
6297 }
6298 else
6299 interval.end++;
6300 }
6301 else
6302 goto error;
6303 }
6304 value.start = (size_t)-1;
6305 value.end = 0;
6306 for (;;) {
6307 if (m_line_break.match(text, interval.end, end, flags)) {
6308 interval.end = m_line_break.interval.end;
6309 if (!m_line_break.match(text, interval.end, end, flags) &&
6310 interval.end < end && text[interval.end] && isspace(text[interval.end]))
6311 interval.end++;
6312 else
6313 break;
6314 }
6315 else if (interval.end < end && text[interval.end]) {
6316 if (isspace(text[interval.end]))
6317 interval.end++;
6318 else {
6319 if (value.start == (size_t)-1) value.start = interval.end;
6320 value.end = ++interval.end;
6321 }
6322 }
6323 else
6324 break;
6325 }
6326 interval.start = start;
6327 return true;
6328
6329 error:
6330 name.start = 1;
6331 name.end = 0;
6332 value.start = 1;
6333 value.end = 0;
6334 interval.start = 1;
6335 interval.end = 0;
6336 return false;
6337 }
6338
6339 virtual void invalidate()
6340 {
6341 name.start = 1;
6342 name.end = 0;
6343 value.start = 1;
6344 value.end = 0;
6345 parser::invalidate();
6346 }
6347
6348 public:
6351
6352 protected:
6353 http_line_break m_line_break;
6354 };
6355
6359 template <class T>
6360 class http_value_collection : public T
6361 {
6362 public:
6363 void insert(
6364 _In_reads_or_z_(end) const char* text,
6365 _In_ size_t start = 0,
6366 _In_ size_t end = (size_t)-1,
6367 _In_ int flags = match_default)
6368 {
6369 while (start < end) {
6370 while (start < end && text[start] && isspace(text[start])) start++;
6371 if (start < end && text[start] == ',') {
6372 start++;
6373 while (start < end&& text[start] && isspace(text[start])) start++;
6374 }
6375 T::key_type el;
6376 if (el.match(text, start, end, flags)) {
6377 start = el.interval.end;
6378 T::insert(std::move(el));
6379 }
6380 else
6381 break;
6382 }
6383 }
6384 };
6385
6386 template <class T>
6388 constexpr bool operator()(const T& a, const T& b) const noexcept
6389 {
6390 return a.factor.value > b.factor.value;
6391 }
6392 };
6393
6397 template <class T, class _Alloc = std::allocator<T>>
6399
6403 template <class T>
6405 {
6406 public:
6408 _In_ const std::shared_ptr<basic_parser<T>>& quote,
6409 _In_ const std::shared_ptr<basic_parser<T>>& chr,
6410 _In_ const std::shared_ptr<basic_parser<T>>& escape,
6411 _In_ const std::shared_ptr<basic_parser<T>>& sol,
6412 _In_ const std::shared_ptr<basic_parser<T>>& bs,
6413 _In_ const std::shared_ptr<basic_parser<T>>& ff,
6414 _In_ const std::shared_ptr<basic_parser<T>>& lf,
6415 _In_ const std::shared_ptr<basic_parser<T>>& cr,
6416 _In_ const std::shared_ptr<basic_parser<T>>& htab,
6417 _In_ const std::shared_ptr<basic_parser<T>>& uni,
6418 _In_ const std::shared_ptr<basic_integer16<T>>& hex,
6419 _In_ const std::locale& locale = std::locale()) :
6420 basic_parser<T>(locale),
6421 m_quote(quote),
6422 m_chr(chr),
6423 m_escape(escape),
6424 m_sol(sol),
6425 m_bs(bs),
6426 m_ff(ff),
6427 m_lf(lf),
6428 m_cr(cr),
6429 m_htab(htab),
6430 m_uni(uni),
6431 m_hex(hex)
6432 {}
6433
6434 virtual bool match(
6435 _In_reads_or_z_(end) const T* text,
6436 _In_ size_t start = 0,
6437 _In_ size_t end = (size_t)-1,
6438 _In_ int flags = match_default)
6439 {
6440 assert(text || start >= end);
6441 interval.end = start;
6442 if (m_quote->match(text, interval.end, end, flags)) {
6443 interval.end = m_quote->interval.end;
6444 value.clear();
6445 for (;;) {
6446 if (m_quote->match(text, interval.end, end, flags)) {
6447 interval.start = start;
6448 interval.end = m_quote->interval.end;
6449 return true;
6450 }
6451 if (m_escape->match(text, interval.end, end, flags)) {
6452 if (m_quote->match(text, m_escape->interval.end, end, flags)) {
6453 value += '"'; interval.end = m_quote->interval.end;
6454 continue;
6455 }
6456 if (m_sol->match(text, m_escape->interval.end, end, flags)) {
6457 value += '/'; interval.end = m_sol->interval.end;
6458 continue;
6459 }
6460 if (m_bs->match(text, m_escape->interval.end, end, flags)) {
6461 value += '\b'; interval.end = m_bs->interval.end;
6462 continue;
6463 }
6464 if (m_ff->match(text, m_escape->interval.end, end, flags)) {
6465 value += '\f'; interval.end = m_ff->interval.end;
6466 continue;
6467 }
6468 if (m_lf->match(text, m_escape->interval.end, end, flags)) {
6469 value += '\n'; interval.end = m_lf->interval.end;
6470 continue;
6471 }
6472 if (m_cr->match(text, m_escape->interval.end, end, flags)) {
6473 value += '\r'; interval.end = m_cr->interval.end;
6474 continue;
6475 }
6476 if (m_htab->match(text, m_escape->interval.end, end, flags)) {
6477 value += '\t'; interval.end = m_htab->interval.end;
6478 continue;
6479 }
6480 if (
6481 m_uni->match(text, m_escape->interval.end, end, flags) &&
6482 m_hex->match(text, m_uni->interval.end, std::min(m_uni->interval.end + 4, end), flags | match_case_insensitive) &&
6483 m_hex->interval.size() == 4 /* JSON requests 4-digit Unicode sequneces: \u.... */)
6484 {
6485 assert(m_hex->value <= 0xffff);
6486 if (sizeof(T) == 1) {
6487 if (m_hex->value > 0x7ff) {
6488 value += (T)(0xe0 | (m_hex->value >> 12) & 0x0f);
6489 value += (T)(0x80 | (m_hex->value >> 6) & 0x3f);
6490 value += (T)(0x80 | m_hex->value & 0x3f);
6491 }
6492 else if (m_hex->value > 0x7f) {
6493 value += (T)(0xc0 | (m_hex->value >> 6) & 0x1f);
6494 value += (T)(0x80 | m_hex->value & 0x3f);
6495 }
6496 else
6497 value += (T)(m_hex->value & 0x7f);
6498 }
6499 else
6500 value += (T)m_hex->value;
6501 interval.end = m_hex->interval.end;
6502 continue;
6503 }
6504 if (m_escape->match(text, m_escape->interval.end, end, flags)) {
6505 value += '\\'; interval.end = m_escape->interval.end;
6506 continue;
6507 }
6508 }
6509 if (m_chr->match(text, interval.end, end, flags)) {
6510 value.Prilepi(text + m_chr->interval.start, m_chr->interval.size());
6511 interval.end = m_chr->interval.end;
6512 continue;
6513 }
6514 break;
6515 }
6516 }
6517 value.clear();
6518 interval.start = (interval.end = start) + 1;
6519 return false;
6520 }
6521
6522 virtual void invalidate()
6523 {
6524 value.clear();
6526 }
6527
6528 public:
6529 std::basic_string<T> value;
6530
6531 protected:
6532 std::shared_ptr<basic_parser<T>> m_quote;
6533 std::shared_ptr<basic_parser<T>> m_chr;
6534 std::shared_ptr<basic_parser<T>> m_escape;
6535 std::shared_ptr<basic_parser<T>> m_sol;
6536 std::shared_ptr<basic_parser<T>> m_bs;
6537 std::shared_ptr<basic_parser<T>> m_ff;
6538 std::shared_ptr<basic_parser<T>> m_lf;
6539 std::shared_ptr<basic_parser<T>> m_cr;
6540 std::shared_ptr<basic_parser<T>> m_htab;
6541 std::shared_ptr<basic_parser<T>> m_uni;
6542 std::shared_ptr<basic_integer16<T>> m_hex;
6543 };
6544
6547#ifdef _UNICODE
6548 using tjson_string = wjson_string;
6549#else
6550 using tjson_string = json_string;
6551#endif
6552 }
6553}
6554
6555#ifdef _MSC_VER
6556#pragma warning(pop)
6557#endif
Test for angle in d°mm'ss.dddd form.
Definition parser.hpp:4369
Test for any code unit.
Definition parser.hpp:200
Test for beginning of line.
Definition parser.hpp:594
Test for any.
Definition parser.hpp:1036
Test for chemical formula.
Definition parser.hpp:4643
Test for any code unit from a given string of code units.
Definition parser.hpp:699
Test for specific code unit.
Definition parser.hpp:270
Test for date.
Definition parser.hpp:3989
Test for valid DNS domain character.
Definition parser.hpp:2784
bool allow_on_edge
Is character allowed at the beginning or an end of a DNS domain?
Definition parser.hpp:2822
Test for DNS domain/hostname.
Definition parser.hpp:2884
bool m_allow_absolute
May DNS names end with a dot (absolute name)?
Definition parser.hpp:2948
Test for e-mail address.
Definition parser.hpp:3772
Test for emoticon.
Definition parser.hpp:3880
std::shared_ptr< basic_parser< T > > apex
apex/eyebrows/halo (e.g. O, 0)
Definition parser.hpp:3969
std::shared_ptr< basic_parser< T > > eyes
eyes (e.g. :, ;, >, |, B)
Definition parser.hpp:3970
std::shared_ptr< basic_set< T > > mouth
mouth (e.g. ), ), (, (, |, P, D, p, d)
Definition parser.hpp:3972
std::shared_ptr< basic_parser< T > > nose
nose (e.g. -, o)
Definition parser.hpp:3971
std::shared_ptr< basic_parser< T > > emoticon
emoticon as a whole (e.g. 😀, 🤔, 😶)
Definition parser.hpp:3968
Test for end of line.
Definition parser.hpp:632
Test for fraction.
Definition parser.hpp:1665
Test for decimal integer.
Definition parser.hpp:1274
Test for decimal integer possibly containing thousand separators.
Definition parser.hpp:1359
bool has_separators
Did integer have any separators?
Definition parser.hpp:1419
size_t digit_count
Total number of digits in integer.
Definition parser.hpp:1418
Test for hexadecimal integer.
Definition parser.hpp:1440
Base class for integer testing.
Definition parser.hpp:1252
size_t value
Calculated value of the numeral.
Definition parser.hpp:1266
Test for IPv4 address.
Definition parser.hpp:2324
stdex::interval< size_t > components[4]
Individual component intervals.
Definition parser.hpp:2439
struct in_addr value
IPv4 address value.
Definition parser.hpp:2440
Test for IPv6 address.
Definition parser.hpp:2543
std::shared_ptr< basic_parser< T > > scope_id
Scope ID (e.g. NIC index with link-local addresses)
Definition parser.hpp:2747
stdex::interval< size_t > components[8]
Individual component intervals.
Definition parser.hpp:2745
struct in6_addr value
IPv6 address value.
Definition parser.hpp:2746
Test for valid IPv6 address scope ID character.
Definition parser.hpp:2471
Test for repeating.
Definition parser.hpp:889
bool m_greedy
try to match as long sequence as possible
Definition parser.hpp:928
std::shared_ptr< basic_parser< T > > m_el
repeating element
Definition parser.hpp:925
size_t m_min_iterations
minimum number of iterations
Definition parser.hpp:926
size_t m_max_iterations
maximum number of iterations
Definition parser.hpp:927
Test for JSON string.
Definition parser.hpp:6405
Test for mixed numeral.
Definition parser.hpp:1900
std::shared_ptr< basic_parser< T > > fraction
fraction
Definition parser.hpp:2006
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:2004
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:2003
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:2002
std::shared_ptr< basic_parser< T > > integer
Integer part.
Definition parser.hpp:2005
Test for monetary numeral.
Definition parser.hpp:2195
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:2301
std::shared_ptr< basic_parser< T > > decimal_separator
Decimal separator.
Definition parser.hpp:2306
std::shared_ptr< basic_parser< T > > currency
Currency part.
Definition parser.hpp:2304
std::shared_ptr< basic_parser< T > > decimal
Decimal part.
Definition parser.hpp:2307
std::shared_ptr< basic_parser< T > > integer
Integer part.
Definition parser.hpp:2305
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:2302
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:2303
"No-op" match
Definition parser.hpp:168
Base template for all parsers.
Definition parser.hpp:49
interval< size_t > interval
Region of the last match.
Definition parser.hpp:148
Test for permutation.
Definition parser.hpp:1176
Test for phone number.
Definition parser.hpp:4492
std::basic_string< T > value
Normalized phone number.
Definition parser.hpp:4618
Test for any punctuation code unit.
Definition parser.hpp:442
Test for Roman numeral.
Definition parser.hpp:1549
Test for scientific numeral.
Definition parser.hpp:2026
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:2170
std::shared_ptr< basic_parser< T > > exponent_symbol
Exponent symbol (e.g. 'e')
Definition parser.hpp:2174
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:2168
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:2169
double value
Calculated value of the numeral.
Definition parser.hpp:2178
std::shared_ptr< basic_parser< T > > negative_exp_sign
Negative exponent sign (e.g. '-')
Definition parser.hpp:2176
std::shared_ptr< basic_integer< T > > decimal
Decimal part.
Definition parser.hpp:2173
std::shared_ptr< basic_parser< T > > positive_exp_sign
Positive exponent sign (e.g. '+')
Definition parser.hpp:2175
std::shared_ptr< basic_integer< T > > exponent
Exponent part.
Definition parser.hpp:2177
std::shared_ptr< basic_parser< T > > decimal_separator
Decimal separator.
Definition parser.hpp:2172
std::shared_ptr< basic_integer< T > > integer
Integer part.
Definition parser.hpp:2171
Test for match score.
Definition parser.hpp:1728
Test for sequence.
Definition parser.hpp:985
Definition parser.hpp:667
Test for signed numeral.
Definition parser.hpp:1814
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:1882
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:1881
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:1880
std::shared_ptr< basic_parser< T > > number
Number.
Definition parser.hpp:1883
Test for any space code unit.
Definition parser.hpp:363
Test for any space or punctuation code unit.
Definition parser.hpp:516
Test for any string.
Definition parser.hpp:1104
Test for given string.
Definition parser.hpp:794
Test for time.
Definition parser.hpp:4266
Test for valid URL password character.
Definition parser.hpp:3066
Test for valid URL path character.
Definition parser.hpp:3166
Test for URL path.
Definition parser.hpp:3274
Test for valid URL username character.
Definition parser.hpp:2967
Test for URL.
Definition parser.hpp:3415
Test for HTTP agent.
Definition parser.hpp:5928
Test for HTTP any type.
Definition parser.hpp:5050
Test for HTTP asterisk.
Definition parser.hpp:5692
Test for HTTP header.
Definition parser.hpp:6251
Test for HTTP language (RFC1766)
Definition parser.hpp:5560
Test for HTTP line break (RFC2616: CRLF | LF)
Definition parser.hpp:4724
Test for HTTP media range (RFC2616: media-range)
Definition parser.hpp:5082
Test for HTTP media type (RFC2616: media-type)
Definition parser.hpp:5137
Test for HTTP parameter (RFC2616: parameter)
Definition parser.hpp:4995
http_token name
Parameter name.
Definition parser.hpp:5039
http_value value
Parameter value.
Definition parser.hpp:5040
Test for HTTP protocol.
Definition parser.hpp:6004
uint16_t version
HTTP protocol version: 0x100 = 1.0, 0x101 = 1.1...
Definition parser.hpp:6106
Test for HTTP quoted string (RFC2616: quoted-string)
Definition parser.hpp:4885
stdex::interval< size_t > content
String content (without quotes)
Definition parser.hpp:4941
Test for HTTP request.
Definition parser.hpp:6113
Test for HTTP space (RFC2616: LWS)
Definition parser.hpp:4760
Test for HTTP text character (RFC2616: TEXT)
Definition parser.hpp:4797
Test for HTTP token (RFC2616: token - tolerates non-ASCII)
Definition parser.hpp:4831
Test for HTTP URL parameter.
Definition parser.hpp:5377
Test for HTTP URL path segment.
Definition parser.hpp:5288
Test for HTTP URL path segment.
Definition parser.hpp:5321
std::vector< http_url_path_segment > segments
Path segments.
Definition parser.hpp:5370
Test for HTTP URL port.
Definition parser.hpp:5232
Test for HTTP URL server.
Definition parser.hpp:5195
Test for HTTP URL.
Definition parser.hpp:5458
Collection of HTTP values.
Definition parser.hpp:6361
Test for HTTP value (RFC2616: value)
Definition parser.hpp:4951
http_quoted_string string
Value when matched as quoted string.
Definition parser.hpp:4987
http_token token
Value when matched as token.
Definition parser.hpp:4988
Test for HTTP weight factor.
Definition parser.hpp:5623
float value
Calculated value of the weight factor.
Definition parser.hpp:5685
Test for HTTP weighted value.
Definition parser.hpp:5715
Base template for collection-holding parsers.
Definition parser.hpp:945
Test for any SGML code point.
Definition parser.hpp:232
Test for any SGML code point from a given string of SGML code points.
Definition parser.hpp:751
Test for specific SGML code point.
Definition parser.hpp:319
Test for valid DNS domain SGML character.
Definition parser.hpp:2840
Test for valid IPv6 address scope ID SGML character.
Definition parser.hpp:2509
Test for any SGML punctuation code point.
Definition parser.hpp:483
Test for any SGML space code point.
Definition parser.hpp:406
Test for any SGML space or punctuation code point.
Definition parser.hpp:559
Test for SGML given string.
Definition parser.hpp:841
Test for valid URL password SGML character.
Definition parser.hpp:3118
Test for valid URL path SGML character.
Definition parser.hpp:3222
Test for valid URL username SGML character.
Definition parser.hpp:3018
Numerical interval.
Definition interval.hpp:18
T size() const
Returns interval size.
Definition interval.hpp:47
T end
interval end
Definition interval.hpp:20
interval() noexcept
Constructs an invalid interval.
Definition interval.hpp:25
T start
interval start
Definition interval.hpp:19
Definition parser.hpp:6387