stdex
Additional custom or not Standard C++ covered algorithms
Loading...
Searching...
No Matches
parser.hpp
1/*
2 SPDX-License-Identifier: MIT
3 Copyright © 2023 Amebis
4*/
5
6#pragma once
7
8#include "interval.hpp"
9#include "memory.hpp"
10#include "sal.hpp"
11#include "sgml.hpp"
12#include "string.hpp"
13#include <assert.h>
14#include <stdarg.h>
15#include <stdint.h>
16#ifdef _WIN32
17#include <winsock2.h>
18#include <ws2ipdef.h>
19#else
20#include <inaddr.h>
21#include <in6addr.h>
22#endif
23#include <limits>
24#include <list>
25#include <locale>
26#include <memory>
27#include <set>
28#include <string>
29
30#ifdef _MSC_VER
31#pragma warning(push)
32#pragma warning(disable: 4100)
33#endif
34
35#define ENUM_FLAG_OPERATOR(T,X) \
36inline T operator X (const T lhs, const T rhs) { return static_cast<T>(static_cast<std::underlying_type_t<T>>(lhs) X static_cast<std::underlying_type_t<T>>(rhs)); } \
37inline T operator X (const T lhs, const std::underlying_type_t<T> rhs) { return static_cast<T>(static_cast<std::underlying_type_t<T>>(lhs) X rhs); } \
38inline T operator X (const std::underlying_type_t<T> lhs, const T rhs) { return static_cast<T>(lhs X static_cast<std::underlying_type_t<T>>(rhs)); } \
39inline T& operator X= (T& lhs, const T rhs) { return lhs = lhs X rhs; } \
40inline T& operator X= (T& lhs, const std::underlying_type_t<T> rhs) { return lhs = lhs X rhs; }
41#define ENUM_FLAGS(T, type) \
42enum class T : type; \
43inline T operator ~ (T t) { return (T) (~static_cast<std::underlying_type_t <T>>(t)); } \
44ENUM_FLAG_OPERATOR(T,|) \
45ENUM_FLAG_OPERATOR(T,^) \
46ENUM_FLAG_OPERATOR(T,&) \
47enum class T : type
48
49namespace stdex
50{
51 namespace parser
52 {
56 constexpr int match_default = 0;
57 constexpr int match_case_insensitive = 0x1;
58 constexpr int match_multiline = 0x2;
59
63 template <class T>
65 {
66 public:
67 basic_parser(_In_ const std::locale& locale = std::locale()) : m_locale(locale) {}
68 virtual ~basic_parser() {}
69
70 bool search(
71 _In_reads_or_z_(end) const T* text,
72 _In_ size_t start = 0,
73 _In_ size_t end = (size_t)-1,
74 _In_ int flags = match_default)
75 {
76 for (size_t i = start; i < end && text[i]; i++)
77 if (match(text, i, end, flags))
78 return true;
79 return false;
80 }
81
82 virtual bool match(
83 _In_reads_or_z_(end) const T* text,
84 _In_ size_t start = 0,
85 _In_ size_t end = (size_t)-1,
86 _In_ int flags = match_default) = 0;
87
88 template<class _Traits, class _Ax>
89 inline bool match(
90 const std::basic_string<T, _Traits, _Ax>& text,
91 _In_ size_t start = 0,
92 _In_ size_t end = (size_t)-1,
93 _In_ int flags = match_default)
94 {
95 return match(text.c_str(), start, std::min<size_t>(end, text.size()), flags);
96 }
97
98 virtual void invalidate()
99 {
100 interval.start = 1;
101 interval.end = 0;
102 }
103
104 protected:
106 const wchar_t* next_sgml_cp(_In_ const char* text, _In_ size_t start, _In_ size_t end, _Out_ size_t& chr_end, _Out_ wchar_t(&buf)[3])
107 {
108 if (text[start] == '&') {
109 // Potential entity start
110 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
111 for (chr_end = start + 1;; chr_end++) {
112 if (chr_end >= end || text[chr_end] == 0) {
113 // Unterminated entity
114 break;
115 }
116 if (text[chr_end] == ';') {
117 // Entity end
118 size_t n = chr_end - start - 1;
119 if (n >= 2 && text[start + 1] == '#') {
120 // Numerical entity
121 char32_t unicode;
122 if (text[start + 2] == 'x' || text[start + 2] == 'X')
123 unicode = strtou32(text + start + 3, n - 2, nullptr, 16);
124 else
125 unicode = strtou32(text + start + 2, n - 1, nullptr, 10);
126#ifdef _WIN32
127 if (unicode < 0x10000) {
128 buf[0] = (wchar_t)unicode;
129 buf[1] = 0;
130 }
131 else {
132 ucs4_to_surrogate_pair(buf, unicode);
133 buf[2] = 0;
134 }
135#else
136 buf[0] = (wchar_t)unicode;
137 buf[1] = 0;
138#endif
139 chr_end++;
140 return buf;
141 }
142 const wchar_t* entity_w = sgml2uni(text + start + 1, n);
143 if (entity_w) {
144 chr_end++;
145 return entity_w;
146 }
147 // Unknown entity.
148 break;
149 }
150 else if (text[chr_end] == '&' || ctype.is(ctype.space, text[chr_end])) {
151 // This char cannot possibly be a part of entity.
152 break;
153 }
154 }
155 }
156 buf[0] = text[start];
157 buf[1] = 0;
158 chr_end = start + 1;
159 return buf;
160 }
162
163 public:
165
166 protected:
167 std::locale m_locale;
168 };
169
172#ifdef _UNICODE
173 using tparser = wparser;
174#else
175 using tparser = parser;
176#endif
178
182 template <class T>
183 class basic_noop : public basic_parser<T>
184 {
185 public:
186 virtual bool match(
187 _In_reads_or_z_(end) const T* text,
188 _In_ size_t start = 0,
189 _In_ size_t end = (size_t)-1,
190 _In_ int flags = match_default)
191 {
192 assert(text || start >= end);
193 if (start < end && text[start]) {
194 interval.start = interval.end = start;
195 return true;
196 }
197 interval.start = (interval.end = start) + 1;
198 return false;
199 }
200 };
201
202 using noop = basic_noop<char>;
204#ifdef _UNICODE
205 using tnoop = wnoop;
206#else
207 using tnoop = noop;
208#endif
210
214 template <class T>
215 class basic_any_cu : public basic_parser<T>
216 {
217 public:
218 basic_any_cu(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
219
220 virtual bool match(
221 _In_reads_or_z_(end) const T* text,
222 _In_ size_t start = 0,
223 _In_ size_t end = (size_t)-1,
224 _In_ int flags = match_default)
225 {
226 assert(text || start >= end);
227 if (start < end && text[start]) {
228 interval.end = (interval.start = start) + 1;
229 return true;
230 }
231 interval.start = (interval.end = start) + 1;
232 return false;
233 }
234 };
235
238#ifdef _UNICODE
239 using tany_cu = wany_cu;
240#else
241 using tany_cu = any_cu;
242#endif
243
247 class sgml_any_cp : public basic_any_cu<char>
248 {
249 public:
250 sgml_any_cp(_In_ const std::locale& locale = std::locale()) : basic_any_cu<char>(locale) {}
251
252 virtual bool match(
253 _In_reads_or_z_(end) const char* text,
254 _In_ size_t start = 0,
255 _In_ size_t end = (size_t)-1,
256 _In_ int flags = match_default)
257 {
258 assert(text || start >= end);
259 if (start < end && text[start]) {
260 if (text[start] == '&') {
261 // SGML entity
262 const auto& ctype = std::use_facet<std::ctype<char>>(m_locale);
263 for (interval.end = start + 1; interval.end < end && text[interval.end]; interval.end++)
264 if (text[interval.end] == ';') {
265 interval.end++;
266 interval.start = start;
267 return true;
268 }
269 else if (text[interval.end] == '&' || ctype.is(ctype.space, text[interval.end]))
270 break;
271 // Unterminated entity
272 }
273 interval.end = (interval.start = start) + 1;
274 return true;
275 }
276 interval.start = (interval.end = start) + 1;
277 return false;
278 }
279 };
280
284 template <class T>
285 class basic_cu : public basic_parser<T>
286 {
287 public:
288 basic_cu(T chr, bool invert = false, _In_ const std::locale& locale = std::locale()) :
289 basic_parser<T>(locale),
290 m_chr(chr),
291 m_invert(invert)
292 {}
293
294 virtual bool match(
295 _In_reads_or_z_(end) const T* text,
296 _In_ size_t start = 0,
297 _In_ size_t end = (size_t)-1,
298 _In_ int flags = match_default)
299 {
300 assert(text || start >= end);
301 if (start < end && text[start]) {
302 bool r;
303 if (flags & match_case_insensitive) {
304 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
305 r = ctype.tolower(text[start]) == ctype.tolower(m_chr);
306 }
307 else
308 r = text[start] == m_chr;
309 if (r && !m_invert || !r && m_invert) {
310 interval.end = (interval.start = start) + 1;
311 return true;
312 }
313 }
314 interval.start = (interval.end = start) + 1;
315 return false;
316 }
317
318 protected:
319 T m_chr;
320 bool m_invert;
321 };
322
323 using cu = basic_cu<char>;
324 using wcu = basic_cu<wchar_t>;
325#ifdef _UNICODE
326 using tcu = wcu;
327#else
328 using tcu = cu;
329#endif
330
334 class sgml_cp : public sgml_parser
335 {
336 public:
337 sgml_cp(const char* chr, size_t count = (size_t)-1, bool invert = false, _In_ const std::locale& locale = std::locale()) :
338 sgml_parser(locale),
339 m_invert(invert)
340 {
341 assert(chr || !count);
342 wchar_t buf[3];
343 size_t chr_end;
344 m_chr.assign(count ? next_sgml_cp(chr, 0, count, chr_end, buf) : L"");
345 }
346
347 virtual bool match(
348 _In_reads_or_z_(end) const char* text,
349 _In_ size_t start = 0,
350 _In_ size_t end = (size_t)-1,
351 _In_ int flags = match_default)
352 {
353 assert(text || start >= end);
354 if (start < end && text[start]) {
355 wchar_t buf[3];
356 const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf);
357 bool r = ((flags & match_case_insensitive) ?
358 stdex::strnicmp(chr, (size_t)-1, m_chr.c_str(), m_chr.size(), m_locale) :
359 stdex::strncmp(chr, (size_t)-1, m_chr.c_str(), m_chr.size())) == 0;
360 if (r && !m_invert || !r && m_invert) {
361 interval.start = start;
362 return true;
363 }
364 }
365 interval.start = (interval.end = start) + 1;
366 return false;
367 }
368
369 protected:
370 std::wstring m_chr;
371 bool m_invert;
372 };
373
377 template <class T>
378 class basic_space_cu : public basic_parser<T>
379 {
380 public:
381 basic_space_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) :
382 basic_parser<T>(locale),
383 m_invert(invert)
384 {}
385
386 virtual bool match(
387 _In_reads_or_z_(end) const T* text,
388 _In_ size_t start = 0,
389 _In_ size_t end = (size_t)-1,
390 _In_ int flags = match_default)
391 {
392 assert(text || start >= end);
393 if (start < end && text[start]) {
394 bool r =
395 ((flags & match_multiline) || !islbreak(text[start])) &&
396 std::use_facet<std::ctype<T>>(m_locale).is(std::ctype_base::space, text[start]);
397 if (r && !m_invert || !r && m_invert) {
398 interval.end = (interval.start = start) + 1;
399 return true;
400 }
401 }
402 interval.start = (interval.end = start) + 1;
403 return false;
404 }
405
406 protected:
407 bool m_invert;
408 };
409
412#ifdef _UNICODE
413 using tspace_cu = wspace_cu;
414#else
415 using tspace_cu = space_cu;
416#endif
417
421 class sgml_space_cp : public basic_space_cu<char>
422 {
423 public:
424 sgml_space_cp(_In_ bool invert = false, _In_ const std::locale& locale = std::locale()) :
425 basic_space_cu<char>(invert, locale)
426 {}
427
428 virtual bool match(
429 _In_reads_or_z_(end) const char* text,
430 _In_ size_t start = 0,
431 _In_ size_t end = (size_t)-1,
432 _In_ int flags = match_default)
433 {
434 assert(text || start >= end);
435 if (start < end && text[start]) {
436 wchar_t buf[3];
437 const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf);
438 const wchar_t* chr_end = chr + stdex::strlen(chr);
439 bool r =
440 ((flags & match_multiline) || !islbreak(chr, (size_t)-1)) &&
441 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::space, chr, chr_end) == chr_end;
442 if (r && !m_invert || !r && m_invert) {
443 interval.start = start;
444 return true;
445 }
446 }
447
448 interval.start = (interval.end = start) + 1;
449 return false;
450 }
451 };
452
456 template <class T>
457 class basic_punct_cu : public basic_parser<T>
458 {
459 public:
460 basic_punct_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) :
461 basic_parser<T>(locale),
462 m_invert(invert)
463 {}
464
465 virtual bool match(
466 _In_reads_or_z_(end) const T* text,
467 _In_ size_t start = 0,
468 _In_ size_t end = (size_t)-1,
469 _In_ int flags = match_default)
470 {
471 assert(text || start >= end);
472 if (start < end && text[start]) {
473 bool r = std::use_facet<std::ctype<T>>(m_locale).is(std::ctype_base::punct, text[start]);
474 if (r && !m_invert || !r && m_invert) {
475 interval.end = (interval.start = start) + 1;
476 return true;
477 }
478 }
479 interval.start = (interval.end = start) + 1;
480 return false;
481 }
482
483 protected:
484 bool m_invert;
485 };
486
489#ifdef _UNICODE
490 using tpunct_cu = wpunct_cu;
491#else
492 using tpunct_cu = punct_cu;
493#endif
494
498 class sgml_punct_cp : public basic_punct_cu<char>
499 {
500 public:
501 sgml_punct_cp(bool invert = false, _In_ const std::locale& locale = std::locale()) :
502 basic_punct_cu<char>(invert, locale)
503 {}
504
505 virtual bool match(
506 _In_reads_or_z_(end) const char* text,
507 _In_ size_t start = 0,
508 _In_ size_t end = (size_t)-1,
509 _In_ int flags = match_default)
510 {
511 assert(text || start >= end);
512 if (start < end && text[start]) {
513 wchar_t buf[3];
514 const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf);
515 const wchar_t* chr_end = chr + stdex::strlen(chr);
516 bool r = std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::punct, chr, chr_end) == chr_end;
517 if (r && !m_invert || !r && m_invert) {
518 interval.start = start;
519 return true;
520 }
521 }
522 interval.start = (interval.end = start) + 1;
523 return false;
524 }
525 };
526
530 template <class T>
532 {
533 public:
534 basic_space_or_punct_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) :
535 basic_parser<T>(locale),
536 m_invert(invert)
537 {}
538
539 virtual bool match(
540 _In_reads_or_z_(end) const T* text,
541 _In_ size_t start = 0,
542 _In_ size_t end = (size_t)-1,
543 _In_ int flags = match_default)
544 {
545 assert(text || start >= end);
546 if (start < end && text[start]) {
547 bool r =
548 ((flags & match_multiline) || !islbreak(text[start])) &&
549 std::use_facet<std::ctype<T>>(m_locale).is(std::ctype_base::space | std::ctype_base::punct, text[start]);
550 if (r && !m_invert || !r && m_invert) {
551 interval.end = (interval.start = start) + 1;
552 return true;
553 }
554 }
555 interval.start = (interval.end = start) + 1;
556 return false;
557 }
558
559 protected:
560 bool m_invert;
561 };
562
565#ifdef _UNICODE
567#else
569#endif
570
575 {
576 public:
577 sgml_space_or_punct_cp(bool invert = false, _In_ const std::locale& locale = std::locale()) :
578 basic_space_or_punct_cu<char>(invert, locale)
579 {}
580
581 virtual bool match(
582 _In_reads_or_z_(end) const char* text,
583 _In_ size_t start = 0,
584 _In_ size_t end = (size_t)-1,
585 _In_ int flags = match_default)
586 {
587 assert(text || start >= end);
588 if (start < end && text[start]) {
589 wchar_t buf[3];
590 const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf);
591 const wchar_t* chr_end = chr + stdex::strlen(chr);
592 bool r =
593 ((flags & match_multiline) || !islbreak(chr, (size_t)-1)) &&
594 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::space | std::ctype_base::punct, chr, chr_end) == chr_end;
595 if (r && !m_invert || !r && m_invert) {
596 interval.start = start;
597 return true;
598 }
599 }
600 interval.start = (interval.end = start) + 1;
601 return false;
602 }
603 };
604
608 template <class T>
609 class basic_bol : public basic_parser<T>
610 {
611 public:
612 basic_bol(bool invert = false) : m_invert(invert) {}
613
614 virtual bool match(
615 _In_reads_or_z_(end) const T* text,
616 _In_ size_t start = 0,
617 _In_ size_t end = (size_t)-1,
618 _In_ int flags = match_default)
619 {
620 assert(text || start >= end);
621 bool r = start == 0 || start <= end && islbreak(text[start - 1]);
622 if (r && !m_invert || !r && m_invert) {
623 interval.end = interval.start = start;
624 return true;
625 }
626 interval.start = (interval.end = start) + 1;
627 return false;
628 }
629
630 protected:
631 bool m_invert;
632 };
633
634 using bol = basic_bol<char>;
635 using wbol = basic_bol<wchar_t>;
636#ifdef _UNICODE
637 using tbol = wbol;
638#else
639 using tbol = bol;
640#endif
642
646 template <class T>
647 class basic_eol : public basic_parser<T>
648 {
649 public:
650 basic_eol(bool invert = false) : m_invert(invert) {}
651
652 virtual bool match(
653 _In_reads_or_z_(end) const T* text,
654 _In_ size_t start = 0,
655 _In_ size_t end = (size_t)-1,
656 _In_ int flags = match_default)
657 {
658 assert(text || start >= end);
659 bool r = islbreak(text[start]);
660 if (r && !m_invert || !r && m_invert) {
661 interval.end = interval.start = start;
662 return true;
663 }
664 interval.start = (interval.end = start) + 1;
665 return false;
666 }
667
668 protected:
669 bool m_invert;
670 };
671
672 using eol = basic_eol<char>;
673 using weol = basic_eol<wchar_t>;
674#ifdef _UNICODE
675 using teol = weol;
676#else
677 using teol = eol;
678#endif
680
681 template <class T>
682 class basic_set : public basic_parser<T>
683 {
684 public:
685 basic_set(bool invert = false, _In_ const std::locale& locale = std::locale()) :
686 basic_parser<T>(locale),
687 hit_offset((size_t)-1),
688 m_invert(invert)
689 {}
690
691 virtual bool match(
692 _In_reads_or_z_(end) const T* text,
693 _In_ size_t start = 0,
694 _In_ size_t end = (size_t)-1,
695 _In_ int flags = match_default) = 0;
696
697 virtual void invalidate()
698 {
699 hit_offset = (size_t)-1;
701 }
702
703 public:
704 size_t hit_offset;
705
706 protected:
707 bool m_invert;
708 };
709
713 template <class T>
714 class basic_cu_set : public basic_set<T>
715 {
716 public:
718 _In_reads_or_z_(count) const T* set,
719 _In_ size_t count = (size_t)-1,
720 _In_ bool invert = false,
721 _In_ const std::locale& locale = std::locale()) :
722 basic_set<T>(invert, locale)
723 {
724 if (set)
725 m_set.assign(set, set + stdex::strnlen(set, count));
726 }
727
728 virtual bool match(
729 _In_reads_or_z_(end) const T* text,
730 _In_ size_t start = 0,
731 _In_ size_t end = (size_t)-1,
732 _In_ int flags = match_default)
733 {
734 assert(text || start >= end);
735 if (start < end && text[start]) {
736 const T* set = m_set.c_str();
737 size_t r = (flags & match_case_insensitive) ?
738 stdex::strnichr(set, m_set.size(), text[start], m_locale) :
739 stdex::strnchr(set, m_set.size(), text[start]);
740 if (r != stdex::npos && !m_invert || r == stdex::npos && m_invert) {
741 hit_offset = r;
742 interval.end = (interval.start = start) + 1;
743 return true;
744 }
745 }
746 hit_offset = (size_t)-1;
747 interval.start = (interval.end = start) + 1;
748 return false;
749 }
750
751 protected:
752 std::basic_string<T> m_set;
753 };
754
757#ifdef _UNICODE
758 using tcu_set = wcu_set;
759#else
760 using tcu_set = cu_set;
761#endif
762
766 class sgml_cp_set : public basic_set<char>
767 {
768 public:
769 sgml_cp_set(const char* set, size_t count = (size_t)-1, bool invert = false, _In_ const std::locale& locale = std::locale()) :
770 basic_set<char>(invert, locale)
771 {
772 if (set)
773 m_set = sgml2wstr(set, count);
774 }
775
776 virtual bool match(
777 _In_reads_or_z_(end) const char* text,
778 _In_ size_t start = 0,
779 _In_ size_t end = (size_t)-1,
780 _In_ int flags = match_default)
781 {
782 assert(text || start >= end);
783 if (start < end && text[start]) {
784 wchar_t buf[3];
785 const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf);
786 const wchar_t* set = m_set.c_str();
787 size_t r = (flags & match_case_insensitive) ?
788 stdex::strnistr(set, m_set.size(), chr, m_locale) :
789 stdex::strnstr(set, m_set.size(), chr);
790 if (r != stdex::npos && !m_invert || r == stdex::npos && m_invert) {
791 hit_offset = r;
792 interval.start = start;
793 return true;
794 }
795 }
796 hit_offset = (size_t)-1;
797 interval.start = (interval.end = start) + 1;
798 return false;
799 }
800
801 protected:
802 std::wstring m_set;
803 };
804
808 template <class T>
809 class basic_string : public basic_parser<T>
810 {
811 public:
813 _In_reads_or_z_(count) const T* str,
814 _In_ size_t count = (size_t)-1,
815 _In_ const std::locale& locale = std::locale()) :
816 basic_parser<T>(locale),
817 m_str(str, str + stdex::strnlen(str, count))
818 {}
819
820 virtual bool match(
821 _In_reads_or_z_(end) const T* text,
822 _In_ size_t start = 0,
823 _In_ size_t end = (size_t)-1,
824 _In_ int flags = match_default)
825 {
826 assert(text || start >= end);
827 size_t
828 m = m_str.size(),
829 n = std::min<size_t>(end - start, m);
830 bool r = ((flags & match_case_insensitive) ?
831 stdex::strnicmp(text + start, n, m_str.c_str(), m, m_locale) :
832 stdex::strncmp(text + start, n, m_str.c_str(), m)) == 0;
833 if (r) {
834 interval.end = (interval.start = start) + n;
835 return true;
836 }
837 interval.start = (interval.end = start) + 1;
838 return false;
839 }
840
841 protected:
842 std::basic_string<T> m_str;
843 };
844
847#ifdef _UNICODE
848 using tstring = wstring;
849#else
850 using tstring = string;
851#endif
852
857 {
858 public:
859 sgml_string(const char* str, size_t count = (size_t)-1, _In_ const std::locale& locale = std::locale()) :
860 sgml_parser(locale),
861 m_str(sgml2wstr(str, count))
862 {}
863
864 virtual bool match(
865 _In_reads_or_z_(end) const char* text,
866 _In_ size_t start = 0,
867 _In_ size_t end = (size_t)-1,
868 _In_ int flags = match_default)
869 {
870 assert(text || start >= end);
871 const wchar_t* str = m_str.c_str();
872 const bool case_insensitive = flags & match_case_insensitive ? true : false;
873 const auto& ctype = std::use_facet<std::ctype<wchar_t>>(m_locale);
874 for (interval.end = start;;) {
875 if (!*str) {
876 interval.start = start;
877 return true;
878 }
879 if (interval.end >= end || !text[interval.end]) {
880 interval.start = (interval.end = start) + 1;
881 return false;
882 }
883 wchar_t buf[3];
884 const wchar_t* chr = next_sgml_cp(text, interval.end, end, interval.end, buf);
885 for (; *chr; ++str, ++chr) {
886 if (!*str ||
887 (case_insensitive ? ctype.tolower(*str) != ctype.tolower(*chr) : *str != *chr))
888 {
889 interval.start = (interval.end = start) + 1;
890 return false;
891 }
892 }
893 }
894 }
895
896 protected:
897 std::wstring m_str;
898 };
899
903 template <class T>
905 {
906 public:
907 basic_iterations(const std::shared_ptr<basic_parser<T>>& el, size_t min_iterations = 0, size_t max_iterations = (size_t)-1, bool greedy = true) :
908 m_el(el),
909 m_min_iterations(min_iterations),
910 m_max_iterations(max_iterations),
911 m_greedy(greedy)
912 {}
913
914 virtual bool match(
915 _In_reads_or_z_(end) const T* text,
916 _In_ size_t start = 0,
917 _In_ size_t end = (size_t)-1,
918 _In_ int flags = match_default)
919 {
920 assert(text || start >= end);
921 interval.start = interval.end = start;
922 for (size_t i = 0; ; i++) {
923 if (!m_greedy && i >= m_min_iterations || i >= m_max_iterations)
924 return true;
925 if (!m_el->match(text, interval.end, end, flags)) {
926 if (i >= m_min_iterations)
927 return true;
928 break;
929 }
930 if (m_el->interval.end == interval.end) {
931 // Element did match, but the matching interval was empty. Quit instead of spinning.
932 return true;
933 }
934 interval.end = m_el->interval.end;
935 }
936 interval.start = (interval.end = start) + 1;
937 return false;
938 }
939
940 protected:
941 std::shared_ptr<basic_parser<T>> m_el;
944 bool m_greedy;
945 };
946
949#ifdef _UNICODE
950 using titerations = witerations;
951#else
952 using titerations = iterations;
953#endif
955
959 template <class T>
961 {
962 protected:
963 parser_collection(_In_ const std::locale& locale) : basic_parser<T>(locale) {}
964
965 public:
967 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el,
968 _In_ size_t count,
969 _In_ const std::locale& locale = std::locale()) :
970 basic_parser<T>(locale)
971 {
972 assert(el || !count);
973 m_collection.reserve(count);
974 for (size_t i = 0; i < count; i++)
975 m_collection.push_back(el[i]);
976 }
977
979 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
980 _In_ const std::locale& locale = std::locale()) :
981 basic_parser<T>(locale),
982 m_collection(std::move(collection))
983 {}
984
985 virtual void invalidate()
986 {
987 for (auto& el: m_collection)
988 el->invalidate();
990 }
991
992 protected:
993 std::vector<std::shared_ptr<basic_parser<T>>> m_collection;
994 };
995
999 template <class T>
1001 {
1002 public:
1004 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el = nullptr,
1005 _In_ size_t count = 0,
1006 _In_ const std::locale& locale = std::locale()) :
1007 parser_collection<T>(el, count, locale)
1008 {}
1009
1011 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1012 _In_ const std::locale& locale = std::locale()) :
1013 parser_collection<T>(std::move(collection), locale)
1014 {}
1015
1016 virtual bool match(
1017 _In_reads_or_z_(end) const T* text,
1018 _In_ size_t start = 0,
1019 _In_ size_t end = (size_t)-1,
1020 _In_ int flags = match_default)
1021 {
1022 assert(text || start >= end);
1023 interval.end = start;
1024 for (auto i = m_collection.begin(); i != m_collection.end(); ++i) {
1025 if (!(*i)->match(text, interval.end, end, flags)) {
1026 for (++i; i != m_collection.end(); ++i)
1027 (*i)->invalidate();
1028 interval.start = (interval.end = start) + 1;
1029 return false;
1030 }
1031 interval.end = (*i)->interval.end;
1032 }
1033 interval.start = start;
1034 return true;
1035 }
1036 };
1037
1040#ifdef _UNICODE
1041 using tsequence = wsequence;
1042#else
1043 using tsequence = sequence;
1044#endif
1046
1050 template <class T>
1052 {
1053 protected:
1054 basic_branch(_In_ const std::locale& locale) :
1055 parser_collection<T>(locale),
1056 hit_offset((size_t)-1)
1057 {}
1058
1059 public:
1061 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el = nullptr,
1062 _In_ size_t count = 0,
1063 _In_ const std::locale& locale = std::locale()) :
1064 parser_collection<T>(el, count, locale),
1065 hit_offset((size_t)-1)
1066 {}
1067
1069 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1070 _In_ const std::locale& locale = std::locale()) :
1071 parser_collection<T>(std::move(collection), locale),
1072 hit_offset((size_t)-1)
1073 {}
1074
1075 virtual bool match(
1076 _In_reads_or_z_(end) const T* text,
1077 _In_ size_t start = 0,
1078 _In_ size_t end = (size_t)-1,
1079 _In_ int flags = match_default)
1080 {
1081 assert(text || start >= end);
1082 hit_offset = 0;
1083 for (auto i = m_collection.begin(); i != m_collection.end(); ++i, ++hit_offset) {
1084 if ((*i)->match(text, start, end, flags)) {
1085 interval = (*i)->interval;
1086 for (++i; i != m_collection.end(); ++i)
1087 (*i)->invalidate();
1088 return true;
1089 }
1090 }
1091 hit_offset = (size_t)-1;
1092 interval.start = (interval.end = start) + 1;
1093 return false;
1094 }
1095
1096 virtual void invalidate()
1097 {
1098 hit_offset = (size_t)-1;
1100 }
1101
1102 public:
1103 size_t hit_offset;
1104 };
1105
1106 using branch = basic_branch<char>;
1108#ifdef _UNICODE
1109 using tbranch = wbranch;
1110#else
1111 using tbranch = branch;
1112#endif
1114
1118 template <class T, class T_parser = basic_string<T>>
1120 {
1121 public:
1122 inline basic_string_branch(
1123 _In_reads_(count) const T* str_z = nullptr,
1124 _In_ size_t count = 0,
1125 _In_ const std::locale& locale = std::locale()) :
1126 basic_branch<T>(locale)
1127 {
1128 build(str_z, count);
1129 }
1130
1131 inline basic_string_branch(_In_z_ const T* str, ...) :
1132 basic_branch<T>(std::locale())
1133 {
1134 va_list params;
1135 va_start(params, str);
1136 build(str, params);
1137 va_end(params);
1138 }
1139
1140 inline basic_string_branch(_In_ const std::locale& locale, _In_z_ const T* str, ...) :
1141 basic_branch<T>(locale)
1142 {
1143 va_list params;
1144 va_start(params, str);
1145 build(str, params);
1146 va_end(params);
1147 }
1148
1149 protected:
1150 void build(_In_reads_(count) const T* str_z, _In_ size_t count)
1151 {
1152 assert(str_z || !count);
1153 if (count) {
1154 size_t offset, n;
1155 for (
1156 offset = n = 0;
1157 offset < count && str_z[offset];
1158 offset += stdex::strnlen(str_z + offset, count - offset) + 1, ++n);
1159 m_collection.reserve(n);
1160 for (
1161 offset = 0;
1162 offset < count && str_z[offset];
1163 offset += stdex::strnlen(str_z + offset, count - offset) + 1)
1164 m_collection.push_back(std::move(std::make_shared<T_parser>(str_z + offset, count - offset, m_locale)));
1165 }
1166 }
1167
1168 void build(_In_z_ const T* str, _In_ va_list params)
1169 {
1170 const T* p;
1171 for (
1172 m_collection.push_back(std::move(std::make_shared<T_parser>(str, (size_t)-1, m_locale)));
1173 (p = va_arg(params, const T*)) != nullptr;
1174 m_collection.push_back(std::move(std::make_shared<T_parser>(p, (size_t)-1, m_locale))));
1175 }
1176 };
1177
1180#ifdef _UNICODE
1182#else
1184#endif
1186
1190 template <class T>
1192 {
1193 public:
1195 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el = nullptr,
1196 _In_ size_t count = 0,
1197 _In_ const std::locale& locale = std::locale()) :
1198 parser_collection<T>(el, count, locale)
1199 {}
1200
1202 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1203 _In_ const std::locale& locale = std::locale()) :
1204 parser_collection<T>(std::move(collection), locale)
1205 {}
1206
1207 virtual bool match(
1208 _In_reads_or_z_(end) const T* text,
1209 _In_ size_t start = 0,
1210 _In_ size_t end = (size_t)-1,
1211 _In_ int flags = match_default)
1212 {
1213 assert(text || start >= end);
1214 for (auto& el: m_collection)
1215 el->invalidate();
1216 if (match_recursively(text, start, end, flags)) {
1217 interval.start = start;
1218 return true;
1219 }
1220 interval.start = (interval.end = start) + 1;
1221 return false;
1222 }
1223
1224 protected:
1225 bool match_recursively(
1226 _In_reads_or_z_(end) const T* text,
1227 _In_ size_t start = 0,
1228 _In_ size_t end = (size_t)-1,
1229 _In_ int flags = match_default)
1230 {
1231 bool all_matched = true;
1232 for (auto& el: m_collection) {
1233 if (!el->interval) {
1234 // Element was not matched in permutatuion yet.
1235 all_matched = false;
1236 if (el->match(text, start, end, flags)) {
1237 // Element matched for the first time.
1238 if (match_recursively(text, el->interval.end, end, flags)) {
1239 // Rest of the elements matched too.
1240 return true;
1241 }
1242 el->invalidate();
1243 }
1244 }
1245 }
1246 if (all_matched) {
1247 interval.end = start;
1248 return true;
1249 }
1250 return false;
1251 }
1252 };
1253
1256#ifdef _UNICODE
1257 using tpermutation = wpermutation;
1258#else
1259 using tpermutation = permutation;
1260#endif
1262
1266 template <class T>
1267 class basic_integer : public basic_parser<T>
1268 {
1269 public:
1270 basic_integer(_In_ const std::locale& locale = std::locale()) :
1271 basic_parser<T>(locale),
1272 value(0)
1273 {}
1274
1275 virtual void invalidate()
1276 {
1277 value = 0;
1279 }
1280
1281 public:
1282 size_t value;
1283 };
1284
1288 template <class T>
1290 {
1291 public:
1293 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
1294 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
1295 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
1296 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
1297 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
1298 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
1299 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
1300 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
1301 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
1302 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
1303 _In_ const std::locale& locale = std::locale()) :
1304 basic_integer<T>(locale),
1305 m_digit_0(digit_0),
1306 m_digit_1(digit_1),
1307 m_digit_2(digit_2),
1308 m_digit_3(digit_3),
1309 m_digit_4(digit_4),
1310 m_digit_5(digit_5),
1311 m_digit_6(digit_6),
1312 m_digit_7(digit_7),
1313 m_digit_8(digit_8),
1314 m_digit_9(digit_9)
1315 {}
1316
1317 virtual bool match(
1318 _In_reads_or_z_(end) const T* text,
1319 _In_ size_t start = 0,
1320 _In_ size_t end = (size_t)-1,
1321 _In_ int flags = match_default)
1322 {
1323 assert(text || start >= end);
1324 for (interval.end = start, value = 0; interval.end < end && text[interval.end];) {
1325 size_t dig;
1326 if (m_digit_0->match(text, interval.end, end, flags)) { dig = 0; interval.end = m_digit_0->interval.end; }
1327 else if (m_digit_1->match(text, interval.end, end, flags)) { dig = 1; interval.end = m_digit_1->interval.end; }
1328 else if (m_digit_2->match(text, interval.end, end, flags)) { dig = 2; interval.end = m_digit_2->interval.end; }
1329 else if (m_digit_3->match(text, interval.end, end, flags)) { dig = 3; interval.end = m_digit_3->interval.end; }
1330 else if (m_digit_4->match(text, interval.end, end, flags)) { dig = 4; interval.end = m_digit_4->interval.end; }
1331 else if (m_digit_5->match(text, interval.end, end, flags)) { dig = 5; interval.end = m_digit_5->interval.end; }
1332 else if (m_digit_6->match(text, interval.end, end, flags)) { dig = 6; interval.end = m_digit_6->interval.end; }
1333 else if (m_digit_7->match(text, interval.end, end, flags)) { dig = 7; interval.end = m_digit_7->interval.end; }
1334 else if (m_digit_8->match(text, interval.end, end, flags)) { dig = 8; interval.end = m_digit_8->interval.end; }
1335 else if (m_digit_9->match(text, interval.end, end, flags)) { dig = 9; interval.end = m_digit_9->interval.end; }
1336 else break;
1337 value = value * 10 + dig;
1338 }
1339 if (start < interval.end) {
1340 interval.start = start;
1341 return true;
1342 }
1343 interval.start = (interval.end = start) + 1;
1344 return false;
1345 }
1346
1347 protected:
1348 std::shared_ptr<basic_parser<T>>
1349 m_digit_0,
1350 m_digit_1,
1351 m_digit_2,
1352 m_digit_3,
1353 m_digit_4,
1354 m_digit_5,
1355 m_digit_6,
1356 m_digit_7,
1357 m_digit_8,
1358 m_digit_9;
1359 };
1360
1363#ifdef _UNICODE
1364 using tinteger10 = winteger10;
1365#else
1366 using tinteger10 = integer10;
1367#endif
1369
1373 template <class T>
1375 {
1376 public:
1378 _In_ const std::shared_ptr<basic_integer10<T>>& digits,
1379 _In_ const std::shared_ptr<basic_set<T>>& separator,
1380 _In_ const std::locale& locale = std::locale()) :
1381 basic_integer<T>(locale),
1382 digit_count(0),
1383 has_separators(false),
1384 m_digits(digits),
1385 m_separator(separator)
1386 {}
1387
1388 virtual bool match(
1389 _In_reads_or_z_(end) const T* text,
1390 _In_ size_t start = 0,
1391 _In_ size_t end = (size_t)-1,
1392 _In_ int flags = match_default)
1393 {
1394 assert(text || start >= end);
1395 if (m_digits->match(text, start, end, flags)) {
1396 // Leading part match.
1397 value = m_digits->value;
1398 digit_count = m_digits->interval.size();
1399 has_separators = false;
1400 interval.start = start;
1401 interval.end = m_digits->interval.end;
1402 if (m_digits->interval.size() <= 3) {
1403 // Maybe separated with thousand separators?
1404 size_t hit_offset = (size_t)-1;
1405 while (m_separator->match(text, interval.end, end, flags) &&
1406 (hit_offset == (size_t)-1 || hit_offset == m_separator->hit_offset) && // All separators must be the same, no mixing.
1407 m_digits->match(text, m_separator->interval.end, end, flags) &&
1408 m_digits->interval.size() == 3)
1409 {
1410 // Thousand separator and three-digit integer followed.
1411 value = value * 1000 + m_digits->value;
1412 digit_count += 3;
1413 has_separators = true;
1414 interval.end = m_digits->interval.end;
1415 hit_offset = m_separator->hit_offset;
1416 }
1417 }
1418
1419 return true;
1420 }
1421 value = 0;
1422 interval.start = (interval.end = start) + 1;
1423 return false;
1424 }
1425
1426 virtual void invalidate()
1427 {
1428 digit_count = 0;
1429 has_separators = false;
1431 }
1432
1433 public:
1436
1437 protected:
1438 std::shared_ptr<basic_integer10<T>> m_digits;
1439 std::shared_ptr<basic_set<T>> m_separator;
1440 };
1441
1444#ifdef _UNICODE
1445 using tinteger10ts = winteger10ts;
1446#else
1447 using tinteger10ts = integer10ts;
1448#endif
1450
1454 template <class T>
1456 {
1457 public:
1459 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
1460 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
1461 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
1462 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
1463 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
1464 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
1465 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
1466 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
1467 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
1468 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
1469 _In_ const std::shared_ptr<basic_parser<T>>& digit_10,
1470 _In_ const std::shared_ptr<basic_parser<T>>& digit_11,
1471 _In_ const std::shared_ptr<basic_parser<T>>& digit_12,
1472 _In_ const std::shared_ptr<basic_parser<T>>& digit_13,
1473 _In_ const std::shared_ptr<basic_parser<T>>& digit_14,
1474 _In_ const std::shared_ptr<basic_parser<T>>& digit_15,
1475 _In_ const std::locale& locale = std::locale()) :
1476 basic_integer<T>(locale),
1477 m_digit_0(digit_0),
1478 m_digit_1(digit_1),
1479 m_digit_2(digit_2),
1480 m_digit_3(digit_3),
1481 m_digit_4(digit_4),
1482 m_digit_5(digit_5),
1483 m_digit_6(digit_6),
1484 m_digit_7(digit_7),
1485 m_digit_8(digit_8),
1486 m_digit_9(digit_9),
1487 m_digit_10(digit_10),
1488 m_digit_11(digit_11),
1489 m_digit_12(digit_12),
1490 m_digit_13(digit_13),
1491 m_digit_14(digit_14),
1492 m_digit_15(digit_15)
1493 {}
1494
1495 virtual bool match(
1496 _In_reads_or_z_(end) const T* text,
1497 _In_ size_t start = 0,
1498 _In_ size_t end = (size_t)-1,
1499 _In_ int flags = match_default)
1500 {
1501 assert(text || start >= end);
1502 for (interval.end = start, value = 0; interval.end < end && text[interval.end];) {
1503 size_t dig;
1504 if (m_digit_0->match(text, interval.end, end, flags)) { dig = 0; interval.end = m_digit_0->interval.end; }
1505 else if (m_digit_1->match(text, interval.end, end, flags)) { dig = 1; interval.end = m_digit_1->interval.end; }
1506 else if (m_digit_2->match(text, interval.end, end, flags)) { dig = 2; interval.end = m_digit_2->interval.end; }
1507 else if (m_digit_3->match(text, interval.end, end, flags)) { dig = 3; interval.end = m_digit_3->interval.end; }
1508 else if (m_digit_4->match(text, interval.end, end, flags)) { dig = 4; interval.end = m_digit_4->interval.end; }
1509 else if (m_digit_5->match(text, interval.end, end, flags)) { dig = 5; interval.end = m_digit_5->interval.end; }
1510 else if (m_digit_6->match(text, interval.end, end, flags)) { dig = 6; interval.end = m_digit_6->interval.end; }
1511 else if (m_digit_7->match(text, interval.end, end, flags)) { dig = 7; interval.end = m_digit_7->interval.end; }
1512 else if (m_digit_8->match(text, interval.end, end, flags)) { dig = 8; interval.end = m_digit_8->interval.end; }
1513 else if (m_digit_9->match(text, interval.end, end, flags)) { dig = 9; interval.end = m_digit_9->interval.end; }
1514 else if (m_digit_10->match(text, interval.end, end, flags)) { dig = 10; interval.end = m_digit_10->interval.end; }
1515 else if (m_digit_11->match(text, interval.end, end, flags)) { dig = 11; interval.end = m_digit_11->interval.end; }
1516 else if (m_digit_12->match(text, interval.end, end, flags)) { dig = 12; interval.end = m_digit_12->interval.end; }
1517 else if (m_digit_13->match(text, interval.end, end, flags)) { dig = 13; interval.end = m_digit_13->interval.end; }
1518 else if (m_digit_14->match(text, interval.end, end, flags)) { dig = 14; interval.end = m_digit_14->interval.end; }
1519 else if (m_digit_15->match(text, interval.end, end, flags)) { dig = 15; interval.end = m_digit_15->interval.end; }
1520 else break;
1521 value = value * 16 + dig;
1522 }
1523 if (start < interval.end) {
1524 interval.start = start;
1525 return true;
1526 }
1527 interval.start = (interval.end = start) + 1;
1528 return false;
1529 }
1530
1531 protected:
1532 std::shared_ptr<basic_parser<T>>
1533 m_digit_0,
1534 m_digit_1,
1535 m_digit_2,
1536 m_digit_3,
1537 m_digit_4,
1538 m_digit_5,
1539 m_digit_6,
1540 m_digit_7,
1541 m_digit_8,
1542 m_digit_9,
1543 m_digit_10,
1544 m_digit_11,
1545 m_digit_12,
1546 m_digit_13,
1547 m_digit_14,
1548 m_digit_15;
1549 };
1550
1553#ifdef _UNICODE
1554 using tinteger16 = winteger16;
1555#else
1556 using tinteger16 = integer16;
1557#endif
1559
1563 template <class T>
1565 {
1566 public:
1568 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
1569 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
1570 _In_ const std::shared_ptr<basic_parser<T>>& digit_10,
1571 _In_ const std::shared_ptr<basic_parser<T>>& digit_50,
1572 _In_ const std::shared_ptr<basic_parser<T>>& digit_100,
1573 _In_ const std::shared_ptr<basic_parser<T>>& digit_500,
1574 _In_ const std::shared_ptr<basic_parser<T>>& digit_1000,
1575 _In_ const std::shared_ptr<basic_parser<T>>& digit_5000,
1576 _In_ const std::shared_ptr<basic_parser<T>>& digit_10000,
1577 _In_ const std::locale& locale = std::locale()) :
1578 basic_integer<T>(locale),
1579 m_digit_1(digit_1),
1580 m_digit_5(digit_5),
1581 m_digit_10(digit_10),
1582 m_digit_50(digit_50),
1583 m_digit_100(digit_100),
1584 m_digit_500(digit_500),
1585 m_digit_1000(digit_1000),
1586 m_digit_5000(digit_5000),
1587 m_digit_10000(digit_10000)
1588 {}
1589
1590 virtual bool match(
1591 _In_reads_or_z_(end) const T* text,
1592 _In_ size_t start = 0,
1593 _In_ size_t end = (size_t)-1,
1594 _In_ int flags = match_default)
1595 {
1596 assert(text || start >= end);
1597 size_t
1598 dig[5] = { (size_t)-1, (size_t)-1, (size_t)-1, (size_t)-1, (size_t)-1 },
1599 end2;
1600
1601 for (interval.end = start, value = 0; interval.end < end && text[interval.end]; dig[3] = dig[2], dig[2] = dig[1], dig[1] = dig[0], interval.end = end2) {
1602 if (m_digit_1 && m_digit_1->match(text, interval.end, end, flags)) { dig[0] = 1; end2 = m_digit_1->interval.end; }
1603 else if (m_digit_5 && m_digit_5->match(text, interval.end, end, flags)) { dig[0] = 5; end2 = m_digit_5->interval.end; }
1604 else if (m_digit_10 && m_digit_10->match(text, interval.end, end, flags)) { dig[0] = 10; end2 = m_digit_10->interval.end; }
1605 else if (m_digit_50 && m_digit_50->match(text, interval.end, end, flags)) { dig[0] = 50; end2 = m_digit_50->interval.end; }
1606 else if (m_digit_100 && m_digit_100->match(text, interval.end, end, flags)) { dig[0] = 100; end2 = m_digit_100->interval.end; }
1607 else if (m_digit_500 && m_digit_500->match(text, interval.end, end, flags)) { dig[0] = 500; end2 = m_digit_500->interval.end; }
1608 else if (m_digit_1000 && m_digit_1000->match(text, interval.end, end, flags)) { dig[0] = 1000; end2 = m_digit_1000->interval.end; }
1609 else if (m_digit_5000 && m_digit_5000->match(text, interval.end, end, flags)) { dig[0] = 5000; end2 = m_digit_5000->interval.end; }
1610 else if (m_digit_10000 && m_digit_10000->match(text, interval.end, end, flags)) { dig[0] = 10000; end2 = m_digit_10000->interval.end; }
1611 else break;
1612
1613 // Store first digit.
1614 if (dig[4] == (size_t)-1) dig[4] = dig[0];
1615
1616 if (dig[3] == dig[2] && dig[2] == dig[1] && dig[1] == dig[0] && dig[0] != dig[4]) {
1617 // Same digit repeated four times. No-go, unless first digit. E.g. XIIII vs. XIV. MMMMMCD allowed, IIII also...
1618 break;
1619 }
1620 if (dig[0] <= dig[1]) {
1621 // Digit is less or equal previous one: add.
1622 value += dig[0];
1623 }
1624 else if (
1625 dig[1] == 1 && (dig[0] == 5 || dig[0] == 10) ||
1626 dig[1] == 10 && (dig[0] == 50 || dig[0] == 100) ||
1627 dig[1] == 100 && (dig[0] == 500 || dig[0] == 1000) ||
1628 dig[1] == 1000 && (dig[0] == 5000 || dig[0] == 10000))
1629 {
1630 // Digit is up to two orders bigger than previous one: subtract. But...
1631 if (dig[2] < dig[0]) {
1632 // Digit is also bigger than pre-previous one. E.g. VIX (V < X => invalid)
1633 break;
1634 }
1635 value -= dig[1]; // Cancel addition in the previous step.
1636 dig[0] -= dig[1]; // Combine last two digits.
1637 dig[1] = dig[2]; // The true previous digit is now pre-previous one. :)
1638 dig[2] = dig[3]; // The true pre-previous digit is now pre-pre-previous one. :)
1639 value += dig[0]; // Add combined value.
1640 }
1641 else {
1642 // New digit is too big than the previous one. E.g. VX (V < X => invalid)
1643 break;
1644 }
1645 }
1646 if (value) {
1647 interval.start = start;
1648 return true;
1649 }
1650 interval.start = (interval.end = start) + 1;
1651 return false;
1652 }
1653
1654 protected:
1655 std::shared_ptr<basic_parser<T>>
1656 m_digit_1,
1657 m_digit_5,
1658 m_digit_10,
1659 m_digit_50,
1660 m_digit_100,
1661 m_digit_500,
1662 m_digit_1000,
1663 m_digit_5000,
1664 m_digit_10000;
1665 };
1666
1669#ifdef _UNICODE
1671#else
1673#endif
1675
1679 template <class T>
1681 {
1682 public:
1684 _In_ const std::shared_ptr<basic_parser<T>>& _numerator,
1685 _In_ const std::shared_ptr<basic_parser<T>>& _fraction_line,
1686 _In_ const std::shared_ptr<basic_parser<T>>& _denominator,
1687 _In_ const std::locale& locale = std::locale()) :
1688 basic_parser<T>(locale),
1689 numerator(_numerator),
1690 fraction_line(_fraction_line),
1691 denominator(_denominator)
1692 {}
1693
1694 virtual bool match(
1695 _In_reads_or_z_(end) const T* text,
1696 _In_ size_t start = 0,
1697 _In_ size_t end = (size_t)-1,
1698 _In_ int flags = match_default)
1699 {
1700 assert(text || start >= end);
1701 if (numerator->match(text, start, end, flags) &&
1702 fraction_line->match(text, numerator->interval.end, end, flags) &&
1703 denominator->match(text, fraction_line->interval.end, end, flags))
1704 {
1705 interval.start = start;
1706 interval.end = denominator->interval.end;
1707 return true;
1708 }
1709 numerator->invalidate();
1710 fraction_line->invalidate();
1711 denominator->invalidate();
1712 interval.start = (interval.end = start) + 1;
1713 return false;
1714 }
1715
1716 virtual void invalidate()
1717 {
1718 numerator->invalidate();
1719 fraction_line->invalidate();
1720 denominator->invalidate();
1722 }
1723
1724 public:
1725 std::shared_ptr<basic_parser<T>> numerator;
1726 std::shared_ptr<basic_parser<T>> fraction_line;
1727 std::shared_ptr<basic_parser<T>> denominator;
1728 };
1729
1732#ifdef _UNICODE
1733 using tfraction = wfraction;
1734#else
1735 using tfraction = fraction;
1736#endif
1738
1742 template <class T>
1743 class basic_score : public basic_parser<T>
1744 {
1745 public:
1747 _In_ const std::shared_ptr<basic_parser<T>>& _home,
1748 _In_ const std::shared_ptr<basic_parser<T>>& _separator,
1749 _In_ const std::shared_ptr<basic_parser<T>>& _guest,
1750 _In_ const std::shared_ptr<basic_parser<T>>& space,
1751 _In_ const std::locale& locale = std::locale()) :
1752 basic_parser<T>(locale),
1753 home(_home),
1754 separator(_separator),
1755 guest(_guest),
1756 m_space(space)
1757 {}
1758
1759 virtual bool match(
1760 _In_reads_or_z_(end) const T* text,
1761 _In_ size_t start = 0,
1762 _In_ size_t end = (size_t)-1,
1763 _In_ int flags = match_default)
1764 {
1765 assert(text || start >= end);
1766 interval.end = start;
1767
1768 if (home->match(text, interval.end, end, flags))
1769 interval.end = home->interval.end;
1770 else
1771 goto end;
1772
1773 const int space_match_flags = flags & ~match_multiline; // Spaces in score must never be broken in new line.
1774 for (; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
1775
1776 if (separator->match(text, interval.end, end, flags))
1777 interval.end = separator->interval.end;
1778 else
1779 goto end;
1780
1781 for (; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
1782
1783 if (guest->match(text, interval.end, end, flags))
1784 interval.end = guest->interval.end;
1785 else
1786 goto end;
1787
1788 interval.start = start;
1789 return true;
1790
1791 end:
1792 home->invalidate();
1793 separator->invalidate();
1794 guest->invalidate();
1795 interval.start = (interval.end = start) + 1;
1796 return false;
1797 }
1798
1799 virtual void invalidate()
1800 {
1801 home->invalidate();
1802 separator->invalidate();
1803 guest->invalidate();
1805 }
1806
1807 public:
1808 std::shared_ptr<basic_parser<T>> home;
1809 std::shared_ptr<basic_parser<T>> separator;
1810 std::shared_ptr<basic_parser<T>> guest;
1811
1812 protected:
1813 std::shared_ptr<basic_parser<T>> m_space;
1814 };
1815
1816 using score = basic_score<char>;
1818#ifdef _UNICODE
1819 using tscore = wscore;
1820#else
1821 using tscore = score;
1822#endif
1824
1828 template <class T>
1830 {
1831 public:
1833 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
1834 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
1835 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
1836 _In_ const std::shared_ptr<basic_parser<T>>& _number,
1837 _In_ const std::locale& locale = std::locale()) :
1838 basic_parser<T>(locale),
1839 positive_sign(_positive_sign),
1840 negative_sign(_negative_sign),
1841 special_sign(_special_sign),
1842 number(_number)
1843 {}
1844
1845 virtual bool match(
1846 _In_reads_or_z_(end) const T* text,
1847 _In_ size_t start = 0,
1848 _In_ size_t end = (size_t)-1,
1849 _In_ int flags = match_default)
1850 {
1851 assert(text || start >= end);
1852 interval.end = start;
1853 if (positive_sign && positive_sign->match(text, interval.end, end, flags)) {
1854 interval.end = positive_sign->interval.end;
1855 if (negative_sign) negative_sign->invalidate();
1856 if (special_sign) special_sign->invalidate();
1857 }
1858 else if (negative_sign && negative_sign->match(text, interval.end, end, flags)) {
1859 interval.end = negative_sign->interval.end;
1860 if (positive_sign) positive_sign->invalidate();
1861 if (special_sign) special_sign->invalidate();
1862 }
1863 else if (special_sign && special_sign->match(text, interval.end, end, flags)) {
1864 interval.end = special_sign->interval.end;
1865 if (positive_sign) positive_sign->invalidate();
1866 if (negative_sign) negative_sign->invalidate();
1867 }
1868 else {
1869 if (positive_sign) positive_sign->invalidate();
1870 if (negative_sign) negative_sign->invalidate();
1871 if (special_sign) special_sign->invalidate();
1872 }
1873 if (number->match(text, interval.end, end, flags)) {
1874 interval.start = start;
1875 interval.end = number->interval.end;
1876 return true;
1877 }
1878 if (positive_sign) positive_sign->invalidate();
1879 if (negative_sign) negative_sign->invalidate();
1880 if (special_sign) special_sign->invalidate();
1881 number->invalidate();
1882 interval.start = (interval.end = start) + 1;
1883 return false;
1884 }
1885
1886 virtual void invalidate()
1887 {
1888 if (positive_sign) positive_sign->invalidate();
1889 if (negative_sign) negative_sign->invalidate();
1890 if (special_sign) special_sign->invalidate();
1891 number->invalidate();
1893 }
1894
1895 public:
1896 std::shared_ptr<basic_parser<T>> positive_sign;
1897 std::shared_ptr<basic_parser<T>> negative_sign;
1898 std::shared_ptr<basic_parser<T>> special_sign;
1899 std::shared_ptr<basic_parser<T>> number;
1900 };
1901
1904#ifdef _UNICODE
1906#else
1908#endif
1910
1914 template <class T>
1916 {
1917 public:
1919 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
1920 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
1921 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
1922 _In_ const std::shared_ptr<basic_parser<T>>& _integer,
1923 _In_ const std::shared_ptr<basic_parser<T>>& space,
1924 _In_ const std::shared_ptr<basic_parser<T>>& _fraction,
1925 _In_ const std::locale& locale = std::locale()) :
1926 basic_parser<T>(locale),
1927 positive_sign(_positive_sign),
1928 negative_sign(_negative_sign),
1929 special_sign(_special_sign),
1930 integer(_integer),
1931 fraction(_fraction),
1932 m_space(space)
1933 {}
1934
1935 virtual bool match(
1936 _In_reads_or_z_(end) const T* text,
1937 _In_ size_t start = 0,
1938 _In_ size_t end = (size_t)-1,
1939 _In_ int flags = match_default)
1940 {
1941 assert(text || start >= end);
1942 interval.end = start;
1943
1944 if (positive_sign && positive_sign->match(text, interval.end, end, flags)) {
1945 interval.end = positive_sign->interval.end;
1946 if (negative_sign) negative_sign->invalidate();
1947 if (special_sign) special_sign->invalidate();
1948 }
1949 else if (negative_sign && negative_sign->match(text, interval.end, end, flags)) {
1950 interval.end = negative_sign->interval.end;
1951 if (positive_sign) positive_sign->invalidate();
1952 if (special_sign) special_sign->invalidate();
1953 }
1954 else if (special_sign && special_sign->match(text, interval.end, end, flags)) {
1955 interval.end = special_sign->interval.end;
1956 if (positive_sign) positive_sign->invalidate();
1957 if (negative_sign) negative_sign->invalidate();
1958 }
1959 else {
1960 if (positive_sign) positive_sign->invalidate();
1961 if (negative_sign) negative_sign->invalidate();
1962 if (special_sign) special_sign->invalidate();
1963 }
1964
1965 // Check for <integer> <fraction>
1966 const int space_match_flags = flags & ~match_multiline; // Spaces in fractions must never be broken in new line.
1967 if (integer->match(text, interval.end, end, flags) &&
1968 m_space->match(text, integer->interval.end, end, space_match_flags))
1969 {
1970 for (interval.end = m_space->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
1971 if (fraction->match(text, interval.end, end, flags)) {
1972 interval.start = start;
1974 return true;
1975 }
1976 fraction->invalidate();
1977 interval.start = start;
1978 interval.end = integer->interval.end;
1979 return true;
1980 }
1981
1982 // Check for <fraction>
1983 if (fraction->match(text, interval.end, end, flags)) {
1984 integer->invalidate();
1985 interval.start = start;
1987 return true;
1988 }
1989
1990 // Check for <integer>
1991 if (integer->match(text, interval.end, end, flags)) {
1992 fraction->invalidate();
1993 interval.start = start;
1994 interval.end = integer->interval.end;
1995 return true;
1996 }
1997
1998 if (positive_sign) positive_sign->invalidate();
1999 if (negative_sign) negative_sign->invalidate();
2000 if (special_sign) special_sign->invalidate();
2001 integer->invalidate();
2002 fraction->invalidate();
2003 interval.start = (interval.end = start) + 1;
2004 return false;
2005 }
2006
2007 virtual void invalidate()
2008 {
2009 if (positive_sign) positive_sign->invalidate();
2010 if (negative_sign) negative_sign->invalidate();
2011 if (special_sign) special_sign->invalidate();
2012 integer->invalidate();
2013 fraction->invalidate();
2015 }
2016
2017 public:
2018 std::shared_ptr<basic_parser<T>> positive_sign;
2019 std::shared_ptr<basic_parser<T>> negative_sign;
2020 std::shared_ptr<basic_parser<T>> special_sign;
2021 std::shared_ptr<basic_parser<T>> integer;
2022 std::shared_ptr<basic_parser<T>> fraction;
2023
2024 protected:
2025 std::shared_ptr<basic_parser<T>> m_space;
2026 };
2027
2030#ifdef _UNICODE
2032#else
2034#endif
2036
2040 template <class T>
2042 {
2043 public:
2045 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
2046 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
2047 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
2048 _In_ const std::shared_ptr<basic_integer<T>>& _integer,
2049 _In_ const std::shared_ptr<basic_parser<T>>& _decimal_separator,
2050 _In_ const std::shared_ptr<basic_integer<T>>& _decimal,
2051 _In_ const std::shared_ptr<basic_parser<T>>& _exponent_symbol,
2052 _In_ const std::shared_ptr<basic_parser<T>>& _positive_exp_sign,
2053 _In_ const std::shared_ptr<basic_parser<T>>& _negative_exp_sign,
2054 _In_ const std::shared_ptr<basic_integer<T>>& _exponent,
2055 _In_ const std::locale& locale = std::locale()) :
2056 basic_parser<T>(locale),
2057 positive_sign(_positive_sign),
2058 negative_sign(_negative_sign),
2059 special_sign(_special_sign),
2060 integer(_integer),
2061 decimal_separator(_decimal_separator),
2062 decimal(_decimal),
2063 exponent_symbol(_exponent_symbol),
2064 positive_exp_sign(_positive_exp_sign),
2065 negative_exp_sign(_negative_exp_sign),
2066 exponent(_exponent),
2067 value(std::numeric_limits<double>::quiet_NaN())
2068 {}
2069
2070 virtual bool match(
2071 _In_reads_or_z_(end) const T* text,
2072 _In_ size_t start = 0,
2073 _In_ size_t end = (size_t)-1,
2074 _In_ int flags = match_default)
2075 {
2076 assert(text || start >= end);
2077 interval.end = start;
2078
2079 if (positive_sign && positive_sign->match(text, interval.end, end, flags)) {
2080 interval.end = positive_sign->interval.end;
2081 if (negative_sign) negative_sign->invalidate();
2082 if (special_sign) special_sign->invalidate();
2083 }
2084 else if (negative_sign && negative_sign->match(text, interval.end, end, flags)) {
2085 interval.end = negative_sign->interval.end;
2086 if (positive_sign) positive_sign->invalidate();
2087 if (special_sign) special_sign->invalidate();
2088 }
2089 else if (special_sign && special_sign->match(text, interval.end, end, flags)) {
2090 interval.end = special_sign->interval.end;
2091 if (positive_sign) positive_sign->invalidate();
2092 if (negative_sign) negative_sign->invalidate();
2093 }
2094 else {
2095 if (positive_sign) positive_sign->invalidate();
2096 if (negative_sign) negative_sign->invalidate();
2097 if (special_sign) special_sign->invalidate();
2098 }
2099
2100 if (integer->match(text, interval.end, end, flags))
2101 interval.end = integer->interval.end;
2102
2103 if (decimal_separator->match(text, interval.end, end, flags) &&
2104 decimal->match(text, decimal_separator->interval.end, end, flags))
2105 interval.end = decimal->interval.end;
2106 else {
2107 decimal_separator->invalidate();
2108 decimal->invalidate();
2109 }
2110
2111 if (integer->interval.empty() &&
2112 decimal->interval.empty())
2113 {
2114 // No integer part, no decimal part.
2115 if (positive_sign) positive_sign->invalidate();
2116 if (negative_sign) negative_sign->invalidate();
2117 if (special_sign) special_sign->invalidate();
2118 integer->invalidate();
2119 decimal_separator->invalidate();
2120 decimal->invalidate();
2121 if (exponent_symbol) exponent_symbol->invalidate();
2122 if (positive_exp_sign) positive_exp_sign->invalidate();
2123 if (negative_exp_sign) negative_exp_sign->invalidate();
2124 if (exponent) exponent->invalidate();
2125 interval.start = (interval.end = start) + 1;
2126 return false;
2127 }
2128
2129 if (exponent_symbol && exponent_symbol->match(text, interval.end, end, flags) &&
2130 (positive_exp_sign && positive_exp_sign->match(text, exponent_symbol->interval.end, end, flags) &&
2131 exponent && exponent->match(text, positive_exp_sign->interval.end, end, flags) ||
2132 exponent && exponent->match(text, exponent_symbol->interval.end, end, flags)))
2133 {
2134 interval.end = exponent->interval.end;
2135 if (negative_exp_sign) negative_exp_sign->invalidate();
2136 }
2137 else if (exponent_symbol && exponent_symbol->match(text, interval.end, end, flags) &&
2138 negative_exp_sign && negative_exp_sign->match(text, exponent_symbol->interval.end, end, flags) &&
2139 exponent && exponent->match(text, negative_exp_sign->interval.end, end, flags))
2140 {
2141 interval.end = exponent->interval.end;
2142 if (positive_exp_sign) positive_exp_sign->invalidate();
2143 }
2144 else {
2145 if (exponent_symbol) exponent_symbol->invalidate();
2146 if (positive_exp_sign) positive_exp_sign->invalidate();
2147 if (negative_exp_sign) negative_exp_sign->invalidate();
2148 if (exponent) exponent->invalidate();
2149 }
2150
2151 value = (double)integer->value;
2152 if (decimal->interval)
2153 value += (double)decimal->value * pow(10.0, -(double)decimal->interval.size());
2154 if (negative_sign && negative_sign->interval)
2155 value = -value;
2156 if (exponent && exponent->interval) {
2157 double e = (double)exponent->value;
2158 if (negative_exp_sign && negative_exp_sign->interval)
2159 e = -e;
2160 value *= pow(10.0, e);
2161 }
2162
2163 interval.start = start;
2164 return true;
2165 }
2166
2167 virtual void invalidate()
2168 {
2169 if (positive_sign) positive_sign->invalidate();
2170 if (negative_sign) negative_sign->invalidate();
2171 if (special_sign) special_sign->invalidate();
2172 integer->invalidate();
2173 decimal_separator->invalidate();
2174 decimal->invalidate();
2175 if (exponent_symbol) exponent_symbol->invalidate();
2176 if (positive_exp_sign) positive_exp_sign->invalidate();
2177 if (negative_exp_sign) negative_exp_sign->invalidate();
2178 if (exponent) exponent->invalidate();
2179 value = std::numeric_limits<double>::quiet_NaN();
2181 }
2182
2183 public:
2184 std::shared_ptr<basic_parser<T>> positive_sign;
2185 std::shared_ptr<basic_parser<T>> negative_sign;
2186 std::shared_ptr<basic_parser<T>> special_sign;
2187 std::shared_ptr<basic_integer<T>> integer;
2188 std::shared_ptr<basic_parser<T>> decimal_separator;
2189 std::shared_ptr<basic_integer<T>> decimal;
2190 std::shared_ptr<basic_parser<T>> exponent_symbol;
2191 std::shared_ptr<basic_parser<T>> positive_exp_sign;
2192 std::shared_ptr<basic_parser<T>> negative_exp_sign;
2193 std::shared_ptr<basic_integer<T>> exponent;
2194 double value;
2195 };
2196
2199#ifdef _UNICODE
2201#else
2203#endif
2205
2209 template <class T>
2211 {
2212 public:
2214 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
2215 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
2216 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
2217 _In_ const std::shared_ptr<basic_parser<T>>& _currency,
2218 _In_ const std::shared_ptr<basic_parser<T>>& _integer,
2219 _In_ const std::shared_ptr<basic_parser<T>>& _decimal_separator,
2220 _In_ const std::shared_ptr<basic_parser<T>>& _decimal,
2221 _In_ const std::locale& locale = std::locale()) :
2222 basic_parser<T>(locale),
2223 positive_sign(_positive_sign),
2224 negative_sign(_negative_sign),
2225 special_sign(_special_sign),
2226 currency(_currency),
2227 integer(_integer),
2228 decimal_separator(_decimal_separator),
2229 decimal(_decimal)
2230 {}
2231
2232 virtual bool match(
2233 _In_reads_or_z_(end) const T* text,
2234 _In_ size_t start = 0,
2235 _In_ size_t end = (size_t)-1,
2236 _In_ int flags = match_default)
2237 {
2238 assert(text || start >= end);
2239 interval.end = start;
2240
2241 if (positive_sign->match(text, interval.end, end, flags)) {
2242 interval.end = positive_sign->interval.end;
2243 if (negative_sign) negative_sign->invalidate();
2244 if (special_sign) special_sign->invalidate();
2245 }
2246 else if (negative_sign->match(text, interval.end, end, flags)) {
2247 interval.end = negative_sign->interval.end;
2248 if (positive_sign) positive_sign->invalidate();
2249 if (special_sign) special_sign->invalidate();
2250 }
2251 else if (special_sign->match(text, interval.end, end, flags)) {
2252 interval.end = special_sign->interval.end;
2253 if (positive_sign) positive_sign->invalidate();
2254 if (negative_sign) negative_sign->invalidate();
2255 }
2256 else {
2257 if (positive_sign) positive_sign->invalidate();
2258 if (negative_sign) negative_sign->invalidate();
2259 if (special_sign) special_sign->invalidate();
2260 }
2261
2262 if (currency->match(text, interval.end, end, flags))
2263 interval.end = currency->interval.end;
2264 else {
2265 if (positive_sign) positive_sign->invalidate();
2266 if (negative_sign) negative_sign->invalidate();
2267 if (special_sign) special_sign->invalidate();
2268 integer->invalidate();
2269 decimal_separator->invalidate();
2270 decimal->invalidate();
2271 interval.start = (interval.end = start) + 1;
2272 return false;
2273 }
2274
2275 if (integer->match(text, interval.end, end, flags))
2276 interval.end = integer->interval.end;
2277 if (decimal_separator->match(text, interval.end, end, flags) &&
2278 decimal->match(text, decimal_separator->interval.end, end, flags))
2279 interval.end = decimal->interval.end;
2280 else {
2281 decimal_separator->invalidate();
2282 decimal->invalidate();
2283 }
2284
2285 if (integer->interval.empty() &&
2286 decimal->interval.empty())
2287 {
2288 // No integer part, no decimal part.
2289 if (positive_sign) positive_sign->invalidate();
2290 if (negative_sign) negative_sign->invalidate();
2291 if (special_sign) special_sign->invalidate();
2292 currency->invalidate();
2293 integer->invalidate();
2294 decimal_separator->invalidate();
2295 decimal->invalidate();
2296 interval.start = (interval.end = start) + 1;
2297 return false;
2298 }
2299
2300 interval.start = start;
2301 return true;
2302 }
2303
2304 virtual void invalidate()
2305 {
2306 if (positive_sign) positive_sign->invalidate();
2307 if (negative_sign) negative_sign->invalidate();
2308 if (special_sign) special_sign->invalidate();
2309 currency->invalidate();
2310 integer->invalidate();
2311 decimal_separator->invalidate();
2312 decimal->invalidate();
2314 }
2315
2316 public:
2317 std::shared_ptr<basic_parser<T>> positive_sign;
2318 std::shared_ptr<basic_parser<T>> negative_sign;
2319 std::shared_ptr<basic_parser<T>> special_sign;
2320 std::shared_ptr<basic_parser<T>> currency;
2321 std::shared_ptr<basic_parser<T>> integer;
2322 std::shared_ptr<basic_parser<T>> decimal_separator;
2323 std::shared_ptr<basic_parser<T>> decimal;
2324 };
2325
2328#ifdef _UNICODE
2330#else
2332#endif
2334
2338 template <class T>
2340 {
2341 public:
2343 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
2344 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
2345 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
2346 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
2347 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
2348 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
2349 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
2350 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
2351 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
2352 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
2353 _In_ const std::shared_ptr<basic_parser<T>>& separator,
2354 _In_ const std::locale& locale = std::locale()) :
2355 basic_parser<T>(locale),
2356 m_digit_0(digit_0),
2357 m_digit_1(digit_1),
2358 m_digit_2(digit_2),
2359 m_digit_3(digit_3),
2360 m_digit_4(digit_4),
2361 m_digit_5(digit_5),
2362 m_digit_6(digit_6),
2363 m_digit_7(digit_7),
2364 m_digit_8(digit_8),
2365 m_digit_9(digit_9),
2366 m_separator(separator)
2367 {
2368 value.s_addr = 0;
2369 }
2370
2371 virtual bool match(
2372 _In_reads_or_z_(end) const T* text,
2373 _In_ size_t start = 0,
2374 _In_ size_t end = (size_t)-1,
2375 _In_ int flags = match_default)
2376 {
2377 assert(text || start >= end);
2378 interval.end = start;
2379 value.s_addr = 0;
2380
2381 size_t i;
2382 for (i = 0; i < 4; i++) {
2383 if (i) {
2384 if (m_separator->match(text, interval.end, end, flags))
2385 interval.end = m_separator->interval.end;
2386 else
2387 goto error;
2388 }
2389
2391 bool is_empty = true;
2392 size_t x;
2393 for (x = 0; interval.end < end && text[interval.end];) {
2394 size_t dig, digit_end;
2395 if (m_digit_0->match(text, interval.end, end, flags)) { dig = 0; digit_end = m_digit_0->interval.end; }
2396 else if (m_digit_1->match(text, interval.end, end, flags)) { dig = 1; digit_end = m_digit_1->interval.end; }
2397 else if (m_digit_2->match(text, interval.end, end, flags)) { dig = 2; digit_end = m_digit_2->interval.end; }
2398 else if (m_digit_3->match(text, interval.end, end, flags)) { dig = 3; digit_end = m_digit_3->interval.end; }
2399 else if (m_digit_4->match(text, interval.end, end, flags)) { dig = 4; digit_end = m_digit_4->interval.end; }
2400 else if (m_digit_5->match(text, interval.end, end, flags)) { dig = 5; digit_end = m_digit_5->interval.end; }
2401 else if (m_digit_6->match(text, interval.end, end, flags)) { dig = 6; digit_end = m_digit_6->interval.end; }
2402 else if (m_digit_7->match(text, interval.end, end, flags)) { dig = 7; digit_end = m_digit_7->interval.end; }
2403 else if (m_digit_8->match(text, interval.end, end, flags)) { dig = 8; digit_end = m_digit_8->interval.end; }
2404 else if (m_digit_9->match(text, interval.end, end, flags)) { dig = 9; digit_end = m_digit_9->interval.end; }
2405 else break;
2406 size_t x_n = x * 10 + dig;
2407 if (x_n <= 255) {
2408 x = x_n;
2409 interval.end = digit_end;
2410 is_empty = false;
2411 }
2412 else
2413 break;
2414 }
2415 if (is_empty)
2416 goto error;
2418 value.s_addr = (value.s_addr << 8) | (uint8_t)x;
2419 }
2420 if (i < 4)
2421 goto error;
2422
2423 interval.start = start;
2424 return true;
2425
2426 error:
2427 components[0].start = 1;
2428 components[0].end = 0;
2429 components[1].start = 1;
2430 components[1].end = 0;
2431 components[2].start = 1;
2432 components[2].end = 0;
2433 components[3].start = 1;
2434 components[3].end = 0;
2435 value.s_addr = 0;
2436 interval.start = (interval.end = start) + 1;
2437 return false;
2438 }
2439
2440 virtual void invalidate()
2441 {
2442 components[0].start = 1;
2443 components[0].end = 0;
2444 components[1].start = 1;
2445 components[1].end = 0;
2446 components[2].start = 1;
2447 components[2].end = 0;
2448 components[3].start = 1;
2449 components[3].end = 0;
2450 value.s_addr = 0;
2452 }
2453
2454 public:
2456 struct in_addr value;
2457
2458 protected:
2459 std::shared_ptr<basic_parser<T>>
2460 m_digit_0,
2461 m_digit_1,
2462 m_digit_2,
2463 m_digit_3,
2464 m_digit_4,
2465 m_digit_5,
2466 m_digit_6,
2467 m_digit_7,
2468 m_digit_8,
2469 m_digit_9;
2470 std::shared_ptr<basic_parser<T>> m_separator;
2471 };
2472
2475#ifdef _UNICODE
2477#else
2479#endif
2481
2485 template <class T>
2487 {
2488 public:
2489 basic_ipv6_scope_id_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
2490
2491 virtual bool match(
2492 _In_reads_or_z_(end) const T* text,
2493 _In_ size_t start = 0,
2494 _In_ size_t end = (size_t)-1,
2495 _In_ int flags = match_default)
2496 {
2497 assert(text || start >= end);
2498 if (start < end && text[start]) {
2499 if (text[start] == '-' ||
2500 text[start] == '_' ||
2501 text[start] == ':' ||
2502 std::use_facet<std::ctype<T>>(m_locale).is(std::ctype_base::alnum, text[start]))
2503 {
2504 interval.end = (interval.start = start) + 1;
2505 return true;
2506 }
2507 }
2508 interval.start = (interval.end = start) + 1;
2509 return false;
2510 }
2511 };
2512
2515#ifdef _UNICODE
2517#else
2519#endif
2520
2525 {
2526 public:
2527 sgml_ipv6_scope_id_char(_In_ const std::locale& locale = std::locale()) : sgml_parser(locale) {}
2528
2529 virtual bool match(
2530 _In_reads_or_z_(end) const char* text,
2531 _In_ size_t start = 0,
2532 _In_ size_t end = (size_t)-1,
2533 _In_ int flags = match_default)
2534 {
2535 assert(text || start >= end);
2536 if (start < end && text[start]) {
2537 wchar_t buf[3];
2538 const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf);
2539 const wchar_t* chr_end = chr + stdex::strlen(chr);
2540 if ((chr[0] == L'-' ||
2541 chr[0] == L'_' ||
2542 chr[0] == L':') && chr[1] == 0 ||
2543 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
2544 {
2545 interval.start = start;
2546 return true;
2547 }
2548 }
2549 interval.start = (interval.end = start) + 1;
2550 return false;
2551 }
2552 };
2553
2557 template <class T>
2559 {
2560 public:
2562 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
2563 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
2564 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
2565 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
2566 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
2567 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
2568 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
2569 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
2570 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
2571 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
2572 _In_ const std::shared_ptr<basic_parser<T>>& digit_10,
2573 _In_ const std::shared_ptr<basic_parser<T>>& digit_11,
2574 _In_ const std::shared_ptr<basic_parser<T>>& digit_12,
2575 _In_ const std::shared_ptr<basic_parser<T>>& digit_13,
2576 _In_ const std::shared_ptr<basic_parser<T>>& digit_14,
2577 _In_ const std::shared_ptr<basic_parser<T>>& digit_15,
2578 _In_ const std::shared_ptr<basic_parser<T>>& separator,
2579 _In_ const std::shared_ptr<basic_parser<T>>& scope_id_separator = nullptr,
2580 _In_ const std::shared_ptr<basic_parser<T>>& _scope_id = nullptr,
2581 _In_ const std::locale& locale = std::locale()) :
2582 basic_parser<T>(locale),
2583 m_digit_0(digit_0),
2584 m_digit_1(digit_1),
2585 m_digit_2(digit_2),
2586 m_digit_3(digit_3),
2587 m_digit_4(digit_4),
2588 m_digit_5(digit_5),
2589 m_digit_6(digit_6),
2590 m_digit_7(digit_7),
2591 m_digit_8(digit_8),
2592 m_digit_9(digit_9),
2593 m_digit_10(digit_10),
2594 m_digit_11(digit_11),
2595 m_digit_12(digit_12),
2596 m_digit_13(digit_13),
2597 m_digit_14(digit_14),
2598 m_digit_15(digit_15),
2599 m_separator(separator),
2600 m_scope_id_separator(scope_id_separator),
2601 scope_id(_scope_id)
2602 {
2603 memset(&value, 0, sizeof(value));
2604 }
2605
2606 virtual bool match(
2607 _In_reads_or_z_(end) const T* text,
2608 _In_ size_t start = 0,
2609 _In_ size_t end = (size_t)-1,
2610 _In_ int flags = match_default)
2611 {
2612 assert(text || start >= end);
2613 interval.end = start;
2614 memset(&value, 0, sizeof(value));
2615
2616 size_t i, compaction_i = (size_t)-1, compaction_start = start;
2617 for (i = 0; i < 8; i++) {
2618 bool is_empty = true;
2619
2620 if (m_separator->match(text, interval.end, end, flags)) {
2621 if (m_separator->match(text, m_separator->interval.end, end, flags)) {
2622 // :: found
2623 if (compaction_i == (size_t)-1) {
2624 // Zero compaction start
2625 compaction_i = i;
2626 compaction_start = m_separator->interval.start;
2627 interval.end = m_separator->interval.end;
2628 }
2629 else {
2630 // More than one zero compaction
2631 break;
2632 }
2633 }
2634 else if (i) {
2635 // Inner : found
2636 interval.end = m_separator->interval.end;
2637 }
2638 else {
2639 // Leading : found
2640 goto error;
2641 }
2642 }
2643 else if (i) {
2644 // : missing
2645 break;
2646 }
2647
2649 size_t x;
2650 for (x = 0; interval.end < end && text[interval.end];) {
2651 size_t dig, digit_end;
2652 if (m_digit_0->match(text, interval.end, end, flags)) { dig = 0; digit_end = m_digit_0->interval.end; }
2653 else if (m_digit_1->match(text, interval.end, end, flags)) { dig = 1; digit_end = m_digit_1->interval.end; }
2654 else if (m_digit_2->match(text, interval.end, end, flags)) { dig = 2; digit_end = m_digit_2->interval.end; }
2655 else if (m_digit_3->match(text, interval.end, end, flags)) { dig = 3; digit_end = m_digit_3->interval.end; }
2656 else if (m_digit_4->match(text, interval.end, end, flags)) { dig = 4; digit_end = m_digit_4->interval.end; }
2657 else if (m_digit_5->match(text, interval.end, end, flags)) { dig = 5; digit_end = m_digit_5->interval.end; }
2658 else if (m_digit_6->match(text, interval.end, end, flags)) { dig = 6; digit_end = m_digit_6->interval.end; }
2659 else if (m_digit_7->match(text, interval.end, end, flags)) { dig = 7; digit_end = m_digit_7->interval.end; }
2660 else if (m_digit_8->match(text, interval.end, end, flags)) { dig = 8; digit_end = m_digit_8->interval.end; }
2661 else if (m_digit_9->match(text, interval.end, end, flags)) { dig = 9; digit_end = m_digit_9->interval.end; }
2662 else if (m_digit_10->match(text, interval.end, end, flags)) { dig = 10; digit_end = m_digit_10->interval.end; }
2663 else if (m_digit_11->match(text, interval.end, end, flags)) { dig = 11; digit_end = m_digit_11->interval.end; }
2664 else if (m_digit_12->match(text, interval.end, end, flags)) { dig = 12; digit_end = m_digit_12->interval.end; }
2665 else if (m_digit_13->match(text, interval.end, end, flags)) { dig = 13; digit_end = m_digit_13->interval.end; }
2666 else if (m_digit_14->match(text, interval.end, end, flags)) { dig = 14; digit_end = m_digit_14->interval.end; }
2667 else if (m_digit_15->match(text, interval.end, end, flags)) { dig = 15; digit_end = m_digit_15->interval.end; }
2668 else break;
2669 size_t x_n = x * 16 + dig;
2670 if (x_n <= 0xffff) {
2671 x = x_n;
2672 interval.end = digit_end;
2673 is_empty = false;
2674 }
2675 else
2676 break;
2677 }
2678 if (is_empty) {
2679 if (compaction_i != (size_t)-1) {
2680 // Zero compaction active: no sweat.
2681 break;
2682 }
2683 goto error;
2684 }
2686 value.s6_words[i] = (uint16_t)x;
2687 }
2688
2689 if (compaction_i != (size_t)-1) {
2690 // Align components right due to zero compaction.
2691 size_t j, k;
2692 for (j = 8, k = i; k > compaction_i;) {
2693 value.s6_words[--j] = value.s6_words[--k];
2694 components[j] = components[k];
2695 }
2696 for (; j > compaction_i;) {
2697 value.s6_words[--j] = 0;
2698 components[j].start =
2699 components[j].end = compaction_start;
2700 }
2701 }
2702 else if (i < 8)
2703 goto error;
2704
2705 if (m_scope_id_separator && m_scope_id_separator->match(text, interval.end, end, flags) &&
2706 scope_id && scope_id->match(text, m_scope_id_separator->interval.end, end, flags))
2707 interval.end = scope_id->interval.end;
2708 else if (scope_id)
2709 scope_id->invalidate();
2710
2711 interval.start = start;
2712 return true;
2713
2714 error:
2715 components[0].start = 1;
2716 components[0].end = 0;
2717 components[1].start = 1;
2718 components[1].end = 0;
2719 components[2].start = 1;
2720 components[2].end = 0;
2721 components[3].start = 1;
2722 components[3].end = 0;
2723 components[4].start = 1;
2724 components[4].end = 0;
2725 components[5].start = 1;
2726 components[5].end = 0;
2727 components[6].start = 1;
2728 components[6].end = 0;
2729 components[7].start = 1;
2730 components[7].end = 0;
2731 memset(&value, 0, sizeof(value));
2732 if (scope_id) scope_id->invalidate();
2733 interval.start = (interval.end = start) + 1;
2734 return false;
2735 }
2736
2737 virtual void invalidate()
2738 {
2739 components[0].start = 1;
2740 components[0].end = 0;
2741 components[1].start = 1;
2742 components[1].end = 0;
2743 components[2].start = 1;
2744 components[2].end = 0;
2745 components[3].start = 1;
2746 components[3].end = 0;
2747 components[4].start = 1;
2748 components[4].end = 0;
2749 components[5].start = 1;
2750 components[5].end = 0;
2751 components[6].start = 1;
2752 components[6].end = 0;
2753 components[7].start = 1;
2754 components[7].end = 0;
2755 memset(&value, 0, sizeof(value));
2756 if (scope_id) scope_id->invalidate();
2758 }
2759
2760 public:
2762 struct in6_addr value;
2763 std::shared_ptr<basic_parser<T>> scope_id;
2764
2765 protected:
2766 std::shared_ptr<basic_parser<T>>
2767 m_digit_0,
2768 m_digit_1,
2769 m_digit_2,
2770 m_digit_3,
2771 m_digit_4,
2772 m_digit_5,
2773 m_digit_6,
2774 m_digit_7,
2775 m_digit_8,
2776 m_digit_9,
2777 m_digit_10,
2778 m_digit_11,
2779 m_digit_12,
2780 m_digit_13,
2781 m_digit_14,
2782 m_digit_15;
2783 std::shared_ptr<basic_parser<T>> m_separator, m_scope_id_separator;
2784 };
2785
2788#ifdef _UNICODE
2790#else
2792#endif
2794
2798 template <class T>
2800 {
2801 public:
2803 _In_ bool allow_idn,
2804 _In_ const std::locale& locale = std::locale()) :
2805 basic_parser<T>(locale),
2806 m_allow_idn(allow_idn),
2807 allow_on_edge(true)
2808 {}
2809
2810 virtual bool match(
2811 _In_reads_or_z_(end) const T* text,
2812 _In_ size_t start = 0,
2813 _In_ size_t end = (size_t)-1,
2814 _In_ int flags = match_default)
2815 {
2816 assert(text || start >= end);
2817 if (start < end && text[start]) {
2818 if (('A' <= text[start] && text[start] <= 'Z') ||
2819 ('a' <= text[start] && text[start] <= 'z') ||
2820 ('0' <= text[start] && text[start] <= '9'))
2821 allow_on_edge = true;
2822 else if (text[start] == '-')
2823 allow_on_edge = false;
2824 else if (m_allow_idn && std::use_facet<std::ctype<T>>(m_locale).is(std::ctype_base::alnum, text[start]))
2825 allow_on_edge = true;
2826 else {
2827 interval.start = (interval.end = start) + 1;
2828 return false;
2829 }
2830 interval.end = (interval.start = start) + 1;
2831 return true;
2832 }
2833 interval.start = (interval.end = start) + 1;
2834 return false;
2835 }
2836
2837 public:
2839
2840 protected:
2841 bool m_allow_idn;
2842 };
2843
2846#ifdef _UNICODE
2848#else
2850#endif
2851
2856 {
2857 public:
2859 _In_ bool allow_idn,
2860 _In_ const std::locale& locale = std::locale()) :
2861 basic_dns_domain_char<char>(allow_idn, locale)
2862 {}
2863
2864 virtual bool match(
2865 _In_reads_or_z_(end) const char* text,
2866 _In_ size_t start = 0,
2867 _In_ size_t end = (size_t)-1,
2868 _In_ int flags = match_default)
2869 {
2870 assert(text || start >= end);
2871 if (start < end && text[start]) {
2872 wchar_t buf[3];
2873 const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf);
2874 const wchar_t* chr_end = chr + stdex::strlen(chr);
2875 if ((('A' <= chr[0] && chr[0] <= 'Z') ||
2876 ('a' <= chr[0] && chr[0] <= 'z') ||
2877 ('0' <= chr[0] && chr[0] <= '9')) && chr[1] == 0)
2878 allow_on_edge = true;
2879 else if (chr[0] == '-' && chr[1] == 0)
2880 allow_on_edge = false;
2881 else if (m_allow_idn && std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
2882 allow_on_edge = true;
2883 else {
2884 interval.start = (interval.end = start) + 1;
2885 return false;
2886 }
2887 interval.start = start;
2888 return true;
2889 }
2890 interval.start = (interval.end = start) + 1;
2891 return false;
2892 }
2893 };
2894
2898 template <class T>
2900 {
2901 public:
2903 _In_ bool allow_absolute,
2904 _In_ const std::shared_ptr<basic_dns_domain_char<T>>& domain_char,
2905 _In_ const std::shared_ptr<basic_parser<T>>& separator,
2906 _In_ const std::locale& locale = std::locale()) :
2907 basic_parser<T>(locale),
2908 m_allow_absolute(allow_absolute),
2909 m_domain_char(domain_char),
2910 m_separator(separator)
2911 {}
2912
2913 virtual bool match(
2914 _In_reads_or_z_(end) const T* text,
2915 _In_ size_t start = 0,
2916 _In_ size_t end = (size_t)-1,
2917 _In_ int flags = match_default)
2918 {
2919 assert(text || start >= end);
2920 size_t i = start, count;
2921 for (count = 0; i < end && text[i] && count < 127; count++) {
2922 if (m_domain_char->match(text, i, end, flags) &&
2923 m_domain_char->allow_on_edge)
2924 {
2925 // Domain start
2926 interval.end = i = m_domain_char->interval.end;
2927 while (i < end && text[i]) {
2928 if (m_domain_char->allow_on_edge &&
2929 m_separator->match(text, i, end, flags))
2930 {
2931 // Domain end
2932 if (m_allow_absolute)
2933 interval.end = i = m_separator->interval.end;
2934 else {
2935 interval.end = i;
2936 i = m_separator->interval.end;
2937 }
2938 break;
2939 }
2940 if (m_domain_char->match(text, i, end, flags)) {
2941 if (m_domain_char->allow_on_edge)
2942 interval.end = i = m_domain_char->interval.end;
2943 else
2944 i = m_domain_char->interval.end;
2945 }
2946 else {
2947 interval.start = start;
2948 return true;
2949 }
2950 }
2951 }
2952 else
2953 break;
2954 }
2955 if (count) {
2956 interval.start = start;
2957 return true;
2958 }
2959 interval.start = (interval.end = start) + 1;
2960 return false;
2961 }
2962
2963 protected:
2965 std::shared_ptr<basic_dns_domain_char<T>> m_domain_char;
2966 std::shared_ptr<basic_parser<T>> m_separator;
2967 };
2968
2971#ifdef _UNICODE
2972 using tdns_name = wdns_name;
2973#else
2974 using tdns_name = dns_name;
2975#endif
2977
2981 template <class T>
2983 {
2984 public:
2985 basic_url_username_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
2986
2987 virtual bool match(
2988 _In_reads_or_z_(end) const T* text,
2989 _In_ size_t start = 0,
2990 _In_ size_t end = (size_t)-1,
2991 _In_ int flags = match_default)
2992 {
2993 assert(text || start >= end);
2994 if (start < end && text[start]) {
2995 if (text[start] == '-' ||
2996 text[start] == '.' ||
2997 text[start] == '_' ||
2998 text[start] == '~' ||
2999 text[start] == '%' ||
3000 text[start] == '!' ||
3001 text[start] == '$' ||
3002 text[start] == '&' ||
3003 text[start] == '\'' ||
3004 //text[start] == '(' ||
3005 //text[start] == ')' ||
3006 text[start] == '*' ||
3007 text[start] == '+' ||
3008 text[start] == ',' ||
3009 text[start] == ';' ||
3010 text[start] == '=' ||
3011 std::use_facet<std::ctype<T>>(m_locale).is(std::ctype_base::alnum, text[start]))
3012 {
3013 interval.end = (interval.start = start) + 1;
3014 return true;
3015 }
3016 }
3017 interval.start = (interval.end = start) + 1;
3018 return false;
3019 }
3020 };
3021
3024#ifdef _UNICODE
3026#else
3028#endif
3029
3034 {
3035 public:
3036 sgml_url_username_char(_In_ const std::locale& locale = std::locale()) : basic_url_username_char<char>(locale) {}
3037
3038 virtual bool match(
3039 _In_reads_or_z_(end) const char* text,
3040 _In_ size_t start = 0,
3041 _In_ size_t end = (size_t)-1,
3042 _In_ int flags = match_default)
3043 {
3044 assert(text || start >= end);
3045 if (start < end && text[start]) {
3046 wchar_t buf[3];
3047 const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf);
3048 const wchar_t* chr_end = chr + stdex::strlen(chr);
3049 if ((chr[0] == L'-' ||
3050 chr[0] == L'.' ||
3051 chr[0] == L'_' ||
3052 chr[0] == L'~' ||
3053 chr[0] == L'%' ||
3054 chr[0] == L'!' ||
3055 chr[0] == L'$' ||
3056 chr[0] == L'&' ||
3057 chr[0] == L'\'' ||
3058 //chr[0] == L'(' ||
3059 //chr[0] == L')' ||
3060 chr[0] == L'*' ||
3061 chr[0] == L'+' ||
3062 chr[0] == L',' ||
3063 chr[0] == L';' ||
3064 chr[0] == L'=') && chr[1] == 0 ||
3065 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
3066 {
3067 interval.start = start;
3068 return true;
3069 }
3070 }
3071
3072 interval.start = (interval.end = start) + 1;
3073 return false;
3074 }
3075 };
3076
3080 template <class T>
3082 {
3083 public:
3084 basic_url_password_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
3085
3086 virtual bool match(
3087 _In_reads_or_z_(end) const T* text,
3088 _In_ size_t start = 0,
3089 _In_ size_t end = (size_t)-1,
3090 _In_ int flags = match_default)
3091 {
3092 assert(text || start >= end);
3093 if (start < end && text[start]) {
3094 if (text[start] == '-' ||
3095 text[start] == '.' ||
3096 text[start] == '_' ||
3097 text[start] == '~' ||
3098 text[start] == '%' ||
3099 text[start] == '!' ||
3100 text[start] == '$' ||
3101 text[start] == '&' ||
3102 text[start] == '\'' ||
3103 text[start] == '(' ||
3104 text[start] == ')' ||
3105 text[start] == '*' ||
3106 text[start] == '+' ||
3107 text[start] == ',' ||
3108 text[start] == ';' ||
3109 text[start] == '=' ||
3110 text[start] == ':' ||
3111 std::use_facet<std::ctype<T>>(m_locale).is(std::ctype_base::alnum, text[start]))
3112 {
3113 interval.end = (interval.start = start) + 1;
3114 return true;
3115 }
3116 }
3117 interval.start = (interval.end = start) + 1;
3118 return false;
3119 }
3120 };
3121
3124#ifdef _UNICODE
3126#else
3128#endif
3129
3134 {
3135 public:
3136 sgml_url_password_char(_In_ const std::locale& locale = std::locale()) : basic_url_password_char<char>(locale) {}
3137
3138 virtual bool match(
3139 _In_reads_or_z_(end) const char* text,
3140 _In_ size_t start = 0,
3141 _In_ size_t end = (size_t)-1,
3142 _In_ int flags = match_default)
3143 {
3144 assert(text || start >= end);
3145 if (start < end && text[start]) {
3146 wchar_t buf[3];
3147 const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf);
3148 const wchar_t* chr_end = chr + stdex::strlen(chr);
3149 if ((chr[0] == L'-' ||
3150 chr[0] == L'.' ||
3151 chr[0] == L'_' ||
3152 chr[0] == L'~' ||
3153 chr[0] == L'%' ||
3154 chr[0] == L'!' ||
3155 chr[0] == L'$' ||
3156 chr[0] == L'&' ||
3157 chr[0] == L'\'' ||
3158 chr[0] == L'(' ||
3159 chr[0] == L')' ||
3160 chr[0] == L'*' ||
3161 chr[0] == L'+' ||
3162 chr[0] == L',' ||
3163 chr[0] == L';' ||
3164 chr[0] == L'=' ||
3165 chr[0] == L':') && chr[1] == 0 ||
3166 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
3167 {
3168 interval.start = start;
3169 return true;
3170 }
3171 }
3172 interval.start = (interval.end = start) + 1;
3173 return false;
3174 }
3175 };
3176
3180 template <class T>
3182 {
3183 public:
3184 basic_url_path_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
3185
3186 virtual bool match(
3187 _In_reads_or_z_(end) const T* text,
3188 _In_ size_t start = 0,
3189 _In_ size_t end = (size_t)-1,
3190 _In_ int flags = match_default)
3191 {
3192 assert(text || start >= end);
3193 if (start < end && text[start]) {
3194 if (text[start] == '/' ||
3195 text[start] == '-' ||
3196 text[start] == '.' ||
3197 text[start] == '_' ||
3198 text[start] == '~' ||
3199 text[start] == '%' ||
3200 text[start] == '!' ||
3201 text[start] == '$' ||
3202 text[start] == '&' ||
3203 text[start] == '\'' ||
3204 text[start] == '(' ||
3205 text[start] == ')' ||
3206 text[start] == '*' ||
3207 text[start] == '+' ||
3208 text[start] == ',' ||
3209 text[start] == ';' ||
3210 text[start] == '=' ||
3211 text[start] == ':' ||
3212 text[start] == '@' ||
3213 text[start] == '?' ||
3214 text[start] == '#' ||
3215 std::use_facet<std::ctype<T>>(m_locale).is(std::ctype_base::alnum, text[start]))
3216 {
3217 interval.end = (interval.start = start) + 1;
3218 return true;
3219 }
3220 }
3221 interval.start = (interval.end = start) + 1;
3222 return false;
3223 }
3224 };
3225
3228#ifdef _UNICODE
3230#else
3232#endif
3233
3238 {
3239 public:
3240 sgml_url_path_char(_In_ const std::locale& locale = std::locale()) : basic_url_path_char<char>(locale) {}
3241
3242 virtual bool match(
3243 _In_reads_or_z_(end) const char* text,
3244 _In_ size_t start = 0,
3245 _In_ size_t end = (size_t)-1,
3246 _In_ int flags = match_default)
3247 {
3248 assert(text || start >= end);
3249 if (start < end && text[start]) {
3250 wchar_t buf[3];
3251 const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf);
3252 const wchar_t* chr_end = chr + stdex::strlen(chr);
3253 if ((chr[0] == L'/' ||
3254 chr[0] == L'-' ||
3255 chr[0] == L'.' ||
3256 chr[0] == L'_' ||
3257 chr[0] == L'~' ||
3258 chr[0] == L'%' ||
3259 chr[0] == L'!' ||
3260 chr[0] == L'$' ||
3261 chr[0] == L'&' ||
3262 chr[0] == L'\'' ||
3263 chr[0] == L'(' ||
3264 chr[0] == L')' ||
3265 chr[0] == L'*' ||
3266 chr[0] == L'+' ||
3267 chr[0] == L',' ||
3268 chr[0] == L';' ||
3269 chr[0] == L'=' ||
3270 chr[0] == L':' ||
3271 chr[0] == L'@' ||
3272 chr[0] == L'?' ||
3273 chr[0] == L'#') && chr[1] == 0 ||
3274 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
3275 {
3276 interval.start = start;
3277 return true;
3278 }
3279 }
3280 interval.start = (interval.end = start) + 1;
3281 return false;
3282 }
3283 };
3284
3288 template <class T>
3290 {
3291 public:
3293 _In_ const std::shared_ptr<basic_parser<T>>& path_char,
3294 _In_ const std::shared_ptr<basic_parser<T>>& query_start,
3295 _In_ const std::shared_ptr<basic_parser<T>>& bookmark_start,
3296 _In_ const std::locale& locale = std::locale()) :
3297 basic_parser<T>(locale),
3298 m_path_char(path_char),
3299 m_query_start(query_start),
3300 m_bookmark_start(bookmark_start)
3301 {}
3302
3303 virtual bool match(
3304 _In_reads_or_z_(end) const T* text,
3305 _In_ size_t start = 0,
3306 _In_ size_t end = (size_t)-1,
3307 _In_ int flags = match_default)
3308 {
3309 assert(text || start >= end);
3310
3311 interval.end = start;
3312 path.start = start;
3313 query.start = 1;
3314 query.end = 0;
3315 bookmark.start = 1;
3316 bookmark.end = 0;
3317
3318 for (;;) {
3319 if (interval.end >= end || !text[interval.end])
3320 break;
3321 if (m_query_start->match(text, interval.end, end, flags)) {
3322 path.end = interval.end;
3323 query.start = interval.end = m_query_start->interval.end;
3324 for (;;) {
3325 if (interval.end >= end || !text[interval.end]) {
3326 query.end = interval.end;
3327 break;
3328 }
3329 if (m_bookmark_start->match(text, interval.end, end, flags)) {
3330 query.end = interval.end;
3331 bookmark.start = interval.end = m_bookmark_start->interval.end;
3332 for (;;) {
3333 if (interval.end >= end || !text[interval.end]) {
3334 bookmark.end = interval.end;
3335 break;
3336 }
3337 if (m_path_char->match(text, interval.end, end, flags))
3338 interval.end = m_path_char->interval.end;
3339 else {
3340 bookmark.end = interval.end;
3341 break;
3342 }
3343 }
3344 interval.start = start;
3345 return true;
3346 }
3347 if (m_path_char->match(text, interval.end, end, flags))
3348 interval.end = m_path_char->interval.end;
3349 else {
3350 query.end = interval.end;
3351 break;
3352 }
3353 }
3354 interval.start = start;
3355 return true;
3356 }
3357 if (m_bookmark_start->match(text, interval.end, end, flags)) {
3358 path.end = interval.end;
3359 bookmark.start = interval.end = m_bookmark_start->interval.end;
3360 for (;;) {
3361 if (interval.end >= end || !text[interval.end]) {
3362 bookmark.end = interval.end;
3363 break;
3364 }
3365 if (m_path_char->match(text, interval.end, end, flags))
3366 interval.end = m_path_char->interval.end;
3367 else {
3368 bookmark.end = interval.end;
3369 break;
3370 }
3371 }
3372 interval.start = start;
3373 return true;
3374 }
3375 if (m_path_char->match(text, interval.end, end, flags))
3376 interval.end = m_path_char->interval.end;
3377 else
3378 break;
3379 }
3380
3381 if (start < interval.end) {
3382 path.end = interval.end;
3383 interval.start = start;
3384 return true;
3385 }
3386
3387 path.start = 1;
3388 path.end = 0;
3389 bookmark.start = 1;
3390 bookmark.end = 0;
3391 interval.start = (interval.end = start) + 1;
3392 return false;
3393 }
3394
3395 virtual void invalidate()
3396 {
3397 path.start = 1;
3398 path.end = 0;
3399 query.start = 1;
3400 query.end = 0;
3401 bookmark.start = 1;
3402 bookmark.end = 0;
3404 }
3405
3406 public:
3409 stdex::interval<size_t> bookmark;
3410
3411 protected:
3412 std::shared_ptr<basic_parser<T>> m_path_char;
3413 std::shared_ptr<basic_parser<T>> m_query_start;
3414 std::shared_ptr<basic_parser<T>> m_bookmark_start;
3415 };
3416
3419#ifdef _UNICODE
3420 using turl_path = wurl_path;
3421#else
3422 using turl_path = url_path;
3423#endif
3425
3429 template <class T>
3430 class basic_url : public basic_parser<T>
3431 {
3432 public:
3433 basic_url(
3434 _In_ const std::shared_ptr<basic_parser<T>>& _http_scheme,
3435 _In_ const std::shared_ptr<basic_parser<T>>& _ftp_scheme,
3436 _In_ const std::shared_ptr<basic_parser<T>>& _mailto_scheme,
3437 _In_ const std::shared_ptr<basic_parser<T>>& _file_scheme,
3438 _In_ const std::shared_ptr<basic_parser<T>>& colon,
3439 _In_ const std::shared_ptr<basic_parser<T>>& slash,
3440 _In_ const std::shared_ptr<basic_parser<T>>& _username,
3441 _In_ const std::shared_ptr<basic_parser<T>>& _password,
3442 _In_ const std::shared_ptr<basic_parser<T>>& at,
3443 _In_ const std::shared_ptr<basic_parser<T>>& ip_lbracket,
3444 _In_ const std::shared_ptr<basic_parser<T>>& ip_rbracket,
3445 _In_ const std::shared_ptr<basic_parser<T>>& _ipv4_host,
3446 _In_ const std::shared_ptr<basic_parser<T>>& _ipv6_host,
3447 _In_ const std::shared_ptr<basic_parser<T>>& _dns_host,
3448 _In_ const std::shared_ptr<basic_parser<T>>& _port,
3449 _In_ const std::shared_ptr<basic_parser<T>>& _path,
3450 _In_ const std::locale& locale = std::locale()) :
3451 basic_parser<T>(locale),
3452 http_scheme(_http_scheme),
3453 ftp_scheme(_ftp_scheme),
3454 mailto_scheme(_mailto_scheme),
3455 file_scheme(_file_scheme),
3456 m_colon(colon),
3457 m_slash(slash),
3458 username(_username),
3459 password(_password),
3460 m_at(at),
3461 m_ip_lbracket(ip_lbracket),
3462 m_ip_rbracket(ip_rbracket),
3463 ipv4_host(_ipv4_host),
3464 ipv6_host(_ipv6_host),
3465 dns_host(_dns_host),
3466 port(_port),
3467 path(_path)
3468 {}
3469
3470 virtual bool match(
3471 _In_reads_or_z_(end) const T* text,
3472 _In_ size_t start = 0,
3473 _In_ size_t end = (size_t)-1,
3474 _In_ int flags = match_default)
3475 {
3476 assert(text || start >= end);
3477
3478 interval.end = start;
3479
3480 if (http_scheme->match(text, interval.end, end, flags) &&
3481 m_colon->match(text, http_scheme->interval.end, end, flags) &&
3482 m_slash->match(text, m_colon->interval.end, end, flags) &&
3483 m_slash->match(text, m_slash->interval.end, end, flags))
3484 {
3485 // http://
3486 interval.end = m_slash->interval.end;
3487 ftp_scheme->invalidate();
3488 mailto_scheme->invalidate();
3489 file_scheme->invalidate();
3490 }
3491 else if (ftp_scheme->match(text, interval.end, end, flags) &&
3492 m_colon->match(text, ftp_scheme->interval.end, end, flags) &&
3493 m_slash->match(text, m_colon->interval.end, end, flags) &&
3494 m_slash->match(text, m_slash->interval.end, end, flags))
3495 {
3496 // ftp://
3497 interval.end = m_slash->interval.end;
3498 http_scheme->invalidate();
3499 mailto_scheme->invalidate();
3500 file_scheme->invalidate();
3501 }
3502 else if (mailto_scheme->match(text, interval.end, end, flags) &&
3503 m_colon->match(text, mailto_scheme->interval.end, end, flags))
3504 {
3505 // mailto:
3506 interval.end = m_colon->interval.end;
3507 http_scheme->invalidate();
3508 ftp_scheme->invalidate();
3509 file_scheme->invalidate();
3510 }
3511 else if (file_scheme->match(text, interval.end, end, flags) &&
3512 m_colon->match(text, file_scheme->interval.end, end, flags) &&
3513 m_slash->match(text, m_colon->interval.end, end, flags) &&
3514 m_slash->match(text, m_slash->interval.end, end, flags))
3515 {
3516 // file://
3517 interval.end = m_slash->interval.end;
3518 http_scheme->invalidate();
3519 ftp_scheme->invalidate();
3520 mailto_scheme->invalidate();
3521 }
3522 else {
3523 // Default to http:
3524 http_scheme->invalidate();
3525 ftp_scheme->invalidate();
3526 mailto_scheme->invalidate();
3527 file_scheme->invalidate();
3528 }
3529
3530 if (ftp_scheme->interval) {
3531 if (username->match(text, interval.end, end, flags)) {
3532 if (m_colon->match(text, username->interval.end, end, flags) &&
3533 password->match(text, m_colon->interval.end, end, flags) &&
3534 m_at->match(text, password->interval.end, end, flags))
3535 {
3536 // Username and password
3537 interval.end = m_at->interval.end;
3538 }
3539 else if (m_at->match(text, interval.end, end, flags)) {
3540 // Username only
3541 interval.end = m_at->interval.end;
3542 password->invalidate();
3543 }
3544 else {
3545 username->invalidate();
3546 password->invalidate();
3547 }
3548 }
3549 else {
3550 username->invalidate();
3551 password->invalidate();
3552 }
3553
3554 if (ipv4_host->match(text, interval.end, end, flags)) {
3555 // Host is IPv4
3556 interval.end = ipv4_host->interval.end;
3557 ipv6_host->invalidate();
3558 dns_host->invalidate();
3559 }
3560 else if (
3561 m_ip_lbracket->match(text, interval.end, end, flags) &&
3562 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3563 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3564 {
3565 // Host is IPv6
3566 interval.end = m_ip_rbracket->interval.end;
3567 ipv4_host->invalidate();
3568 dns_host->invalidate();
3569 }
3570 else if (dns_host->match(text, interval.end, end, flags)) {
3571 // Host is hostname
3572 interval.end = dns_host->interval.end;
3573 ipv4_host->invalidate();
3574 ipv6_host->invalidate();
3575 }
3576 else {
3577 invalidate();
3578 return false;
3579 }
3580
3581 if (m_colon->match(text, interval.end, end, flags) &&
3582 port->match(text, m_colon->interval.end, end, flags))
3583 {
3584 // Port
3585 interval.end = port->interval.end;
3586 }
3587 else
3588 port->invalidate();
3589
3590 if (path->match(text, interval.end, end, flags)) {
3591 // Path
3592 interval.end = path->interval.end;
3593 }
3594
3595 interval.start = start;
3596 return true;
3597 }
3598
3599 if (mailto_scheme->interval) {
3600 if (username->match(text, interval.end, end, flags) &&
3601 m_at->match(text, username->interval.end, end, flags))
3602 {
3603 // Username
3604 interval.end = m_at->interval.end;
3605 }
3606 else {
3607 invalidate();
3608 return false;
3609 }
3610
3611 if (m_ip_lbracket->match(text, interval.end, end, flags) &&
3612 ipv4_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3613 m_ip_rbracket->match(text, ipv4_host->interval.end, end, flags))
3614 {
3615 // Host is IPv4
3616 interval.end = m_ip_rbracket->interval.end;
3617 ipv6_host->invalidate();
3618 dns_host->invalidate();
3619 }
3620 else if (
3621 m_ip_lbracket->match(text, interval.end, end, flags) &&
3622 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3623 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3624 {
3625 // Host is IPv6
3626 interval.end = m_ip_rbracket->interval.end;
3627 ipv4_host->invalidate();
3628 dns_host->invalidate();
3629 }
3630 else if (dns_host->match(text, interval.end, end, flags)) {
3631 // Host is hostname
3632 interval.end = dns_host->interval.end;
3633 ipv4_host->invalidate();
3634 ipv6_host->invalidate();
3635 }
3636 else {
3637 invalidate();
3638 return false;
3639 }
3640
3641 password->invalidate();
3642 port->invalidate();
3643 path->invalidate();
3644 interval.start = start;
3645 return true;
3646 }
3647
3648 if (file_scheme->interval) {
3649 if (path->match(text, interval.end, end, flags)) {
3650 // Path
3651 interval.end = path->interval.end;
3652 }
3653
3654 username->invalidate();
3655 password->invalidate();
3656 ipv4_host->invalidate();
3657 ipv6_host->invalidate();
3658 dns_host->invalidate();
3659 port->invalidate();
3660 interval.start = start;
3661 return true;
3662 }
3663
3664 // "http://" found or defaulted to
3665
3666 // If "http://" explicit, test for username&password.
3667 if (http_scheme->interval &&
3668 username->match(text, interval.end, end, flags))
3669 {
3670 if (m_colon->match(text, username->interval.end, end, flags) &&
3671 password->match(text, m_colon->interval.end, end, flags) &&
3672 m_at->match(text, password->interval.end, end, flags))
3673 {
3674 // Username and password
3675 interval.end = m_at->interval.end;
3676 }
3677 else if (m_at->match(text, username->interval.end, end, flags)) {
3678 // Username only
3679 interval.end = m_at->interval.end;
3680 password->invalidate();
3681 }
3682 else {
3683 username->invalidate();
3684 password->invalidate();
3685 }
3686 }
3687 else {
3688 username->invalidate();
3689 password->invalidate();
3690 }
3691
3692 if (ipv4_host->match(text, interval.end, end, flags)) {
3693 // Host is IPv4
3694 interval.end = ipv4_host->interval.end;
3695 ipv6_host->invalidate();
3696 dns_host->invalidate();
3697 }
3698 else if (
3699 m_ip_lbracket->match(text, interval.end, end, flags) &&
3700 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3701 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3702 {
3703 // Host is IPv6
3704 interval.end = m_ip_rbracket->interval.end;
3705 ipv4_host->invalidate();
3706 dns_host->invalidate();
3707 }
3708 else if (dns_host->match(text, interval.end, end, flags)) {
3709 // Host is hostname
3710 interval.end = dns_host->interval.end;
3711 ipv4_host->invalidate();
3712 ipv6_host->invalidate();
3713 }
3714 else {
3715 invalidate();
3716 return false;
3717 }
3718
3719 if (m_colon->match(text, interval.end, end, flags) &&
3720 port->match(text, m_colon->interval.end, end, flags))
3721 {
3722 // Port
3723 interval.end = port->interval.end;
3724 }
3725 else
3726 port->invalidate();
3727
3728 if (path->match(text, interval.end, end, flags)) {
3729 // Path
3730 interval.end = path->interval.end;
3731 }
3732
3733 interval.start = start;
3734 return true;
3735 }
3736
3737 virtual void invalidate()
3738 {
3739 http_scheme->invalidate();
3740 ftp_scheme->invalidate();
3741 mailto_scheme->invalidate();
3742 file_scheme->invalidate();
3743 username->invalidate();
3744 password->invalidate();
3745 ipv4_host->invalidate();
3746 ipv6_host->invalidate();
3747 dns_host->invalidate();
3748 port->invalidate();
3749 path->invalidate();
3751 }
3752
3753 public:
3754 std::shared_ptr<basic_parser<T>> http_scheme;
3755 std::shared_ptr<basic_parser<T>> ftp_scheme;
3756 std::shared_ptr<basic_parser<T>> mailto_scheme;
3757 std::shared_ptr<basic_parser<T>> file_scheme;
3758 std::shared_ptr<basic_parser<T>> username;
3759 std::shared_ptr<basic_parser<T>> password;
3760 std::shared_ptr<basic_parser<T>> ipv4_host;
3761 std::shared_ptr<basic_parser<T>> ipv6_host;
3762 std::shared_ptr<basic_parser<T>> dns_host;
3763 std::shared_ptr<basic_parser<T>> port;
3764 std::shared_ptr<basic_parser<T>> path;
3765
3766 protected:
3767 std::shared_ptr<basic_parser<T>> m_colon;
3768 std::shared_ptr<basic_parser<T>> m_slash;
3769 std::shared_ptr<basic_parser<T>> m_at;
3770 std::shared_ptr<basic_parser<T>> m_ip_lbracket;
3771 std::shared_ptr<basic_parser<T>> m_ip_rbracket;
3772 };
3773
3774 using url = basic_url<char>;
3775 using wurl = basic_url<wchar_t>;
3776#ifdef _UNICODE
3777 using turl = wurl;
3778#else
3779 using turl = url;
3780#endif
3781 using sgml_url = basic_url<char>;
3782
3786 template <class T>
3788 {
3789 public:
3791 _In_ const std::shared_ptr<basic_parser<T>>& _username,
3792 _In_ const std::shared_ptr<basic_parser<T>>& at,
3793 _In_ const std::shared_ptr<basic_parser<T>>& ip_lbracket,
3794 _In_ const std::shared_ptr<basic_parser<T>>& ip_rbracket,
3795 _In_ const std::shared_ptr<basic_parser<T>>& _ipv4_host,
3796 _In_ const std::shared_ptr<basic_parser<T>>& _ipv6_host,
3797 _In_ const std::shared_ptr<basic_parser<T>>& _dns_host,
3798 _In_ const std::locale& locale = std::locale()) :
3799 basic_parser<T>(locale),
3800 username(_username),
3801 m_at(at),
3802 m_ip_lbracket(ip_lbracket),
3803 m_ip_rbracket(ip_rbracket),
3804 ipv4_host(_ipv4_host),
3805 ipv6_host(_ipv6_host),
3806 dns_host(_dns_host)
3807 {}
3808
3809 virtual bool match(
3810 _In_reads_or_z_(end) const T* text,
3811 _In_ size_t start = 0,
3812 _In_ size_t end = (size_t)-1,
3813 _In_ int flags = match_default)
3814 {
3815 assert(text || start >= end);
3816
3817 if (username->match(text, start, end, flags) &&
3818 m_at->match(text, username->interval.end, end, flags))
3819 {
3820 // Username@
3821 if (m_ip_lbracket->match(text, m_at->interval.end, end, flags) &&
3822 ipv4_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3823 m_ip_rbracket->match(text, ipv4_host->interval.end, end, flags))
3824 {
3825 // Host is IPv4
3826 interval.end = m_ip_rbracket->interval.end;
3827 ipv6_host->invalidate();
3828 dns_host->invalidate();
3829 }
3830 else if (
3831 m_ip_lbracket->match(text, m_at->interval.end, end, flags) &&
3832 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3833 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3834 {
3835 // Host is IPv6
3836 interval.end = m_ip_rbracket->interval.end;
3837 ipv4_host->invalidate();
3838 dns_host->invalidate();
3839 }
3840 else if (dns_host->match(text, m_at->interval.end, end, flags)) {
3841 // Host is hostname
3842 interval.end = dns_host->interval.end;
3843 ipv4_host->invalidate();
3844 ipv6_host->invalidate();
3845 }
3846 else
3847 goto error;
3848 interval.start = start;
3849 return true;
3850 }
3851
3852 error:
3853 username->invalidate();
3854 ipv4_host->invalidate();
3855 ipv6_host->invalidate();
3856 dns_host->invalidate();
3857 interval.start = (interval.end = start) + 1;
3858 return false;
3859 }
3860
3861 virtual void invalidate()
3862 {
3863 username->invalidate();
3864 ipv4_host->invalidate();
3865 ipv6_host->invalidate();
3866 dns_host->invalidate();
3868 }
3869
3870 public:
3871 std::shared_ptr<basic_parser<T>> username;
3872 std::shared_ptr<basic_parser<T>> ipv4_host;
3873 std::shared_ptr<basic_parser<T>> ipv6_host;
3874 std::shared_ptr<basic_parser<T>> dns_host;
3875
3876 protected:
3877 std::shared_ptr<basic_parser<T>> m_at;
3878 std::shared_ptr<basic_parser<T>> m_ip_lbracket;
3879 std::shared_ptr<basic_parser<T>> m_ip_rbracket;
3880 };
3881
3884#ifdef _UNICODE
3886#else
3888#endif
3890
3894 template <class T>
3896 {
3897 public:
3899 _In_ const std::shared_ptr<basic_parser<T>>& _emoticon,
3900 _In_ const std::shared_ptr<basic_parser<T>>& _apex,
3901 _In_ const std::shared_ptr<basic_parser<T>>& _eyes,
3902 _In_ const std::shared_ptr<basic_parser<T>>& _nose,
3903 _In_ const std::shared_ptr<basic_set<T>>& _mouth,
3904 _In_ const std::locale& locale = std::locale()) :
3905 basic_parser<T>(locale),
3906 emoticon(_emoticon),
3907 apex(_apex),
3908 eyes(_eyes),
3909 nose(_nose),
3910 mouth(_mouth)
3911 {}
3912
3913 virtual bool match(
3914 _In_reads_or_z_(end) const T* text,
3915 _In_ size_t start = 0,
3916 _In_ size_t end = (size_t)-1,
3917 _In_ int flags = match_default)
3918 {
3919 assert(text || start >= end);
3920
3921 if (emoticon && emoticon->match(text, start, end, flags)) {
3922 if (apex) apex->invalidate();
3923 eyes->invalidate();
3924 if (nose) nose->invalidate();
3925 mouth->invalidate();
3926 interval.start = start;
3928 return true;
3929 }
3930
3931 interval.end = start;
3932
3933 if (apex && apex->match(text, interval.end, end, flags))
3934 interval.end = apex->interval.end;
3935
3936 if (eyes->match(text, interval.end, end, flags)) {
3937 if (nose && nose->match(text, eyes->interval.end, end, flags) &&
3938 mouth->match(text, nose->interval.end, end, flags))
3939 {
3940 size_t
3941 start_mouth = mouth->interval.start,
3942 hit_offset = mouth->hit_offset;
3943 // Mouth may repeat :-)))))))
3944 for (interval.end = mouth->interval.end; mouth->match(text, interval.end, end, flags) && mouth->hit_offset == hit_offset; interval.end = mouth->interval.end);
3945 mouth->interval.start = start_mouth;
3946 mouth->interval.end = interval.end;
3947 interval.start = start;
3948 return true;
3949 }
3950 if (mouth->match(text, eyes->interval.end, end, flags)) {
3951 size_t
3952 start_mouth = mouth->interval.start,
3953 hit_offset = mouth->hit_offset;
3954 // Mouth may repeat :-)))))))
3955 for (interval.end = mouth->interval.end; mouth->match(text, interval.end, end, flags) && mouth->hit_offset == hit_offset; interval.end = mouth->interval.end);
3956 if (nose) nose->invalidate();
3957 mouth->interval.start = start_mouth;
3958 mouth->interval.end = interval.end;
3959 interval.start = start;
3960 return true;
3961 }
3962 }
3963
3964 if (emoticon) emoticon->invalidate();
3965 if (apex) apex->invalidate();
3966 eyes->invalidate();
3967 if (nose) nose->invalidate();
3968 mouth->invalidate();
3969 interval.start = (interval.end = start) + 1;
3970 return false;
3971 }
3972
3973 virtual void invalidate()
3974 {
3975 if (emoticon) emoticon->invalidate();
3976 if (apex) apex->invalidate();
3977 eyes->invalidate();
3978 if (nose) nose->invalidate();
3979 mouth->invalidate();
3981 }
3982
3983 public:
3984 std::shared_ptr<basic_parser<T>> emoticon;
3985 std::shared_ptr<basic_parser<T>> apex;
3986 std::shared_ptr<basic_parser<T>> eyes;
3987 std::shared_ptr<basic_parser<T>> nose;
3988 std::shared_ptr<basic_set<T>> mouth;
3989 };
3990
3993#ifdef _UNICODE
3994 using temoticon = wemoticon;
3995#else
3996 using temoticon = emoticon;
3997#endif
3999
4003 ENUM_FLAGS(date_format_t, int) {
4004 none = 0,
4005 dmy = 0x1,
4006 mdy = 0x2,
4007 ymd = 0x4,
4008 ym = 0x8,
4009 my = 0x10,
4010 dm = 0x20,
4011 md = 0x40,
4012 };
4013
4017 template <class T>
4018 class basic_date : public basic_parser<T>
4019 {
4020 public:
4021 basic_date(
4022 _In_ int format_mask,
4023 _In_ const std::shared_ptr<basic_integer<T>>& _day,
4024 _In_ const std::shared_ptr<basic_integer<T>>& _month,
4025 _In_ const std::shared_ptr<basic_integer<T>>& _year,
4026 _In_ const std::shared_ptr<basic_set<T>>& separator,
4027 _In_ const std::shared_ptr<basic_parser<T>>& space,
4028 _In_ const std::locale& locale = std::locale()) :
4029 basic_parser<T>(locale),
4030 format(date_format_t::none),
4031 m_format_mask(format_mask),
4032 day(_day),
4033 month(_month),
4034 year(_year),
4035 m_separator(separator),
4036 m_space(space)
4037 {}
4038
4039 virtual bool match(
4040 _In_reads_or_z_(end) const T* text,
4041 _In_ size_t start = 0,
4042 _In_ size_t end = (size_t)-1,
4043 _In_ int flags = match_default)
4044 {
4045 assert(text || start >= end);
4046
4047 const int space_match_flags = flags & ~match_multiline; // Spaces in dates must never be broken in new line.
4048 if ((m_format_mask & date_format_t::dmy) == date_format_t::dmy) {
4049 if (day->match(text, start, end, flags)) {
4050 for (interval.end = day->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4051 if (m_separator->match(text, interval.end, end, flags)) {
4052 size_t hit_offset = m_separator->hit_offset;
4053 for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4054 if (month->match(text, interval.end, end, flags)) {
4055 for (interval.end = month->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4056 if (m_separator->match(text, interval.end, end, flags) &&
4057 m_separator->hit_offset == hit_offset) // Both separators must match.
4058 {
4059 for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4060 if (year->match(text, interval.end, end, flags) &&
4061 is_valid(day->value, month->value))
4062 {
4063 interval.start = start;
4064 interval.end = year->interval.end;
4065 format = date_format_t::dmy;
4066 return true;
4067 }
4068 }
4069 }
4070 }
4071 }
4072 }
4073
4074 if ((m_format_mask & date_format_t::mdy) == date_format_t::mdy) {
4075 if (month->match(text, start, end, flags)) {
4076 for (interval.end = month->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4077 if (m_separator->match(text, interval.end, end, flags)) {
4078 size_t hit_offset = m_separator->hit_offset;
4079 for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4080 if (day->match(text, interval.end, end, flags)) {
4081 for (interval.end = day->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4082 if (m_separator->match(text, interval.end, end, flags) &&
4083 m_separator->hit_offset == hit_offset) // Both separators must match.
4084 {
4085 for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4086 if (year->match(text, interval.end, end, flags) &&
4087 is_valid(day->value, month->value))
4088 {
4089 interval.start = start;
4090 interval.end = year->interval.end;
4091 format = date_format_t::mdy;
4092 return true;
4093 }
4094 }
4095 }
4096 }
4097 }
4098 }
4099
4100 if ((m_format_mask & date_format_t::ymd) == date_format_t::ymd) {
4101 if (year->match(text, start, end, flags)) {
4102 for (interval.end = year->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4103 if (m_separator->match(text, interval.end, end, flags)) {
4104 size_t hit_offset = m_separator->hit_offset;
4105 for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4106 if (month->match(text, interval.end, end, flags)) {
4107 for (interval.end = month->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4108 if (m_separator->match(text, interval.end, end, flags) &&
4109 m_separator->hit_offset == hit_offset) // Both separators must match.
4110 {
4111 for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4112 if (day->match(text, interval.end, end, flags) &&
4113 is_valid(day->value, month->value))
4114 {
4115 interval.start = start;
4116 interval.end = day->interval.end;
4117 format = date_format_t::ymd;
4118 return true;
4119 }
4120 }
4121 }
4122 }
4123 }
4124 }
4125
4126 if ((m_format_mask & date_format_t::ym) == date_format_t::ym) {
4127 if (year->match(text, start, end, flags)) {
4128 for (interval.end = year->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4129 if (m_separator->match(text, interval.end, end, flags)) {
4130 for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4131 if (month->match(text, interval.end, end, flags) &&
4132 is_valid((size_t)-1, month->value))
4133 {
4134 if (day) day->invalidate();
4135 interval.start = start;
4136 interval.end = month->interval.end;
4137 format = date_format_t::ym;
4138 return true;
4139 }
4140 }
4141 }
4142 }
4143
4144 if ((m_format_mask & date_format_t::my) == date_format_t::my) {
4145 if (month->match(text, start, end, flags)) {
4146 for (interval.end = month->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4147 if (m_separator->match(text, interval.end, end, flags)) {
4148 for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4149 if (year->match(text, interval.end, end, flags) &&
4150 is_valid((size_t)-1, month->value))
4151 {
4152 if (day) day->invalidate();
4153 interval.start = start;
4154 interval.end = year->interval.end;
4155 format = date_format_t::my;
4156 return true;
4157 }
4158 }
4159 }
4160 }
4161
4162 if ((m_format_mask & date_format_t::dm) == date_format_t::dm) {
4163 if (day->match(text, start, end, flags)) {
4164 for (interval.end = day->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4165 if (m_separator->match(text, interval.end, end, flags)) {
4166 size_t hit_offset = m_separator->hit_offset;
4167 for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4168 if (month->match(text, interval.end, end, flags) &&
4169 is_valid(day->value, month->value))
4170 {
4171 if (year) year->invalidate();
4172 interval.start = start;
4173 for (interval.end = month->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4174 if (m_separator->match(text, interval.end, end, flags) &&
4175 m_separator->hit_offset == hit_offset) // Both separators must match.
4176 interval.end = m_separator->interval.end;
4177 else
4178 interval.end = month->interval.end;
4179 format = date_format_t::dm;
4180 return true;
4181 }
4182 }
4183 }
4184 }
4185
4186 if ((m_format_mask & date_format_t::md) == date_format_t::md) {
4187 if (month->match(text, start, end, flags)) {
4188 for (interval.end = month->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4189 if (m_separator->match(text, interval.end, end, flags)) {
4190 size_t hit_offset = m_separator->hit_offset;
4191 for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4192 if (day->match(text, interval.end, end, flags) &&
4193 is_valid(day->value, month->value))
4194 {
4195 if (year) year->invalidate();
4196 interval.start = start;
4197 for (interval.end = day->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4198 if (m_separator->match(text, interval.end, end, flags) &&
4199 m_separator->hit_offset == hit_offset) // Both separators must match.
4200 interval.end = m_separator->interval.end;
4201 else
4202 interval.end = day->interval.end;
4203 format = date_format_t::md;
4204 return true;
4205 }
4206 }
4207 }
4208 }
4209
4210 if (day) day->invalidate();
4211 if (month) month->invalidate();
4212 if (year) year->invalidate();
4213 format = date_format_t::none;
4214 interval.start = (interval.end = start) + 1;
4215 return false;
4216 }
4217
4218 virtual void invalidate()
4219 {
4220 if (day) day->invalidate();
4221 if (month) month->invalidate();
4222 if (year) year->invalidate();
4223 format = date_format_t::none;
4225 }
4226
4227 protected:
4228 static inline bool is_valid(size_t day, size_t month)
4229 {
4230 if (month == (size_t)-1) {
4231 // Default to January. This allows validating day only, as January has all 31 days.
4232 month = 1;
4233 }
4234 if (day == (size_t)-1) {
4235 // Default to 1st day in month. This allows validating month only, as each month has 1st day.
4236 day = 1;
4237 }
4238
4239 switch (month) {
4240 case 1:
4241 case 3:
4242 case 5:
4243 case 7:
4244 case 8:
4245 case 10:
4246 case 12:
4247 return 1 <= day && day <= 31;
4248 case 2:
4249 return 1 <= day && day <= 29;
4250 case 4:
4251 case 6:
4252 case 9:
4253 case 11:
4254 return 1 <= day && day <= 30;
4255 default:
4256 return false;
4257 }
4258 }
4259
4260 public:
4261 date_format_t format;
4262 std::shared_ptr<basic_integer<T>> day;
4263 std::shared_ptr<basic_integer<T>> month;
4264 std::shared_ptr<basic_integer<T>> year;
4265
4266 protected:
4267 int m_format_mask;
4268 std::shared_ptr<basic_set<T>> m_separator;
4269 std::shared_ptr<basic_parser<T>> m_space;
4270 };
4271
4272 using date = basic_date<char>;
4273 using wdate = basic_date<wchar_t>;
4274#ifdef _UNICODE
4275 using tdate = wdate;
4276#else
4277 using tdate = date;
4278#endif
4280
4284 template <class T>
4285 class basic_time : public basic_parser<T>
4286 {
4287 public:
4288 basic_time(
4289 _In_ const std::shared_ptr<basic_integer10<T>>& _hour,
4290 _In_ const std::shared_ptr<basic_integer10<T>>& _minute,
4291 _In_ const std::shared_ptr<basic_integer10<T>>& _second,
4292 _In_ const std::shared_ptr<basic_integer10<T>>& _millisecond,
4293 _In_ const std::shared_ptr<basic_set<T>>& separator,
4294 _In_ const std::shared_ptr<basic_parser<T>>& millisecond_separator,
4295 _In_ const std::locale& locale = std::locale()) :
4296 basic_parser<T>(locale),
4297 hour(_hour),
4298 minute(_minute),
4299 second(_second),
4300 millisecond(_millisecond),
4301 m_separator(separator),
4302 m_millisecond_separator(millisecond_separator)
4303 {}
4304
4305 virtual bool match(
4306 _In_reads_or_z_(end) const T* text,
4307 _In_ size_t start = 0,
4308 _In_ size_t end = (size_t)-1,
4309 _In_ int flags = match_default)
4310 {
4311 assert(text || start >= end);
4312
4313 if (hour->match(text, start, end, flags) &&
4314 m_separator->match(text, hour->interval.end, end, flags) &&
4315 minute->match(text, m_separator->interval.end, end, flags) &&
4316 minute->value < 60)
4317 {
4318 // hh::mm
4319 size_t hit_offset = m_separator->hit_offset;
4320 if (m_separator->match(text, minute->interval.end, end, flags) &&
4321 m_separator->hit_offset == hit_offset && // Both separators must match.
4322 second && second->match(text, m_separator->interval.end, end, flags) &&
4323 second->value < 60)
4324 {
4325 // hh::mm:ss
4326 if (m_millisecond_separator && m_millisecond_separator->match(text, second->interval.end, end, flags) &&
4327 millisecond && millisecond->match(text, m_millisecond_separator->interval.end, end, flags) &&
4328 millisecond->value < 1000)
4329 {
4330 // hh::mm:ss.mmmm
4331 interval.end = millisecond->interval.end;
4332 }
4333 else {
4334 if (millisecond) millisecond->invalidate();
4335 interval.end = second->interval.end;
4336 }
4337 }
4338 else {
4339 if (second) second->invalidate();
4340 if (millisecond) millisecond->invalidate();
4341 interval.end = minute->interval.end;
4342 }
4343 interval.start = start;
4344 return true;
4345 }
4346
4347 hour->invalidate();
4348 minute->invalidate();
4349 if (second) second->invalidate();
4350 if (millisecond) millisecond->invalidate();
4351 interval.start = (interval.end = start) + 1;
4352 return false;
4353 }
4354
4355 virtual void invalidate()
4356 {
4357 hour->invalidate();
4358 minute->invalidate();
4359 if (second) second->invalidate();
4360 if (millisecond) millisecond->invalidate();
4362 }
4363
4364 public:
4365 std::shared_ptr<basic_integer10<T>> hour;
4366 std::shared_ptr<basic_integer10<T>> minute;
4367 std::shared_ptr<basic_integer10<T>> second;
4368 std::shared_ptr<basic_integer10<T>> millisecond;
4369
4370 protected:
4371 std::shared_ptr<basic_set<T>> m_separator;
4372 std::shared_ptr<basic_parser<T>> m_millisecond_separator;
4373 };
4374
4375 using time = basic_time<char>;
4376 using wtime = basic_time<wchar_t>;
4377#ifdef _UNICODE
4378 using ttime = wtime;
4379#else
4380 using ttime = time;
4381#endif
4383
4387 template <class T>
4388 class basic_angle : public basic_parser<T>
4389 {
4390 public:
4392 _In_ const std::shared_ptr<basic_integer10<T>>& _degree,
4393 _In_ const std::shared_ptr<basic_parser<T>>& _degree_separator,
4394 _In_ const std::shared_ptr<basic_integer10<T>>& _minute,
4395 _In_ const std::shared_ptr<basic_parser<T>>& _minute_separator,
4396 _In_ const std::shared_ptr<basic_integer10<T>>& _second,
4397 _In_ const std::shared_ptr<basic_parser<T>>& _second_separator,
4398 _In_ const std::shared_ptr<basic_parser<T>>& _decimal,
4399 _In_ const std::locale& locale = std::locale()) :
4400 basic_parser<T>(locale),
4401 degree(_degree),
4402 degree_separator(_degree_separator),
4403 minute(_minute),
4404 minute_separator(_minute_separator),
4405 second(_second),
4406 second_separator(_second_separator),
4407 decimal(_decimal)
4408 {}
4409
4410 virtual bool match(
4411 _In_reads_or_z_(end) const T* text,
4412 _In_ size_t start = 0,
4413 _In_ size_t end = (size_t)-1,
4414 _In_ int flags = match_default)
4415 {
4416 assert(text || start >= end);
4417
4418 interval.end = start;
4419
4420 if (degree->match(text, interval.end, end, flags) &&
4421 degree_separator->match(text, degree->interval.end, end, flags))
4422 {
4423 // Degrees
4424 interval.end = degree_separator->interval.end;
4425 }
4426 else {
4427 degree->invalidate();
4428 degree_separator->invalidate();
4429 }
4430
4431 if (minute->match(text, interval.end, end, flags) &&
4432 minute->value < 60 &&
4433 minute_separator->match(text, minute->interval.end, end, flags))
4434 {
4435 // Minutes
4436 interval.end = minute_separator->interval.end;
4437 }
4438 else {
4439 minute->invalidate();
4440 minute_separator->invalidate();
4441 }
4442
4443 if (second && second->match(text, interval.end, end, flags) &&
4444 second->value < 60)
4445 {
4446 // Seconds
4447 interval.end = second->interval.end;
4448 if (second_separator && second_separator->match(text, interval.end, end, flags))
4449 interval.end = second_separator->interval.end;
4450 else
4451 if (second_separator) second_separator->invalidate();
4452 }
4453 else {
4454 if (second) second->invalidate();
4455 if (second_separator) second_separator->invalidate();
4456 }
4457
4458 if (degree->interval.start < degree->interval.end ||
4459 minute->interval.start < minute->interval.end ||
4460 second && second->interval.start < second->interval.end)
4461 {
4462 if (decimal && decimal->match(text, interval.end, end, flags)) {
4463 // Decimals
4464 interval.end = decimal->interval.end;
4465 }
4466 else if (decimal)
4467 decimal->invalidate();
4468 interval.start = start;
4469 return true;
4470 }
4471 if (decimal) decimal->invalidate();
4472 interval.start = (interval.end = start) + 1;
4473 return false;
4474 }
4475
4476 virtual void invalidate()
4477 {
4478 degree->invalidate();
4479 degree_separator->invalidate();
4480 minute->invalidate();
4481 minute_separator->invalidate();
4482 if (second) second->invalidate();
4483 if (second_separator) second_separator->invalidate();
4484 if (decimal) decimal->invalidate();
4486 }
4487
4488 public:
4489 std::shared_ptr<basic_integer10<T>> degree;
4490 std::shared_ptr<basic_parser<T>> degree_separator;
4491 std::shared_ptr<basic_integer10<T>> minute;
4492 std::shared_ptr<basic_parser<T>> minute_separator;
4493 std::shared_ptr<basic_integer10<T>> second;
4494 std::shared_ptr<basic_parser<T>> second_separator;
4495 std::shared_ptr<basic_parser<T>> decimal;
4496 };
4497
4498 using angle = basic_angle<char>;
4500#ifdef _UNICODE
4501 using RRegElKot = wangle;
4502#else
4503 using RRegElKot = angle;
4504#endif
4506
4510 template <class T>
4512 {
4513 public:
4515 _In_ const std::shared_ptr<basic_parser<T>>& digit,
4516 _In_ const std::shared_ptr<basic_parser<T>>& plus_sign,
4517 _In_ const std::shared_ptr<basic_set<T>>& lparenthesis,
4518 _In_ const std::shared_ptr<basic_set<T>>& rparenthesis,
4519 _In_ const std::shared_ptr<basic_parser<T>>& separator,
4520 _In_ const std::shared_ptr<basic_parser<T>>& space,
4521 _In_ const std::locale& locale = std::locale()) :
4522 basic_parser<T>(locale),
4523 m_digit(digit),
4524 m_plus_sign(plus_sign),
4525 m_lparenthesis(lparenthesis),
4526 m_rparenthesis(rparenthesis),
4527 m_separator(separator),
4528 m_space(space)
4529 {}
4530
4531 virtual bool match(
4532 _In_reads_or_z_(end) const T* text,
4533 _In_ size_t start = 0,
4534 _In_ size_t end = (size_t)-1,
4535 _In_ int flags = match_default)
4536 {
4537 assert(text || start >= end);
4538
4539 size_t safe_digit_end = start, safe_value_size = 0;
4540 bool has_digits = false, after_digit = false, in_parentheses = false, after_parentheses = false;
4541 const int space_match_flags = flags & ~match_multiline; // Spaces in phone numbers must never be broken in new line.
4542
4543 interval.end = start;
4544 value.clear();
4545 m_lparenthesis->invalidate();
4546 m_rparenthesis->invalidate();
4547
4548 if (m_plus_sign && m_plus_sign->match(text, interval.end, end, flags)) {
4549 value.append(text + m_plus_sign->interval.start, text + m_plus_sign->interval.end);
4550 safe_value_size = value.size();
4551 interval.end = m_plus_sign->interval.end;
4552 }
4553
4554 for (;;) {
4555 assert(text || interval.end >= end);
4556 if (interval.end >= end || !text[interval.end])
4557 break;
4558 if (m_digit->match(text, interval.end, end, flags)) {
4559 // Digit
4560 value.append(text + m_digit->interval.start, text + m_digit->interval.end);
4561 interval.end = m_digit->interval.end;
4562 if (!in_parentheses) {
4563 safe_digit_end = interval.end;
4564 safe_value_size = value.size();
4565 has_digits = true;
4566 }
4567 after_digit = true;
4568 after_parentheses = false;
4569 }
4570 else if (
4571 m_lparenthesis && !m_lparenthesis->interval && // No left parenthesis yet
4572 m_rparenthesis && !m_rparenthesis->interval && // Right parenthesis after left
4573 m_lparenthesis->match(text, interval.end, end, flags))
4574 {
4575 // Left parenthesis
4576 value.append(text + m_lparenthesis->interval.start, m_lparenthesis->interval.size());
4577 interval.end = m_lparenthesis->interval.end;
4578 in_parentheses = true;
4579 after_digit = false;
4580 after_parentheses = false;
4581 }
4582 else if (
4583 in_parentheses && // After left parenthesis
4584 m_rparenthesis && !m_rparenthesis->interval && // No right parenthesis yet
4585 m_rparenthesis->match(text, interval.end, end, flags) &&
4586 m_lparenthesis->hit_offset == m_rparenthesis->hit_offset) // Left and right parentheses must match
4587 {
4588 // Right parenthesis
4589 value.append(text + m_rparenthesis->interval.start, text + m_rparenthesis->interval.end);
4590 interval.end = m_rparenthesis->interval.end;
4591 safe_digit_end = interval.end;
4592 safe_value_size = value.size();
4593 in_parentheses = false;
4594 after_digit = false;
4595 after_parentheses = true;
4596 }
4597 else if (
4598 after_digit &&
4599 !in_parentheses && // No separators inside parentheses
4600 !after_parentheses && // No separators following right parenthesis
4601 m_separator && m_separator->match(text, interval.end, end, flags))
4602 {
4603 // Separator
4604 interval.end = m_separator->interval.end;
4605 after_digit = false;
4606 after_parentheses = false;
4607 }
4608 else if (
4609 (after_digit || after_parentheses) &&
4610 m_space && m_space->match(text, interval.end, end, space_match_flags))
4611 {
4612 // Space
4613 interval.end = m_space->interval.end;
4614 after_digit = false;
4615 after_parentheses = false;
4616 }
4617 else
4618 break;
4619 }
4620 if (has_digits) {
4621 value.erase(safe_value_size);
4622 interval.start = start;
4623 interval.end = safe_digit_end;
4624 return true;
4625 }
4626 value.clear();
4627 interval.start = (interval.end = start) + 1;
4628 return false;
4629 }
4630
4631 virtual void invalidate()
4632 {
4633 value.clear();
4635 }
4636
4637 public:
4638 std::basic_string<T> value;
4639
4640 protected:
4641 std::shared_ptr<basic_parser<T>> m_digit;
4642 std::shared_ptr<basic_parser<T>> m_plus_sign;
4643 std::shared_ptr<basic_set<T>> m_lparenthesis;
4644 std::shared_ptr<basic_set<T>> m_rparenthesis;
4645 std::shared_ptr<basic_parser<T>> m_separator;
4646 std::shared_ptr<basic_parser<T>> m_space;
4647 };
4648
4651#ifdef _UNICODE
4653#else
4655#endif
4657
4661 template <class T>
4663 {
4664 public:
4666 _In_ const std::shared_ptr<basic_parser<T>>& element,
4667 _In_ const std::shared_ptr<basic_parser<T>>& digit,
4668 _In_ const std::shared_ptr<basic_parser<T>>& sign,
4669 _In_ const std::locale& locale = std::locale()) :
4670 basic_parser<T>(locale),
4671 m_element(element),
4672 m_digit(digit),
4673 m_sign(sign),
4674 has_digits(false),
4675 has_charge(false)
4676 {}
4677
4678 virtual bool match(
4679 _In_reads_or_z_(end) const T* text,
4680 _In_ size_t start = 0,
4681 _In_ size_t end = (size_t)-1,
4682 _In_ int flags = match_default)
4683 {
4684 assert(text || start >= end);
4685
4686 has_digits = false;
4687 has_charge = false;
4688 interval.end = start;
4689
4690 const int element_match_flags = flags & ~match_case_insensitive; // Chemical elements are always case-sensitive.
4691 for (;;) {
4692 if (m_element->match(text, interval.end, end, element_match_flags)) {
4693 interval.end = m_element->interval.end;
4694 while (m_digit->match(text, interval.end, end, flags)) {
4695 interval.end = m_digit->interval.end;
4696 has_digits = true;
4697 }
4698 }
4699 else if (start < interval.end) {
4700 if (m_sign->match(text, interval.end, end, flags)) {
4701 interval.end = m_sign->interval.end;
4702 has_charge = true;
4703 }
4704 interval.start = start;
4705 return true;
4706 }
4707 else {
4708 interval.start = (interval.end = start) + 1;
4709 return false;
4710 }
4711 }
4712 }
4713
4714 virtual void invalidate()
4715 {
4716 has_digits = false;
4717 has_charge = false;
4719 }
4720
4721 public:
4722 bool has_digits;
4723 bool has_charge;
4724
4725 protected:
4726 std::shared_ptr<basic_parser<T>> m_element;
4727 std::shared_ptr<basic_parser<T>> m_digit;
4728 std::shared_ptr<basic_parser<T>> m_sign;
4729 };
4730
4733#ifdef _UNICODE
4735#else
4737#endif
4739
4744 {
4745 public:
4746 virtual bool match(
4747 _In_reads_or_z_(end) const char* text,
4748 _In_ size_t start = 0,
4749 _In_ size_t end = (size_t)-1,
4750 _In_ int flags = match_default)
4751 {
4752 assert(text || start >= end);
4753 interval.end = start;
4754
4755 assert(text || interval.end >= end);
4756 if (interval.end < end && text[interval.end]) {
4757 if (text[interval.end] == '\r') {
4758 interval.end++;
4759 if (interval.end < end && text[interval.end] == '\n') {
4760 interval.start = start;
4761 interval.end++;
4762 return true;
4763 }
4764 }
4765 else if (text[interval.end] == '\n') {
4766 interval.start = start;
4767 interval.end++;
4768 return true;
4769 }
4770 }
4771 interval.start = (interval.end = start) + 1;
4772 return false;
4773 }
4774 };
4775
4779 class http_space : public parser
4780 {
4781 public:
4782 virtual bool match(
4783 _In_reads_or_z_(end) const char* text,
4784 _In_ size_t start = 0,
4785 _In_ size_t end = (size_t)-1,
4786 _In_ int flags = match_default)
4787 {
4788 assert(text || start >= end);
4789 interval.end = start;
4790 if (m_line_break.match(text, interval.end, end, flags)) {
4791 interval.end = m_line_break.interval.end;
4792 if (interval.end < end && text[interval.end] && isspace(text[interval.end])) {
4793 interval.start = start;
4794 interval.end++;
4795 while (interval.end < end && text[interval.end] && isspace(text[interval.end])) interval.end++;
4796 return true;
4797 }
4798 }
4799 else if (interval.end < end && text[interval.end] && isspace(text[interval.end])) {
4800 interval.start = start;
4801 interval.end++;
4802 while (interval.end < end && text[interval.end] && isspace(text[interval.end])) interval.end++;
4803 return true;
4804 }
4805 interval.start = (interval.end = start) + 1;
4806 return false;
4807 }
4808
4809 protected:
4810 http_line_break m_line_break;
4811 };
4812
4816 class http_text_char : public parser
4817 {
4818 public:
4819 virtual bool match(
4820 _In_reads_or_z_(end) const char* text,
4821 _In_ size_t start = 0,
4822 _In_ size_t end = (size_t)-1,
4823 _In_ int flags = match_default)
4824 {
4825 assert(text || start >= end);
4826 interval.end = start;
4827
4828 assert(text || interval.end >= end);
4829 if (m_space.match(text, interval.end, end, flags)) {
4830 interval.start = start;
4831 interval.end = m_space.interval.end;
4832 return true;
4833 }
4834 else if (interval.end < end && text[interval.end] && text[interval.end] >= 0x20) {
4835 interval.start = start;
4836 interval.end++;
4837 return true;
4838 }
4839 interval.start = (interval.end = start) + 1;
4840 return false;
4841 }
4842
4843 protected:
4844 http_space m_space;
4845 };
4846
4850 class http_token : public parser
4851 {
4852 public:
4853 virtual bool match(
4854 _In_reads_or_z_(end) const char* text,
4855 _In_ size_t start = 0,
4856 _In_ size_t end = (size_t)-1,
4857 _In_ int flags = match_default)
4858 {
4859 assert(text || start >= end);
4860 interval.end = start;
4861 for (;;) {
4862 if (interval.end < end && text[interval.end]) {
4863 if ((unsigned int)text[interval.end] < 0x20 ||
4864 (unsigned int)text[interval.end] == 0x7f ||
4865 text[interval.end] == '(' ||
4866 text[interval.end] == ')' ||
4867 text[interval.end] == '<' ||
4868 text[interval.end] == '>' ||
4869 text[interval.end] == '@' ||
4870 text[interval.end] == ',' ||
4871 text[interval.end] == ';' ||
4872 text[interval.end] == ':' ||
4873 text[interval.end] == '\\' ||
4874 text[interval.end] == '\"' ||
4875 text[interval.end] == '/' ||
4876 text[interval.end] == '[' ||
4877 text[interval.end] == ']' ||
4878 text[interval.end] == '?' ||
4879 text[interval.end] == '=' ||
4880 text[interval.end] == '{' ||
4881 text[interval.end] == '}' ||
4882 isspace(text[interval.end]))
4883 break;
4884 else
4885 interval.end++;
4886 }
4887 else
4888 break;
4889 }
4890 if (start < interval.end) {
4891 interval.start = start;
4892 return true;
4893 }
4894 else {
4895 interval.start = (interval.end = start) + 1;
4896 return false;
4897 }
4898 }
4899 };
4900
4905 {
4906 public:
4907 virtual bool match(
4908 _In_reads_or_z_(end) const char* text,
4909 _In_ size_t start = 0,
4910 _In_ size_t end = (size_t)-1,
4911 _In_ int flags = match_default)
4912 {
4913 assert(text || start >= end);
4914 interval.end = start;
4915 if (interval.end < end && text[interval.end] != '"')
4916 goto error;
4917 interval.end++;
4919 for (;;) {
4920 assert(text || interval.end >= end);
4921 if (interval.end < end && text[interval.end]) {
4922 if (text[interval.end] == '"') {
4924 interval.end++;
4925 break;
4926 }
4927 else if (text[interval.end] == '\\') {
4928 interval.end++;
4929 if (interval.end < end && text[interval.end]) {
4930 interval.end++;
4931 }
4932 else
4933 goto error;
4934 }
4935 else if (m_chr.match(text, interval.end, end, flags))
4936 interval.end++;
4937 else
4938 goto error;
4939 }
4940 else
4941 goto error;
4942 }
4943 interval.start = start;
4944 return true;
4945
4946 error:
4947 content.start = 1;
4948 content.end = 0;
4949 interval.start = (interval.end = start) + 1;
4950 return false;
4951 }
4952
4953 virtual void invalidate()
4954 {
4955 content.start = 1;
4956 content.end = 0;
4957 parser::invalidate();
4958 }
4959
4960 public:
4962
4963 protected:
4964 http_text_char m_chr;
4965 };
4966
4970 class http_value : public parser
4971 {
4972 public:
4973 virtual bool match(
4974 _In_reads_or_z_(end) const char* text,
4975 _In_ size_t start = 0,
4976 _In_ size_t end = (size_t)-1,
4977 _In_ int flags = match_default)
4978 {
4979 assert(text || start >= end);
4980 interval.end = start;
4981 if (string.match(text, interval.end, end, flags)) {
4982 token.invalidate();
4983 interval.end = string.interval.end;
4984 interval.start = start;
4985 return true;
4986 }
4987 else if (token.match(text, interval.end, end, flags)) {
4988 string.invalidate();
4990 interval.start = start;
4991 return true;
4992 }
4993 else {
4994 interval.start = (interval.end = start) + 1;
4995 return false;
4996 }
4997 }
4998
4999 virtual void invalidate()
5000 {
5001 string.invalidate();
5002 token.invalidate();
5003 parser::invalidate();
5004 }
5005
5006 public:
5009 };
5010
5014 class http_parameter : public parser
5015 {
5016 public:
5017 virtual bool match(
5018 _In_reads_or_z_(end) const char* text,
5019 _In_ size_t start = 0,
5020 _In_ size_t end = (size_t)-1,
5021 _In_ int flags = match_default)
5022 {
5023 assert(text || start >= end);
5024 interval.end = start;
5025 if (name.match(text, interval.end, end, flags))
5027 else
5028 goto error;
5029 while (m_space.match(text, interval.end, end, flags))
5030 interval.end = m_space.interval.end;
5031 assert(text || interval.end >= end);
5032 if (interval.end < end && text[interval.end] == '=')
5033 interval.end++;
5034 else
5035 while (m_space.match(text, interval.end, end, flags))
5036 interval.end = m_space.interval.end;
5037 if (value.match(text, interval.end, end, flags))
5039 else
5040 goto error;
5041 interval.start = start;
5042 return true;
5043
5044 error:
5045 name.invalidate();
5046 value.invalidate();
5047 interval.start = (interval.end = start) + 1;
5048 return false;
5049 }
5050
5051 virtual void invalidate()
5052 {
5053 name.invalidate();
5054 value.invalidate();
5055 parser::invalidate();
5056 }
5057
5058 public:
5061
5062 protected:
5063 http_space m_space;
5064 };
5065
5069 class http_any_type : public parser
5070 {
5071 public:
5072 virtual bool match(
5073 _In_reads_or_z_(end) const char* text,
5074 _In_ size_t start = 0,
5075 _In_ size_t end = (size_t)-1,
5076 _In_ int flags = match_default)
5077 {
5078 assert(text || start >= end);
5079 if (start + 2 < end &&
5080 text[start] == '*' &&
5081 text[start + 1] == '/' &&
5082 text[start + 2] == '*')
5083 {
5084 interval.end = (interval.start = start) + 3;
5085 return true;
5086 }
5087 else if (start < end && text[start] == '*') {
5088 interval.end = (interval.start = start) + 1;
5089 return true;
5090 }
5091 else {
5092 interval.start = (interval.end = start) + 1;
5093 return false;
5094 }
5095 }
5096 };
5097
5102 {
5103 public:
5104 virtual bool match(
5105 _In_reads_or_z_(end) const char* text,
5106 _In_ size_t start = 0,
5107 _In_ size_t end = (size_t)-1,
5108 _In_ int flags = match_default)
5109 {
5110 assert(text || start >= end);
5111 interval.end = start;
5112 if (type.match(text, interval.end, end, flags))
5113 interval.end = type.interval.end;
5114 else
5115 goto error;
5116 while (m_space.match(text, interval.end, end, flags))
5117 interval.end = m_space.interval.end;
5118 if (interval.end < end && text[interval.end] == '/')
5119 interval.end++;
5120 else
5121 goto error;
5122 while (m_space.match(text, interval.end, end, flags))
5123 interval.end = m_space.interval.end;
5124 if (subtype.match(text, interval.end, end, flags))
5125 interval.end = subtype.interval.end;
5126 else
5127 goto error;
5128 interval.start = start;
5129 return true;
5130
5131 error:
5132 type.invalidate();
5133 subtype.invalidate();
5134 interval.start = (interval.end = start) + 1;
5135 return false;
5136 }
5137
5138 virtual void invalidate()
5139 {
5140 type.invalidate();
5141 subtype.invalidate();
5142 parser::invalidate();
5143 }
5144
5145 public:
5146 http_token type;
5147 http_token subtype;
5148
5149 protected:
5150 http_space m_space;
5151 };
5152
5157 {
5158 public:
5159 virtual bool match(
5160 _In_reads_or_z_(end) const char* text,
5161 _In_ size_t start = 0,
5162 _In_ size_t end = (size_t)-1,
5163 _In_ int flags = match_default)
5164 {
5165 assert(text || start >= end);
5166 if (!http_media_range::match(text, start, end, flags))
5167 goto error;
5168 params.clear();
5169 for (;;) {
5170 if (interval.end < end && text[interval.end]) {
5171 if (m_space.match(text, interval.end, end, flags))
5172 interval.end = m_space.interval.end;
5173 else if (text[interval.end] == ';') {
5174 interval.end++;
5175 while (m_space.match(text, interval.end, end, flags))
5176 interval.end = m_space.interval.end;
5177 http_parameter param;
5178 if (param.match(text, interval.end, end, flags)) {
5179 interval.end = param.interval.end;
5180 params.push_back(std::move(param));
5181 }
5182 else
5183 break;
5184 }
5185 else
5186 break;
5187 }
5188 else
5189 break;
5190 }
5191 interval.end = params.empty() ? subtype.interval.end : params.back().interval.end;
5192 return true;
5193
5194 error:
5195 http_media_range::invalidate();
5196 params.clear();
5197 interval.start = (interval.end = start) + 1;
5198 return false;
5199 }
5200
5201 virtual void invalidate()
5202 {
5203 params.clear();
5204 http_media_range::invalidate();
5205 }
5206
5207 public:
5208 std::list<http_parameter> params;
5209 };
5210
5215 {
5216 public:
5217 virtual bool match(
5218 _In_reads_or_z_(end) const char* text,
5219 _In_ size_t start = 0,
5220 _In_ size_t end = (size_t)-1,
5221 _In_ int flags = match_default)
5222 {
5223 assert(text || start >= end);
5224 interval.end = start;
5225 for (;;) {
5226 if (interval.end < end && text[interval.end]) {
5227 if ((unsigned int)text[interval.end] < 0x20 ||
5228 (unsigned int)text[interval.end] == 0x7f ||
5229 text[interval.end] == ':' ||
5230 text[interval.end] == '/' ||
5231 isspace(text[interval.end]))
5232 break;
5233 else
5234 interval.end++;
5235 }
5236 else
5237 break;
5238 }
5239 if (start < interval.end) {
5240 interval.start = start;
5241 return true;
5242 }
5243 interval.start = (interval.end = start) + 1;
5244 return false;
5245 }
5246 };
5247
5251 class http_url_port : public parser
5252 {
5253 public:
5254 http_url_port(_In_ const std::locale& locale = std::locale()) :
5255 parser(locale),
5256 value(0)
5257 {}
5258
5259 virtual bool match(
5260 _In_reads_or_z_(end) const char* text,
5261 _In_ size_t start = 0,
5262 _In_ size_t end = (size_t)-1,
5263 _In_ int flags = match_default)
5264 {
5265 assert(text || start >= end);
5266 value = 0;
5267 interval.end = start;
5268 for (;;) {
5269 if (interval.end < end && text[interval.end]) {
5270 if ('0' <= text[interval.end] && text[interval.end] <= '9') {
5271 size_t _value = (size_t)value * 10 + text[interval.end] - '0';
5272 if (_value > (uint16_t)-1) {
5273 value = 0;
5274 interval.start = (interval.end = start) + 1;
5275 return false;
5276 }
5277 value = (uint16_t)_value;
5278 interval.end++;
5279 }
5280 else
5281 break;
5282 }
5283 else
5284 break;
5285 }
5286 if (start < interval.end) {
5287 interval.start = start;
5288 return true;
5289 }
5290 interval.start = (interval.end = start) + 1;
5291 return false;
5292 }
5293
5294 virtual void invalidate()
5295 {
5296 value = 0;
5297 parser::invalidate();
5298 }
5299
5300 public:
5301 uint16_t value;
5302 };
5303
5308 {
5309 public:
5310 virtual bool match(
5311 _In_reads_or_z_(end) const char* text,
5312 _In_ size_t start = 0,
5313 _In_ size_t end = (size_t)-1,
5314 _In_ int flags = match_default)
5315 {
5316 assert(text || start >= end);
5317 interval.end = start;
5318 for (;;) {
5319 if (interval.end < end && text[interval.end]) {
5320 if ((unsigned int)text[interval.end] < 0x20 ||
5321 (unsigned int)text[interval.end] == 0x7f ||
5322 text[interval.end] == '?' ||
5323 text[interval.end] == '/' ||
5324 isspace(text[interval.end]))
5325 break;
5326 else
5327 interval.end++;
5328 }
5329 else
5330 break;
5331 }
5332 interval.start = start;
5333 return true;
5334 }
5335 };
5336
5340 class http_url_path : public parser
5341 {
5342 public:
5343 virtual bool match(
5344 _In_reads_or_z_(end) const char* text,
5345 _In_ size_t start = 0,
5346 _In_ size_t end = (size_t)-1,
5347 _In_ int flags = match_default)
5348 {
5349 assert(text || start >= end);
5351 interval.end = start;
5352 segments.clear();
5353 assert(text || interval.end >= end);
5354 if (interval.end < end && text[interval.end] != '/')
5355 goto error;
5356 interval.end++;
5357 s.match(text, interval.end, end, flags);
5358 segments.push_back(s);
5360 for (;;) {
5361 if (interval.end < end && text[interval.end]) {
5362 if (text[interval.end] == '/') {
5363 interval.end++;
5364 s.match(text, interval.end, end, flags);
5365 segments.push_back(s);
5367 }
5368 else
5369 break;
5370 }
5371 else
5372 break;
5373 }
5374 interval.start = start;
5375 return true;
5376
5377 error:
5378 segments.clear();
5379 interval.start = (interval.end = start) + 1;
5380 return false;
5381 }
5382
5383 virtual void invalidate()
5384 {
5385 segments.clear();
5386 parser::invalidate();
5387 }
5388
5389 public:
5390 std::vector<http_url_path_segment> segments;
5391 };
5392
5397 {
5398 public:
5399 virtual bool match(
5400 _In_reads_or_z_(end) const char* text,
5401 _In_ size_t start = 0,
5402 _In_ size_t end = (size_t)-1,
5403 _In_ int flags = match_default)
5404 {
5405 assert(text || start >= end);
5406 interval.end = start;
5407 name.start = interval.end;
5408 for (;;) {
5409 if (interval.end < end && text[interval.end]) {
5410 if ((unsigned int)text[interval.end] < 0x20 ||
5411 (unsigned int)text[interval.end] == 0x7f ||
5412 text[interval.end] == '&' ||
5413 text[interval.end] == '=' ||
5414 isspace(text[interval.end]))
5415 break;
5416 else
5417 interval.end++;
5418 }
5419 else
5420 break;
5421 }
5422 if (start < interval.end)
5423 name.end = interval.end;
5424 else
5425 goto error;
5426 if (text[interval.end] == '=') {
5427 interval.end++;
5428 value.start = interval.end;
5429 for (;;) {
5430 if (interval.end < end && text[interval.end]) {
5431 if ((unsigned int)text[interval.end] < 0x20 ||
5432 (unsigned int)text[interval.end] == 0x7f ||
5433 text[interval.end] == '&' ||
5434 isspace(text[interval.end]))
5435 break;
5436 else
5437 interval.end++;
5438 }
5439 else
5440 break;
5441 }
5442 value.end = interval.end;
5443 }
5444 else {
5445 value.start = 1;
5446 value.end = 0;
5447 }
5448 interval.start = start;
5449 return true;
5450
5451 error:
5452 name.start = 1;
5453 name.end = 0;
5454 value.start = 1;
5455 value.end = 0;
5456 interval.start = (interval.end = start) + 1;
5457 return false;
5458 }
5459
5460 virtual void invalidate()
5461 {
5462 name.start = 1;
5463 name.end = 0;
5464 value.start = 1;
5465 value.end = 0;
5466 parser::invalidate();
5467 }
5468
5469 public:
5472 };
5473
5477 class http_url : public parser
5478 {
5479 public:
5480 http_url(_In_ const std::locale& locale = std::locale()) :
5481 parser(locale),
5482 port(locale)
5483 {}
5484
5485 virtual bool match(
5486 _In_reads_or_z_(end) const char* text,
5487 _In_ size_t start = 0,
5488 _In_ size_t end = (size_t)-1,
5489 _In_ int flags = match_default)
5490 {
5491 assert(text || start >= end);
5492 interval.end = start;
5493
5494 if (interval.end + 7 <= end && stdex::strnicmp(text + interval.end, 7, "http://", (size_t)-1, m_locale) == 0) {
5495 interval.end += 7;
5496 if (server.match(text, interval.end, end, flags))
5497 interval.end = server.interval.end;
5498 else
5499 goto error;
5500 if (interval.end < end && text[interval.end] == ':') {
5501 interval.end++;
5502 if (port.match(text, interval.end, end, flags))
5503 interval.end = port.interval.end;
5504 }
5505 else {
5506 port.invalidate();
5507 port.value = 80;
5508 }
5509 }
5510 else {
5511 server.invalidate();
5512 port.invalidate();
5513 port.value = 80;
5514 }
5515
5516 if (path.match(text, interval.end, end, flags))
5517 interval.end = path.interval.end;
5518 else
5519 goto error;
5520
5521 params.clear();
5522
5523 if (interval.end < end && text[interval.end] == '?') {
5524 interval.end++;
5525 for (;;) {
5526 if (interval.end < end && text[interval.end]) {
5527 if ((unsigned int)text[interval.end] < 0x20 ||
5528 (unsigned int)text[interval.end] == 0x7f ||
5529 isspace(text[interval.end]))
5530 break;
5531 else if (text[interval.end] == '&')
5532 interval.end++;
5533 else {
5534 http_url_parameter param;
5535 if (param.match(text, interval.end, end, flags)) {
5536 interval.end = param.interval.end;
5537 params.push_back(std::move(param));
5538 }
5539 else
5540 break;
5541 }
5542 }
5543 else
5544 break;
5545 }
5546 }
5547
5548 interval.start = start;
5549 return true;
5550
5551 error:
5552 server.invalidate();
5553 port.invalidate();
5554 path.invalidate();
5555 params.clear();
5556 interval.start = (interval.end = start) + 1;
5557 return false;
5558 }
5559
5560 virtual void invalidate()
5561 {
5562 server.invalidate();
5563 port.invalidate();
5564 path.invalidate();
5565 params.clear();
5566 parser::invalidate();
5567 }
5568
5569 public:
5570 http_url_server server;
5571 http_url_port port;
5572 http_url_path path;
5573 std::list<http_url_parameter> params;
5574 };
5575
5579 class http_language : public parser
5580 {
5581 public:
5582 virtual bool match(
5583 _In_reads_or_z_(end) const char* text,
5584 _In_ size_t start = 0,
5585 _In_ size_t end = (size_t)-1,
5586 _In_ int flags = match_default)
5587 {
5588 assert(text || start >= end);
5589 interval.end = start;
5590 components.clear();
5591 for (;;) {
5592 if (interval.end < end && text[interval.end]) {
5594 k.end = interval.end;
5595 for (;;) {
5596 if (k.end < end && text[k.end]) {
5597 if (isalpha(text[k.end]))
5598 k.end++;
5599 else
5600 break;
5601 }
5602 else
5603 break;
5604 }
5605 if (interval.end < k.end) {
5606 k.start = interval.end;
5607 interval.end = k.end;
5608 components.push_back(k);
5609 }
5610 else
5611 break;
5612 if (interval.end < end && text[interval.end] == '-')
5613 interval.end++;
5614 else
5615 break;
5616 }
5617 else
5618 break;
5619 }
5620 if (!components.empty()) {
5621 interval.start = start;
5622 interval.end = components.back().end;
5623 return true;
5624 }
5625 interval.start = (interval.end = start) + 1;
5626 return false;
5627 }
5628
5629 virtual void invalidate()
5630 {
5631 components.clear();
5632 parser::invalidate();
5633 }
5634
5635 public:
5636 std::vector<stdex::interval<size_t>> components;
5637 };
5638
5642 class http_weight : public parser
5643 {
5644 public:
5645 http_weight(_In_ const std::locale& locale = std::locale()) :
5646 parser(locale),
5647 value(1.0f)
5648 {}
5649
5650 virtual bool match(
5651 _In_reads_or_z_(end) const char* text,
5652 _In_ size_t start = 0,
5653 _In_ size_t end = (size_t)-1,
5654 _In_ int flags = match_default)
5655 {
5656 assert(text || start >= end);
5657 size_t celi_del = 0, decimalni_del = 0, decimalni_del_n = 1;
5658 interval.end = start;
5659 for (;;) {
5660 if (interval.end < end && text[interval.end]) {
5661 if ('0' <= text[interval.end] && text[interval.end] <= '9') {
5662 celi_del = celi_del * 10 + text[interval.end] - '0';
5663 interval.end++;
5664 }
5665 else if (text[interval.end] == '.') {
5666 interval.end++;
5667 for (;;) {
5668 if (interval.end < end && text[interval.end]) {
5669 if ('0' <= text[interval.end] && text[interval.end] <= '9') {
5670 decimalni_del = decimalni_del * 10 + text[interval.end] - '0';
5671 decimalni_del_n *= 10;
5672 interval.end++;
5673 }
5674 else
5675 break;
5676 }
5677 else
5678 break;
5679 }
5680 break;
5681 }
5682 else
5683 break;
5684 }
5685 else
5686 break;
5687 }
5688 if (start < interval.end) {
5689 value = (float)((double)celi_del + (double)decimalni_del / decimalni_del_n);
5690 interval.start = start;
5691 return true;
5692 }
5693 value = 1.0f;
5694 interval.start = (interval.end = start) + 1;
5695 return false;
5696 }
5697
5698 virtual void invalidate()
5699 {
5700 value = 1.0f;
5701 parser::invalidate();
5702 }
5703
5704 public:
5705 float value;
5706 };
5707
5711 class http_asterisk : public parser
5712 {
5713 public:
5714 virtual bool match(
5715 _In_reads_or_z_(end) const char* text,
5716 _In_ size_t start = 0,
5717 _In_ size_t end = (size_t)-1,
5718 _In_ int flags = match_default)
5719 {
5720 assert(text || end <= start);
5721 if (start < end && text[start] == '*') {
5722 interval.end = (interval.start = start) + 1;
5723 return true;
5724 }
5725 interval.start = (interval.end = start) + 1;
5726 return false;
5727 }
5728 };
5729
5733 template <class T, class T_asterisk = http_asterisk>
5735 {
5736 public:
5737 http_weighted_value(_In_ const std::locale& locale = std::locale()) :
5738 parser(locale),
5739 factor(locale)
5740 {}
5741
5742 virtual bool match(
5743 _In_reads_or_z_(end) const char* text,
5744 _In_ size_t start = 0,
5745 _In_ size_t end = (size_t)-1,
5746 _In_ int flags = match_default)
5747 {
5748 assert(text || start >= end);
5749 size_t konec_vrednosti;
5750 interval.end = start;
5751 if (asterisk.match(text, interval.end, end, flags)) {
5752 interval.end = konec_vrednosti = asterisk.interval.end;
5753 value.invalidate();
5754 }
5755 else if (value.match(text, interval.end, end, flags)) {
5756 interval.end = konec_vrednosti = value.interval.end;
5757 asterisk.invalidate();
5758 }
5759 else {
5760 asterisk.invalidate();
5761 value.invalidate();
5762 interval.start = (interval.end = start) + 1;
5763 return false;
5764 }
5765
5766 while (interval.end < end && text[interval.end] && isspace(text[interval.end])) interval.end++;
5767 if (interval.end < end && text[interval.end] == ';') {
5768 interval.end++;
5769 while (interval.end < end && text[interval.end] && isspace(text[interval.end])) interval.end++;
5770 if (interval.end < end && (text[interval.end] == 'q' || text[interval.end] == 'Q')) {
5771 interval.end++;
5772 while (interval.end < end && text[interval.end] && isspace(text[interval.end])) interval.end++;
5773 if (interval.end < end && text[interval.end] == '=') {
5774 interval.end++;
5775 while (interval.end < end && text[interval.end] && isspace(text[interval.end])) interval.end++;
5776 if (factor.match(text, interval.end, end, flags))
5777 interval.end = factor.interval.end;
5778 }
5779 }
5780 }
5781 if (!factor.interval) {
5782 factor.invalidate();
5783 interval.end = konec_vrednosti;
5784 }
5785 interval.start = start;
5786 return true;
5787 }
5788
5789 virtual void invalidate()
5790 {
5791 asterisk.invalidate();
5792 value.invalidate();
5793 factor.invalidate();
5794 parser::invalidate();
5795 }
5796
5797 public:
5798 T_asterisk asterisk;
5799 T value;
5800 http_weight factor;
5801 };
5802
5807 {
5808 public:
5809 virtual bool match(
5810 _In_reads_or_z_(end) const char* text,
5811 _In_ size_t start = 0,
5812 _In_ size_t end = (size_t)-1,
5813 _In_ int flags = match_default)
5814 {
5815 assert(text || start >= end);
5816 interval.end = start;
5817 if (interval.end < end && text[interval.end] == '$')
5818 interval.end++;
5819 else
5820 goto error;
5821 if (name.match(text, interval.end, end, flags))
5822 interval.end = name.interval.end;
5823 else
5824 goto error;
5825 while (m_space.match(text, interval.end, end, flags))
5826 interval.end = m_space.interval.end;
5827 if (interval.end < end && text[interval.end] == '=')
5828 interval.end++;
5829 else
5830 goto error;
5831 while (m_space.match(text, interval.end, end, flags))
5832 interval.end = m_space.interval.end;
5833 if (value.match(text, interval.end, end, flags))
5834 interval.end = value.interval.end;
5835 else
5836 goto error;
5837 interval.start = start;
5838 return true;
5839
5840 error:
5841 name.invalidate();
5842 value.invalidate();
5843 interval.start = (interval.end = start) + 1;
5844 return false;
5845 }
5846
5847 virtual void invalidate()
5848 {
5849 name.invalidate();
5850 value.invalidate();
5851 parser::invalidate();
5852 }
5853
5854 public:
5855 http_token name;
5856 http_value value;
5857
5858 protected:
5859 http_space m_space;
5860 };
5861
5865 class http_cookie : public parser
5866 {
5867 public:
5868 virtual bool match(
5869 _In_reads_or_z_(end) const char* text,
5870 _In_ size_t start = 0,
5871 _In_ size_t end = (size_t)-1,
5872 _In_ int flags = match_default)
5873 {
5874 assert(text || start >= end);
5875 interval.end = start;
5876 if (name.match(text, interval.end, end, flags))
5878 else
5879 goto error;
5880 while (m_space.match(text, interval.end, end, flags))
5881 interval.end = m_space.interval.end;
5882 if (interval.end < end && text[interval.end] == '=')
5883 interval.end++;
5884 else
5885 goto error;
5886 while (m_space.match(text, interval.end, end, flags))
5887 interval.end = m_space.interval.end;
5888 if (value.match(text, interval.end, end, flags))
5890 else
5891 goto error;
5892 params.clear();
5893 for (;;) {
5894 if (interval.end < end && text[interval.end]) {
5895 if (m_space.match(text, interval.end, end, flags))
5896 interval.end = m_space.interval.end;
5897 else if (text[interval.end] == ';') {
5898 interval.end++;
5899 while (m_space.match(text, interval.end, end, flags))
5900 interval.end = m_space.interval.end;
5902 if (param.match(text, interval.end, end, flags)) {
5903 interval.end = param.interval.end;
5904 params.push_back(std::move(param));
5905 }
5906 else
5907 break;
5908 }
5909 else
5910 break;
5911 }
5912 else
5913 break;
5914 }
5915 interval.start = start;
5916 interval.end = params.empty() ? value.interval.end : params.back().interval.end;
5917 return true;
5918
5919 error:
5920 name.invalidate();
5921 value.invalidate();
5922 params.clear();
5923 interval.start = (interval.end = start) + 1;
5924 return false;
5925 }
5926
5927 virtual void invalidate()
5928 {
5929 name.invalidate();
5930 value.invalidate();
5931 params.clear();
5932 parser::invalidate();
5933 }
5934
5935 public:
5938 std::list<http_cookie_parameter> params;
5939
5940 protected:
5941 http_space m_space;
5942 };
5943
5947 class http_agent : public parser
5948 {
5949 public:
5950 virtual bool match(
5951 _In_reads_or_z_(end) const char* text,
5952 _In_ size_t start = 0,
5953 _In_ size_t end = (size_t)-1,
5954 _In_ int flags = match_default)
5955 {
5956 assert(text || start >= end);
5957 interval.end = start;
5958 type.start = interval.end;
5959 for (;;) {
5960 if (interval.end < end && text[interval.end]) {
5961 if (text[interval.end] == '/') {
5962 type.end = interval.end;
5963 interval.end++;
5964 version.start = interval.end;
5965 for (;;) {
5966 if (interval.end < end && text[interval.end]) {
5967 if (isspace(text[interval.end])) {
5968 version.end = interval.end;
5969 break;
5970 }
5971 else
5972 interval.end++;
5973 }
5974 else {
5975 version.end = interval.end;
5976 break;
5977 }
5978 }
5979 break;
5980 }
5981 else if (isspace(text[interval.end])) {
5982 type.end = interval.end;
5983 break;
5984 }
5985 else
5986 interval.end++;
5987 }
5988 else {
5989 type.end = interval.end;
5990 break;
5991 }
5992 }
5993 if (start < interval.end) {
5994 interval.start = start;
5995 return true;
5996 }
5997 type.start = 1;
5998 type.end = 0;
5999 version.start = 1;
6000 version.end = 0;
6001 interval.start = 1;
6002 interval.end = 0;
6003 return false;
6004 }
6005
6006 virtual void invalidate()
6007 {
6008 type.start = 1;
6009 type.end = 0;
6010 version.start = 1;
6011 version.end = 0;
6012 parser::invalidate();
6013 }
6014
6015 public:
6018 };
6019
6023 class http_protocol : public parser
6024 {
6025 public:
6026 http_protocol(_In_ const std::locale& locale = std::locale()) :
6027 parser(locale),
6028 version(0x009)
6029 {}
6030
6031 virtual bool match(
6032 _In_reads_or_z_(end) const char* text,
6033 _In_ size_t start = 0,
6034 _In_ size_t end = (size_t)-1,
6035 _In_ int flags = match_default)
6036 {
6037 assert(text || start >= end);
6038 interval.end = start;
6039 type.start = interval.end;
6040 for (;;) {
6041 if (interval.end < end && text[interval.end]) {
6042 if (text[interval.end] == '/') {
6043 type.end = interval.end;
6044 interval.end++;
6045 break;
6046 }
6047 else if (isspace(text[interval.end]))
6048 goto error;
6049 else
6050 interval.end++;
6051 }
6052 else {
6053 type.end = interval.end;
6054 goto error;
6055 }
6056 }
6057 version_maj.start = interval.end;
6058 for (;;) {
6059 if (interval.end < end && text[interval.end]) {
6060 if (text[interval.end] == '.') {
6061 version_maj.end = interval.end;
6062 interval.end++;
6063 version_min.start = interval.end;
6064 for (;;) {
6065 if (interval.end < end && text[interval.end]) {
6066 if (isspace(text[interval.end])) {
6067 version_min.end = interval.end;
6068 version =
6069 (uint16_t)strtoui(text + version_maj.start, version_maj.size(), nullptr, 10) * 0x100 +
6070 (uint16_t)strtoui(text + version_min.start, version_min.size(), nullptr, 10);
6071 break;
6072 }
6073 else
6074 interval.end++;
6075 }
6076 else
6077 goto error;
6078 }
6079 break;
6080 }
6081 else if (isspace(text[interval.end])) {
6082 version_maj.end = interval.end;
6083 version_min.start = 1;
6084 version_min.end = 0;
6085 version = (uint16_t)strtoui(text + version_maj.start, version_maj.size(), nullptr, 10) * 0x100;
6086 break;
6087 }
6088 else
6089 interval.end++;
6090 }
6091 else
6092 goto error;
6093 }
6094 interval.start = start;
6095 return true;
6096
6097 error:
6098 type.start = 1;
6099 type.end = 0;
6100 version_maj.start = 1;
6101 version_maj.end = 0;
6102 version_min.start = 1;
6103 version_min.end = 0;
6104 version = 0x009;
6105 interval.start = 1;
6106 interval.end = 0;
6107 return false;
6108 }
6109
6110 virtual void invalidate()
6111 {
6112 type.start = 1;
6113 type.end = 0;
6114 version_maj.start = 1;
6115 version_maj.end = 0;
6116 version_min.start = 1;
6117 version_min.end = 0;
6118 version = 0x009;
6119 parser::invalidate();
6120 }
6121
6122 public:
6124 stdex::interval<size_t> version_maj;
6125 stdex::interval<size_t> version_min;
6126 uint16_t version;
6127 };
6128
6132 class http_request : public parser
6133 {
6134 public:
6135 http_request(_In_ const std::locale& locale = std::locale()) :
6136 parser(locale),
6137 url(locale),
6138 protocol(locale)
6139 {}
6140
6141 virtual bool match(
6142 _In_reads_or_z_(end) const char* text,
6143 _In_ size_t start = 0,
6144 _In_ size_t end = (size_t)-1,
6145 _In_ int flags = match_default)
6146 {
6147 assert(text || start >= end);
6148 interval.end = start;
6149
6150 for (;;) {
6151 if (m_line_break.match(text, interval.end, end, flags))
6152 goto error;
6153 else if (interval.end < end && text[interval.end]) {
6154 if (isspace(text[interval.end]))
6155 interval.end++;
6156 else
6157 break;
6158 }
6159 else
6160 goto error;
6161 }
6162 verb.start = interval.end;
6163 for (;;) {
6164 if (m_line_break.match(text, interval.end, end, flags))
6165 goto error;
6166 else if (interval.end < end && text[interval.end]) {
6167 if (isspace(text[interval.end])) {
6168 verb.end = interval.end;
6169 interval.end++;
6170 break;
6171 }
6172 else
6173 interval.end++;
6174 }
6175 else
6176 goto error;
6177 }
6178
6179 for (;;) {
6180 if (m_line_break.match(text, interval.end, end, flags))
6181 goto error;
6182 else if (interval.end < end && text[interval.end]) {
6183 if (isspace(text[interval.end]))
6184 interval.end++;
6185 else
6186 break;
6187 }
6188 else
6189 goto error;
6190 }
6191 if (url.match(text, interval.end, end, flags))
6193 else
6194 goto error;
6195
6196 protocol.invalidate();
6197 for (;;) {
6198 if (m_line_break.match(text, interval.end, end, flags)) {
6199 interval.end = m_line_break.interval.end;
6200 goto end;
6201 }
6202 else if (interval.end < end && text[interval.end]) {
6203 if (isspace(text[interval.end]))
6204 interval.end++;
6205 else
6206 break;
6207 }
6208 else
6209 goto end;
6210 }
6211 for (;;) {
6212 if (m_line_break.match(text, interval.end, end, flags)) {
6213 interval.end = m_line_break.interval.end;
6214 goto end;
6215 }
6216 else if (protocol.match(text, interval.end, end, flags)) {
6217 interval.end = protocol.interval.end;
6218 break;
6219 }
6220 else
6221 goto end;
6222 }
6223
6224 for (;;) {
6225 if (m_line_break.match(text, interval.end, end, flags)) {
6226 interval.end = m_line_break.interval.end;
6227 break;
6228 }
6229 else if (interval.end < end && text[interval.end])
6230 interval.end++;
6231 else
6232 goto end;
6233 }
6234
6235 end:
6236 interval.start = start;
6237 return true;
6238
6239 error:
6240 verb.start = 1;
6241 verb.end = 0;
6242 url.invalidate();
6243 protocol.invalidate();
6244 interval.start = 1;
6245 interval.end = 0;
6246 return false;
6247 }
6248
6249 virtual void invalidate()
6250 {
6251 verb.start = 1;
6252 verb.end = 0;
6253 url.invalidate();
6254 protocol.invalidate();
6255 parser::invalidate();
6256 }
6257
6258 public:
6260 http_url url;
6261 http_protocol protocol;
6262
6263 protected:
6264 http_line_break m_line_break;
6265 };
6266
6270 class http_header : public parser
6271 {
6272 public:
6273 virtual bool match(
6274 _In_reads_or_z_(end) const char* text,
6275 _In_ size_t start = 0,
6276 _In_ size_t end = (size_t)-1,
6277 _In_ int flags = match_default)
6278 {
6279 assert(text || start >= end);
6280 interval.end = start;
6281
6282 if (m_line_break.match(text, interval.end, end, flags) ||
6283 interval.end < end && text[interval.end] && isspace(text[interval.end]))
6284 goto error;
6285 name.start = interval.end;
6286 for (;;) {
6287 if (m_line_break.match(text, interval.end, end, flags))
6288 goto error;
6289 else if (interval.end < end && text[interval.end]) {
6290 if (isspace(text[interval.end])) {
6291 name.end = interval.end;
6292 interval.end++;
6293 for (;;) {
6294 if (m_line_break.match(text, interval.end, end, flags))
6295 goto error;
6296 else if (interval.end < end && text[interval.end]) {
6297 if (isspace(text[interval.end]))
6298 interval.end++;
6299 else
6300 break;
6301 }
6302 else
6303 goto error;
6304 }
6305 if (interval.end < end && text[interval.end] == ':') {
6306 interval.end++;
6307 break;
6308 }
6309 else
6310 goto error;
6311 break;
6312 }
6313 else if (text[interval.end] == ':') {
6314 name.end = interval.end;
6315 interval.end++;
6316 break;
6317 }
6318 else
6319 interval.end++;
6320 }
6321 else
6322 goto error;
6323 }
6324 value.start = (size_t)-1;
6325 value.end = 0;
6326 for (;;) {
6327 if (m_line_break.match(text, interval.end, end, flags)) {
6328 interval.end = m_line_break.interval.end;
6329 if (!m_line_break.match(text, interval.end, end, flags) &&
6330 interval.end < end && text[interval.end] && isspace(text[interval.end]))
6331 interval.end++;
6332 else
6333 break;
6334 }
6335 else if (interval.end < end && text[interval.end]) {
6336 if (isspace(text[interval.end]))
6337 interval.end++;
6338 else {
6339 if (value.start == (size_t)-1) value.start = interval.end;
6340 value.end = ++interval.end;
6341 }
6342 }
6343 else
6344 break;
6345 }
6346 interval.start = start;
6347 return true;
6348
6349 error:
6350 name.start = 1;
6351 name.end = 0;
6352 value.start = 1;
6353 value.end = 0;
6354 interval.start = 1;
6355 interval.end = 0;
6356 return false;
6357 }
6358
6359 virtual void invalidate()
6360 {
6361 name.start = 1;
6362 name.end = 0;
6363 value.start = 1;
6364 value.end = 0;
6365 parser::invalidate();
6366 }
6367
6368 public:
6371
6372 protected:
6373 http_line_break m_line_break;
6374 };
6375
6379 template <class T>
6380 class http_value_collection : public T
6381 {
6382 public:
6383 void insert(
6384 _In_reads_or_z_(end) const char* text,
6385 _In_ size_t start = 0,
6386 _In_ size_t end = (size_t)-1,
6387 _In_ int flags = match_default)
6388 {
6389 while (start < end) {
6390 while (start < end && text[start] && isspace(text[start])) start++;
6391 if (start < end && text[start] == ',') {
6392 start++;
6393 while (start < end&& text[start] && isspace(text[start])) start++;
6394 }
6395 T::key_type el;
6396 if (el.match(text, start, end, flags)) {
6397 start = el.interval.end;
6398 T::insert(std::move(el));
6399 }
6400 else
6401 break;
6402 }
6403 }
6404 };
6405
6406 template <class T>
6408 constexpr bool operator()(const T& a, const T& b) const noexcept
6409 {
6410 return a.factor.value > b.factor.value;
6411 }
6412 };
6413
6417 template <class T, class _Alloc = std::allocator<T>>
6419
6423 template <class T>
6425 {
6426 public:
6428 _In_ const std::shared_ptr<basic_parser<T>>& quote,
6429 _In_ const std::shared_ptr<basic_parser<T>>& chr,
6430 _In_ const std::shared_ptr<basic_parser<T>>& escape,
6431 _In_ const std::shared_ptr<basic_parser<T>>& sol,
6432 _In_ const std::shared_ptr<basic_parser<T>>& bs,
6433 _In_ const std::shared_ptr<basic_parser<T>>& ff,
6434 _In_ const std::shared_ptr<basic_parser<T>>& lf,
6435 _In_ const std::shared_ptr<basic_parser<T>>& cr,
6436 _In_ const std::shared_ptr<basic_parser<T>>& htab,
6437 _In_ const std::shared_ptr<basic_parser<T>>& uni,
6438 _In_ const std::shared_ptr<basic_integer16<T>>& hex,
6439 _In_ const std::locale& locale = std::locale()) :
6440 basic_parser<T>(locale),
6441 m_quote(quote),
6442 m_chr(chr),
6443 m_escape(escape),
6444 m_sol(sol),
6445 m_bs(bs),
6446 m_ff(ff),
6447 m_lf(lf),
6448 m_cr(cr),
6449 m_htab(htab),
6450 m_uni(uni),
6451 m_hex(hex)
6452 {}
6453
6454 virtual bool match(
6455 _In_reads_or_z_(end) const T* text,
6456 _In_ size_t start = 0,
6457 _In_ size_t end = (size_t)-1,
6458 _In_ int flags = match_default)
6459 {
6460 assert(text || start >= end);
6461 interval.end = start;
6462 if (m_quote->match(text, interval.end, end, flags)) {
6463 interval.end = m_quote->interval.end;
6464 value.clear();
6465 for (;;) {
6466 if (m_quote->match(text, interval.end, end, flags)) {
6467 interval.start = start;
6468 interval.end = m_quote->interval.end;
6469 return true;
6470 }
6471 if (m_escape->match(text, interval.end, end, flags)) {
6472 if (m_quote->match(text, m_escape->interval.end, end, flags)) {
6473 value += '"'; interval.end = m_quote->interval.end;
6474 continue;
6475 }
6476 if (m_sol->match(text, m_escape->interval.end, end, flags)) {
6477 value += '/'; interval.end = m_sol->interval.end;
6478 continue;
6479 }
6480 if (m_bs->match(text, m_escape->interval.end, end, flags)) {
6481 value += '\b'; interval.end = m_bs->interval.end;
6482 continue;
6483 }
6484 if (m_ff->match(text, m_escape->interval.end, end, flags)) {
6485 value += '\f'; interval.end = m_ff->interval.end;
6486 continue;
6487 }
6488 if (m_lf->match(text, m_escape->interval.end, end, flags)) {
6489 value += '\n'; interval.end = m_lf->interval.end;
6490 continue;
6491 }
6492 if (m_cr->match(text, m_escape->interval.end, end, flags)) {
6493 value += '\r'; interval.end = m_cr->interval.end;
6494 continue;
6495 }
6496 if (m_htab->match(text, m_escape->interval.end, end, flags)) {
6497 value += '\t'; interval.end = m_htab->interval.end;
6498 continue;
6499 }
6500 if (
6501 m_uni->match(text, m_escape->interval.end, end, flags) &&
6502 m_hex->match(text, m_uni->interval.end, std::min(m_uni->interval.end + 4, end), flags | match_case_insensitive) &&
6503 m_hex->interval.size() == 4 /* JSON requests 4-digit Unicode sequneces: \u.... */)
6504 {
6505 assert(m_hex->value <= 0xffff);
6506 if (sizeof(T) == 1) {
6507 if (m_hex->value > 0x7ff) {
6508 value += (T)(0xe0 | (m_hex->value >> 12) & 0x0f);
6509 value += (T)(0x80 | (m_hex->value >> 6) & 0x3f);
6510 value += (T)(0x80 | m_hex->value & 0x3f);
6511 }
6512 else if (m_hex->value > 0x7f) {
6513 value += (T)(0xc0 | (m_hex->value >> 6) & 0x1f);
6514 value += (T)(0x80 | m_hex->value & 0x3f);
6515 }
6516 else
6517 value += (T)(m_hex->value & 0x7f);
6518 }
6519 else
6520 value += (T)m_hex->value;
6521 interval.end = m_hex->interval.end;
6522 continue;
6523 }
6524 if (m_escape->match(text, m_escape->interval.end, end, flags)) {
6525 value += '\\'; interval.end = m_escape->interval.end;
6526 continue;
6527 }
6528 }
6529 if (m_chr->match(text, interval.end, end, flags)) {
6530 value.Prilepi(text + m_chr->interval.start, m_chr->interval.size());
6531 interval.end = m_chr->interval.end;
6532 continue;
6533 }
6534 break;
6535 }
6536 }
6537 value.clear();
6538 interval.start = (interval.end = start) + 1;
6539 return false;
6540 }
6541
6542 virtual void invalidate()
6543 {
6544 value.clear();
6546 }
6547
6548 public:
6549 std::basic_string<T> value;
6550
6551 protected:
6552 std::shared_ptr<basic_parser<T>> m_quote;
6553 std::shared_ptr<basic_parser<T>> m_chr;
6554 std::shared_ptr<basic_parser<T>> m_escape;
6555 std::shared_ptr<basic_parser<T>> m_sol;
6556 std::shared_ptr<basic_parser<T>> m_bs;
6557 std::shared_ptr<basic_parser<T>> m_ff;
6558 std::shared_ptr<basic_parser<T>> m_lf;
6559 std::shared_ptr<basic_parser<T>> m_cr;
6560 std::shared_ptr<basic_parser<T>> m_htab;
6561 std::shared_ptr<basic_parser<T>> m_uni;
6562 std::shared_ptr<basic_integer16<T>> m_hex;
6563 };
6564
6567#ifdef _UNICODE
6568 using tjson_string = wjson_string;
6569#else
6570 using tjson_string = json_string;
6571#endif
6572 }
6573}
6574
6575#undef ENUM_FLAG_OPERATOR
6576#undef ENUM_FLAGS
6577
6578#ifdef _MSC_VER
6579#pragma warning(pop)
6580#endif
Test for angle in d°mm'ss.dddd form.
Definition parser.hpp:4389
Test for any code unit.
Definition parser.hpp:216
Test for beginning of line.
Definition parser.hpp:610
Test for any.
Definition parser.hpp:1052
Test for chemical formula.
Definition parser.hpp:4663
Test for any code unit from a given string of code units.
Definition parser.hpp:715
Test for specific code unit.
Definition parser.hpp:286
Test for date.
Definition parser.hpp:4019
Test for valid DNS domain character.
Definition parser.hpp:2800
bool allow_on_edge
Is character allowed at the beginning or an end of a DNS domain?
Definition parser.hpp:2838
Test for DNS domain/hostname.
Definition parser.hpp:2900
bool m_allow_absolute
May DNS names end with a dot (absolute name)?
Definition parser.hpp:2964
Test for e-mail address.
Definition parser.hpp:3788
Test for emoticon.
Definition parser.hpp:3896
std::shared_ptr< basic_parser< T > > apex
apex/eyebrows/halo (e.g. O, 0)
Definition parser.hpp:3985
std::shared_ptr< basic_parser< T > > eyes
eyes (e.g. :, ;, >, |, B)
Definition parser.hpp:3986
std::shared_ptr< basic_set< T > > mouth
mouth (e.g. ), ), (, (, |, P, D, p, d)
Definition parser.hpp:3988
std::shared_ptr< basic_parser< T > > nose
nose (e.g. -, o)
Definition parser.hpp:3987
std::shared_ptr< basic_parser< T > > emoticon
emoticon as a whole (e.g. 😀, 🤔, 😶)
Definition parser.hpp:3984
Test for end of line.
Definition parser.hpp:648
Test for fraction.
Definition parser.hpp:1681
Test for decimal integer.
Definition parser.hpp:1290
Test for decimal integer possibly containing thousand separators.
Definition parser.hpp:1375
bool has_separators
Did integer have any separators?
Definition parser.hpp:1435
size_t digit_count
Total number of digits in integer.
Definition parser.hpp:1434
Test for hexadecimal integer.
Definition parser.hpp:1456
Base class for integer testing.
Definition parser.hpp:1268
size_t value
Calculated value of the numeral.
Definition parser.hpp:1282
Test for IPv4 address.
Definition parser.hpp:2340
stdex::interval< size_t > components[4]
Individual component intervals.
Definition parser.hpp:2455
struct in_addr value
IPv4 address value.
Definition parser.hpp:2456
Test for IPv6 address.
Definition parser.hpp:2559
std::shared_ptr< basic_parser< T > > scope_id
Scope ID (e.g. NIC index with link-local addresses)
Definition parser.hpp:2763
stdex::interval< size_t > components[8]
Individual component intervals.
Definition parser.hpp:2761
struct in6_addr value
IPv6 address value.
Definition parser.hpp:2762
Test for valid IPv6 address scope ID character.
Definition parser.hpp:2487
Test for repeating.
Definition parser.hpp:905
bool m_greedy
try to match as long sequence as possible
Definition parser.hpp:944
std::shared_ptr< basic_parser< T > > m_el
repeating element
Definition parser.hpp:941
size_t m_min_iterations
minimum number of iterations
Definition parser.hpp:942
size_t m_max_iterations
maximum number of iterations
Definition parser.hpp:943
Test for JSON string.
Definition parser.hpp:6425
Test for mixed numeral.
Definition parser.hpp:1916
std::shared_ptr< basic_parser< T > > fraction
fraction
Definition parser.hpp:2022
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:2020
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:2019
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:2018
std::shared_ptr< basic_parser< T > > integer
Integer part.
Definition parser.hpp:2021
Test for monetary numeral.
Definition parser.hpp:2211
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:2317
std::shared_ptr< basic_parser< T > > decimal_separator
Decimal separator.
Definition parser.hpp:2322
std::shared_ptr< basic_parser< T > > currency
Currency part.
Definition parser.hpp:2320
std::shared_ptr< basic_parser< T > > decimal
Decimal part.
Definition parser.hpp:2323
std::shared_ptr< basic_parser< T > > integer
Integer part.
Definition parser.hpp:2321
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:2318
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:2319
"No-op" match
Definition parser.hpp:184
Base template for all parsers.
Definition parser.hpp:65
interval< size_t > interval
Region of the last match.
Definition parser.hpp:164
Test for permutation.
Definition parser.hpp:1192
Test for phone number.
Definition parser.hpp:4512
std::basic_string< T > value
Normalized phone number.
Definition parser.hpp:4638
Test for any punctuation code unit.
Definition parser.hpp:458
Test for Roman numeral.
Definition parser.hpp:1565
Test for scientific numeral.
Definition parser.hpp:2042
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:2186
std::shared_ptr< basic_parser< T > > exponent_symbol
Exponent symbol (e.g. 'e')
Definition parser.hpp:2190
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:2184
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:2185
double value
Calculated value of the numeral.
Definition parser.hpp:2194
std::shared_ptr< basic_parser< T > > negative_exp_sign
Negative exponent sign (e.g. '-')
Definition parser.hpp:2192
std::shared_ptr< basic_integer< T > > decimal
Decimal part.
Definition parser.hpp:2189
std::shared_ptr< basic_parser< T > > positive_exp_sign
Positive exponent sign (e.g. '+')
Definition parser.hpp:2191
std::shared_ptr< basic_integer< T > > exponent
Exponent part.
Definition parser.hpp:2193
std::shared_ptr< basic_parser< T > > decimal_separator
Decimal separator.
Definition parser.hpp:2188
std::shared_ptr< basic_integer< T > > integer
Integer part.
Definition parser.hpp:2187
Test for match score.
Definition parser.hpp:1744
Test for sequence.
Definition parser.hpp:1001
Definition parser.hpp:683
Test for signed numeral.
Definition parser.hpp:1830
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:1898
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:1897
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:1896
std::shared_ptr< basic_parser< T > > number
Number.
Definition parser.hpp:1899
Test for any space code unit.
Definition parser.hpp:379
Test for any space or punctuation code unit.
Definition parser.hpp:532
Test for any string.
Definition parser.hpp:1120
Test for given string.
Definition parser.hpp:810
Test for time.
Definition parser.hpp:4286
Test for valid URL password character.
Definition parser.hpp:3082
Test for valid URL path character.
Definition parser.hpp:3182
Test for URL path.
Definition parser.hpp:3290
Test for valid URL username character.
Definition parser.hpp:2983
Test for URL.
Definition parser.hpp:3431
Test for HTTP agent.
Definition parser.hpp:5948
Test for HTTP any type.
Definition parser.hpp:5070
Test for HTTP asterisk.
Definition parser.hpp:5712
Test for HTTP header.
Definition parser.hpp:6271
Test for HTTP language (RFC1766)
Definition parser.hpp:5580
Test for HTTP line break (RFC2616: CRLF | LF)
Definition parser.hpp:4744
Test for HTTP media range (RFC2616: media-range)
Definition parser.hpp:5102
Test for HTTP media type (RFC2616: media-type)
Definition parser.hpp:5157
Test for HTTP parameter (RFC2616: parameter)
Definition parser.hpp:5015
http_token name
Parameter name.
Definition parser.hpp:5059
http_value value
Parameter value.
Definition parser.hpp:5060
Test for HTTP protocol.
Definition parser.hpp:6024
uint16_t version
HTTP protocol version: 0x100 = 1.0, 0x101 = 1.1...
Definition parser.hpp:6126
Test for HTTP quoted string (RFC2616: quoted-string)
Definition parser.hpp:4905
stdex::interval< size_t > content
String content (without quotes)
Definition parser.hpp:4961
Test for HTTP request.
Definition parser.hpp:6133
Test for HTTP space (RFC2616: LWS)
Definition parser.hpp:4780
Test for HTTP text character (RFC2616: TEXT)
Definition parser.hpp:4817
Test for HTTP token (RFC2616: token - tolerates non-ASCII)
Definition parser.hpp:4851
Test for HTTP URL parameter.
Definition parser.hpp:5397
Test for HTTP URL path segment.
Definition parser.hpp:5308
Test for HTTP URL path segment.
Definition parser.hpp:5341
std::vector< http_url_path_segment > segments
Path segments.
Definition parser.hpp:5390
Test for HTTP URL port.
Definition parser.hpp:5252
Test for HTTP URL server.
Definition parser.hpp:5215
Test for HTTP URL.
Definition parser.hpp:5478
Collection of HTTP values.
Definition parser.hpp:6381
Test for HTTP value (RFC2616: value)
Definition parser.hpp:4971
http_quoted_string string
Value when matched as quoted string.
Definition parser.hpp:5007
http_token token
Value when matched as token.
Definition parser.hpp:5008
Test for HTTP weight factor.
Definition parser.hpp:5643
float value
Calculated value of the weight factor.
Definition parser.hpp:5705
Test for HTTP weighted value.
Definition parser.hpp:5735
Base template for collection-holding parsers.
Definition parser.hpp:961
Test for any SGML code point.
Definition parser.hpp:248
Test for any SGML code point from a given string of SGML code points.
Definition parser.hpp:767
Test for specific SGML code point.
Definition parser.hpp:335
Test for valid DNS domain SGML character.
Definition parser.hpp:2856
Test for valid IPv6 address scope ID SGML character.
Definition parser.hpp:2525
Test for any SGML punctuation code point.
Definition parser.hpp:499
Test for any SGML space code point.
Definition parser.hpp:422
Test for any SGML space or punctuation code point.
Definition parser.hpp:575
Test for SGML given string.
Definition parser.hpp:857
Test for valid URL password SGML character.
Definition parser.hpp:3134
Test for valid URL path SGML character.
Definition parser.hpp:3238
Test for valid URL username SGML character.
Definition parser.hpp:3034
Numerical interval.
Definition interval.hpp:18
T size() const
Returns interval size.
Definition interval.hpp:47
T end
interval end
Definition interval.hpp:20
interval() noexcept
Constructs an invalid interval.
Definition interval.hpp:25
T start
interval start
Definition interval.hpp:19
Definition parser.hpp:6407