stdex
Additional custom or not Standard C++ covered algorithms
Loading...
Searching...
No Matches
parser.hpp
1/*
2 SPDX-License-Identifier: MIT
3 Copyright © 2023 Amebis
4*/
5
6#pragma once
7
8#include "interval.hpp"
9#include "memory.hpp"
10#include "sal.hpp"
11#include "sgml.hpp"
12#include "string.hpp"
13#include <assert.h>
14#include <stdarg.h>
15#include <stdint.h>
16#include <limits>
17#include <list>
18#include <memory>
19#include <set>
20#include <string>
21#ifdef _WIN32
22#include <winsock2.h>
23#else
24#include <inaddr.h>
25#include <in6addr.h>
26#endif
27
28#ifdef _MSC_VER
29#pragma warning(push)
30#pragma warning(disable: 4100)
31#endif
32
33#define ENUM_FLAG_OPERATOR(T,X) \
34inline T operator X (const T lhs, const T rhs) { return static_cast<T>(static_cast<std::underlying_type_t<T>>(lhs) X static_cast<std::underlying_type_t<T>>(rhs)); } \
35inline T operator X (const T lhs, const std::underlying_type_t<T> rhs) { return static_cast<T>(static_cast<std::underlying_type_t<T>>(lhs) X rhs); } \
36inline T operator X (const std::underlying_type_t<T> lhs, const T rhs) { return static_cast<T>(lhs X static_cast<std::underlying_type_t<T>>(rhs)); } \
37inline T& operator X= (T& lhs, const T rhs) { return lhs = lhs X rhs; } \
38inline T& operator X= (T& lhs, const std::underlying_type_t<T> rhs) { return lhs = lhs X rhs; }
39#define ENUM_FLAGS(T, type) \
40enum class T : type; \
41inline T operator ~ (T t) { return (T) (~static_cast<std::underlying_type_t <T>>(t)); } \
42ENUM_FLAG_OPERATOR(T,|) \
43ENUM_FLAG_OPERATOR(T,^) \
44ENUM_FLAG_OPERATOR(T,&) \
45enum class T : type
46
47namespace stdex
48{
49 namespace parser
50 {
54 constexpr int match_default = 0;
55 constexpr int match_case_insensitive = 0x1;
56 constexpr int match_multiline = 0x2;
57
61 template <class T>
63 {
64 public:
65 basic_parser(_In_ const std::locale& locale = std::locale()) : m_locale(locale) {}
66 virtual ~basic_parser() {}
67
68 bool search(
69 _In_reads_or_z_(end) const T* text,
70 _In_ size_t start = 0,
71 _In_ size_t end = (size_t)-1,
72 _In_ int flags = match_default)
73 {
74 for (size_t i = start; i < end && text[i]; i++)
75 if (match(text, i, end, flags))
76 return true;
77 return false;
78 }
79
80 virtual bool match(
81 _In_reads_or_z_(end) const T* text,
82 _In_ size_t start = 0,
83 _In_ size_t end = (size_t)-1,
84 _In_ int flags = match_default) = 0;
85
86 template<class _Traits, class _Ax>
87 inline bool match(
88 const std::basic_string<T, _Traits, _Ax>& text,
89 _In_ size_t start = 0,
90 _In_ size_t end = (size_t)-1,
91 _In_ int flags = match_default)
92 {
93 return match(text.c_str(), start, std::min<size_t>(end, text.size()), flags);
94 }
95
96 virtual void invalidate()
97 {
98 interval.start = 1;
99 interval.end = 0;
100 }
101
102 protected:
104 const wchar_t* next_sgml_cp(_In_ const char* text, _In_ size_t start, _In_ size_t end, _Out_ size_t& chr_end, _Out_ wchar_t(&buf)[3])
105 {
106 if (text[start] == '&') {
107 // Potential entity start
108 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
109 for (chr_end = start + 1;; chr_end++) {
110 if (chr_end >= end || text[chr_end] == 0) {
111 // Unterminated entity
112 break;
113 }
114 if (text[chr_end] == ';') {
115 // Entity end
116 size_t n = chr_end - start - 1;
117 if (n >= 2 && text[start + 1] == '#') {
118 // Numerical entity
119 char32_t unicode;
120 if (text[start + 2] == 'x' || text[start + 2] == 'X')
121 unicode = strtou32(text + start + 3, n - 2, nullptr, 16);
122 else
123 unicode = strtou32(text + start + 2, n - 1, nullptr, 10);
124#ifdef _WIN32
125 if (unicode < 0x10000) {
126 buf[0] = (wchar_t)unicode;
127 buf[1] = 0;
128 }
129 else {
130 ucs4_to_surrogate_pair(buf, unicode);
131 buf[2] = 0;
132 }
133#else
134 buf[0] = (wchar_t)unicode;
135 buf[1] = 0;
136#endif
137 chr_end++;
138 return buf;
139 }
140 const wchar_t* entity_w = sgml2uni(text + start + 1, n);
141 if (entity_w) {
142 chr_end++;
143 return entity_w;
144 }
145 // Unknown entity.
146 break;
147 }
148 else if (text[chr_end] == '&' || ctype.is(ctype.space, text[chr_end])) {
149 // This char cannot possibly be a part of entity.
150 break;
151 }
152 }
153 }
154 buf[0] = text[start];
155 buf[1] = 0;
156 chr_end = start + 1;
157 return buf;
158 }
160
161 public:
163
164 protected:
165 std::locale m_locale;
166 };
167
170#ifdef _UNICODE
171 using tparser = wparser;
172#else
173 using tparser = parser;
174#endif
176
180 template <class T>
181 class basic_noop : public basic_parser<T>
182 {
183 public:
184 virtual bool match(
185 _In_reads_or_z_(end) const T* text,
186 _In_ size_t start = 0,
187 _In_ size_t end = (size_t)-1,
188 _In_ int flags = match_default)
189 {
190 assert(text || start >= end);
191 if (start < end && text[start]) {
192 interval.start = interval.end = start;
193 return true;
194 }
195 interval.start = (interval.end = start) + 1;
196 return false;
197 }
198 };
199
200 using noop = basic_noop<char>;
202#ifdef _UNICODE
203 using tnoop = wnoop;
204#else
205 using tnoop = noop;
206#endif
208
212 template <class T>
213 class basic_any_cu : public basic_parser<T>
214 {
215 public:
216 basic_any_cu(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
217
218 virtual bool match(
219 _In_reads_or_z_(end) const T* text,
220 _In_ size_t start = 0,
221 _In_ size_t end = (size_t)-1,
222 _In_ int flags = match_default)
223 {
224 assert(text || start >= end);
225 if (start < end && text[start]) {
226 interval.end = (interval.start = start) + 1;
227 return true;
228 }
229 interval.start = (interval.end = start) + 1;
230 return false;
231 }
232 };
233
236#ifdef _UNICODE
237 using tany_cu = wany_cu;
238#else
239 using tany_cu = any_cu;
240#endif
241
245 class sgml_any_cp : public basic_any_cu<char>
246 {
247 public:
248 sgml_any_cp(_In_ const std::locale& locale = std::locale()) : basic_any_cu<char>(locale) {}
249
250 virtual bool match(
251 _In_reads_or_z_(end) const char* text,
252 _In_ size_t start = 0,
253 _In_ size_t end = (size_t)-1,
254 _In_ int flags = match_default)
255 {
256 assert(text || start >= end);
257 if (start < end && text[start]) {
258 if (text[start] == '&') {
259 // SGML entity
260 const auto& ctype = std::use_facet<std::ctype<char>>(m_locale);
261 for (interval.end = start + 1; interval.end < end && text[interval.end]; interval.end++)
262 if (text[interval.end] == ';') {
263 interval.end++;
264 interval.start = start;
265 return true;
266 }
267 else if (text[interval.end] == '&' || ctype.is(ctype.space, text[interval.end]))
268 break;
269 // Unterminated entity
270 }
271 interval.end = (interval.start = start) + 1;
272 return true;
273 }
274 interval.start = (interval.end = start) + 1;
275 return false;
276 }
277 };
278
282 template <class T>
283 class basic_cu : public basic_parser<T>
284 {
285 public:
286 basic_cu(T chr, bool invert = false, _In_ const std::locale& locale = std::locale()) :
287 basic_parser<T>(locale),
288 m_chr(chr),
289 m_invert(invert)
290 {}
291
292 virtual bool match(
293 _In_reads_or_z_(end) const T* text,
294 _In_ size_t start = 0,
295 _In_ size_t end = (size_t)-1,
296 _In_ int flags = match_default)
297 {
298 assert(text || start >= end);
299 if (start < end && text[start]) {
300 bool r;
301 if (flags & match_case_insensitive) {
302 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
303 r = ctype.tolower(text[start]) == ctype.tolower(m_chr);
304 }
305 else
306 r = text[start] == m_chr;
307 if (r && !m_invert || !r && m_invert) {
308 interval.end = (interval.start = start) + 1;
309 return true;
310 }
311 }
312 interval.start = (interval.end = start) + 1;
313 return false;
314 }
315
316 protected:
317 T m_chr;
318 bool m_invert;
319 };
320
321 using cu = basic_cu<char>;
322 using wcu = basic_cu<wchar_t>;
323#ifdef _UNICODE
324 using tcu = wcu;
325#else
326 using tcu = cu;
327#endif
328
332 class sgml_cp : public sgml_parser
333 {
334 public:
335 sgml_cp(const char* chr, size_t count = (size_t)-1, bool invert = false, _In_ const std::locale& locale = std::locale()) :
336 sgml_parser(locale),
337 m_invert(invert)
338 {
339 assert(chr || !count);
340 wchar_t buf[3];
341 size_t chr_end;
342 m_chr.assign(count ? next_sgml_cp(chr, 0, count, chr_end, buf) : L"");
343 }
344
345 virtual bool match(
346 _In_reads_or_z_(end) const char* text,
347 _In_ size_t start = 0,
348 _In_ size_t end = (size_t)-1,
349 _In_ int flags = match_default)
350 {
351 assert(text || start >= end);
352 if (start < end && text[start]) {
353 wchar_t buf[3];
354 const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf);
355 bool r = ((flags & match_case_insensitive) ?
356 stdex::strnicmp(chr, (size_t)-1, m_chr.c_str(), m_chr.size(), m_locale) :
357 stdex::strncmp(chr, (size_t)-1, m_chr.c_str(), m_chr.size())) == 0;
358 if (r && !m_invert || !r && m_invert) {
359 interval.start = start;
360 return true;
361 }
362 }
363 interval.start = (interval.end = start) + 1;
364 return false;
365 }
366
367 protected:
368 std::wstring m_chr;
369 bool m_invert;
370 };
371
375 template <class T>
376 class basic_space_cu : public basic_parser<T>
377 {
378 public:
379 basic_space_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) :
380 basic_parser<T>(locale),
381 m_invert(invert)
382 {}
383
384 virtual bool match(
385 _In_reads_or_z_(end) const T* text,
386 _In_ size_t start = 0,
387 _In_ size_t end = (size_t)-1,
388 _In_ int flags = match_default)
389 {
390 assert(text || start >= end);
391 if (start < end && text[start]) {
392 bool r =
393 ((flags & match_multiline) || !islbreak(text[start])) &&
394 std::use_facet<std::ctype<T>>(m_locale).is(std::ctype_base::space, text[start]);
395 if (r && !m_invert || !r && m_invert) {
396 interval.end = (interval.start = start) + 1;
397 return true;
398 }
399 }
400 interval.start = (interval.end = start) + 1;
401 return false;
402 }
403
404 protected:
405 bool m_invert;
406 };
407
410#ifdef _UNICODE
411 using tspace_cu = wspace_cu;
412#else
413 using tspace_cu = space_cu;
414#endif
415
419 class sgml_space_cp : public basic_space_cu<char>
420 {
421 public:
422 sgml_space_cp(_In_ bool invert = false, _In_ const std::locale& locale = std::locale()) :
423 basic_space_cu<char>(invert, locale)
424 {}
425
426 virtual bool match(
427 _In_reads_or_z_(end) const char* text,
428 _In_ size_t start = 0,
429 _In_ size_t end = (size_t)-1,
430 _In_ int flags = match_default)
431 {
432 assert(text || start >= end);
433 if (start < end && text[start]) {
434 wchar_t buf[3];
435 const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf);
436 const wchar_t* chr_end = chr + stdex::strlen(chr);
437 bool r =
438 ((flags & match_multiline) || !islbreak(chr, (size_t)-1)) &&
439 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::space, chr, chr_end) == chr_end;
440 if (r && !m_invert || !r && m_invert) {
441 interval.start = start;
442 return true;
443 }
444 }
445
446 interval.start = (interval.end = start) + 1;
447 return false;
448 }
449 };
450
454 template <class T>
455 class basic_punct_cu : public basic_parser<T>
456 {
457 public:
458 basic_punct_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) :
459 basic_parser<T>(locale),
460 m_invert(invert)
461 {}
462
463 virtual bool match(
464 _In_reads_or_z_(end) const T* text,
465 _In_ size_t start = 0,
466 _In_ size_t end = (size_t)-1,
467 _In_ int flags = match_default)
468 {
469 assert(text || start >= end);
470 if (start < end && text[start]) {
471 bool r = std::use_facet<std::ctype<T>>(m_locale).is(std::ctype_base::punct, text[start]);
472 if (r && !m_invert || !r && m_invert) {
473 interval.end = (interval.start = start) + 1;
474 return true;
475 }
476 }
477 interval.start = (interval.end = start) + 1;
478 return false;
479 }
480
481 protected:
482 bool m_invert;
483 };
484
487#ifdef _UNICODE
488 using tpunct_cu = wpunct_cu;
489#else
490 using tpunct_cu = punct_cu;
491#endif
492
496 class sgml_punct_cp : public basic_punct_cu<char>
497 {
498 public:
499 sgml_punct_cp(bool invert = false, _In_ const std::locale& locale = std::locale()) :
500 basic_punct_cu<char>(invert, locale)
501 {}
502
503 virtual bool match(
504 _In_reads_or_z_(end) const char* text,
505 _In_ size_t start = 0,
506 _In_ size_t end = (size_t)-1,
507 _In_ int flags = match_default)
508 {
509 assert(text || start >= end);
510 if (start < end && text[start]) {
511 wchar_t buf[3];
512 const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf);
513 const wchar_t* chr_end = chr + stdex::strlen(chr);
514 bool r = std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::punct, chr, chr_end) == chr_end;
515 if (r && !m_invert || !r && m_invert) {
516 interval.start = start;
517 return true;
518 }
519 }
520 interval.start = (interval.end = start) + 1;
521 return false;
522 }
523 };
524
528 template <class T>
530 {
531 public:
532 basic_space_or_punct_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) :
533 basic_parser<T>(locale),
534 m_invert(invert)
535 {}
536
537 virtual bool match(
538 _In_reads_or_z_(end) const T* text,
539 _In_ size_t start = 0,
540 _In_ size_t end = (size_t)-1,
541 _In_ int flags = match_default)
542 {
543 assert(text || start >= end);
544 if (start < end && text[start]) {
545 bool r =
546 ((flags & match_multiline) || !islbreak(text[start])) &&
547 std::use_facet<std::ctype<T>>(m_locale).is(std::ctype_base::space | std::ctype_base::punct, text[start]);
548 if (r && !m_invert || !r && m_invert) {
549 interval.end = (interval.start = start) + 1;
550 return true;
551 }
552 }
553 interval.start = (interval.end = start) + 1;
554 return false;
555 }
556
557 protected:
558 bool m_invert;
559 };
560
563#ifdef _UNICODE
565#else
567#endif
568
573 {
574 public:
575 sgml_space_or_punct_cp(bool invert = false, _In_ const std::locale& locale = std::locale()) :
576 basic_space_or_punct_cu<char>(invert, locale)
577 {}
578
579 virtual bool match(
580 _In_reads_or_z_(end) const char* text,
581 _In_ size_t start = 0,
582 _In_ size_t end = (size_t)-1,
583 _In_ int flags = match_default)
584 {
585 assert(text || start >= end);
586 if (start < end && text[start]) {
587 wchar_t buf[3];
588 const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf);
589 const wchar_t* chr_end = chr + stdex::strlen(chr);
590 bool r =
591 ((flags & match_multiline) || !islbreak(chr, (size_t)-1)) &&
592 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::space | std::ctype_base::punct, chr, chr_end) == chr_end;
593 if (r && !m_invert || !r && m_invert) {
594 interval.start = start;
595 return true;
596 }
597 }
598 interval.start = (interval.end = start) + 1;
599 return false;
600 }
601 };
602
606 template <class T>
607 class basic_bol : public basic_parser<T>
608 {
609 public:
610 basic_bol(bool invert = false) : m_invert(invert) {}
611
612 virtual bool match(
613 _In_reads_or_z_(end) const T* text,
614 _In_ size_t start = 0,
615 _In_ size_t end = (size_t)-1,
616 _In_ int flags = match_default)
617 {
618 assert(text || start >= end);
619 bool r = start == 0 || start <= end && islbreak(text[start - 1]);
620 if (r && !m_invert || !r && m_invert) {
621 interval.end = interval.start = start;
622 return true;
623 }
624 interval.start = (interval.end = start) + 1;
625 return false;
626 }
627
628 protected:
629 bool m_invert;
630 };
631
632 using bol = basic_bol<char>;
633 using wbol = basic_bol<wchar_t>;
634#ifdef _UNICODE
635 using tbol = wbol;
636#else
637 using tbol = bol;
638#endif
640
644 template <class T>
645 class basic_eol : public basic_parser<T>
646 {
647 public:
648 basic_eol(bool invert = false) : m_invert(invert) {}
649
650 virtual bool match(
651 _In_reads_or_z_(end) const T* text,
652 _In_ size_t start = 0,
653 _In_ size_t end = (size_t)-1,
654 _In_ int flags = match_default)
655 {
656 assert(text || start >= end);
657 bool r = islbreak(text[start]);
658 if (r && !m_invert || !r && m_invert) {
659 interval.end = interval.start = start;
660 return true;
661 }
662 interval.start = (interval.end = start) + 1;
663 return false;
664 }
665
666 protected:
667 bool m_invert;
668 };
669
670 using eol = basic_eol<char>;
671 using weol = basic_eol<wchar_t>;
672#ifdef _UNICODE
673 using teol = weol;
674#else
675 using teol = eol;
676#endif
678
679 template <class T>
680 class basic_set : public basic_parser<T>
681 {
682 public:
683 basic_set(bool invert = false, _In_ const std::locale& locale = std::locale()) :
684 basic_parser<T>(locale),
685 hit_offset((size_t)-1),
686 m_invert(invert)
687 {}
688
689 virtual bool match(
690 _In_reads_or_z_(end) const T* text,
691 _In_ size_t start = 0,
692 _In_ size_t end = (size_t)-1,
693 _In_ int flags = match_default) = 0;
694
695 virtual void invalidate()
696 {
697 hit_offset = (size_t)-1;
699 }
700
701 public:
702 size_t hit_offset;
703
704 protected:
705 bool m_invert;
706 };
707
711 template <class T>
712 class basic_cu_set : public basic_set<T>
713 {
714 public:
716 _In_reads_or_z_(count) const T* set,
717 _In_ size_t count = (size_t)-1,
718 _In_ bool invert = false,
719 _In_ const std::locale& locale = std::locale()) :
720 basic_set<T>(invert, locale)
721 {
722 if (set)
723 m_set.assign(set, set + stdex::strnlen(set, count));
724 }
725
726 virtual bool match(
727 _In_reads_or_z_(end) const T* text,
728 _In_ size_t start = 0,
729 _In_ size_t end = (size_t)-1,
730 _In_ int flags = match_default)
731 {
732 assert(text || start >= end);
733 if (start < end && text[start]) {
734 const T* set = m_set.c_str();
735 size_t r = (flags & match_case_insensitive) ?
736 stdex::strnichr(set, m_set.size(), text[start], m_locale) :
737 stdex::strnchr(set, m_set.size(), text[start]);
738 if (r != stdex::npos && !m_invert || r == stdex::npos && m_invert) {
739 hit_offset = r;
740 interval.end = (interval.start = start) + 1;
741 return true;
742 }
743 }
744 hit_offset = (size_t)-1;
745 interval.start = (interval.end = start) + 1;
746 return false;
747 }
748
749 protected:
750 std::basic_string<T> m_set;
751 };
752
755#ifdef _UNICODE
756 using tcu_set = wcu_set;
757#else
758 using tcu_set = cu_set;
759#endif
760
764 class sgml_cp_set : public basic_set<char>
765 {
766 public:
767 sgml_cp_set(const char* set, size_t count = (size_t)-1, bool invert = false, _In_ const std::locale& locale = std::locale()) :
768 basic_set<char>(invert, locale)
769 {
770 if (set)
771 m_set = sgml2wstr(set, count);
772 }
773
774 virtual bool match(
775 _In_reads_or_z_(end) const char* text,
776 _In_ size_t start = 0,
777 _In_ size_t end = (size_t)-1,
778 _In_ int flags = match_default)
779 {
780 assert(text || start >= end);
781 if (start < end && text[start]) {
782 wchar_t buf[3];
783 const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf);
784 const wchar_t* set = m_set.c_str();
785 size_t r = (flags & match_case_insensitive) ?
786 stdex::strnistr(set, m_set.size(), chr, m_locale) :
787 stdex::strnstr(set, m_set.size(), chr);
788 if (r != stdex::npos && !m_invert || r == stdex::npos && m_invert) {
789 hit_offset = r;
790 interval.start = start;
791 return true;
792 }
793 }
794 hit_offset = (size_t)-1;
795 interval.start = (interval.end = start) + 1;
796 return false;
797 }
798
799 protected:
800 std::wstring m_set;
801 };
802
806 template <class T>
807 class basic_string : public basic_parser<T>
808 {
809 public:
811 _In_reads_or_z_(count) const T* str,
812 _In_ size_t count = (size_t)-1,
813 _In_ const std::locale& locale = std::locale()) :
814 basic_parser<T>(locale),
815 m_str(str, str + stdex::strnlen(str, count))
816 {}
817
818 virtual bool match(
819 _In_reads_or_z_(end) const T* text,
820 _In_ size_t start = 0,
821 _In_ size_t end = (size_t)-1,
822 _In_ int flags = match_default)
823 {
824 assert(text || start >= end);
825 size_t
826 m = m_str.size(),
827 n = std::min<size_t>(end - start, m);
828 bool r = ((flags & match_case_insensitive) ?
829 stdex::strnicmp(text + start, n, m_str.c_str(), m, m_locale) :
830 stdex::strncmp(text + start, n, m_str.c_str(), m)) == 0;
831 if (r) {
832 interval.end = (interval.start = start) + n;
833 return true;
834 }
835 interval.start = (interval.end = start) + 1;
836 return false;
837 }
838
839 protected:
840 std::basic_string<T> m_str;
841 };
842
845#ifdef _UNICODE
846 using tstring = wstring;
847#else
848 using tstring = string;
849#endif
850
855 {
856 public:
857 sgml_string(const char* str, size_t count = (size_t)-1, _In_ const std::locale& locale = std::locale()) :
858 sgml_parser(locale),
859 m_str(sgml2wstr(str, count))
860 {}
861
862 virtual bool match(
863 _In_reads_or_z_(end) const char* text,
864 _In_ size_t start = 0,
865 _In_ size_t end = (size_t)-1,
866 _In_ int flags = match_default)
867 {
868 assert(text || start >= end);
869 const wchar_t* str = m_str.c_str();
870 const bool case_insensitive = flags & match_case_insensitive ? true : false;
871 const auto& ctype = std::use_facet<std::ctype<wchar_t>>(m_locale);
872 for (interval.end = start;;) {
873 if (!*str) {
874 interval.start = start;
875 return true;
876 }
877 if (interval.end >= end || !text[interval.end]) {
878 interval.start = (interval.end = start) + 1;
879 return false;
880 }
881 wchar_t buf[3];
882 const wchar_t* chr = next_sgml_cp(text, interval.end, end, interval.end, buf);
883 for (; *chr; ++str, ++chr) {
884 if (!*str ||
885 (case_insensitive ? ctype.tolower(*str) != ctype.tolower(*chr) : *str != *chr))
886 {
887 interval.start = (interval.end = start) + 1;
888 return false;
889 }
890 }
891 }
892 }
893
894 protected:
895 std::wstring m_str;
896 };
897
901 template <class T>
903 {
904 public:
905 basic_iterations(const std::shared_ptr<basic_parser<T>>& el, size_t min_iterations = 0, size_t max_iterations = (size_t)-1, bool greedy = true) :
906 m_el(el),
907 m_min_iterations(min_iterations),
908 m_max_iterations(max_iterations),
909 m_greedy(greedy)
910 {}
911
912 virtual bool match(
913 _In_reads_or_z_(end) const T* text,
914 _In_ size_t start = 0,
915 _In_ size_t end = (size_t)-1,
916 _In_ int flags = match_default)
917 {
918 assert(text || start >= end);
919 interval.start = interval.end = start;
920 for (size_t i = 0; ; i++) {
921 if (!m_greedy && i >= m_min_iterations || i >= m_max_iterations)
922 return true;
923 if (!m_el->match(text, interval.end, end, flags)) {
924 if (i >= m_min_iterations)
925 return true;
926 break;
927 }
928 if (m_el->interval.end == interval.end) {
929 // Element did match, but the matching interval was empty. Quit instead of spinning.
930 return true;
931 }
932 interval.end = m_el->interval.end;
933 }
934 interval.start = (interval.end = start) + 1;
935 return false;
936 }
937
938 protected:
939 std::shared_ptr<basic_parser<T>> m_el;
942 bool m_greedy;
943 };
944
947#ifdef _UNICODE
948 using titerations = witerations;
949#else
950 using titerations = iterations;
951#endif
953
957 template <class T>
959 {
960 protected:
961 parser_collection(_In_ const std::locale& locale) : basic_parser<T>(locale) {}
962
963 public:
965 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el,
966 _In_ size_t count,
967 _In_ const std::locale& locale = std::locale()) :
968 basic_parser<T>(locale)
969 {
970 assert(el || !count);
971 m_collection.reserve(count);
972 for (size_t i = 0; i < count; i++)
973 m_collection.push_back(el[i]);
974 }
975
977 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
978 _In_ const std::locale& locale = std::locale()) :
979 basic_parser<T>(locale),
980 m_collection(std::move(collection))
981 {}
982
983 virtual void invalidate()
984 {
985 for (auto& el: m_collection)
986 el->invalidate();
988 }
989
990 protected:
991 std::vector<std::shared_ptr<basic_parser<T>>> m_collection;
992 };
993
997 template <class T>
999 {
1000 public:
1002 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el = nullptr,
1003 _In_ size_t count = 0,
1004 _In_ const std::locale& locale = std::locale()) :
1005 parser_collection<T>(el, count, locale)
1006 {}
1007
1009 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1010 _In_ const std::locale& locale = std::locale()) :
1011 parser_collection<T>(std::move(collection), locale)
1012 {}
1013
1014 virtual bool match(
1015 _In_reads_or_z_(end) const T* text,
1016 _In_ size_t start = 0,
1017 _In_ size_t end = (size_t)-1,
1018 _In_ int flags = match_default)
1019 {
1020 assert(text || start >= end);
1021 interval.end = start;
1022 for (auto i = m_collection.begin(); i != m_collection.end(); ++i) {
1023 if (!(*i)->match(text, interval.end, end, flags)) {
1024 for (++i; i != m_collection.end(); ++i)
1025 (*i)->invalidate();
1026 interval.start = (interval.end = start) + 1;
1027 return false;
1028 }
1029 interval.end = (*i)->interval.end;
1030 }
1031 interval.start = start;
1032 return true;
1033 }
1034 };
1035
1038#ifdef _UNICODE
1039 using tsequence = wsequence;
1040#else
1041 using tsequence = sequence;
1042#endif
1044
1048 template <class T>
1050 {
1051 protected:
1052 basic_branch(_In_ const std::locale& locale) :
1053 parser_collection<T>(locale),
1054 hit_offset((size_t)-1)
1055 {}
1056
1057 public:
1059 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el = nullptr,
1060 _In_ size_t count = 0,
1061 _In_ const std::locale& locale = std::locale()) :
1062 parser_collection<T>(el, count, locale),
1063 hit_offset((size_t)-1)
1064 {}
1065
1067 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1068 _In_ const std::locale& locale = std::locale()) :
1069 parser_collection<T>(std::move(collection), locale),
1070 hit_offset((size_t)-1)
1071 {}
1072
1073 virtual bool match(
1074 _In_reads_or_z_(end) const T* text,
1075 _In_ size_t start = 0,
1076 _In_ size_t end = (size_t)-1,
1077 _In_ int flags = match_default)
1078 {
1079 assert(text || start >= end);
1080 hit_offset = 0;
1081 for (auto i = m_collection.begin(); i != m_collection.end(); ++i, ++hit_offset) {
1082 if ((*i)->match(text, start, end, flags)) {
1083 interval = (*i)->interval;
1084 for (++i; i != m_collection.end(); ++i)
1085 (*i)->invalidate();
1086 return true;
1087 }
1088 }
1089 hit_offset = (size_t)-1;
1090 interval.start = (interval.end = start) + 1;
1091 return false;
1092 }
1093
1094 virtual void invalidate()
1095 {
1096 hit_offset = (size_t)-1;
1098 }
1099
1100 public:
1101 size_t hit_offset;
1102 };
1103
1104 using branch = basic_branch<char>;
1106#ifdef _UNICODE
1107 using tbranch = wbranch;
1108#else
1109 using tbranch = branch;
1110#endif
1112
1116 template <class T, class T_parser = basic_string<T>>
1118 {
1119 public:
1120 inline basic_string_branch(
1121 _In_reads_(count) const T* str_z = nullptr,
1122 _In_ size_t count = 0,
1123 _In_ const std::locale& locale = std::locale()) :
1124 basic_branch<T>(locale)
1125 {
1126 build(str_z, count);
1127 }
1128
1129 inline basic_string_branch(_In_z_ const T* str, ...) :
1130 basic_branch<T>(std::locale())
1131 {
1132 va_list params;
1133 va_start(params, str);
1134 build(str, params);
1135 va_end(params);
1136 }
1137
1138 inline basic_string_branch(_In_ const std::locale& locale, _In_z_ const T* str, ...) :
1139 basic_branch<T>(locale)
1140 {
1141 va_list params;
1142 va_start(params, str);
1143 build(str, params);
1144 va_end(params);
1145 }
1146
1147 protected:
1148 void build(_In_reads_(count) const T* str_z, _In_ size_t count)
1149 {
1150 assert(str_z || !count);
1151 if (count) {
1152 size_t offset, n;
1153 for (
1154 offset = n = 0;
1155 offset < count && str_z[offset];
1156 offset += stdex::strnlen(str_z + offset, count - offset) + 1, ++n);
1157 m_collection.reserve(n);
1158 for (
1159 offset = 0;
1160 offset < count && str_z[offset];
1161 offset += stdex::strnlen(str_z + offset, count - offset) + 1)
1162 m_collection.push_back(std::move(std::make_shared<T_parser>(str_z + offset, count - offset, m_locale)));
1163 }
1164 }
1165
1166 void build(_In_z_ const T* str, _In_ va_list params)
1167 {
1168 const T* p;
1169 for (
1170 m_collection.push_back(std::move(std::make_shared<T_parser>(str, (size_t)-1, m_locale)));
1171 (p = va_arg(params, const T*)) != nullptr;
1172 m_collection.push_back(std::move(std::make_shared<T_parser>(p, (size_t)-1, m_locale))));
1173 }
1174 };
1175
1178#ifdef _UNICODE
1180#else
1182#endif
1184
1188 template <class T>
1190 {
1191 public:
1193 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el = nullptr,
1194 _In_ size_t count = 0,
1195 _In_ const std::locale& locale = std::locale()) :
1196 parser_collection<T>(el, count, locale)
1197 {}
1198
1200 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1201 _In_ const std::locale& locale = std::locale()) :
1202 parser_collection<T>(std::move(collection), locale)
1203 {}
1204
1205 virtual bool match(
1206 _In_reads_or_z_(end) const T* text,
1207 _In_ size_t start = 0,
1208 _In_ size_t end = (size_t)-1,
1209 _In_ int flags = match_default)
1210 {
1211 assert(text || start >= end);
1212 for (auto& el: m_collection)
1213 el->invalidate();
1214 if (match_recursively(text, start, end, flags)) {
1215 interval.start = start;
1216 return true;
1217 }
1218 interval.start = (interval.end = start) + 1;
1219 return false;
1220 }
1221
1222 protected:
1223 bool match_recursively(
1224 _In_reads_or_z_(end) const T* text,
1225 _In_ size_t start = 0,
1226 _In_ size_t end = (size_t)-1,
1227 _In_ int flags = match_default)
1228 {
1229 bool all_matched = true;
1230 for (auto& el: m_collection) {
1231 if (!el->interval) {
1232 // Element was not matched in permutatuion yet.
1233 all_matched = false;
1234 if (el->match(text, start, end, flags)) {
1235 // Element matched for the first time.
1236 if (match_recursively(text, el->interval.end, end, flags)) {
1237 // Rest of the elements matched too.
1238 return true;
1239 }
1240 el->invalidate();
1241 }
1242 }
1243 }
1244 if (all_matched) {
1245 interval.end = start;
1246 return true;
1247 }
1248 return false;
1249 }
1250 };
1251
1254#ifdef _UNICODE
1255 using tpermutation = wpermutation;
1256#else
1257 using tpermutation = permutation;
1258#endif
1260
1264 template <class T>
1265 class basic_integer : public basic_parser<T>
1266 {
1267 public:
1268 basic_integer(_In_ const std::locale& locale = std::locale()) :
1269 basic_parser<T>(locale),
1270 value(0)
1271 {}
1272
1273 virtual void invalidate()
1274 {
1275 value = 0;
1277 }
1278
1279 public:
1280 size_t value;
1281 };
1282
1286 template <class T>
1288 {
1289 public:
1291 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
1292 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
1293 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
1294 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
1295 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
1296 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
1297 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
1298 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
1299 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
1300 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
1301 _In_ const std::locale& locale = std::locale()) :
1302 basic_integer<T>(locale),
1303 m_digit_0(digit_0),
1304 m_digit_1(digit_1),
1305 m_digit_2(digit_2),
1306 m_digit_3(digit_3),
1307 m_digit_4(digit_4),
1308 m_digit_5(digit_5),
1309 m_digit_6(digit_6),
1310 m_digit_7(digit_7),
1311 m_digit_8(digit_8),
1312 m_digit_9(digit_9)
1313 {}
1314
1315 virtual bool match(
1316 _In_reads_or_z_(end) const T* text,
1317 _In_ size_t start = 0,
1318 _In_ size_t end = (size_t)-1,
1319 _In_ int flags = match_default)
1320 {
1321 assert(text || start >= end);
1322 for (interval.end = start, value = 0; interval.end < end && text[interval.end];) {
1323 size_t dig;
1324 if (m_digit_0->match(text, interval.end, end, flags)) { dig = 0; interval.end = m_digit_0->interval.end; }
1325 else if (m_digit_1->match(text, interval.end, end, flags)) { dig = 1; interval.end = m_digit_1->interval.end; }
1326 else if (m_digit_2->match(text, interval.end, end, flags)) { dig = 2; interval.end = m_digit_2->interval.end; }
1327 else if (m_digit_3->match(text, interval.end, end, flags)) { dig = 3; interval.end = m_digit_3->interval.end; }
1328 else if (m_digit_4->match(text, interval.end, end, flags)) { dig = 4; interval.end = m_digit_4->interval.end; }
1329 else if (m_digit_5->match(text, interval.end, end, flags)) { dig = 5; interval.end = m_digit_5->interval.end; }
1330 else if (m_digit_6->match(text, interval.end, end, flags)) { dig = 6; interval.end = m_digit_6->interval.end; }
1331 else if (m_digit_7->match(text, interval.end, end, flags)) { dig = 7; interval.end = m_digit_7->interval.end; }
1332 else if (m_digit_8->match(text, interval.end, end, flags)) { dig = 8; interval.end = m_digit_8->interval.end; }
1333 else if (m_digit_9->match(text, interval.end, end, flags)) { dig = 9; interval.end = m_digit_9->interval.end; }
1334 else break;
1335 value = value * 10 + dig;
1336 }
1337 if (start < interval.end) {
1338 interval.start = start;
1339 return true;
1340 }
1341 interval.start = (interval.end = start) + 1;
1342 return false;
1343 }
1344
1345 protected:
1346 std::shared_ptr<basic_parser<T>>
1347 m_digit_0,
1348 m_digit_1,
1349 m_digit_2,
1350 m_digit_3,
1351 m_digit_4,
1352 m_digit_5,
1353 m_digit_6,
1354 m_digit_7,
1355 m_digit_8,
1356 m_digit_9;
1357 };
1358
1361#ifdef _UNICODE
1362 using tinteger10 = winteger10;
1363#else
1364 using tinteger10 = integer10;
1365#endif
1367
1371 template <class T>
1373 {
1374 public:
1376 _In_ const std::shared_ptr<basic_integer10<T>>& digits,
1377 _In_ const std::shared_ptr<basic_set<T>>& separator,
1378 _In_ const std::locale& locale = std::locale()) :
1379 basic_integer<T>(locale),
1380 digit_count(0),
1381 has_separators(false),
1382 m_digits(digits),
1383 m_separator(separator)
1384 {}
1385
1386 virtual bool match(
1387 _In_reads_or_z_(end) const T* text,
1388 _In_ size_t start = 0,
1389 _In_ size_t end = (size_t)-1,
1390 _In_ int flags = match_default)
1391 {
1392 assert(text || start >= end);
1393 if (m_digits->match(text, start, end, flags)) {
1394 // Leading part match.
1395 value = m_digits->value;
1396 digit_count = m_digits->interval.size();
1397 has_separators = false;
1398 interval.start = start;
1399 interval.end = m_digits->interval.end;
1400 if (m_digits->interval.size() <= 3) {
1401 // Maybe separated with thousand separators?
1402 size_t hit_offset = (size_t)-1;
1403 while (m_separator->match(text, interval.end, end, flags) &&
1404 (hit_offset == (size_t)-1 || hit_offset == m_separator->hit_offset) && // All separators must be the same, no mixing.
1405 m_digits->match(text, m_separator->interval.end, end, flags) &&
1406 m_digits->interval.size() == 3)
1407 {
1408 // Thousand separator and three-digit integer followed.
1409 value = value * 1000 + m_digits->value;
1410 digit_count += 3;
1411 has_separators = true;
1412 interval.end = m_digits->interval.end;
1413 hit_offset = m_separator->hit_offset;
1414 }
1415 }
1416
1417 return true;
1418 }
1419 value = 0;
1420 interval.start = (interval.end = start) + 1;
1421 return false;
1422 }
1423
1424 virtual void invalidate()
1425 {
1426 digit_count = 0;
1427 has_separators = false;
1429 }
1430
1431 public:
1434
1435 protected:
1436 std::shared_ptr<basic_integer10<T>> m_digits;
1437 std::shared_ptr<basic_set<T>> m_separator;
1438 };
1439
1442#ifdef _UNICODE
1443 using tinteger10ts = winteger10ts;
1444#else
1445 using tinteger10ts = integer10ts;
1446#endif
1448
1452 template <class T>
1454 {
1455 public:
1457 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
1458 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
1459 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
1460 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
1461 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
1462 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
1463 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
1464 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
1465 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
1466 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
1467 _In_ const std::shared_ptr<basic_parser<T>>& digit_10,
1468 _In_ const std::shared_ptr<basic_parser<T>>& digit_11,
1469 _In_ const std::shared_ptr<basic_parser<T>>& digit_12,
1470 _In_ const std::shared_ptr<basic_parser<T>>& digit_13,
1471 _In_ const std::shared_ptr<basic_parser<T>>& digit_14,
1472 _In_ const std::shared_ptr<basic_parser<T>>& digit_15,
1473 _In_ const std::locale& locale = std::locale()) :
1474 basic_integer<T>(locale),
1475 m_digit_0(digit_0),
1476 m_digit_1(digit_1),
1477 m_digit_2(digit_2),
1478 m_digit_3(digit_3),
1479 m_digit_4(digit_4),
1480 m_digit_5(digit_5),
1481 m_digit_6(digit_6),
1482 m_digit_7(digit_7),
1483 m_digit_8(digit_8),
1484 m_digit_9(digit_9),
1485 m_digit_10(digit_10),
1486 m_digit_11(digit_11),
1487 m_digit_12(digit_12),
1488 m_digit_13(digit_13),
1489 m_digit_14(digit_14),
1490 m_digit_15(digit_15)
1491 {}
1492
1493 virtual bool match(
1494 _In_reads_or_z_(end) const T* text,
1495 _In_ size_t start = 0,
1496 _In_ size_t end = (size_t)-1,
1497 _In_ int flags = match_default)
1498 {
1499 assert(text || start >= end);
1500 for (interval.end = start, value = 0; interval.end < end && text[interval.end];) {
1501 size_t dig;
1502 if (m_digit_0->match(text, interval.end, end, flags)) { dig = 0; interval.end = m_digit_0->interval.end; }
1503 else if (m_digit_1->match(text, interval.end, end, flags)) { dig = 1; interval.end = m_digit_1->interval.end; }
1504 else if (m_digit_2->match(text, interval.end, end, flags)) { dig = 2; interval.end = m_digit_2->interval.end; }
1505 else if (m_digit_3->match(text, interval.end, end, flags)) { dig = 3; interval.end = m_digit_3->interval.end; }
1506 else if (m_digit_4->match(text, interval.end, end, flags)) { dig = 4; interval.end = m_digit_4->interval.end; }
1507 else if (m_digit_5->match(text, interval.end, end, flags)) { dig = 5; interval.end = m_digit_5->interval.end; }
1508 else if (m_digit_6->match(text, interval.end, end, flags)) { dig = 6; interval.end = m_digit_6->interval.end; }
1509 else if (m_digit_7->match(text, interval.end, end, flags)) { dig = 7; interval.end = m_digit_7->interval.end; }
1510 else if (m_digit_8->match(text, interval.end, end, flags)) { dig = 8; interval.end = m_digit_8->interval.end; }
1511 else if (m_digit_9->match(text, interval.end, end, flags)) { dig = 9; interval.end = m_digit_9->interval.end; }
1512 else if (m_digit_10->match(text, interval.end, end, flags)) { dig = 10; interval.end = m_digit_10->interval.end; }
1513 else if (m_digit_11->match(text, interval.end, end, flags)) { dig = 11; interval.end = m_digit_11->interval.end; }
1514 else if (m_digit_12->match(text, interval.end, end, flags)) { dig = 12; interval.end = m_digit_12->interval.end; }
1515 else if (m_digit_13->match(text, interval.end, end, flags)) { dig = 13; interval.end = m_digit_13->interval.end; }
1516 else if (m_digit_14->match(text, interval.end, end, flags)) { dig = 14; interval.end = m_digit_14->interval.end; }
1517 else if (m_digit_15->match(text, interval.end, end, flags)) { dig = 15; interval.end = m_digit_15->interval.end; }
1518 else break;
1519 value = value * 16 + dig;
1520 }
1521 if (start < interval.end) {
1522 interval.start = start;
1523 return true;
1524 }
1525 interval.start = (interval.end = start) + 1;
1526 return false;
1527 }
1528
1529 protected:
1530 std::shared_ptr<basic_parser<T>>
1531 m_digit_0,
1532 m_digit_1,
1533 m_digit_2,
1534 m_digit_3,
1535 m_digit_4,
1536 m_digit_5,
1537 m_digit_6,
1538 m_digit_7,
1539 m_digit_8,
1540 m_digit_9,
1541 m_digit_10,
1542 m_digit_11,
1543 m_digit_12,
1544 m_digit_13,
1545 m_digit_14,
1546 m_digit_15;
1547 };
1548
1551#ifdef _UNICODE
1552 using tinteger16 = winteger16;
1553#else
1554 using tinteger16 = integer16;
1555#endif
1557
1561 template <class T>
1563 {
1564 public:
1566 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
1567 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
1568 _In_ const std::shared_ptr<basic_parser<T>>& digit_10,
1569 _In_ const std::shared_ptr<basic_parser<T>>& digit_50,
1570 _In_ const std::shared_ptr<basic_parser<T>>& digit_100,
1571 _In_ const std::shared_ptr<basic_parser<T>>& digit_500,
1572 _In_ const std::shared_ptr<basic_parser<T>>& digit_1000,
1573 _In_ const std::shared_ptr<basic_parser<T>>& digit_5000,
1574 _In_ const std::shared_ptr<basic_parser<T>>& digit_10000,
1575 _In_ const std::locale& locale = std::locale()) :
1576 basic_integer<T>(locale),
1577 m_digit_1(digit_1),
1578 m_digit_5(digit_5),
1579 m_digit_10(digit_10),
1580 m_digit_50(digit_50),
1581 m_digit_100(digit_100),
1582 m_digit_500(digit_500),
1583 m_digit_1000(digit_1000),
1584 m_digit_5000(digit_5000),
1585 m_digit_10000(digit_10000)
1586 {}
1587
1588 virtual bool match(
1589 _In_reads_or_z_(end) const T* text,
1590 _In_ size_t start = 0,
1591 _In_ size_t end = (size_t)-1,
1592 _In_ int flags = match_default)
1593 {
1594 assert(text || start >= end);
1595 size_t
1596 dig[5] = { (size_t)-1, (size_t)-1, (size_t)-1, (size_t)-1, (size_t)-1 },
1597 end2;
1598
1599 for (interval.end = start, value = 0; interval.end < end && text[interval.end]; dig[3] = dig[2], dig[2] = dig[1], dig[1] = dig[0], interval.end = end2) {
1600 if (m_digit_1 && m_digit_1->match(text, interval.end, end, flags)) { dig[0] = 1; end2 = m_digit_1->interval.end; }
1601 else if (m_digit_5 && m_digit_5->match(text, interval.end, end, flags)) { dig[0] = 5; end2 = m_digit_5->interval.end; }
1602 else if (m_digit_10 && m_digit_10->match(text, interval.end, end, flags)) { dig[0] = 10; end2 = m_digit_10->interval.end; }
1603 else if (m_digit_50 && m_digit_50->match(text, interval.end, end, flags)) { dig[0] = 50; end2 = m_digit_50->interval.end; }
1604 else if (m_digit_100 && m_digit_100->match(text, interval.end, end, flags)) { dig[0] = 100; end2 = m_digit_100->interval.end; }
1605 else if (m_digit_500 && m_digit_500->match(text, interval.end, end, flags)) { dig[0] = 500; end2 = m_digit_500->interval.end; }
1606 else if (m_digit_1000 && m_digit_1000->match(text, interval.end, end, flags)) { dig[0] = 1000; end2 = m_digit_1000->interval.end; }
1607 else if (m_digit_5000 && m_digit_5000->match(text, interval.end, end, flags)) { dig[0] = 5000; end2 = m_digit_5000->interval.end; }
1608 else if (m_digit_10000 && m_digit_10000->match(text, interval.end, end, flags)) { dig[0] = 10000; end2 = m_digit_10000->interval.end; }
1609 else break;
1610
1611 // Store first digit.
1612 if (dig[4] == (size_t)-1) dig[4] = dig[0];
1613
1614 if (dig[3] == dig[2] && dig[2] == dig[1] && dig[1] == dig[0] && dig[0] != dig[4]) {
1615 // Same digit repeated four times. No-go, unless first digit. E.g. XIIII vs. XIV. MMMMMCD allowed, IIII also...
1616 break;
1617 }
1618 if (dig[0] <= dig[1]) {
1619 // Digit is less or equal previous one: add.
1620 value += dig[0];
1621 }
1622 else if (
1623 dig[1] == 1 && (dig[0] == 5 || dig[0] == 10) ||
1624 dig[1] == 10 && (dig[0] == 50 || dig[0] == 100) ||
1625 dig[1] == 100 && (dig[0] == 500 || dig[0] == 1000) ||
1626 dig[1] == 1000 && (dig[0] == 5000 || dig[0] == 10000))
1627 {
1628 // Digit is up to two orders bigger than previous one: subtract. But...
1629 if (dig[2] < dig[0]) {
1630 // Digit is also bigger than pre-previous one. E.g. VIX (V < X => invalid)
1631 break;
1632 }
1633 value -= dig[1]; // Cancel addition in the previous step.
1634 dig[0] -= dig[1]; // Combine last two digits.
1635 dig[1] = dig[2]; // The true previous digit is now pre-previous one. :)
1636 dig[2] = dig[3]; // The true pre-previous digit is now pre-pre-previous one. :)
1637 value += dig[0]; // Add combined value.
1638 }
1639 else {
1640 // New digit is too big than the previous one. E.g. VX (V < X => invalid)
1641 break;
1642 }
1643 }
1644 if (value) {
1645 interval.start = start;
1646 return true;
1647 }
1648 interval.start = (interval.end = start) + 1;
1649 return false;
1650 }
1651
1652 protected:
1653 std::shared_ptr<basic_parser<T>>
1654 m_digit_1,
1655 m_digit_5,
1656 m_digit_10,
1657 m_digit_50,
1658 m_digit_100,
1659 m_digit_500,
1660 m_digit_1000,
1661 m_digit_5000,
1662 m_digit_10000;
1663 };
1664
1667#ifdef _UNICODE
1669#else
1671#endif
1673
1677 template <class T>
1679 {
1680 public:
1682 _In_ const std::shared_ptr<basic_parser<T>>& _numerator,
1683 _In_ const std::shared_ptr<basic_parser<T>>& _fraction_line,
1684 _In_ const std::shared_ptr<basic_parser<T>>& _denominator,
1685 _In_ const std::locale& locale = std::locale()) :
1686 basic_parser<T>(locale),
1687 numerator(_numerator),
1688 fraction_line(_fraction_line),
1689 denominator(_denominator)
1690 {}
1691
1692 virtual bool match(
1693 _In_reads_or_z_(end) const T* text,
1694 _In_ size_t start = 0,
1695 _In_ size_t end = (size_t)-1,
1696 _In_ int flags = match_default)
1697 {
1698 assert(text || start >= end);
1699 if (numerator->match(text, start, end, flags) &&
1700 fraction_line->match(text, numerator->interval.end, end, flags) &&
1701 denominator->match(text, fraction_line->interval.end, end, flags))
1702 {
1703 interval.start = start;
1704 interval.end = denominator->interval.end;
1705 return true;
1706 }
1707 numerator->invalidate();
1708 fraction_line->invalidate();
1709 denominator->invalidate();
1710 interval.start = (interval.end = start) + 1;
1711 return false;
1712 }
1713
1714 virtual void invalidate()
1715 {
1716 numerator->invalidate();
1717 fraction_line->invalidate();
1718 denominator->invalidate();
1720 }
1721
1722 public:
1723 std::shared_ptr<basic_parser<T>> numerator;
1724 std::shared_ptr<basic_parser<T>> fraction_line;
1725 std::shared_ptr<basic_parser<T>> denominator;
1726 };
1727
1730#ifdef _UNICODE
1731 using tfraction = wfraction;
1732#else
1733 using tfraction = fraction;
1734#endif
1736
1740 template <class T>
1741 class basic_score : public basic_parser<T>
1742 {
1743 public:
1745 _In_ const std::shared_ptr<basic_parser<T>>& _home,
1746 _In_ const std::shared_ptr<basic_parser<T>>& _separator,
1747 _In_ const std::shared_ptr<basic_parser<T>>& _guest,
1748 _In_ const std::shared_ptr<basic_parser<T>>& space,
1749 _In_ const std::locale& locale = std::locale()) :
1750 basic_parser<T>(locale),
1751 home(_home),
1752 separator(_separator),
1753 guest(_guest),
1754 m_space(space)
1755 {}
1756
1757 virtual bool match(
1758 _In_reads_or_z_(end) const T* text,
1759 _In_ size_t start = 0,
1760 _In_ size_t end = (size_t)-1,
1761 _In_ int flags = match_default)
1762 {
1763 assert(text || start >= end);
1764 interval.end = start;
1765
1766 if (home->match(text, interval.end, end, flags))
1767 interval.end = home->interval.end;
1768 else
1769 goto end;
1770
1771 const int space_match_flags = flags & ~match_multiline; // Spaces in score must never be broken in new line.
1772 for (; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
1773
1774 if (separator->match(text, interval.end, end, flags))
1775 interval.end = separator->interval.end;
1776 else
1777 goto end;
1778
1779 for (; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
1780
1781 if (guest->match(text, interval.end, end, flags))
1782 interval.end = guest->interval.end;
1783 else
1784 goto end;
1785
1786 interval.start = start;
1787 return true;
1788
1789 end:
1790 home->invalidate();
1791 separator->invalidate();
1792 guest->invalidate();
1793 interval.start = (interval.end = start) + 1;
1794 return false;
1795 }
1796
1797 virtual void invalidate()
1798 {
1799 home->invalidate();
1800 separator->invalidate();
1801 guest->invalidate();
1803 }
1804
1805 public:
1806 std::shared_ptr<basic_parser<T>> home;
1807 std::shared_ptr<basic_parser<T>> separator;
1808 std::shared_ptr<basic_parser<T>> guest;
1809
1810 protected:
1811 std::shared_ptr<basic_parser<T>> m_space;
1812 };
1813
1814 using score = basic_score<char>;
1816#ifdef _UNICODE
1817 using tscore = wscore;
1818#else
1819 using tscore = score;
1820#endif
1822
1826 template <class T>
1828 {
1829 public:
1831 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
1832 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
1833 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
1834 _In_ const std::shared_ptr<basic_parser<T>>& _number,
1835 _In_ const std::locale& locale = std::locale()) :
1836 basic_parser<T>(locale),
1837 positive_sign(_positive_sign),
1838 negative_sign(_negative_sign),
1839 special_sign(_special_sign),
1840 number(_number)
1841 {}
1842
1843 virtual bool match(
1844 _In_reads_or_z_(end) const T* text,
1845 _In_ size_t start = 0,
1846 _In_ size_t end = (size_t)-1,
1847 _In_ int flags = match_default)
1848 {
1849 assert(text || start >= end);
1850 interval.end = start;
1851 if (positive_sign && positive_sign->match(text, interval.end, end, flags)) {
1852 interval.end = positive_sign->interval.end;
1853 if (negative_sign) negative_sign->invalidate();
1854 if (special_sign) special_sign->invalidate();
1855 }
1856 else if (negative_sign && negative_sign->match(text, interval.end, end, flags)) {
1857 interval.end = negative_sign->interval.end;
1858 if (positive_sign) positive_sign->invalidate();
1859 if (special_sign) special_sign->invalidate();
1860 }
1861 else if (special_sign && special_sign->match(text, interval.end, end, flags)) {
1862 interval.end = special_sign->interval.end;
1863 if (positive_sign) positive_sign->invalidate();
1864 if (negative_sign) negative_sign->invalidate();
1865 }
1866 else {
1867 if (positive_sign) positive_sign->invalidate();
1868 if (negative_sign) negative_sign->invalidate();
1869 if (special_sign) special_sign->invalidate();
1870 }
1871 if (number->match(text, interval.end, end, flags)) {
1872 interval.start = start;
1873 interval.end = number->interval.end;
1874 return true;
1875 }
1876 if (positive_sign) positive_sign->invalidate();
1877 if (negative_sign) negative_sign->invalidate();
1878 if (special_sign) special_sign->invalidate();
1879 number->invalidate();
1880 interval.start = (interval.end = start) + 1;
1881 return false;
1882 }
1883
1884 virtual void invalidate()
1885 {
1886 if (positive_sign) positive_sign->invalidate();
1887 if (negative_sign) negative_sign->invalidate();
1888 if (special_sign) special_sign->invalidate();
1889 number->invalidate();
1891 }
1892
1893 public:
1894 std::shared_ptr<basic_parser<T>> positive_sign;
1895 std::shared_ptr<basic_parser<T>> negative_sign;
1896 std::shared_ptr<basic_parser<T>> special_sign;
1897 std::shared_ptr<basic_parser<T>> number;
1898 };
1899
1902#ifdef _UNICODE
1904#else
1906#endif
1908
1912 template <class T>
1914 {
1915 public:
1917 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
1918 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
1919 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
1920 _In_ const std::shared_ptr<basic_parser<T>>& _integer,
1921 _In_ const std::shared_ptr<basic_parser<T>>& space,
1922 _In_ const std::shared_ptr<basic_parser<T>>& _fraction,
1923 _In_ const std::locale& locale = std::locale()) :
1924 basic_parser<T>(locale),
1925 positive_sign(_positive_sign),
1926 negative_sign(_negative_sign),
1927 special_sign(_special_sign),
1928 integer(_integer),
1929 fraction(_fraction),
1930 m_space(space)
1931 {}
1932
1933 virtual bool match(
1934 _In_reads_or_z_(end) const T* text,
1935 _In_ size_t start = 0,
1936 _In_ size_t end = (size_t)-1,
1937 _In_ int flags = match_default)
1938 {
1939 assert(text || start >= end);
1940 interval.end = start;
1941
1942 if (positive_sign && positive_sign->match(text, interval.end, end, flags)) {
1943 interval.end = positive_sign->interval.end;
1944 if (negative_sign) negative_sign->invalidate();
1945 if (special_sign) special_sign->invalidate();
1946 }
1947 else if (negative_sign && negative_sign->match(text, interval.end, end, flags)) {
1948 interval.end = negative_sign->interval.end;
1949 if (positive_sign) positive_sign->invalidate();
1950 if (special_sign) special_sign->invalidate();
1951 }
1952 else if (special_sign && special_sign->match(text, interval.end, end, flags)) {
1953 interval.end = special_sign->interval.end;
1954 if (positive_sign) positive_sign->invalidate();
1955 if (negative_sign) negative_sign->invalidate();
1956 }
1957 else {
1958 if (positive_sign) positive_sign->invalidate();
1959 if (negative_sign) negative_sign->invalidate();
1960 if (special_sign) special_sign->invalidate();
1961 }
1962
1963 // Check for <integer> <fraction>
1964 const int space_match_flags = flags & ~match_multiline; // Spaces in fractions must never be broken in new line.
1965 if (integer->match(text, interval.end, end, flags) &&
1966 m_space->match(text, integer->interval.end, end, space_match_flags))
1967 {
1968 for (interval.end = m_space->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
1969 if (fraction->match(text, interval.end, end, flags)) {
1970 interval.start = start;
1972 return true;
1973 }
1974 fraction->invalidate();
1975 interval.start = start;
1976 interval.end = integer->interval.end;
1977 return true;
1978 }
1979
1980 // Check for <fraction>
1981 if (fraction->match(text, interval.end, end, flags)) {
1982 integer->invalidate();
1983 interval.start = start;
1985 return true;
1986 }
1987
1988 // Check for <integer>
1989 if (integer->match(text, interval.end, end, flags)) {
1990 fraction->invalidate();
1991 interval.start = start;
1992 interval.end = integer->interval.end;
1993 return true;
1994 }
1995
1996 if (positive_sign) positive_sign->invalidate();
1997 if (negative_sign) negative_sign->invalidate();
1998 if (special_sign) special_sign->invalidate();
1999 integer->invalidate();
2000 fraction->invalidate();
2001 interval.start = (interval.end = start) + 1;
2002 return false;
2003 }
2004
2005 virtual void invalidate()
2006 {
2007 if (positive_sign) positive_sign->invalidate();
2008 if (negative_sign) negative_sign->invalidate();
2009 if (special_sign) special_sign->invalidate();
2010 integer->invalidate();
2011 fraction->invalidate();
2013 }
2014
2015 public:
2016 std::shared_ptr<basic_parser<T>> positive_sign;
2017 std::shared_ptr<basic_parser<T>> negative_sign;
2018 std::shared_ptr<basic_parser<T>> special_sign;
2019 std::shared_ptr<basic_parser<T>> integer;
2020 std::shared_ptr<basic_parser<T>> fraction;
2021
2022 protected:
2023 std::shared_ptr<basic_parser<T>> m_space;
2024 };
2025
2028#ifdef _UNICODE
2030#else
2032#endif
2034
2038 template <class T>
2040 {
2041 public:
2043 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
2044 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
2045 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
2046 _In_ const std::shared_ptr<basic_integer<T>>& _integer,
2047 _In_ const std::shared_ptr<basic_parser<T>>& _decimal_separator,
2048 _In_ const std::shared_ptr<basic_integer<T>>& _decimal,
2049 _In_ const std::shared_ptr<basic_parser<T>>& _exponent_symbol,
2050 _In_ const std::shared_ptr<basic_parser<T>>& _positive_exp_sign,
2051 _In_ const std::shared_ptr<basic_parser<T>>& _negative_exp_sign,
2052 _In_ const std::shared_ptr<basic_integer<T>>& _exponent,
2053 _In_ const std::locale& locale = std::locale()) :
2054 basic_parser<T>(locale),
2055 positive_sign(_positive_sign),
2056 negative_sign(_negative_sign),
2057 special_sign(_special_sign),
2058 integer(_integer),
2059 decimal_separator(_decimal_separator),
2060 decimal(_decimal),
2061 exponent_symbol(_exponent_symbol),
2062 positive_exp_sign(_positive_exp_sign),
2063 negative_exp_sign(_negative_exp_sign),
2064 exponent(_exponent),
2065 value(std::numeric_limits<double>::quiet_NaN())
2066 {}
2067
2068 virtual bool match(
2069 _In_reads_or_z_(end) const T* text,
2070 _In_ size_t start = 0,
2071 _In_ size_t end = (size_t)-1,
2072 _In_ int flags = match_default)
2073 {
2074 assert(text || start >= end);
2075 interval.end = start;
2076
2077 if (positive_sign && positive_sign->match(text, interval.end, end, flags)) {
2078 interval.end = positive_sign->interval.end;
2079 if (negative_sign) negative_sign->invalidate();
2080 if (special_sign) special_sign->invalidate();
2081 }
2082 else if (negative_sign && negative_sign->match(text, interval.end, end, flags)) {
2083 interval.end = negative_sign->interval.end;
2084 if (positive_sign) positive_sign->invalidate();
2085 if (special_sign) special_sign->invalidate();
2086 }
2087 else if (special_sign && special_sign->match(text, interval.end, end, flags)) {
2088 interval.end = special_sign->interval.end;
2089 if (positive_sign) positive_sign->invalidate();
2090 if (negative_sign) negative_sign->invalidate();
2091 }
2092 else {
2093 if (positive_sign) positive_sign->invalidate();
2094 if (negative_sign) negative_sign->invalidate();
2095 if (special_sign) special_sign->invalidate();
2096 }
2097
2098 if (integer->match(text, interval.end, end, flags))
2099 interval.end = integer->interval.end;
2100
2101 if (decimal_separator->match(text, interval.end, end, flags) &&
2102 decimal->match(text, decimal_separator->interval.end, end, flags))
2103 interval.end = decimal->interval.end;
2104 else {
2105 decimal_separator->invalidate();
2106 decimal->invalidate();
2107 }
2108
2109 if (!integer->interval.empty() &&
2110 decimal->interval.empty())
2111 {
2112 // No integer part, no decimal part.
2113 if (positive_sign) positive_sign->invalidate();
2114 if (negative_sign) negative_sign->invalidate();
2115 if (special_sign) special_sign->invalidate();
2116 integer->invalidate();
2117 decimal_separator->invalidate();
2118 decimal->invalidate();
2119 if (exponent_symbol) exponent_symbol->invalidate();
2120 if (positive_exp_sign) positive_exp_sign->invalidate();
2121 if (negative_exp_sign) negative_exp_sign->invalidate();
2122 if (exponent) exponent->invalidate();
2123 interval.start = (interval.end = start) + 1;
2124 return false;
2125 }
2126
2127 if (exponent_symbol && exponent_symbol->match(text, interval.end, end, flags) &&
2128 (positive_exp_sign && positive_exp_sign->match(text, exponent_symbol->interval.end, end, flags) &&
2129 exponent && exponent->match(text, positive_exp_sign->interval.end, end, flags) ||
2130 exponent && exponent->match(text, exponent_symbol->interval.end, end, flags)))
2131 {
2132 interval.end = exponent->interval.end;
2133 if (negative_exp_sign) negative_exp_sign->invalidate();
2134 }
2135 else if (exponent_symbol && exponent_symbol->match(text, interval.end, end, flags) &&
2136 negative_exp_sign && negative_exp_sign->match(text, exponent_symbol->interval.end, end, flags) &&
2137 exponent && exponent->match(text, negative_exp_sign->interval.end, end, flags))
2138 {
2139 interval.end = exponent->interval.end;
2140 if (positive_exp_sign) positive_exp_sign->invalidate();
2141 }
2142 else {
2143 if (exponent_symbol) exponent_symbol->invalidate();
2144 if (positive_exp_sign) positive_exp_sign->invalidate();
2145 if (negative_exp_sign) negative_exp_sign->invalidate();
2146 if (exponent) exponent->invalidate();
2147 }
2148
2149 value = (double)integer->value;
2150 if (decimal->interval)
2151 value += (double)decimal->value * pow(10.0, -(double)decimal->interval.size());
2152 if (negative_sign && negative_sign->interval)
2153 value = -value;
2154 if (exponent && exponent->interval) {
2155 double e = (double)exponent->value;
2156 if (negative_exp_sign && negative_exp_sign->interval)
2157 e = -e;
2158 value *= pow(10.0, e);
2159 }
2160
2161 interval.start = start;
2162 return true;
2163 }
2164
2165 virtual void invalidate()
2166 {
2167 if (positive_sign) positive_sign->invalidate();
2168 if (negative_sign) negative_sign->invalidate();
2169 if (special_sign) special_sign->invalidate();
2170 integer->invalidate();
2171 decimal_separator->invalidate();
2172 decimal->invalidate();
2173 if (exponent_symbol) exponent_symbol->invalidate();
2174 if (positive_exp_sign) positive_exp_sign->invalidate();
2175 if (negative_exp_sign) negative_exp_sign->invalidate();
2176 if (exponent) exponent->invalidate();
2177 value = std::numeric_limits<double>::quiet_NaN();
2179 }
2180
2181 public:
2182 std::shared_ptr<basic_parser<T>> positive_sign;
2183 std::shared_ptr<basic_parser<T>> negative_sign;
2184 std::shared_ptr<basic_parser<T>> special_sign;
2185 std::shared_ptr<basic_integer<T>> integer;
2186 std::shared_ptr<basic_parser<T>> decimal_separator;
2187 std::shared_ptr<basic_integer<T>> decimal;
2188 std::shared_ptr<basic_parser<T>> exponent_symbol;
2189 std::shared_ptr<basic_parser<T>> positive_exp_sign;
2190 std::shared_ptr<basic_parser<T>> negative_exp_sign;
2191 std::shared_ptr<basic_integer<T>> exponent;
2192 double value;
2193 };
2194
2197#ifdef _UNICODE
2199#else
2201#endif
2203
2207 template <class T>
2209 {
2210 public:
2212 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
2213 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
2214 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
2215 _In_ const std::shared_ptr<basic_parser<T>>& _currency,
2216 _In_ const std::shared_ptr<basic_parser<T>>& _integer,
2217 _In_ const std::shared_ptr<basic_parser<T>>& _decimal_separator,
2218 _In_ const std::shared_ptr<basic_parser<T>>& _decimal,
2219 _In_ const std::locale& locale = std::locale()) :
2220 basic_parser<T>(locale),
2221 positive_sign(_positive_sign),
2222 negative_sign(_negative_sign),
2223 special_sign(_special_sign),
2224 currency(_currency),
2225 integer(_integer),
2226 decimal_separator(_decimal_separator),
2227 decimal(_decimal)
2228 {}
2229
2230 virtual bool match(
2231 _In_reads_or_z_(end) const T* text,
2232 _In_ size_t start = 0,
2233 _In_ size_t end = (size_t)-1,
2234 _In_ int flags = match_default)
2235 {
2236 assert(text || start >= end);
2237 interval.end = start;
2238
2239 if (positive_sign->match(text, interval.end, end, flags)) {
2240 interval.end = positive_sign->interval.end;
2241 if (negative_sign) negative_sign->invalidate();
2242 if (special_sign) special_sign->invalidate();
2243 }
2244 else if (negative_sign->match(text, interval.end, end, flags)) {
2245 interval.end = negative_sign->interval.end;
2246 if (positive_sign) positive_sign->invalidate();
2247 if (special_sign) special_sign->invalidate();
2248 }
2249 else if (special_sign->match(text, interval.end, end, flags)) {
2250 interval.end = special_sign->interval.end;
2251 if (positive_sign) positive_sign->invalidate();
2252 if (negative_sign) negative_sign->invalidate();
2253 }
2254 else {
2255 if (positive_sign) positive_sign->invalidate();
2256 if (negative_sign) negative_sign->invalidate();
2257 if (special_sign) special_sign->invalidate();
2258 }
2259
2260 if (currency->match(text, interval.end, end, flags))
2261 interval.end = currency->interval.end;
2262 else {
2263 if (positive_sign) positive_sign->invalidate();
2264 if (negative_sign) negative_sign->invalidate();
2265 if (special_sign) special_sign->invalidate();
2266 integer->invalidate();
2267 decimal_separator->invalidate();
2268 decimal->invalidate();
2269 interval.start = (interval.end = start) + 1;
2270 return false;
2271 }
2272
2273 if (integer->match(text, interval.end, end, flags))
2274 interval.end = integer->interval.end;
2275 if (decimal_separator->match(text, interval.end, end, flags) &&
2276 decimal->match(text, decimal_separator->interval.end, end, flags))
2277 interval.end = decimal->interval.end;
2278 else {
2279 decimal_separator->invalidate();
2280 decimal->invalidate();
2281 }
2282
2283 if (integer->interval.empty() &&
2284 decimal->interval.empty())
2285 {
2286 // No integer part, no decimal part.
2287 if (positive_sign) positive_sign->invalidate();
2288 if (negative_sign) negative_sign->invalidate();
2289 if (special_sign) special_sign->invalidate();
2290 currency->invalidate();
2291 integer->invalidate();
2292 decimal_separator->invalidate();
2293 decimal->invalidate();
2294 interval.start = (interval.end = start) + 1;
2295 return false;
2296 }
2297
2298 interval.start = start;
2299 return true;
2300 }
2301
2302 virtual void invalidate()
2303 {
2304 if (positive_sign) positive_sign->invalidate();
2305 if (negative_sign) negative_sign->invalidate();
2306 if (special_sign) special_sign->invalidate();
2307 currency->invalidate();
2308 integer->invalidate();
2309 decimal_separator->invalidate();
2310 decimal->invalidate();
2312 }
2313
2314 public:
2315 std::shared_ptr<basic_parser<T>> positive_sign;
2316 std::shared_ptr<basic_parser<T>> negative_sign;
2317 std::shared_ptr<basic_parser<T>> special_sign;
2318 std::shared_ptr<basic_parser<T>> currency;
2319 std::shared_ptr<basic_parser<T>> integer;
2320 std::shared_ptr<basic_parser<T>> decimal_separator;
2321 std::shared_ptr<basic_parser<T>> decimal;
2322 };
2323
2326#ifdef _UNICODE
2328#else
2330#endif
2332
2336 template <class T>
2338 {
2339 public:
2341 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
2342 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
2343 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
2344 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
2345 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
2346 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
2347 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
2348 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
2349 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
2350 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
2351 _In_ const std::shared_ptr<basic_parser<T>>& separator,
2352 _In_ const std::locale& locale = std::locale()) :
2353 basic_parser<T>(locale),
2354 m_digit_0(digit_0),
2355 m_digit_1(digit_1),
2356 m_digit_2(digit_2),
2357 m_digit_3(digit_3),
2358 m_digit_4(digit_4),
2359 m_digit_5(digit_5),
2360 m_digit_6(digit_6),
2361 m_digit_7(digit_7),
2362 m_digit_8(digit_8),
2363 m_digit_9(digit_9),
2364 m_separator(separator)
2365 {
2366 value.s_addr = 0;
2367 }
2368
2369 virtual bool match(
2370 _In_reads_or_z_(end) const T* text,
2371 _In_ size_t start = 0,
2372 _In_ size_t end = (size_t)-1,
2373 _In_ int flags = match_default)
2374 {
2375 assert(text || start >= end);
2376 interval.end = start;
2377 value.s_addr = 0;
2378
2379 size_t i;
2380 for (i = 0; i < 4; i++) {
2381 if (i) {
2382 if (m_separator->match(text, interval.end, end, flags))
2383 interval.end = m_separator->interval.end;
2384 else
2385 goto error;
2386 }
2387
2389 bool is_empty = true;
2390 size_t x;
2391 for (x = 0; interval.end < end && text[interval.end];) {
2392 size_t dig, digit_end;
2393 if (m_digit_0->match(text, interval.end, end, flags)) { dig = 0; digit_end = m_digit_0->interval.end; }
2394 else if (m_digit_1->match(text, interval.end, end, flags)) { dig = 1; digit_end = m_digit_1->interval.end; }
2395 else if (m_digit_2->match(text, interval.end, end, flags)) { dig = 2; digit_end = m_digit_2->interval.end; }
2396 else if (m_digit_3->match(text, interval.end, end, flags)) { dig = 3; digit_end = m_digit_3->interval.end; }
2397 else if (m_digit_4->match(text, interval.end, end, flags)) { dig = 4; digit_end = m_digit_4->interval.end; }
2398 else if (m_digit_5->match(text, interval.end, end, flags)) { dig = 5; digit_end = m_digit_5->interval.end; }
2399 else if (m_digit_6->match(text, interval.end, end, flags)) { dig = 6; digit_end = m_digit_6->interval.end; }
2400 else if (m_digit_7->match(text, interval.end, end, flags)) { dig = 7; digit_end = m_digit_7->interval.end; }
2401 else if (m_digit_8->match(text, interval.end, end, flags)) { dig = 8; digit_end = m_digit_8->interval.end; }
2402 else if (m_digit_9->match(text, interval.end, end, flags)) { dig = 9; digit_end = m_digit_9->interval.end; }
2403 else break;
2404 size_t x_n = x * 10 + dig;
2405 if (x_n <= 255) {
2406 x = x_n;
2407 interval.end = digit_end;
2408 is_empty = false;
2409 }
2410 else
2411 break;
2412 }
2413 if (is_empty)
2414 goto error;
2416 value.s_addr = (value.s_addr << 8) | (uint8_t)x;
2417 }
2418 if (i < 4)
2419 goto error;
2420
2421 interval.start = start;
2422 return true;
2423
2424 error:
2425 components[0].start = 1;
2426 components[0].end = 0;
2427 components[1].start = 1;
2428 components[1].end = 0;
2429 components[2].start = 1;
2430 components[2].end = 0;
2431 components[3].start = 1;
2432 components[3].end = 0;
2433 value.s_addr = 0;
2434 interval.start = (interval.end = start) + 1;
2435 return false;
2436 }
2437
2438 virtual void invalidate()
2439 {
2440 components[0].start = 1;
2441 components[0].end = 0;
2442 components[1].start = 1;
2443 components[1].end = 0;
2444 components[2].start = 1;
2445 components[2].end = 0;
2446 components[3].start = 1;
2447 components[3].end = 0;
2448 value.s_addr = 0;
2450 }
2451
2452 public:
2454 struct in_addr value;
2455
2456 protected:
2457 std::shared_ptr<basic_parser<T>>
2458 m_digit_0,
2459 m_digit_1,
2460 m_digit_2,
2461 m_digit_3,
2462 m_digit_4,
2463 m_digit_5,
2464 m_digit_6,
2465 m_digit_7,
2466 m_digit_8,
2467 m_digit_9;
2468 std::shared_ptr<basic_parser<T>> m_separator;
2469 };
2470
2473#ifdef _UNICODE
2475#else
2477#endif
2479
2483 template <class T>
2485 {
2486 public:
2487 basic_ipv6_scope_id_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
2488
2489 virtual bool match(
2490 _In_reads_or_z_(end) const T* text,
2491 _In_ size_t start = 0,
2492 _In_ size_t end = (size_t)-1,
2493 _In_ int flags = match_default)
2494 {
2495 assert(text || start >= end);
2496 if (start < end && text[start]) {
2497 if (text[start] == '-' ||
2498 text[start] == '_' ||
2499 text[start] == ':' ||
2500 std::use_facet<std::ctype<T>>(m_locale).is(std::ctype_base::alnum, text[start]))
2501 {
2502 interval.end = (interval.start = start) + 1;
2503 return true;
2504 }
2505 }
2506 interval.start = (interval.end = start) + 1;
2507 return false;
2508 }
2509 };
2510
2513#ifdef _UNICODE
2515#else
2517#endif
2518
2523 {
2524 public:
2525 sgml_ipv6_scope_id_char(_In_ const std::locale& locale = std::locale()) : sgml_parser(locale) {}
2526
2527 virtual bool match(
2528 _In_reads_or_z_(end) const char* text,
2529 _In_ size_t start = 0,
2530 _In_ size_t end = (size_t)-1,
2531 _In_ int flags = match_default)
2532 {
2533 assert(text || start >= end);
2534 if (start < end && text[start]) {
2535 wchar_t buf[3];
2536 const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf);
2537 const wchar_t* chr_end = chr + stdex::strlen(chr);
2538 if ((chr[0] == L'-' ||
2539 chr[0] == L'_' ||
2540 chr[0] == L':') && chr[1] == 0 ||
2541 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
2542 {
2543 interval.start = start;
2544 return true;
2545 }
2546 }
2547 interval.start = (interval.end = start) + 1;
2548 return false;
2549 }
2550 };
2551
2555 template <class T>
2557 {
2558 public:
2560 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
2561 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
2562 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
2563 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
2564 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
2565 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
2566 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
2567 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
2568 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
2569 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
2570 _In_ const std::shared_ptr<basic_parser<T>>& digit_10,
2571 _In_ const std::shared_ptr<basic_parser<T>>& digit_11,
2572 _In_ const std::shared_ptr<basic_parser<T>>& digit_12,
2573 _In_ const std::shared_ptr<basic_parser<T>>& digit_13,
2574 _In_ const std::shared_ptr<basic_parser<T>>& digit_14,
2575 _In_ const std::shared_ptr<basic_parser<T>>& digit_15,
2576 _In_ const std::shared_ptr<basic_parser<T>>& separator,
2577 _In_ const std::shared_ptr<basic_parser<T>>& scope_id_separator = nullptr,
2578 _In_ const std::shared_ptr<basic_parser<T>>& _scope_id = nullptr,
2579 _In_ const std::locale& locale = std::locale()) :
2580 basic_parser<T>(locale),
2581 m_digit_0(digit_0),
2582 m_digit_1(digit_1),
2583 m_digit_2(digit_2),
2584 m_digit_3(digit_3),
2585 m_digit_4(digit_4),
2586 m_digit_5(digit_5),
2587 m_digit_6(digit_6),
2588 m_digit_7(digit_7),
2589 m_digit_8(digit_8),
2590 m_digit_9(digit_9),
2591 m_digit_10(digit_10),
2592 m_digit_11(digit_11),
2593 m_digit_12(digit_12),
2594 m_digit_13(digit_13),
2595 m_digit_14(digit_14),
2596 m_digit_15(digit_15),
2597 m_separator(separator),
2598 m_scope_id_separator(scope_id_separator),
2599 scope_id(_scope_id)
2600 {
2601 memset(&value, 0, sizeof(value));
2602 }
2603
2604 virtual bool match(
2605 _In_reads_or_z_(end) const T* text,
2606 _In_ size_t start = 0,
2607 _In_ size_t end = (size_t)-1,
2608 _In_ int flags = match_default)
2609 {
2610 assert(text || start >= end);
2611 interval.end = start;
2612 memset(&value, 0, sizeof(value));
2613
2614 size_t i, compaction_i = (size_t)-1, compaction_start = start;
2615 for (i = 0; i < 8; i++) {
2616 bool is_empty = true;
2617
2618 if (m_separator->match(text, interval.end, end, flags)) {
2619 if (m_separator->match(text, m_separator->interval.end, end, flags)) {
2620 // :: found
2621 if (compaction_i == (size_t)-1) {
2622 // Zero compaction start
2623 compaction_i = i;
2624 compaction_start = m_separator->interval.start;
2625 interval.end = m_separator->interval.end;
2626 }
2627 else {
2628 // More than one zero compaction
2629 break;
2630 }
2631 }
2632 else if (i) {
2633 // Inner : found
2634 interval.end = m_separator->interval.end;
2635 }
2636 else {
2637 // Leading : found
2638 goto error;
2639 }
2640 }
2641 else if (i) {
2642 // : missing
2643 break;
2644 }
2645
2647 size_t x;
2648 for (x = 0; interval.end < end && text[interval.end];) {
2649 size_t dig, digit_end;
2650 if (m_digit_0->match(text, interval.end, end, flags)) { dig = 0; digit_end = m_digit_0->interval.end; }
2651 else if (m_digit_1->match(text, interval.end, end, flags)) { dig = 1; digit_end = m_digit_1->interval.end; }
2652 else if (m_digit_2->match(text, interval.end, end, flags)) { dig = 2; digit_end = m_digit_2->interval.end; }
2653 else if (m_digit_3->match(text, interval.end, end, flags)) { dig = 3; digit_end = m_digit_3->interval.end; }
2654 else if (m_digit_4->match(text, interval.end, end, flags)) { dig = 4; digit_end = m_digit_4->interval.end; }
2655 else if (m_digit_5->match(text, interval.end, end, flags)) { dig = 5; digit_end = m_digit_5->interval.end; }
2656 else if (m_digit_6->match(text, interval.end, end, flags)) { dig = 6; digit_end = m_digit_6->interval.end; }
2657 else if (m_digit_7->match(text, interval.end, end, flags)) { dig = 7; digit_end = m_digit_7->interval.end; }
2658 else if (m_digit_8->match(text, interval.end, end, flags)) { dig = 8; digit_end = m_digit_8->interval.end; }
2659 else if (m_digit_9->match(text, interval.end, end, flags)) { dig = 9; digit_end = m_digit_9->interval.end; }
2660 else if (m_digit_10->match(text, interval.end, end, flags)) { dig = 10; digit_end = m_digit_10->interval.end; }
2661 else if (m_digit_11->match(text, interval.end, end, flags)) { dig = 11; digit_end = m_digit_11->interval.end; }
2662 else if (m_digit_12->match(text, interval.end, end, flags)) { dig = 12; digit_end = m_digit_12->interval.end; }
2663 else if (m_digit_13->match(text, interval.end, end, flags)) { dig = 13; digit_end = m_digit_13->interval.end; }
2664 else if (m_digit_14->match(text, interval.end, end, flags)) { dig = 14; digit_end = m_digit_14->interval.end; }
2665 else if (m_digit_15->match(text, interval.end, end, flags)) { dig = 15; digit_end = m_digit_15->interval.end; }
2666 else break;
2667 size_t x_n = x * 16 + dig;
2668 if (x_n <= 0xffff) {
2669 x = x_n;
2670 interval.end = digit_end;
2671 is_empty = false;
2672 }
2673 else
2674 break;
2675 }
2676 if (is_empty) {
2677 if (compaction_i != (size_t)-1) {
2678 // Zero compaction active: no sweat.
2679 break;
2680 }
2681 goto error;
2682 }
2684 value.s6_words[i] = (uint16_t)x;
2685 }
2686
2687 if (compaction_i != (size_t)-1) {
2688 // Align components right due to zero compaction.
2689 size_t j, k;
2690 for (j = 8, k = i; k > compaction_i;) {
2691 value.s6_words[--j] = value.s6_words[--k];
2692 components[j] = components[k];
2693 }
2694 for (; j > compaction_i;) {
2695 value.s6_words[--j] = 0;
2696 components[j].start =
2697 components[j].end = compaction_start;
2698 }
2699 }
2700 else if (i < 8)
2701 goto error;
2702
2703 if (m_scope_id_separator && m_scope_id_separator->match(text, interval.end, end, flags) &&
2704 scope_id && scope_id->match(text, m_scope_id_separator->interval.end, end, flags))
2705 interval.end = scope_id->interval.end;
2706 else if (scope_id)
2707 scope_id->invalidate();
2708
2709 interval.start = start;
2710 return true;
2711
2712 error:
2713 components[0].start = 1;
2714 components[0].end = 0;
2715 components[1].start = 1;
2716 components[1].end = 0;
2717 components[2].start = 1;
2718 components[2].end = 0;
2719 components[3].start = 1;
2720 components[3].end = 0;
2721 components[4].start = 1;
2722 components[4].end = 0;
2723 components[5].start = 1;
2724 components[5].end = 0;
2725 components[6].start = 1;
2726 components[6].end = 0;
2727 components[7].start = 1;
2728 components[7].end = 0;
2729 memset(&value, 0, sizeof(value));
2730 if (scope_id) scope_id->invalidate();
2731 interval.start = (interval.end = start) + 1;
2732 return false;
2733 }
2734
2735 virtual void invalidate()
2736 {
2737 components[0].start = 1;
2738 components[0].end = 0;
2739 components[1].start = 1;
2740 components[1].end = 0;
2741 components[2].start = 1;
2742 components[2].end = 0;
2743 components[3].start = 1;
2744 components[3].end = 0;
2745 components[4].start = 1;
2746 components[4].end = 0;
2747 components[5].start = 1;
2748 components[5].end = 0;
2749 components[6].start = 1;
2750 components[6].end = 0;
2751 components[7].start = 1;
2752 components[7].end = 0;
2753 memset(&value, 0, sizeof(value));
2754 if (scope_id) scope_id->invalidate();
2756 }
2757
2758 public:
2760 struct in6_addr value;
2761 std::shared_ptr<basic_parser<T>> scope_id;
2762
2763 protected:
2764 std::shared_ptr<basic_parser<T>>
2765 m_digit_0,
2766 m_digit_1,
2767 m_digit_2,
2768 m_digit_3,
2769 m_digit_4,
2770 m_digit_5,
2771 m_digit_6,
2772 m_digit_7,
2773 m_digit_8,
2774 m_digit_9,
2775 m_digit_10,
2776 m_digit_11,
2777 m_digit_12,
2778 m_digit_13,
2779 m_digit_14,
2780 m_digit_15;
2781 std::shared_ptr<basic_parser<T>> m_separator, m_scope_id_separator;
2782 };
2783
2786#ifdef _UNICODE
2788#else
2790#endif
2792
2796 template <class T>
2798 {
2799 public:
2801 _In_ bool allow_idn,
2802 _In_ const std::locale& locale = std::locale()) :
2803 basic_parser<T>(locale),
2804 m_allow_idn(allow_idn),
2805 allow_on_edge(true)
2806 {}
2807
2808 virtual bool match(
2809 _In_reads_or_z_(end) const T* text,
2810 _In_ size_t start = 0,
2811 _In_ size_t end = (size_t)-1,
2812 _In_ int flags = match_default)
2813 {
2814 assert(text || start >= end);
2815 if (start < end && text[start]) {
2816 if (('A' <= text[start] && text[start] <= 'Z') ||
2817 ('a' <= text[start] && text[start] <= 'z') ||
2818 ('0' <= text[start] && text[start] <= '9'))
2819 allow_on_edge = true;
2820 else if (text[start] == '-')
2821 allow_on_edge = false;
2822 else if (m_allow_idn && std::use_facet<std::ctype<T>>(m_locale).is(std::ctype_base::alnum, text[start]))
2823 allow_on_edge = true;
2824 else {
2825 interval.start = (interval.end = start) + 1;
2826 return false;
2827 }
2828 interval.end = (interval.start = start) + 1;
2829 return true;
2830 }
2831 interval.start = (interval.end = start) + 1;
2832 return false;
2833 }
2834
2835 public:
2837
2838 protected:
2839 bool m_allow_idn;
2840 };
2841
2844#ifdef _UNICODE
2846#else
2848#endif
2849
2854 {
2855 public:
2857 _In_ bool allow_idn,
2858 _In_ const std::locale& locale = std::locale()) :
2859 basic_dns_domain_char<char>(allow_idn, locale)
2860 {}
2861
2862 virtual bool match(
2863 _In_reads_or_z_(end) const char* text,
2864 _In_ size_t start = 0,
2865 _In_ size_t end = (size_t)-1,
2866 _In_ int flags = match_default)
2867 {
2868 assert(text || start >= end);
2869 if (start < end && text[start]) {
2870 wchar_t buf[3];
2871 const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf);
2872 const wchar_t* chr_end = chr + stdex::strlen(chr);
2873 if ((('A' <= chr[0] && chr[0] <= 'Z') ||
2874 ('a' <= chr[0] && chr[0] <= 'z') ||
2875 ('0' <= chr[0] && chr[0] <= '9')) && chr[1] == 0)
2876 allow_on_edge = true;
2877 else if (chr[0] == '-' && chr[1] == 0)
2878 allow_on_edge = false;
2879 else if (m_allow_idn && std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
2880 allow_on_edge = true;
2881 else {
2882 interval.start = (interval.end = start) + 1;
2883 return false;
2884 }
2885 interval.start = start;
2886 return true;
2887 }
2888 interval.start = (interval.end = start) + 1;
2889 return false;
2890 }
2891 };
2892
2896 template <class T>
2898 {
2899 public:
2901 _In_ bool allow_absolute,
2902 _In_ const std::shared_ptr<basic_dns_domain_char<T>>& domain_char,
2903 _In_ const std::shared_ptr<basic_parser<T>>& separator,
2904 _In_ const std::locale& locale = std::locale()) :
2905 basic_parser<T>(locale),
2906 m_allow_absolute(allow_absolute),
2907 m_domain_char(domain_char),
2908 m_separator(separator)
2909 {}
2910
2911 virtual bool match(
2912 _In_reads_or_z_(end) const T* text,
2913 _In_ size_t start = 0,
2914 _In_ size_t end = (size_t)-1,
2915 _In_ int flags = match_default)
2916 {
2917 assert(text || start >= end);
2918 size_t i = start, count;
2919 for (count = 0; i < end && text[i] && count < 127; count++) {
2920 if (m_domain_char->match(text, i, end, flags) &&
2921 m_domain_char->allow_on_edge)
2922 {
2923 // Domain start
2924 interval.end = i = m_domain_char->interval.end;
2925 while (i < end && text[i]) {
2926 if (m_domain_char->allow_on_edge &&
2927 m_separator->match(text, i, end, flags))
2928 {
2929 // Domain end
2930 if (m_allow_absolute)
2931 interval.end = i = m_separator->interval.end;
2932 else {
2933 interval.end = i;
2934 i = m_separator->interval.end;
2935 }
2936 break;
2937 }
2938 if (m_domain_char->match(text, i, end, flags)) {
2939 if (m_domain_char->allow_on_edge)
2940 interval.end = i = m_domain_char->interval.end;
2941 else
2942 i = m_domain_char->interval.end;
2943 }
2944 else {
2945 interval.start = start;
2946 return true;
2947 }
2948 }
2949 }
2950 else
2951 break;
2952 }
2953 if (count) {
2954 interval.start = start;
2955 return true;
2956 }
2957 interval.start = (interval.end = start) + 1;
2958 return false;
2959 }
2960
2961 protected:
2963 std::shared_ptr<basic_dns_domain_char<T>> m_domain_char;
2964 std::shared_ptr<basic_parser<T>> m_separator;
2965 };
2966
2969#ifdef _UNICODE
2970 using tdns_name = wdns_name;
2971#else
2972 using tdns_name = dns_name;
2973#endif
2975
2979 template <class T>
2981 {
2982 public:
2983 basic_url_username_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
2984
2985 virtual bool match(
2986 _In_reads_or_z_(end) const T* text,
2987 _In_ size_t start = 0,
2988 _In_ size_t end = (size_t)-1,
2989 _In_ int flags = match_default)
2990 {
2991 assert(text || start >= end);
2992 if (start < end && text[start]) {
2993 if (text[start] == '-' ||
2994 text[start] == '.' ||
2995 text[start] == '_' ||
2996 text[start] == '~' ||
2997 text[start] == '%' ||
2998 text[start] == '!' ||
2999 text[start] == '$' ||
3000 text[start] == '&' ||
3001 text[start] == '\'' ||
3002 //text[start] == '(' ||
3003 //text[start] == ')' ||
3004 text[start] == '*' ||
3005 text[start] == '+' ||
3006 text[start] == ',' ||
3007 text[start] == ';' ||
3008 text[start] == '=' ||
3009 std::use_facet<std::ctype<T>>(m_locale).is(std::ctype_base::alnum, text[start]))
3010 {
3011 interval.end = (interval.start = start) + 1;
3012 return true;
3013 }
3014 }
3015 interval.start = (interval.end = start) + 1;
3016 return false;
3017 }
3018 };
3019
3022#ifdef _UNICODE
3024#else
3026#endif
3027
3032 {
3033 public:
3034 sgml_url_username_char(_In_ const std::locale& locale = std::locale()) : basic_url_username_char<char>(locale) {}
3035
3036 virtual bool match(
3037 _In_reads_or_z_(end) const char* text,
3038 _In_ size_t start = 0,
3039 _In_ size_t end = (size_t)-1,
3040 _In_ int flags = match_default)
3041 {
3042 assert(text || start >= end);
3043 if (start < end && text[start]) {
3044 wchar_t buf[3];
3045 const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf);
3046 const wchar_t* chr_end = chr + stdex::strlen(chr);
3047 if ((chr[0] == L'-' ||
3048 chr[0] == L'.' ||
3049 chr[0] == L'_' ||
3050 chr[0] == L'~' ||
3051 chr[0] == L'%' ||
3052 chr[0] == L'!' ||
3053 chr[0] == L'$' ||
3054 chr[0] == L'&' ||
3055 chr[0] == L'\'' ||
3056 //chr[0] == L'(' ||
3057 //chr[0] == L')' ||
3058 chr[0] == L'*' ||
3059 chr[0] == L'+' ||
3060 chr[0] == L',' ||
3061 chr[0] == L';' ||
3062 chr[0] == L'=') && chr[1] == 0 ||
3063 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
3064 {
3065 interval.start = start;
3066 return true;
3067 }
3068 }
3069
3070 interval.start = (interval.end = start) + 1;
3071 return false;
3072 }
3073 };
3074
3078 template <class T>
3080 {
3081 public:
3082 basic_url_password_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
3083
3084 virtual bool match(
3085 _In_reads_or_z_(end) const T* text,
3086 _In_ size_t start = 0,
3087 _In_ size_t end = (size_t)-1,
3088 _In_ int flags = match_default)
3089 {
3090 assert(text || start >= end);
3091 if (start < end && text[start]) {
3092 if (text[start] == '-' ||
3093 text[start] == '.' ||
3094 text[start] == '_' ||
3095 text[start] == '~' ||
3096 text[start] == '%' ||
3097 text[start] == '!' ||
3098 text[start] == '$' ||
3099 text[start] == '&' ||
3100 text[start] == '\'' ||
3101 text[start] == '(' ||
3102 text[start] == ')' ||
3103 text[start] == '*' ||
3104 text[start] == '+' ||
3105 text[start] == ',' ||
3106 text[start] == ';' ||
3107 text[start] == '=' ||
3108 text[start] == ':' ||
3109 std::use_facet<std::ctype<T>>(m_locale).is(std::ctype_base::alnum, text[start]))
3110 {
3111 interval.end = (interval.start = start) + 1;
3112 return true;
3113 }
3114 }
3115 interval.start = (interval.end = start) + 1;
3116 return false;
3117 }
3118 };
3119
3122#ifdef _UNICODE
3124#else
3126#endif
3127
3132 {
3133 public:
3134 sgml_url_password_char(_In_ const std::locale& locale = std::locale()) : basic_url_password_char<char>(locale) {}
3135
3136 virtual bool match(
3137 _In_reads_or_z_(end) const char* text,
3138 _In_ size_t start = 0,
3139 _In_ size_t end = (size_t)-1,
3140 _In_ int flags = match_default)
3141 {
3142 assert(text || start >= end);
3143 if (start < end && text[start]) {
3144 wchar_t buf[3];
3145 const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf);
3146 const wchar_t* chr_end = chr + stdex::strlen(chr);
3147 if ((chr[0] == L'-' ||
3148 chr[0] == L'.' ||
3149 chr[0] == L'_' ||
3150 chr[0] == L'~' ||
3151 chr[0] == L'%' ||
3152 chr[0] == L'!' ||
3153 chr[0] == L'$' ||
3154 chr[0] == L'&' ||
3155 chr[0] == L'\'' ||
3156 chr[0] == L'(' ||
3157 chr[0] == L')' ||
3158 chr[0] == L'*' ||
3159 chr[0] == L'+' ||
3160 chr[0] == L',' ||
3161 chr[0] == L';' ||
3162 chr[0] == L'=' ||
3163 chr[0] == L':') && chr[1] == 0 ||
3164 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
3165 {
3166 interval.start = start;
3167 return true;
3168 }
3169 }
3170 interval.start = (interval.end = start) + 1;
3171 return false;
3172 }
3173 };
3174
3178 template <class T>
3180 {
3181 public:
3182 basic_url_path_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
3183
3184 virtual bool match(
3185 _In_reads_or_z_(end) const T* text,
3186 _In_ size_t start = 0,
3187 _In_ size_t end = (size_t)-1,
3188 _In_ int flags = match_default)
3189 {
3190 assert(text || start >= end);
3191 if (start < end && text[start]) {
3192 if (text[start] == '/' ||
3193 text[start] == '-' ||
3194 text[start] == '.' ||
3195 text[start] == '_' ||
3196 text[start] == '~' ||
3197 text[start] == '%' ||
3198 text[start] == '!' ||
3199 text[start] == '$' ||
3200 text[start] == '&' ||
3201 text[start] == '\'' ||
3202 text[start] == '(' ||
3203 text[start] == ')' ||
3204 text[start] == '*' ||
3205 text[start] == '+' ||
3206 text[start] == ',' ||
3207 text[start] == ';' ||
3208 text[start] == '=' ||
3209 text[start] == ':' ||
3210 text[start] == '@' ||
3211 text[start] == '?' ||
3212 text[start] == '#' ||
3213 std::use_facet<std::ctype<T>>(m_locale).is(std::ctype_base::alnum, text[start]))
3214 {
3215 interval.end = (interval.start = start) + 1;
3216 return true;
3217 }
3218 }
3219 interval.start = (interval.end = start) + 1;
3220 return false;
3221 }
3222 };
3223
3226#ifdef _UNICODE
3228#else
3230#endif
3231
3236 {
3237 public:
3238 sgml_url_path_char(_In_ const std::locale& locale = std::locale()) : basic_url_path_char<char>(locale) {}
3239
3240 virtual bool match(
3241 _In_reads_or_z_(end) const char* text,
3242 _In_ size_t start = 0,
3243 _In_ size_t end = (size_t)-1,
3244 _In_ int flags = match_default)
3245 {
3246 assert(text || start >= end);
3247 if (start < end && text[start]) {
3248 wchar_t buf[3];
3249 const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf);
3250 const wchar_t* chr_end = chr + stdex::strlen(chr);
3251 if ((chr[0] == L'/' ||
3252 chr[0] == L'-' ||
3253 chr[0] == L'.' ||
3254 chr[0] == L'_' ||
3255 chr[0] == L'~' ||
3256 chr[0] == L'%' ||
3257 chr[0] == L'!' ||
3258 chr[0] == L'$' ||
3259 chr[0] == L'&' ||
3260 chr[0] == L'\'' ||
3261 chr[0] == L'(' ||
3262 chr[0] == L')' ||
3263 chr[0] == L'*' ||
3264 chr[0] == L'+' ||
3265 chr[0] == L',' ||
3266 chr[0] == L';' ||
3267 chr[0] == L'=' ||
3268 chr[0] == L':' ||
3269 chr[0] == L'@' ||
3270 chr[0] == L'?' ||
3271 chr[0] == L'#') && chr[1] == 0 ||
3272 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
3273 {
3274 interval.start = start;
3275 return true;
3276 }
3277 }
3278 interval.start = (interval.end = start) + 1;
3279 return false;
3280 }
3281 };
3282
3286 template <class T>
3288 {
3289 public:
3291 _In_ const std::shared_ptr<basic_parser<T>>& path_char,
3292 _In_ const std::shared_ptr<basic_parser<T>>& query_start,
3293 _In_ const std::shared_ptr<basic_parser<T>>& bookmark_start,
3294 _In_ const std::locale& locale = std::locale()) :
3295 basic_parser<T>(locale),
3296 m_path_char(path_char),
3297 m_query_start(query_start),
3298 m_bookmark_start(bookmark_start)
3299 {}
3300
3301 virtual bool match(
3302 _In_reads_or_z_(end) const T* text,
3303 _In_ size_t start = 0,
3304 _In_ size_t end = (size_t)-1,
3305 _In_ int flags = match_default)
3306 {
3307 assert(text || start >= end);
3308
3309 interval.end = start;
3310 path.start = start;
3311 query.start = 1;
3312 query.end = 0;
3313 bookmark.start = 1;
3314 bookmark.end = 0;
3315
3316 for (;;) {
3317 if (interval.end >= end || !text[interval.end])
3318 break;
3319 if (m_query_start->match(text, interval.end, end, flags)) {
3320 path.end = interval.end;
3321 query.start = interval.end = m_query_start->interval.end;
3322 for (;;) {
3323 if (interval.end >= end || !text[interval.end]) {
3324 query.end = interval.end;
3325 break;
3326 }
3327 if (m_bookmark_start->match(text, interval.end, end, flags)) {
3328 query.end = interval.end;
3329 bookmark.start = interval.end = m_bookmark_start->interval.end;
3330 for (;;) {
3331 if (interval.end >= end || !text[interval.end]) {
3332 bookmark.end = interval.end;
3333 break;
3334 }
3335 if (m_path_char->match(text, interval.end, end, flags))
3336 interval.end = m_path_char->interval.end;
3337 else {
3338 bookmark.end = interval.end;
3339 break;
3340 }
3341 }
3342 interval.start = start;
3343 return true;
3344 }
3345 if (m_path_char->match(text, interval.end, end, flags))
3346 interval.end = m_path_char->interval.end;
3347 else {
3348 query.end = interval.end;
3349 break;
3350 }
3351 }
3352 interval.start = start;
3353 return true;
3354 }
3355 if (m_bookmark_start->match(text, interval.end, end, flags)) {
3356 path.end = interval.end;
3357 bookmark.start = interval.end = m_bookmark_start->interval.end;
3358 for (;;) {
3359 if (interval.end >= end || !text[interval.end]) {
3360 bookmark.end = interval.end;
3361 break;
3362 }
3363 if (m_path_char->match(text, interval.end, end, flags))
3364 interval.end = m_path_char->interval.end;
3365 else {
3366 bookmark.end = interval.end;
3367 break;
3368 }
3369 }
3370 interval.start = start;
3371 return true;
3372 }
3373 if (m_path_char->match(text, interval.end, end, flags))
3374 interval.end = m_path_char->interval.end;
3375 else
3376 break;
3377 }
3378
3379 if (start < interval.end) {
3380 path.end = interval.end;
3381 interval.start = start;
3382 return true;
3383 }
3384
3385 path.start = 1;
3386 path.end = 0;
3387 bookmark.start = 1;
3388 bookmark.end = 0;
3389 interval.start = (interval.end = start) + 1;
3390 return false;
3391 }
3392
3393 virtual void invalidate()
3394 {
3395 path.start = 1;
3396 path.end = 0;
3397 query.start = 1;
3398 query.end = 0;
3399 bookmark.start = 1;
3400 bookmark.end = 0;
3402 }
3403
3404 public:
3407 stdex::interval<size_t> bookmark;
3408
3409 protected:
3410 std::shared_ptr<basic_parser<T>> m_path_char;
3411 std::shared_ptr<basic_parser<T>> m_query_start;
3412 std::shared_ptr<basic_parser<T>> m_bookmark_start;
3413 };
3414
3417#ifdef _UNICODE
3418 using turl_path = wurl_path;
3419#else
3420 using turl_path = url_path;
3421#endif
3423
3427 template <class T>
3428 class basic_url : public basic_parser<T>
3429 {
3430 public:
3431 basic_url(
3432 _In_ const std::shared_ptr<basic_parser<T>>& _http_scheme,
3433 _In_ const std::shared_ptr<basic_parser<T>>& _ftp_scheme,
3434 _In_ const std::shared_ptr<basic_parser<T>>& _mailto_scheme,
3435 _In_ const std::shared_ptr<basic_parser<T>>& _file_scheme,
3436 _In_ const std::shared_ptr<basic_parser<T>>& colon,
3437 _In_ const std::shared_ptr<basic_parser<T>>& slash,
3438 _In_ const std::shared_ptr<basic_parser<T>>& _username,
3439 _In_ const std::shared_ptr<basic_parser<T>>& _password,
3440 _In_ const std::shared_ptr<basic_parser<T>>& at,
3441 _In_ const std::shared_ptr<basic_parser<T>>& ip_lbracket,
3442 _In_ const std::shared_ptr<basic_parser<T>>& ip_rbracket,
3443 _In_ const std::shared_ptr<basic_parser<T>>& _ipv4_host,
3444 _In_ const std::shared_ptr<basic_parser<T>>& _ipv6_host,
3445 _In_ const std::shared_ptr<basic_parser<T>>& _dns_host,
3446 _In_ const std::shared_ptr<basic_parser<T>>& _port,
3447 _In_ const std::shared_ptr<basic_parser<T>>& _path,
3448 _In_ const std::locale& locale = std::locale()) :
3449 basic_parser<T>(locale),
3450 http_scheme(_http_scheme),
3451 ftp_scheme(_ftp_scheme),
3452 mailto_scheme(_mailto_scheme),
3453 file_scheme(_file_scheme),
3454 m_colon(colon),
3455 m_slash(slash),
3456 username(_username),
3457 password(_password),
3458 m_at(at),
3459 m_ip_lbracket(ip_lbracket),
3460 m_ip_rbracket(ip_rbracket),
3461 ipv4_host(_ipv4_host),
3462 ipv6_host(_ipv6_host),
3463 dns_host(_dns_host),
3464 port(_port),
3465 path(_path)
3466 {}
3467
3468 virtual bool match(
3469 _In_reads_or_z_(end) const T* text,
3470 _In_ size_t start = 0,
3471 _In_ size_t end = (size_t)-1,
3472 _In_ int flags = match_default)
3473 {
3474 assert(text || start >= end);
3475
3476 interval.end = start;
3477
3478 if (http_scheme->match(text, interval.end, end, flags) &&
3479 m_colon->match(text, http_scheme->interval.end, end, flags) &&
3480 m_slash->match(text, m_colon->interval.end, end, flags) &&
3481 m_slash->match(text, m_slash->interval.end, end, flags))
3482 {
3483 // http://
3484 interval.end = m_slash->interval.end;
3485 ftp_scheme->invalidate();
3486 mailto_scheme->invalidate();
3487 file_scheme->invalidate();
3488 }
3489 else if (ftp_scheme->match(text, interval.end, end, flags) &&
3490 m_colon->match(text, ftp_scheme->interval.end, end, flags) &&
3491 m_slash->match(text, m_colon->interval.end, end, flags) &&
3492 m_slash->match(text, m_slash->interval.end, end, flags))
3493 {
3494 // ftp://
3495 interval.end = m_slash->interval.end;
3496 http_scheme->invalidate();
3497 mailto_scheme->invalidate();
3498 file_scheme->invalidate();
3499 }
3500 else if (mailto_scheme->match(text, interval.end, end, flags) &&
3501 m_colon->match(text, mailto_scheme->interval.end, end, flags))
3502 {
3503 // mailto:
3504 interval.end = m_colon->interval.end;
3505 http_scheme->invalidate();
3506 ftp_scheme->invalidate();
3507 file_scheme->invalidate();
3508 }
3509 else if (file_scheme->match(text, interval.end, end, flags) &&
3510 m_colon->match(text, file_scheme->interval.end, end, flags) &&
3511 m_slash->match(text, m_colon->interval.end, end, flags) &&
3512 m_slash->match(text, m_slash->interval.end, end, flags))
3513 {
3514 // file://
3515 interval.end = m_slash->interval.end;
3516 http_scheme->invalidate();
3517 ftp_scheme->invalidate();
3518 mailto_scheme->invalidate();
3519 }
3520 else {
3521 // Default to http:
3522 http_scheme->invalidate();
3523 ftp_scheme->invalidate();
3524 mailto_scheme->invalidate();
3525 file_scheme->invalidate();
3526 }
3527
3528 if (ftp_scheme->interval) {
3529 if (username->match(text, interval.end, end, flags)) {
3530 if (m_colon->match(text, username->interval.end, end, flags) &&
3531 password->match(text, m_colon->interval.end, end, flags) &&
3532 m_at->match(text, password->interval.end, end, flags))
3533 {
3534 // Username and password
3535 interval.end = m_at->interval.end;
3536 }
3537 else if (m_at->match(text, interval.end, end, flags)) {
3538 // Username only
3539 interval.end = m_at->interval.end;
3540 password->invalidate();
3541 }
3542 else {
3543 username->invalidate();
3544 password->invalidate();
3545 }
3546 }
3547 else {
3548 username->invalidate();
3549 password->invalidate();
3550 }
3551
3552 if (ipv4_host->match(text, interval.end, end, flags)) {
3553 // Host is IPv4
3554 interval.end = ipv4_host->interval.end;
3555 ipv6_host->invalidate();
3556 dns_host->invalidate();
3557 }
3558 else if (
3559 m_ip_lbracket->match(text, interval.end, end, flags) &&
3560 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3561 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3562 {
3563 // Host is IPv6
3564 interval.end = m_ip_rbracket->interval.end;
3565 ipv4_host->invalidate();
3566 dns_host->invalidate();
3567 }
3568 else if (dns_host->match(text, interval.end, end, flags)) {
3569 // Host is hostname
3570 interval.end = dns_host->interval.end;
3571 ipv4_host->invalidate();
3572 ipv6_host->invalidate();
3573 }
3574 else {
3575 invalidate();
3576 return false;
3577 }
3578
3579 if (m_colon->match(text, interval.end, end, flags) &&
3580 port->match(text, m_colon->interval.end, end, flags))
3581 {
3582 // Port
3583 interval.end = port->interval.end;
3584 }
3585 else
3586 port->invalidate();
3587
3588 if (path->match(text, interval.end, end, flags)) {
3589 // Path
3590 interval.end = path->interval.end;
3591 }
3592
3593 interval.start = start;
3594 return true;
3595 }
3596
3597 if (mailto_scheme->interval) {
3598 if (username->match(text, interval.end, end, flags) &&
3599 m_at->match(text, username->interval.end, end, flags))
3600 {
3601 // Username
3602 interval.end = m_at->interval.end;
3603 }
3604 else {
3605 invalidate();
3606 return false;
3607 }
3608
3609 if (m_ip_lbracket->match(text, interval.end, end, flags) &&
3610 ipv4_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3611 m_ip_rbracket->match(text, ipv4_host->interval.end, end, flags))
3612 {
3613 // Host is IPv4
3614 interval.end = m_ip_rbracket->interval.end;
3615 ipv6_host->invalidate();
3616 dns_host->invalidate();
3617 }
3618 else if (
3619 m_ip_lbracket->match(text, interval.end, end, flags) &&
3620 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3621 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3622 {
3623 // Host is IPv6
3624 interval.end = m_ip_rbracket->interval.end;
3625 ipv4_host->invalidate();
3626 dns_host->invalidate();
3627 }
3628 else if (dns_host->match(text, interval.end, end, flags)) {
3629 // Host is hostname
3630 interval.end = dns_host->interval.end;
3631 ipv4_host->invalidate();
3632 ipv6_host->invalidate();
3633 }
3634 else {
3635 invalidate();
3636 return false;
3637 }
3638
3639 password->invalidate();
3640 port->invalidate();
3641 path->invalidate();
3642 interval.start = start;
3643 return true;
3644 }
3645
3646 if (file_scheme->interval) {
3647 if (path->match(text, interval.end, end, flags)) {
3648 // Path
3649 interval.end = path->interval.end;
3650 }
3651
3652 username->invalidate();
3653 password->invalidate();
3654 ipv4_host->invalidate();
3655 ipv6_host->invalidate();
3656 dns_host->invalidate();
3657 port->invalidate();
3658 interval.start = start;
3659 return true;
3660 }
3661
3662 // "http://" found or defaulted to
3663
3664 // If "http://" explicit, test for username&password.
3665 if (http_scheme->interval &&
3666 username->match(text, interval.end, end, flags))
3667 {
3668 if (m_colon->match(text, username->interval.end, end, flags) &&
3669 password->match(text, m_colon->interval.end, end, flags) &&
3670 m_at->match(text, password->interval.end, end, flags))
3671 {
3672 // Username and password
3673 interval.end = m_at->interval.end;
3674 }
3675 else if (m_at->match(text, username->interval.end, end, flags)) {
3676 // Username only
3677 interval.end = m_at->interval.end;
3678 password->invalidate();
3679 }
3680 else {
3681 username->invalidate();
3682 password->invalidate();
3683 }
3684 }
3685 else {
3686 username->invalidate();
3687 password->invalidate();
3688 }
3689
3690 if (ipv4_host->match(text, interval.end, end, flags)) {
3691 // Host is IPv4
3692 interval.end = ipv4_host->interval.end;
3693 ipv6_host->invalidate();
3694 dns_host->invalidate();
3695 }
3696 else if (
3697 m_ip_lbracket->match(text, interval.end, end, flags) &&
3698 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3699 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3700 {
3701 // Host is IPv6
3702 interval.end = m_ip_rbracket->interval.end;
3703 ipv4_host->invalidate();
3704 dns_host->invalidate();
3705 }
3706 else if (dns_host->match(text, interval.end, end, flags)) {
3707 // Host is hostname
3708 interval.end = dns_host->interval.end;
3709 ipv4_host->invalidate();
3710 ipv6_host->invalidate();
3711 }
3712 else {
3713 invalidate();
3714 return false;
3715 }
3716
3717 if (m_colon->match(text, interval.end, end, flags) &&
3718 port->match(text, m_colon->interval.end, end, flags))
3719 {
3720 // Port
3721 interval.end = port->interval.end;
3722 }
3723 else
3724 port->invalidate();
3725
3726 if (path->match(text, interval.end, end, flags)) {
3727 // Path
3728 interval.end = path->interval.end;
3729 }
3730
3731 interval.start = start;
3732 return true;
3733 }
3734
3735 virtual void invalidate()
3736 {
3737 http_scheme->invalidate();
3738 ftp_scheme->invalidate();
3739 mailto_scheme->invalidate();
3740 file_scheme->invalidate();
3741 username->invalidate();
3742 password->invalidate();
3743 ipv4_host->invalidate();
3744 ipv6_host->invalidate();
3745 dns_host->invalidate();
3746 port->invalidate();
3747 path->invalidate();
3749 }
3750
3751 public:
3752 std::shared_ptr<basic_parser<T>> http_scheme;
3753 std::shared_ptr<basic_parser<T>> ftp_scheme;
3754 std::shared_ptr<basic_parser<T>> mailto_scheme;
3755 std::shared_ptr<basic_parser<T>> file_scheme;
3756 std::shared_ptr<basic_parser<T>> username;
3757 std::shared_ptr<basic_parser<T>> password;
3758 std::shared_ptr<basic_parser<T>> ipv4_host;
3759 std::shared_ptr<basic_parser<T>> ipv6_host;
3760 std::shared_ptr<basic_parser<T>> dns_host;
3761 std::shared_ptr<basic_parser<T>> port;
3762 std::shared_ptr<basic_parser<T>> path;
3763
3764 protected:
3765 std::shared_ptr<basic_parser<T>> m_colon;
3766 std::shared_ptr<basic_parser<T>> m_slash;
3767 std::shared_ptr<basic_parser<T>> m_at;
3768 std::shared_ptr<basic_parser<T>> m_ip_lbracket;
3769 std::shared_ptr<basic_parser<T>> m_ip_rbracket;
3770 };
3771
3772 using url = basic_url<char>;
3773 using wurl = basic_url<wchar_t>;
3774#ifdef _UNICODE
3775 using turl = wurl;
3776#else
3777 using turl = url;
3778#endif
3779 using sgml_url = basic_url<char>;
3780
3784 template <class T>
3786 {
3787 public:
3789 _In_ const std::shared_ptr<basic_parser<T>>& _username,
3790 _In_ const std::shared_ptr<basic_parser<T>>& at,
3791 _In_ const std::shared_ptr<basic_parser<T>>& ip_lbracket,
3792 _In_ const std::shared_ptr<basic_parser<T>>& ip_rbracket,
3793 _In_ const std::shared_ptr<basic_parser<T>>& _ipv4_host,
3794 _In_ const std::shared_ptr<basic_parser<T>>& _ipv6_host,
3795 _In_ const std::shared_ptr<basic_parser<T>>& _dns_host,
3796 _In_ const std::locale& locale = std::locale()) :
3797 basic_parser<T>(locale),
3798 username(_username),
3799 m_at(at),
3800 m_ip_lbracket(ip_lbracket),
3801 m_ip_rbracket(ip_rbracket),
3802 ipv4_host(_ipv4_host),
3803 ipv6_host(_ipv6_host),
3804 dns_host(_dns_host)
3805 {}
3806
3807 virtual bool match(
3808 _In_reads_or_z_(end) const T* text,
3809 _In_ size_t start = 0,
3810 _In_ size_t end = (size_t)-1,
3811 _In_ int flags = match_default)
3812 {
3813 assert(text || start >= end);
3814
3815 if (username->match(text, start, end, flags) &&
3816 m_at->match(text, username->interval.end, end, flags))
3817 {
3818 // Username@
3819 if (m_ip_lbracket->match(text, m_at->interval.end, end, flags) &&
3820 ipv4_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3821 m_ip_rbracket->match(text, ipv4_host->interval.end, end, flags))
3822 {
3823 // Host is IPv4
3824 interval.end = m_ip_rbracket->interval.end;
3825 ipv6_host->invalidate();
3826 dns_host->invalidate();
3827 }
3828 else if (
3829 m_ip_lbracket->match(text, m_at->interval.end, end, flags) &&
3830 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3831 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3832 {
3833 // Host is IPv6
3834 interval.end = m_ip_rbracket->interval.end;
3835 ipv4_host->invalidate();
3836 dns_host->invalidate();
3837 }
3838 else if (dns_host->match(text, m_at->interval.end, end, flags)) {
3839 // Host is hostname
3840 interval.end = dns_host->interval.end;
3841 ipv4_host->invalidate();
3842 ipv6_host->invalidate();
3843 }
3844 else
3845 goto error;
3846 interval.start = start;
3847 return true;
3848 }
3849
3850 error:
3851 username->invalidate();
3852 ipv4_host->invalidate();
3853 ipv6_host->invalidate();
3854 dns_host->invalidate();
3855 interval.start = (interval.end = start) + 1;
3856 return false;
3857 }
3858
3859 virtual void invalidate()
3860 {
3861 username->invalidate();
3862 ipv4_host->invalidate();
3863 ipv6_host->invalidate();
3864 dns_host->invalidate();
3866 }
3867
3868 public:
3869 std::shared_ptr<basic_parser<T>> username;
3870 std::shared_ptr<basic_parser<T>> ipv4_host;
3871 std::shared_ptr<basic_parser<T>> ipv6_host;
3872 std::shared_ptr<basic_parser<T>> dns_host;
3873
3874 protected:
3875 std::shared_ptr<basic_parser<T>> m_at;
3876 std::shared_ptr<basic_parser<T>> m_ip_lbracket;
3877 std::shared_ptr<basic_parser<T>> m_ip_rbracket;
3878 };
3879
3882#ifdef _UNICODE
3884#else
3886#endif
3888
3892 template <class T>
3894 {
3895 public:
3897 _In_ const std::shared_ptr<basic_parser<T>>& _emoticon,
3898 _In_ const std::shared_ptr<basic_parser<T>>& _apex,
3899 _In_ const std::shared_ptr<basic_parser<T>>& _eyes,
3900 _In_ const std::shared_ptr<basic_parser<T>>& _nose,
3901 _In_ const std::shared_ptr<basic_set<T>>& _mouth,
3902 _In_ const std::locale& locale = std::locale()) :
3903 basic_parser<T>(locale),
3904 emoticon(_emoticon),
3905 apex(_apex),
3906 eyes(_eyes),
3907 nose(_nose),
3908 mouth(_mouth)
3909 {}
3910
3911 virtual bool match(
3912 _In_reads_or_z_(end) const T* text,
3913 _In_ size_t start = 0,
3914 _In_ size_t end = (size_t)-1,
3915 _In_ int flags = match_default)
3916 {
3917 assert(text || start >= end);
3918
3919 if (emoticon && emoticon->match(text, start, end, flags)) {
3920 if (apex) apex->invalidate();
3921 eyes->invalidate();
3922 if (nose) nose->invalidate();
3923 mouth->invalidate();
3924 interval.start = start;
3926 return true;
3927 }
3928
3929 interval.end = start;
3930
3931 if (apex && apex->match(text, interval.end, end, flags))
3932 interval.end = apex->interval.end;
3933
3934 if (eyes->match(text, interval.end, end, flags)) {
3935 if (nose && nose->match(text, eyes->interval.end, end, flags) &&
3936 mouth->match(text, nose->interval.end, end, flags))
3937 {
3938 size_t
3939 start_mouth = mouth->interval.start,
3940 hit_offset = mouth->hit_offset;
3941 // Mouth may repeat :-)))))))
3942 for (interval.end = mouth->interval.end; mouth->match(text, interval.end, end, flags) && mouth->hit_offset == hit_offset; interval.end = mouth->interval.end);
3943 mouth->interval.start = start_mouth;
3944 mouth->interval.end = interval.end;
3945 interval.start = start;
3946 return true;
3947 }
3948 if (mouth->match(text, eyes->interval.end, end, flags)) {
3949 size_t
3950 start_mouth = mouth->interval.start,
3951 hit_offset = mouth->hit_offset;
3952 // Mouth may repeat :-)))))))
3953 for (interval.end = mouth->interval.end; mouth->match(text, interval.end, end, flags) && mouth->hit_offset == hit_offset; interval.end = mouth->interval.end);
3954 if (nose) nose->invalidate();
3955 mouth->interval.start = start_mouth;
3956 mouth->interval.end = interval.end;
3957 interval.start = start;
3958 return true;
3959 }
3960 }
3961
3962 if (emoticon) emoticon->invalidate();
3963 if (apex) apex->invalidate();
3964 eyes->invalidate();
3965 if (nose) nose->invalidate();
3966 mouth->invalidate();
3967 interval.start = (interval.end = start) + 1;
3968 return false;
3969 }
3970
3971 virtual void invalidate()
3972 {
3973 if (emoticon) emoticon->invalidate();
3974 if (apex) apex->invalidate();
3975 eyes->invalidate();
3976 if (nose) nose->invalidate();
3977 mouth->invalidate();
3979 }
3980
3981 public:
3982 std::shared_ptr<basic_parser<T>> emoticon;
3983 std::shared_ptr<basic_parser<T>> apex;
3984 std::shared_ptr<basic_parser<T>> eyes;
3985 std::shared_ptr<basic_parser<T>> nose;
3986 std::shared_ptr<basic_set<T>> mouth;
3987 };
3988
3991#ifdef _UNICODE
3992 using temoticon = wemoticon;
3993#else
3994 using temoticon = emoticon;
3995#endif
3997
4001 ENUM_FLAGS(date_format_t, int) {
4002 none = 0,
4003 dmy = 0x1,
4004 mdy = 0x2,
4005 ymd = 0x4,
4006 ym = 0x8,
4007 my = 0x10,
4008 dm = 0x20,
4009 md = 0x40,
4010 };
4011
4015 template <class T>
4016 class basic_date : public basic_parser<T>
4017 {
4018 public:
4019 basic_date(
4020 _In_ int format_mask,
4021 _In_ const std::shared_ptr<basic_integer<T>>& _day,
4022 _In_ const std::shared_ptr<basic_integer<T>>& _month,
4023 _In_ const std::shared_ptr<basic_integer<T>>& _year,
4024 _In_ const std::shared_ptr<basic_set<T>>& separator,
4025 _In_ const std::shared_ptr<basic_parser<T>>& space,
4026 _In_ const std::locale& locale = std::locale()) :
4027 basic_parser<T>(locale),
4028 format(date_format_t::none),
4029 m_format_mask(format_mask),
4030 day(_day),
4031 month(_month),
4032 year(_year),
4033 m_separator(separator),
4034 m_space(space)
4035 {}
4036
4037 virtual bool match(
4038 _In_reads_or_z_(end) const T* text,
4039 _In_ size_t start = 0,
4040 _In_ size_t end = (size_t)-1,
4041 _In_ int flags = match_default)
4042 {
4043 assert(text || start >= end);
4044
4045 const int space_match_flags = flags & ~match_multiline; // Spaces in dates must never be broken in new line.
4046 if ((m_format_mask & date_format_t::dmy) == date_format_t::dmy) {
4047 if (day->match(text, start, end, flags)) {
4048 for (interval.end = day->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4049 if (m_separator->match(text, interval.end, end, flags)) {
4050 size_t hit_offset = m_separator->hit_offset;
4051 for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4052 if (month->match(text, interval.end, end, flags)) {
4053 for (interval.end = month->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4054 if (m_separator->match(text, interval.end, end, flags) &&
4055 m_separator->hit_offset == hit_offset) // Both separators must match.
4056 {
4057 for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4058 if (year->match(text, interval.end, end, flags) &&
4059 is_valid(day->value, month->value))
4060 {
4061 interval.start = start;
4062 interval.end = year->interval.end;
4063 format = date_format_t::dmy;
4064 return true;
4065 }
4066 }
4067 }
4068 }
4069 }
4070 }
4071
4072 if ((m_format_mask & date_format_t::mdy) == date_format_t::mdy) {
4073 if (month->match(text, start, end, flags)) {
4074 for (interval.end = month->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4075 if (m_separator->match(text, interval.end, end, flags)) {
4076 size_t hit_offset = m_separator->hit_offset;
4077 for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4078 if (day->match(text, interval.end, end, flags)) {
4079 for (interval.end = day->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4080 if (m_separator->match(text, interval.end, end, flags) &&
4081 m_separator->hit_offset == hit_offset) // Both separators must match.
4082 {
4083 for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4084 if (year->match(text, interval.end, end, flags) &&
4085 is_valid(day->value, month->value))
4086 {
4087 interval.start = start;
4088 interval.end = year->interval.end;
4089 format = date_format_t::mdy;
4090 return true;
4091 }
4092 }
4093 }
4094 }
4095 }
4096 }
4097
4098 if ((m_format_mask & date_format_t::ymd) == date_format_t::ymd) {
4099 if (year->match(text, start, end, flags)) {
4100 for (interval.end = year->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4101 if (m_separator->match(text, interval.end, end, flags)) {
4102 size_t hit_offset = m_separator->hit_offset;
4103 for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4104 if (month->match(text, interval.end, end, flags)) {
4105 for (interval.end = month->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4106 if (m_separator->match(text, interval.end, end, flags) &&
4107 m_separator->hit_offset == hit_offset) // Both separators must match.
4108 {
4109 for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4110 if (day->match(text, interval.end, end, flags) &&
4111 is_valid(day->value, month->value))
4112 {
4113 interval.start = start;
4114 interval.end = day->interval.end;
4115 format = date_format_t::ymd;
4116 return true;
4117 }
4118 }
4119 }
4120 }
4121 }
4122 }
4123
4124 if ((m_format_mask & date_format_t::ym) == date_format_t::ym) {
4125 if (year->match(text, start, end, flags)) {
4126 for (interval.end = year->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4127 if (m_separator->match(text, interval.end, end, flags)) {
4128 for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4129 if (month->match(text, interval.end, end, flags) &&
4130 is_valid((size_t)-1, month->value))
4131 {
4132 if (day) day->invalidate();
4133 interval.start = start;
4134 interval.end = month->interval.end;
4135 format = date_format_t::ym;
4136 return true;
4137 }
4138 }
4139 }
4140 }
4141
4142 if ((m_format_mask & date_format_t::my) == date_format_t::my) {
4143 if (month->match(text, start, end, flags)) {
4144 for (interval.end = month->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4145 if (m_separator->match(text, interval.end, end, flags)) {
4146 for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4147 if (year->match(text, interval.end, end, flags) &&
4148 is_valid((size_t)-1, month->value))
4149 {
4150 if (day) day->invalidate();
4151 interval.start = start;
4152 interval.end = year->interval.end;
4153 format = date_format_t::my;
4154 return true;
4155 }
4156 }
4157 }
4158 }
4159
4160 if ((m_format_mask & date_format_t::dm) == date_format_t::dm) {
4161 if (day->match(text, start, end, flags)) {
4162 for (interval.end = day->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4163 if (m_separator->match(text, interval.end, end, flags)) {
4164 size_t hit_offset = m_separator->hit_offset;
4165 for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4166 if (month->match(text, interval.end, end, flags) &&
4167 is_valid(day->value, month->value))
4168 {
4169 if (year) year->invalidate();
4170 interval.start = start;
4171 for (interval.end = month->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4172 if (m_separator->match(text, interval.end, end, flags) &&
4173 m_separator->hit_offset == hit_offset) // Both separators must match.
4174 interval.end = m_separator->interval.end;
4175 else
4176 interval.end = month->interval.end;
4177 format = date_format_t::dm;
4178 return true;
4179 }
4180 }
4181 }
4182 }
4183
4184 if ((m_format_mask & date_format_t::md) == date_format_t::md) {
4185 if (month->match(text, start, end, flags)) {
4186 for (interval.end = month->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4187 if (m_separator->match(text, interval.end, end, flags)) {
4188 size_t hit_offset = m_separator->hit_offset;
4189 for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4190 if (day->match(text, interval.end, end, flags) &&
4191 is_valid(day->value, month->value))
4192 {
4193 if (year) year->invalidate();
4194 interval.start = start;
4195 for (interval.end = day->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4196 if (m_separator->match(text, interval.end, end, flags) &&
4197 m_separator->hit_offset == hit_offset) // Both separators must match.
4198 interval.end = m_separator->interval.end;
4199 else
4200 interval.end = day->interval.end;
4201 format = date_format_t::md;
4202 return true;
4203 }
4204 }
4205 }
4206 }
4207
4208 if (day) day->invalidate();
4209 if (month) month->invalidate();
4210 if (year) year->invalidate();
4211 format = date_format_t::none;
4212 interval.start = (interval.end = start) + 1;
4213 return false;
4214 }
4215
4216 virtual void invalidate()
4217 {
4218 if (day) day->invalidate();
4219 if (month) month->invalidate();
4220 if (year) year->invalidate();
4221 format = date_format_t::none;
4223 }
4224
4225 protected:
4226 static inline bool is_valid(size_t day, size_t month)
4227 {
4228 if (month == (size_t)-1) {
4229 // Default to January. This allows validating day only, as January has all 31 days.
4230 month = 1;
4231 }
4232 if (day == (size_t)-1) {
4233 // Default to 1st day in month. This allows validating month only, as each month has 1st day.
4234 day = 1;
4235 }
4236
4237 switch (month) {
4238 case 1:
4239 case 3:
4240 case 5:
4241 case 7:
4242 case 8:
4243 case 10:
4244 case 12:
4245 return 1 <= day && day <= 31;
4246 case 2:
4247 return 1 <= day && day <= 29;
4248 case 4:
4249 case 6:
4250 case 9:
4251 case 11:
4252 return 1 <= day && day <= 30;
4253 default:
4254 return false;
4255 }
4256 }
4257
4258 public:
4259 date_format_t format;
4260 std::shared_ptr<basic_integer<T>> day;
4261 std::shared_ptr<basic_integer<T>> month;
4262 std::shared_ptr<basic_integer<T>> year;
4263
4264 protected:
4265 int m_format_mask;
4266 std::shared_ptr<basic_set<T>> m_separator;
4267 std::shared_ptr<basic_parser<T>> m_space;
4268 };
4269
4270 using date = basic_date<char>;
4271 using wdate = basic_date<wchar_t>;
4272#ifdef _UNICODE
4273 using tdate = wdate;
4274#else
4275 using tdate = date;
4276#endif
4278
4282 template <class T>
4283 class basic_time : public basic_parser<T>
4284 {
4285 public:
4286 basic_time(
4287 _In_ const std::shared_ptr<basic_integer10<T>>& _hour,
4288 _In_ const std::shared_ptr<basic_integer10<T>>& _minute,
4289 _In_ const std::shared_ptr<basic_integer10<T>>& _second,
4290 _In_ const std::shared_ptr<basic_integer10<T>>& _millisecond,
4291 _In_ const std::shared_ptr<basic_set<T>>& separator,
4292 _In_ const std::shared_ptr<basic_parser<T>>& millisecond_separator,
4293 _In_ const std::locale& locale = std::locale()) :
4294 basic_parser<T>(locale),
4295 hour(_hour),
4296 minute(_minute),
4297 second(_second),
4298 millisecond(_millisecond),
4299 m_separator(separator),
4300 m_millisecond_separator(millisecond_separator)
4301 {}
4302
4303 virtual bool match(
4304 _In_reads_or_z_(end) const T* text,
4305 _In_ size_t start = 0,
4306 _In_ size_t end = (size_t)-1,
4307 _In_ int flags = match_default)
4308 {
4309 assert(text || start >= end);
4310
4311 if (hour->match(text, start, end, flags) &&
4312 m_separator->match(text, hour->interval.end, end, flags) &&
4313 minute->match(text, m_separator->interval.end, end, flags) &&
4314 minute->value < 60)
4315 {
4316 // hh::mm
4317 size_t hit_offset = m_separator->hit_offset;
4318 if (m_separator->match(text, minute->interval.end, end, flags) &&
4319 m_separator->hit_offset == hit_offset && // Both separators must match.
4320 second && second->match(text, m_separator->interval.end, end, flags) &&
4321 second->value < 60)
4322 {
4323 // hh::mm:ss
4324 if (m_millisecond_separator && m_millisecond_separator->match(text, second->interval.end, end, flags) &&
4325 millisecond && millisecond->match(text, m_millisecond_separator->interval.end, end, flags) &&
4326 millisecond->value < 1000)
4327 {
4328 // hh::mm:ss.mmmm
4329 interval.end = millisecond->interval.end;
4330 }
4331 else {
4332 if (millisecond) millisecond->invalidate();
4333 interval.end = second->interval.end;
4334 }
4335 }
4336 else {
4337 if (second) second->invalidate();
4338 if (millisecond) millisecond->invalidate();
4339 interval.end = minute->interval.end;
4340 }
4341 interval.start = start;
4342 return true;
4343 }
4344
4345 hour->invalidate();
4346 minute->invalidate();
4347 if (second) second->invalidate();
4348 if (millisecond) millisecond->invalidate();
4349 interval.start = (interval.end = start) + 1;
4350 return false;
4351 }
4352
4353 virtual void invalidate()
4354 {
4355 hour->invalidate();
4356 minute->invalidate();
4357 if (second) second->invalidate();
4358 if (millisecond) millisecond->invalidate();
4360 }
4361
4362 public:
4363 std::shared_ptr<basic_integer10<T>> hour;
4364 std::shared_ptr<basic_integer10<T>> minute;
4365 std::shared_ptr<basic_integer10<T>> second;
4366 std::shared_ptr<basic_integer10<T>> millisecond;
4367
4368 protected:
4369 std::shared_ptr<basic_set<T>> m_separator;
4370 std::shared_ptr<basic_parser<T>> m_millisecond_separator;
4371 };
4372
4373 using time = basic_time<char>;
4374 using wtime = basic_time<wchar_t>;
4375#ifdef _UNICODE
4376 using ttime = wtime;
4377#else
4378 using ttime = time;
4379#endif
4381
4385 template <class T>
4386 class basic_angle : public basic_parser<T>
4387 {
4388 public:
4390 _In_ const std::shared_ptr<basic_integer10<T>>& _degree,
4391 _In_ const std::shared_ptr<basic_parser<T>>& _degree_separator,
4392 _In_ const std::shared_ptr<basic_integer10<T>>& _minute,
4393 _In_ const std::shared_ptr<basic_parser<T>>& _minute_separator,
4394 _In_ const std::shared_ptr<basic_integer10<T>>& _second,
4395 _In_ const std::shared_ptr<basic_parser<T>>& _second_separator,
4396 _In_ const std::shared_ptr<basic_parser<T>>& _decimal,
4397 _In_ const std::locale& locale = std::locale()) :
4398 basic_parser<T>(locale),
4399 degree(_degree),
4400 degree_separator(_degree_separator),
4401 minute(_minute),
4402 minute_separator(_minute_separator),
4403 second(_second),
4404 second_separator(_second_separator),
4405 decimal(_decimal)
4406 {}
4407
4408 virtual bool match(
4409 _In_reads_or_z_(end) const T* text,
4410 _In_ size_t start = 0,
4411 _In_ size_t end = (size_t)-1,
4412 _In_ int flags = match_default)
4413 {
4414 assert(text || start >= end);
4415
4416 interval.end = start;
4417
4418 if (degree->match(text, interval.end, end, flags) &&
4419 degree_separator->match(text, degree->interval.end, end, flags))
4420 {
4421 // Degrees
4422 interval.end = degree_separator->interval.end;
4423 }
4424 else {
4425 degree->invalidate();
4426 degree_separator->invalidate();
4427 }
4428
4429 if (minute->match(text, interval.end, end, flags) &&
4430 minute->value < 60 &&
4431 minute_separator->match(text, minute->interval.end, end, flags))
4432 {
4433 // Minutes
4434 interval.end = minute_separator->interval.end;
4435 }
4436 else {
4437 minute->invalidate();
4438 minute_separator->invalidate();
4439 }
4440
4441 if (second && second->match(text, interval.end, end, flags) &&
4442 second->value < 60)
4443 {
4444 // Seconds
4445 interval.end = second->interval.end;
4446 if (second_separator && second_separator->match(text, interval.end, end, flags))
4447 interval.end = second_separator->interval.end;
4448 else
4449 if (second_separator) second_separator->invalidate();
4450 }
4451 else {
4452 if (second) second->invalidate();
4453 if (second_separator) second_separator->invalidate();
4454 }
4455
4456 if (degree->interval.start < degree->interval.end ||
4457 minute->interval.start < minute->interval.end ||
4458 second && second->interval.start < second->interval.end)
4459 {
4460 if (decimal && decimal->match(text, interval.end, end, flags)) {
4461 // Decimals
4462 interval.end = decimal->interval.end;
4463 }
4464 else if (decimal)
4465 decimal->invalidate();
4466 interval.start = start;
4467 return true;
4468 }
4469 if (decimal) decimal->invalidate();
4470 interval.start = (interval.end = start) + 1;
4471 return false;
4472 }
4473
4474 virtual void invalidate()
4475 {
4476 degree->invalidate();
4477 degree_separator->invalidate();
4478 minute->invalidate();
4479 minute_separator->invalidate();
4480 if (second) second->invalidate();
4481 if (second_separator) second_separator->invalidate();
4482 if (decimal) decimal->invalidate();
4484 }
4485
4486 public:
4487 std::shared_ptr<basic_integer10<T>> degree;
4488 std::shared_ptr<basic_parser<T>> degree_separator;
4489 std::shared_ptr<basic_integer10<T>> minute;
4490 std::shared_ptr<basic_parser<T>> minute_separator;
4491 std::shared_ptr<basic_integer10<T>> second;
4492 std::shared_ptr<basic_parser<T>> second_separator;
4493 std::shared_ptr<basic_parser<T>> decimal;
4494 };
4495
4496 using angle = basic_angle<char>;
4498#ifdef _UNICODE
4499 using RRegElKot = wangle;
4500#else
4501 using RRegElKot = angle;
4502#endif
4504
4508 template <class T>
4510 {
4511 public:
4513 _In_ const std::shared_ptr<basic_parser<T>>& digit,
4514 _In_ const std::shared_ptr<basic_parser<T>>& plus_sign,
4515 _In_ const std::shared_ptr<basic_set<T>>& lparenthesis,
4516 _In_ const std::shared_ptr<basic_set<T>>& rparenthesis,
4517 _In_ const std::shared_ptr<basic_parser<T>>& separator,
4518 _In_ const std::shared_ptr<basic_parser<T>>& space,
4519 _In_ const std::locale& locale = std::locale()) :
4520 basic_parser<T>(locale),
4521 m_digit(digit),
4522 m_plus_sign(plus_sign),
4523 m_lparenthesis(lparenthesis),
4524 m_rparenthesis(rparenthesis),
4525 m_separator(separator),
4526 m_space(space)
4527 {}
4528
4529 virtual bool match(
4530 _In_reads_or_z_(end) const T* text,
4531 _In_ size_t start = 0,
4532 _In_ size_t end = (size_t)-1,
4533 _In_ int flags = match_default)
4534 {
4535 assert(text || start >= end);
4536
4537 size_t safe_digit_end = start, safe_value_size = 0;
4538 bool has_digits = false, after_digit = false, in_parentheses = false, after_parentheses = false;
4539 const int space_match_flags = flags & ~match_multiline; // Spaces in phone numbers must never be broken in new line.
4540
4541 interval.end = start;
4542 value.clear();
4543 m_lparenthesis->invalidate();
4544 m_rparenthesis->invalidate();
4545
4546 if (m_plus_sign && m_plus_sign->match(text, interval.end, end, flags)) {
4547 value.append(text + m_plus_sign->interval.start, text + m_plus_sign->interval.end);
4548 safe_value_size = value.size();
4549 interval.end = m_plus_sign->interval.end;
4550 }
4551
4552 for (;;) {
4553 assert(text || interval.end >= end);
4554 if (interval.end >= end || !text[interval.end])
4555 break;
4556 if (m_digit->match(text, interval.end, end, flags)) {
4557 // Digit
4558 value.append(text + m_digit->interval.start, text + m_digit->interval.end);
4559 interval.end = m_digit->interval.end;
4560 if (!in_parentheses) {
4561 safe_digit_end = interval.end;
4562 safe_value_size = value.size();
4563 has_digits = true;
4564 }
4565 after_digit = true;
4566 after_parentheses = false;
4567 }
4568 else if (
4569 m_lparenthesis && !m_lparenthesis->interval && // No left parenthesis yet
4570 m_rparenthesis && !m_rparenthesis->interval && // Right parenthesis after left
4571 m_lparenthesis->match(text, interval.end, end, flags))
4572 {
4573 // Left parenthesis
4574 value.append(text + m_lparenthesis->interval.start, m_lparenthesis->interval.size());
4575 interval.end = m_lparenthesis->interval.end;
4576 in_parentheses = true;
4577 after_digit = false;
4578 after_parentheses = false;
4579 }
4580 else if (
4581 in_parentheses && // After left parenthesis
4582 m_rparenthesis && !m_rparenthesis->interval && // No right parenthesis yet
4583 m_rparenthesis->match(text, interval.end, end, flags) &&
4584 m_lparenthesis->hit_offset == m_rparenthesis->hit_offset) // Left and right parentheses must match
4585 {
4586 // Right parenthesis
4587 value.append(text + m_rparenthesis->interval.start, text + m_rparenthesis->interval.end);
4588 interval.end = m_rparenthesis->interval.end;
4589 safe_digit_end = interval.end;
4590 safe_value_size = value.size();
4591 in_parentheses = false;
4592 after_digit = false;
4593 after_parentheses = true;
4594 }
4595 else if (
4596 after_digit &&
4597 !in_parentheses && // No separators inside parentheses
4598 !after_parentheses && // No separators following right parenthesis
4599 m_separator && m_separator->match(text, interval.end, end, flags))
4600 {
4601 // Separator
4602 interval.end = m_separator->interval.end;
4603 after_digit = false;
4604 after_parentheses = false;
4605 }
4606 else if (
4607 (after_digit || after_parentheses) &&
4608 m_space && m_space->match(text, interval.end, end, space_match_flags))
4609 {
4610 // Space
4611 interval.end = m_space->interval.end;
4612 after_digit = false;
4613 after_parentheses = false;
4614 }
4615 else
4616 break;
4617 }
4618 if (has_digits) {
4619 value.erase(safe_value_size);
4620 interval.start = start;
4621 interval.end = safe_digit_end;
4622 return true;
4623 }
4624 value.clear();
4625 interval.start = (interval.end = start) + 1;
4626 return false;
4627 }
4628
4629 virtual void invalidate()
4630 {
4631 value.clear();
4633 }
4634
4635 public:
4636 std::basic_string<T> value;
4637
4638 protected:
4639 std::shared_ptr<basic_parser<T>> m_digit;
4640 std::shared_ptr<basic_parser<T>> m_plus_sign;
4641 std::shared_ptr<basic_set<T>> m_lparenthesis;
4642 std::shared_ptr<basic_set<T>> m_rparenthesis;
4643 std::shared_ptr<basic_parser<T>> m_separator;
4644 std::shared_ptr<basic_parser<T>> m_space;
4645 };
4646
4649#ifdef _UNICODE
4651#else
4653#endif
4655
4659 template <class T>
4661 {
4662 public:
4664 _In_ const std::shared_ptr<basic_parser<T>>& element,
4665 _In_ const std::shared_ptr<basic_parser<T>>& digit,
4666 _In_ const std::shared_ptr<basic_parser<T>>& sign,
4667 _In_ const std::locale& locale = std::locale()) :
4668 basic_parser<T>(locale),
4669 m_element(element),
4670 m_digit(digit),
4671 m_sign(sign),
4672 has_digits(false),
4673 has_charge(false)
4674 {}
4675
4676 virtual bool match(
4677 _In_reads_or_z_(end) const T* text,
4678 _In_ size_t start = 0,
4679 _In_ size_t end = (size_t)-1,
4680 _In_ int flags = match_default)
4681 {
4682 assert(text || start >= end);
4683
4684 has_digits = false;
4685 has_charge = false;
4686 interval.end = start;
4687
4688 const int element_match_flags = flags & ~match_case_insensitive; // Chemical elements are always case-sensitive.
4689 for (;;) {
4690 if (m_element->match(text, interval.end, end, element_match_flags)) {
4691 interval.end = m_element->interval.end;
4692 while (m_digit->match(text, interval.end, end, flags)) {
4693 interval.end = m_digit->interval.end;
4694 has_digits = true;
4695 }
4696 }
4697 else if (start < interval.end) {
4698 if (m_sign->match(text, interval.end, end, flags)) {
4699 interval.end = m_sign->interval.end;
4700 has_charge = true;
4701 }
4702 interval.start = start;
4703 return true;
4704 }
4705 else {
4706 interval.start = (interval.end = start) + 1;
4707 return false;
4708 }
4709 }
4710 }
4711
4712 virtual void invalidate()
4713 {
4714 has_digits = false;
4715 has_charge = false;
4717 }
4718
4719 public:
4720 bool has_digits;
4721 bool has_charge;
4722
4723 protected:
4724 std::shared_ptr<basic_parser<T>> m_element;
4725 std::shared_ptr<basic_parser<T>> m_digit;
4726 std::shared_ptr<basic_parser<T>> m_sign;
4727 };
4728
4731#ifdef _UNICODE
4733#else
4735#endif
4737
4742 {
4743 public:
4744 virtual bool match(
4745 _In_reads_or_z_(end) const char* text,
4746 _In_ size_t start = 0,
4747 _In_ size_t end = (size_t)-1,
4748 _In_ int flags = match_default)
4749 {
4750 assert(text || start >= end);
4751 interval.end = start;
4752
4753 assert(text || interval.end >= end);
4754 if (interval.end < end && text[interval.end]) {
4755 if (text[interval.end] == '\r') {
4756 interval.end++;
4757 if (interval.end < end && text[interval.end] == '\n') {
4758 interval.start = start;
4759 interval.end++;
4760 return true;
4761 }
4762 }
4763 else if (text[interval.end] == '\n') {
4764 interval.start = start;
4765 interval.end++;
4766 return true;
4767 }
4768 }
4769 interval.start = (interval.end = start) + 1;
4770 return false;
4771 }
4772 };
4773
4777 class http_space : public parser
4778 {
4779 public:
4780 virtual bool match(
4781 _In_reads_or_z_(end) const char* text,
4782 _In_ size_t start = 0,
4783 _In_ size_t end = (size_t)-1,
4784 _In_ int flags = match_default)
4785 {
4786 assert(text || start >= end);
4787 interval.end = start;
4788 if (m_line_break.match(text, interval.end, end, flags)) {
4789 interval.end = m_line_break.interval.end;
4790 if (interval.end < end && text[interval.end] && isspace(text[interval.end])) {
4791 interval.start = start;
4792 interval.end++;
4793 while (interval.end < end && text[interval.end] && isspace(text[interval.end])) interval.end++;
4794 return true;
4795 }
4796 }
4797 else if (interval.end < end && text[interval.end] && isspace(text[interval.end])) {
4798 interval.start = start;
4799 interval.end++;
4800 while (interval.end < end && text[interval.end] && isspace(text[interval.end])) interval.end++;
4801 return true;
4802 }
4803 interval.start = (interval.end = start) + 1;
4804 return false;
4805 }
4806
4807 protected:
4808 http_line_break m_line_break;
4809 };
4810
4814 class http_text_char : public parser
4815 {
4816 public:
4817 virtual bool match(
4818 _In_reads_or_z_(end) const char* text,
4819 _In_ size_t start = 0,
4820 _In_ size_t end = (size_t)-1,
4821 _In_ int flags = match_default)
4822 {
4823 assert(text || start >= end);
4824 interval.end = start;
4825
4826 assert(text || interval.end >= end);
4827 if (m_space.match(text, interval.end, end, flags)) {
4828 interval.start = start;
4829 interval.end = m_space.interval.end;
4830 return true;
4831 }
4832 else if (interval.end < end && text[interval.end] && text[interval.end] >= 0x20) {
4833 interval.start = start;
4834 interval.end++;
4835 return true;
4836 }
4837 interval.start = (interval.end = start) + 1;
4838 return false;
4839 }
4840
4841 protected:
4842 http_space m_space;
4843 };
4844
4848 class http_token : public parser
4849 {
4850 public:
4851 virtual bool match(
4852 _In_reads_or_z_(end) const char* text,
4853 _In_ size_t start = 0,
4854 _In_ size_t end = (size_t)-1,
4855 _In_ int flags = match_default)
4856 {
4857 assert(text || start >= end);
4858 interval.end = start;
4859 for (;;) {
4860 if (interval.end < end && text[interval.end]) {
4861 if ((unsigned int)text[interval.end] < 0x20 ||
4862 (unsigned int)text[interval.end] == 0x7f ||
4863 text[interval.end] == '(' ||
4864 text[interval.end] == ')' ||
4865 text[interval.end] == '<' ||
4866 text[interval.end] == '>' ||
4867 text[interval.end] == '@' ||
4868 text[interval.end] == ',' ||
4869 text[interval.end] == ';' ||
4870 text[interval.end] == ':' ||
4871 text[interval.end] == '\\' ||
4872 text[interval.end] == '\"' ||
4873 text[interval.end] == '/' ||
4874 text[interval.end] == '[' ||
4875 text[interval.end] == ']' ||
4876 text[interval.end] == '?' ||
4877 text[interval.end] == '=' ||
4878 text[interval.end] == '{' ||
4879 text[interval.end] == '}' ||
4880 isspace(text[interval.end]))
4881 break;
4882 else
4883 interval.end++;
4884 }
4885 else
4886 break;
4887 }
4888 if (start < interval.end) {
4889 interval.start = start;
4890 return true;
4891 }
4892 else {
4893 interval.start = (interval.end = start) + 1;
4894 return false;
4895 }
4896 }
4897 };
4898
4903 {
4904 public:
4905 virtual bool match(
4906 _In_reads_or_z_(end) const char* text,
4907 _In_ size_t start = 0,
4908 _In_ size_t end = (size_t)-1,
4909 _In_ int flags = match_default)
4910 {
4911 assert(text || start >= end);
4912 interval.end = start;
4913 if (interval.end < end && text[interval.end] != '"')
4914 goto error;
4915 interval.end++;
4917 for (;;) {
4918 assert(text || interval.end >= end);
4919 if (interval.end < end && text[interval.end]) {
4920 if (text[interval.end] == '"') {
4922 interval.end++;
4923 break;
4924 }
4925 else if (text[interval.end] == '\\') {
4926 interval.end++;
4927 if (interval.end < end && text[interval.end]) {
4928 interval.end++;
4929 }
4930 else
4931 goto error;
4932 }
4933 else if (m_chr.match(text, interval.end, end, flags))
4934 interval.end++;
4935 else
4936 goto error;
4937 }
4938 else
4939 goto error;
4940 }
4941 interval.start = start;
4942 return true;
4943
4944 error:
4945 content.start = 1;
4946 content.end = 0;
4947 interval.start = (interval.end = start) + 1;
4948 return false;
4949 }
4950
4951 virtual void invalidate()
4952 {
4953 content.start = 1;
4954 content.end = 0;
4955 parser::invalidate();
4956 }
4957
4958 public:
4960
4961 protected:
4962 http_text_char m_chr;
4963 };
4964
4968 class http_value : public parser
4969 {
4970 public:
4971 virtual bool match(
4972 _In_reads_or_z_(end) const char* text,
4973 _In_ size_t start = 0,
4974 _In_ size_t end = (size_t)-1,
4975 _In_ int flags = match_default)
4976 {
4977 assert(text || start >= end);
4978 interval.end = start;
4979 if (string.match(text, interval.end, end, flags)) {
4980 token.invalidate();
4981 interval.end = string.interval.end;
4982 interval.start = start;
4983 return true;
4984 }
4985 else if (token.match(text, interval.end, end, flags)) {
4986 string.invalidate();
4988 interval.start = start;
4989 return true;
4990 }
4991 else {
4992 interval.start = (interval.end = start) + 1;
4993 return false;
4994 }
4995 }
4996
4997 virtual void invalidate()
4998 {
4999 string.invalidate();
5000 token.invalidate();
5001 parser::invalidate();
5002 }
5003
5004 public:
5007 };
5008
5012 class http_parameter : public parser
5013 {
5014 public:
5015 virtual bool match(
5016 _In_reads_or_z_(end) const char* text,
5017 _In_ size_t start = 0,
5018 _In_ size_t end = (size_t)-1,
5019 _In_ int flags = match_default)
5020 {
5021 assert(text || start >= end);
5022 interval.end = start;
5023 if (name.match(text, interval.end, end, flags))
5025 else
5026 goto error;
5027 while (m_space.match(text, interval.end, end, flags))
5028 interval.end = m_space.interval.end;
5029 assert(text || interval.end >= end);
5030 if (interval.end < end && text[interval.end] == '=')
5031 interval.end++;
5032 else
5033 while (m_space.match(text, interval.end, end, flags))
5034 interval.end = m_space.interval.end;
5035 if (value.match(text, interval.end, end, flags))
5037 else
5038 goto error;
5039 interval.start = start;
5040 return true;
5041
5042 error:
5043 name.invalidate();
5044 value.invalidate();
5045 interval.start = (interval.end = start) + 1;
5046 return false;
5047 }
5048
5049 virtual void invalidate()
5050 {
5051 name.invalidate();
5052 value.invalidate();
5053 parser::invalidate();
5054 }
5055
5056 public:
5059
5060 protected:
5061 http_space m_space;
5062 };
5063
5067 class http_any_type : public parser
5068 {
5069 public:
5070 virtual bool match(
5071 _In_reads_or_z_(end) const char* text,
5072 _In_ size_t start = 0,
5073 _In_ size_t end = (size_t)-1,
5074 _In_ int flags = match_default)
5075 {
5076 assert(text || start >= end);
5077 if (start + 2 < end &&
5078 text[start] == '*' &&
5079 text[start + 1] == '/' &&
5080 text[start + 2] == '*')
5081 {
5082 interval.end = (interval.start = start) + 3;
5083 return true;
5084 }
5085 else if (start < end && text[start] == '*') {
5086 interval.end = (interval.start = start) + 1;
5087 return true;
5088 }
5089 else {
5090 interval.start = (interval.end = start) + 1;
5091 return false;
5092 }
5093 }
5094 };
5095
5100 {
5101 public:
5102 virtual bool match(
5103 _In_reads_or_z_(end) const char* text,
5104 _In_ size_t start = 0,
5105 _In_ size_t end = (size_t)-1,
5106 _In_ int flags = match_default)
5107 {
5108 assert(text || start >= end);
5109 interval.end = start;
5110 if (type.match(text, interval.end, end, flags))
5111 interval.end = type.interval.end;
5112 else
5113 goto error;
5114 while (m_space.match(text, interval.end, end, flags))
5115 interval.end = m_space.interval.end;
5116 if (interval.end < end && text[interval.end] == '/')
5117 interval.end++;
5118 else
5119 goto error;
5120 while (m_space.match(text, interval.end, end, flags))
5121 interval.end = m_space.interval.end;
5122 if (subtype.match(text, interval.end, end, flags))
5123 interval.end = subtype.interval.end;
5124 else
5125 goto error;
5126 interval.start = start;
5127 return true;
5128
5129 error:
5130 type.invalidate();
5131 subtype.invalidate();
5132 interval.start = (interval.end = start) + 1;
5133 return false;
5134 }
5135
5136 virtual void invalidate()
5137 {
5138 type.invalidate();
5139 subtype.invalidate();
5140 parser::invalidate();
5141 }
5142
5143 public:
5144 http_token type;
5145 http_token subtype;
5146
5147 protected:
5148 http_space m_space;
5149 };
5150
5155 {
5156 public:
5157 virtual bool match(
5158 _In_reads_or_z_(end) const char* text,
5159 _In_ size_t start = 0,
5160 _In_ size_t end = (size_t)-1,
5161 _In_ int flags = match_default)
5162 {
5163 assert(text || start >= end);
5164 if (!http_media_range::match(text, start, end, flags))
5165 goto error;
5166 params.clear();
5167 for (;;) {
5168 if (interval.end < end && text[interval.end]) {
5169 if (m_space.match(text, interval.end, end, flags))
5170 interval.end = m_space.interval.end;
5171 else if (text[interval.end] == ';') {
5172 interval.end++;
5173 while (m_space.match(text, interval.end, end, flags))
5174 interval.end = m_space.interval.end;
5175 http_parameter param;
5176 if (param.match(text, interval.end, end, flags)) {
5177 interval.end = param.interval.end;
5178 params.push_back(std::move(param));
5179 }
5180 else
5181 break;
5182 }
5183 else
5184 break;
5185 }
5186 else
5187 break;
5188 }
5189 interval.end = params.empty() ? subtype.interval.end : params.back().interval.end;
5190 return true;
5191
5192 error:
5193 http_media_range::invalidate();
5194 params.clear();
5195 interval.start = (interval.end = start) + 1;
5196 return false;
5197 }
5198
5199 virtual void invalidate()
5200 {
5201 params.clear();
5202 http_media_range::invalidate();
5203 }
5204
5205 public:
5206 std::list<http_parameter> params;
5207 };
5208
5213 {
5214 public:
5215 virtual bool match(
5216 _In_reads_or_z_(end) const char* text,
5217 _In_ size_t start = 0,
5218 _In_ size_t end = (size_t)-1,
5219 _In_ int flags = match_default)
5220 {
5221 assert(text || start >= end);
5222 interval.end = start;
5223 for (;;) {
5224 if (interval.end < end && text[interval.end]) {
5225 if ((unsigned int)text[interval.end] < 0x20 ||
5226 (unsigned int)text[interval.end] == 0x7f ||
5227 text[interval.end] == ':' ||
5228 text[interval.end] == '/' ||
5229 isspace(text[interval.end]))
5230 break;
5231 else
5232 interval.end++;
5233 }
5234 else
5235 break;
5236 }
5237 if (start < interval.end) {
5238 interval.start = start;
5239 return true;
5240 }
5241 interval.start = (interval.end = start) + 1;
5242 return false;
5243 }
5244 };
5245
5249 class http_url_port : public parser
5250 {
5251 public:
5252 http_url_port(_In_ const std::locale& locale = std::locale()) :
5253 parser(locale),
5254 value(0)
5255 {}
5256
5257 virtual bool match(
5258 _In_reads_or_z_(end) const char* text,
5259 _In_ size_t start = 0,
5260 _In_ size_t end = (size_t)-1,
5261 _In_ int flags = match_default)
5262 {
5263 assert(text || start >= end);
5264 value = 0;
5265 interval.end = start;
5266 for (;;) {
5267 if (interval.end < end && text[interval.end]) {
5268 if ('0' <= text[interval.end] && text[interval.end] <= '9') {
5269 size_t _value = (size_t)value * 10 + text[interval.end] - '0';
5270 if (_value > (uint16_t)-1) {
5271 value = 0;
5272 interval.start = (interval.end = start) + 1;
5273 return false;
5274 }
5275 value = (uint16_t)_value;
5276 interval.end++;
5277 }
5278 else
5279 break;
5280 }
5281 else
5282 break;
5283 }
5284 if (start < interval.end) {
5285 interval.start = start;
5286 return true;
5287 }
5288 interval.start = (interval.end = start) + 1;
5289 return false;
5290 }
5291
5292 virtual void invalidate()
5293 {
5294 value = 0;
5295 parser::invalidate();
5296 }
5297
5298 public:
5299 uint16_t value;
5300 };
5301
5306 {
5307 public:
5308 virtual bool match(
5309 _In_reads_or_z_(end) const char* text,
5310 _In_ size_t start = 0,
5311 _In_ size_t end = (size_t)-1,
5312 _In_ int flags = match_default)
5313 {
5314 assert(text || start >= end);
5315 interval.end = start;
5316 for (;;) {
5317 if (interval.end < end && text[interval.end]) {
5318 if ((unsigned int)text[interval.end] < 0x20 ||
5319 (unsigned int)text[interval.end] == 0x7f ||
5320 text[interval.end] == '?' ||
5321 text[interval.end] == '/' ||
5322 isspace(text[interval.end]))
5323 break;
5324 else
5325 interval.end++;
5326 }
5327 else
5328 break;
5329 }
5330 interval.start = start;
5331 return true;
5332 }
5333 };
5334
5338 class http_url_path : public parser
5339 {
5340 public:
5341 virtual bool match(
5342 _In_reads_or_z_(end) const char* text,
5343 _In_ size_t start = 0,
5344 _In_ size_t end = (size_t)-1,
5345 _In_ int flags = match_default)
5346 {
5347 assert(text || start >= end);
5349 interval.end = start;
5350 segments.clear();
5351 assert(text || interval.end >= end);
5352 if (interval.end < end && text[interval.end] != '/')
5353 goto error;
5354 interval.end++;
5355 s.match(text, interval.end, end, flags);
5356 segments.push_back(s);
5358 for (;;) {
5359 if (interval.end < end && text[interval.end]) {
5360 if (text[interval.end] == '/') {
5361 interval.end++;
5362 s.match(text, interval.end, end, flags);
5363 segments.push_back(s);
5365 }
5366 else
5367 break;
5368 }
5369 else
5370 break;
5371 }
5372 interval.start = start;
5373 return true;
5374
5375 error:
5376 segments.clear();
5377 interval.start = (interval.end = start) + 1;
5378 return false;
5379 }
5380
5381 virtual void invalidate()
5382 {
5383 segments.clear();
5384 parser::invalidate();
5385 }
5386
5387 public:
5388 std::vector<http_url_path_segment> segments;
5389 };
5390
5395 {
5396 public:
5397 virtual bool match(
5398 _In_reads_or_z_(end) const char* text,
5399 _In_ size_t start = 0,
5400 _In_ size_t end = (size_t)-1,
5401 _In_ int flags = match_default)
5402 {
5403 assert(text || start >= end);
5404 interval.end = start;
5405 name.start = interval.end;
5406 for (;;) {
5407 if (interval.end < end && text[interval.end]) {
5408 if ((unsigned int)text[interval.end] < 0x20 ||
5409 (unsigned int)text[interval.end] == 0x7f ||
5410 text[interval.end] == '&' ||
5411 text[interval.end] == '=' ||
5412 isspace(text[interval.end]))
5413 break;
5414 else
5415 interval.end++;
5416 }
5417 else
5418 break;
5419 }
5420 if (start < interval.end)
5421 name.end = interval.end;
5422 else
5423 goto error;
5424 if (text[interval.end] == '=') {
5425 interval.end++;
5426 value.start = interval.end;
5427 for (;;) {
5428 if (interval.end < end && text[interval.end]) {
5429 if ((unsigned int)text[interval.end] < 0x20 ||
5430 (unsigned int)text[interval.end] == 0x7f ||
5431 text[interval.end] == '&' ||
5432 isspace(text[interval.end]))
5433 break;
5434 else
5435 interval.end++;
5436 }
5437 else
5438 break;
5439 }
5440 value.end = interval.end;
5441 }
5442 else {
5443 value.start = 1;
5444 value.end = 0;
5445 }
5446 interval.start = start;
5447 return true;
5448
5449 error:
5450 name.start = 1;
5451 name.end = 0;
5452 value.start = 1;
5453 value.end = 0;
5454 interval.start = (interval.end = start) + 1;
5455 return false;
5456 }
5457
5458 virtual void invalidate()
5459 {
5460 name.start = 1;
5461 name.end = 0;
5462 value.start = 1;
5463 value.end = 0;
5464 parser::invalidate();
5465 }
5466
5467 public:
5470 };
5471
5475 class http_url : public parser
5476 {
5477 public:
5478 http_url(_In_ const std::locale& locale = std::locale()) :
5479 parser(locale),
5480 port(locale)
5481 {}
5482
5483 virtual bool match(
5484 _In_reads_or_z_(end) const char* text,
5485 _In_ size_t start = 0,
5486 _In_ size_t end = (size_t)-1,
5487 _In_ int flags = match_default)
5488 {
5489 assert(text || start >= end);
5490 interval.end = start;
5491
5492 if (interval.end + 7 <= end && stdex::strnicmp(text + interval.end, 7, "http://", (size_t)-1, m_locale) == 0) {
5493 interval.end += 7;
5494 if (server.match(text, interval.end, end, flags))
5495 interval.end = server.interval.end;
5496 else
5497 goto error;
5498 if (interval.end < end && text[interval.end] == ':') {
5499 interval.end++;
5500 if (port.match(text, interval.end, end, flags))
5501 interval.end = port.interval.end;
5502 }
5503 else {
5504 port.invalidate();
5505 port.value = 80;
5506 }
5507 }
5508 else {
5509 server.invalidate();
5510 port.invalidate();
5511 port.value = 80;
5512 }
5513
5514 if (path.match(text, interval.end, end, flags))
5515 interval.end = path.interval.end;
5516 else
5517 goto error;
5518
5519 params.clear();
5520
5521 if (interval.end < end && text[interval.end] == '?') {
5522 interval.end++;
5523 for (;;) {
5524 if (interval.end < end && text[interval.end]) {
5525 if ((unsigned int)text[interval.end] < 0x20 ||
5526 (unsigned int)text[interval.end] == 0x7f ||
5527 isspace(text[interval.end]))
5528 break;
5529 else if (text[interval.end] == '&')
5530 interval.end++;
5531 else {
5532 http_url_parameter param;
5533 if (param.match(text, interval.end, end, flags)) {
5534 interval.end = param.interval.end;
5535 params.push_back(std::move(param));
5536 }
5537 else
5538 break;
5539 }
5540 }
5541 else
5542 break;
5543 }
5544 }
5545
5546 interval.start = start;
5547 return true;
5548
5549 error:
5550 server.invalidate();
5551 port.invalidate();
5552 path.invalidate();
5553 params.clear();
5554 interval.start = (interval.end = start) + 1;
5555 return false;
5556 }
5557
5558 virtual void invalidate()
5559 {
5560 server.invalidate();
5561 port.invalidate();
5562 path.invalidate();
5563 params.clear();
5564 parser::invalidate();
5565 }
5566
5567 public:
5568 http_url_server server;
5569 http_url_port port;
5570 http_url_path path;
5571 std::list<http_url_parameter> params;
5572 };
5573
5577 class http_language : public parser
5578 {
5579 public:
5580 virtual bool match(
5581 _In_reads_or_z_(end) const char* text,
5582 _In_ size_t start = 0,
5583 _In_ size_t end = (size_t)-1,
5584 _In_ int flags = match_default)
5585 {
5586 assert(text || start >= end);
5587 interval.end = start;
5588 components.clear();
5589 for (;;) {
5590 if (interval.end < end && text[interval.end]) {
5592 k.end = interval.end;
5593 for (;;) {
5594 if (k.end < end && text[k.end]) {
5595 if (isalpha(text[k.end]))
5596 k.end++;
5597 else
5598 break;
5599 }
5600 else
5601 break;
5602 }
5603 if (interval.end < k.end) {
5604 k.start = interval.end;
5605 interval.end = k.end;
5606 components.push_back(k);
5607 }
5608 else
5609 break;
5610 if (interval.end < end && text[interval.end] == '-')
5611 interval.end++;
5612 else
5613 break;
5614 }
5615 else
5616 break;
5617 }
5618 if (!components.empty()) {
5619 interval.start = start;
5620 interval.end = components.back().end;
5621 return true;
5622 }
5623 interval.start = (interval.end = start) + 1;
5624 return false;
5625 }
5626
5627 virtual void invalidate()
5628 {
5629 components.clear();
5630 parser::invalidate();
5631 }
5632
5633 public:
5634 std::vector<stdex::interval<size_t>> components;
5635 };
5636
5640 class http_weight : public parser
5641 {
5642 public:
5643 http_weight(_In_ const std::locale& locale = std::locale()) :
5644 parser(locale),
5645 value(1.0f)
5646 {}
5647
5648 virtual bool match(
5649 _In_reads_or_z_(end) const char* text,
5650 _In_ size_t start = 0,
5651 _In_ size_t end = (size_t)-1,
5652 _In_ int flags = match_default)
5653 {
5654 assert(text || start >= end);
5655 size_t celi_del = 0, decimalni_del = 0, decimalni_del_n = 1;
5656 interval.end = start;
5657 for (;;) {
5658 if (interval.end < end && text[interval.end]) {
5659 if ('0' <= text[interval.end] && text[interval.end] <= '9') {
5660 celi_del = celi_del * 10 + text[interval.end] - '0';
5661 interval.end++;
5662 }
5663 else if (text[interval.end] == '.') {
5664 interval.end++;
5665 for (;;) {
5666 if (interval.end < end && text[interval.end]) {
5667 if ('0' <= text[interval.end] && text[interval.end] <= '9') {
5668 decimalni_del = decimalni_del * 10 + text[interval.end] - '0';
5669 decimalni_del_n *= 10;
5670 interval.end++;
5671 }
5672 else
5673 break;
5674 }
5675 else
5676 break;
5677 }
5678 break;
5679 }
5680 else
5681 break;
5682 }
5683 else
5684 break;
5685 }
5686 if (start < interval.end) {
5687 value = (float)((double)celi_del + (double)decimalni_del / decimalni_del_n);
5688 interval.start = start;
5689 return true;
5690 }
5691 value = 1.0f;
5692 interval.start = (interval.end = start) + 1;
5693 return false;
5694 }
5695
5696 virtual void invalidate()
5697 {
5698 value = 1.0f;
5699 parser::invalidate();
5700 }
5701
5702 public:
5703 float value;
5704 };
5705
5709 class http_asterisk : public parser
5710 {
5711 public:
5712 virtual bool match(
5713 _In_reads_or_z_(end) const char* text,
5714 _In_ size_t start = 0,
5715 _In_ size_t end = (size_t)-1,
5716 _In_ int flags = match_default)
5717 {
5718 assert(text || end <= start);
5719 if (start < end && text[start] == '*') {
5720 interval.end = (interval.start = start) + 1;
5721 return true;
5722 }
5723 interval.start = (interval.end = start) + 1;
5724 return false;
5725 }
5726 };
5727
5731 template <class T, class T_asterisk = http_asterisk>
5733 {
5734 public:
5735 http_weighted_value(_In_ const std::locale& locale = std::locale()) :
5736 parser(locale),
5737 factor(locale)
5738 {}
5739
5740 virtual bool match(
5741 _In_reads_or_z_(end) const char* text,
5742 _In_ size_t start = 0,
5743 _In_ size_t end = (size_t)-1,
5744 _In_ int flags = match_default)
5745 {
5746 assert(text || start >= end);
5747 size_t konec_vrednosti;
5748 interval.end = start;
5749 if (asterisk.match(text, interval.end, end, flags)) {
5750 interval.end = konec_vrednosti = asterisk.interval.end;
5751 value.invalidate();
5752 }
5753 else if (value.match(text, interval.end, end, flags)) {
5754 interval.end = konec_vrednosti = value.interval.end;
5755 asterisk.invalidate();
5756 }
5757 else {
5758 asterisk.invalidate();
5759 value.invalidate();
5760 interval.start = (interval.end = start) + 1;
5761 return false;
5762 }
5763
5764 while (interval.end < end && text[interval.end] && isspace(text[interval.end])) interval.end++;
5765 if (interval.end < end && text[interval.end] == ';') {
5766 interval.end++;
5767 while (interval.end < end && text[interval.end] && isspace(text[interval.end])) interval.end++;
5768 if (interval.end < end && (text[interval.end] == 'q' || text[interval.end] == 'Q')) {
5769 interval.end++;
5770 while (interval.end < end && text[interval.end] && isspace(text[interval.end])) interval.end++;
5771 if (interval.end < end && text[interval.end] == '=') {
5772 interval.end++;
5773 while (interval.end < end && text[interval.end] && isspace(text[interval.end])) interval.end++;
5774 if (factor.match(text, interval.end, end, flags))
5775 interval.end = factor.interval.end;
5776 }
5777 }
5778 }
5779 if (!factor.interval) {
5780 factor.invalidate();
5781 interval.end = konec_vrednosti;
5782 }
5783 interval.start = start;
5784 return true;
5785 }
5786
5787 virtual void invalidate()
5788 {
5789 asterisk.invalidate();
5790 value.invalidate();
5791 factor.invalidate();
5792 parser::invalidate();
5793 }
5794
5795 public:
5796 T_asterisk asterisk;
5797 T value;
5798 http_weight factor;
5799 };
5800
5805 {
5806 public:
5807 virtual bool match(
5808 _In_reads_or_z_(end) const char* text,
5809 _In_ size_t start = 0,
5810 _In_ size_t end = (size_t)-1,
5811 _In_ int flags = match_default)
5812 {
5813 assert(text || start >= end);
5814 interval.end = start;
5815 if (interval.end < end && text[interval.end] == '$')
5816 interval.end++;
5817 else
5818 goto error;
5819 if (name.match(text, interval.end, end, flags))
5820 interval.end = name.interval.end;
5821 else
5822 goto error;
5823 while (m_space.match(text, interval.end, end, flags))
5824 interval.end = m_space.interval.end;
5825 if (interval.end < end && text[interval.end] == '=')
5826 interval.end++;
5827 else
5828 goto error;
5829 while (m_space.match(text, interval.end, end, flags))
5830 interval.end = m_space.interval.end;
5831 if (value.match(text, interval.end, end, flags))
5832 interval.end = value.interval.end;
5833 else
5834 goto error;
5835 interval.start = start;
5836 return true;
5837
5838 error:
5839 name.invalidate();
5840 value.invalidate();
5841 interval.start = (interval.end = start) + 1;
5842 return false;
5843 }
5844
5845 virtual void invalidate()
5846 {
5847 name.invalidate();
5848 value.invalidate();
5849 parser::invalidate();
5850 }
5851
5852 public:
5853 http_token name;
5854 http_value value;
5855
5856 protected:
5857 http_space m_space;
5858 };
5859
5863 class http_cookie : public parser
5864 {
5865 public:
5866 virtual bool match(
5867 _In_reads_or_z_(end) const char* text,
5868 _In_ size_t start = 0,
5869 _In_ size_t end = (size_t)-1,
5870 _In_ int flags = match_default)
5871 {
5872 assert(text || start >= end);
5873 interval.end = start;
5874 if (name.match(text, interval.end, end, flags))
5876 else
5877 goto error;
5878 while (m_space.match(text, interval.end, end, flags))
5879 interval.end = m_space.interval.end;
5880 if (interval.end < end && text[interval.end] == '=')
5881 interval.end++;
5882 else
5883 goto error;
5884 while (m_space.match(text, interval.end, end, flags))
5885 interval.end = m_space.interval.end;
5886 if (value.match(text, interval.end, end, flags))
5888 else
5889 goto error;
5890 params.clear();
5891 for (;;) {
5892 if (interval.end < end && text[interval.end]) {
5893 if (m_space.match(text, interval.end, end, flags))
5894 interval.end = m_space.interval.end;
5895 else if (text[interval.end] == ';') {
5896 interval.end++;
5897 while (m_space.match(text, interval.end, end, flags))
5898 interval.end = m_space.interval.end;
5900 if (param.match(text, interval.end, end, flags)) {
5901 interval.end = param.interval.end;
5902 params.push_back(std::move(param));
5903 }
5904 else
5905 break;
5906 }
5907 else
5908 break;
5909 }
5910 else
5911 break;
5912 }
5913 interval.start = start;
5914 interval.end = params.empty() ? value.interval.end : params.back().interval.end;
5915 return true;
5916
5917 error:
5918 name.invalidate();
5919 value.invalidate();
5920 params.clear();
5921 interval.start = (interval.end = start) + 1;
5922 return false;
5923 }
5924
5925 virtual void invalidate()
5926 {
5927 name.invalidate();
5928 value.invalidate();
5929 params.clear();
5930 parser::invalidate();
5931 }
5932
5933 public:
5936 std::list<http_cookie_parameter> params;
5937
5938 protected:
5939 http_space m_space;
5940 };
5941
5945 class http_agent : public parser
5946 {
5947 public:
5948 virtual bool match(
5949 _In_reads_or_z_(end) const char* text,
5950 _In_ size_t start = 0,
5951 _In_ size_t end = (size_t)-1,
5952 _In_ int flags = match_default)
5953 {
5954 assert(text || start >= end);
5955 interval.end = start;
5956 type.start = interval.end;
5957 for (;;) {
5958 if (interval.end < end && text[interval.end]) {
5959 if (text[interval.end] == '/') {
5960 type.end = interval.end;
5961 interval.end++;
5962 version.start = interval.end;
5963 for (;;) {
5964 if (interval.end < end && text[interval.end]) {
5965 if (isspace(text[interval.end])) {
5966 version.end = interval.end;
5967 break;
5968 }
5969 else
5970 interval.end++;
5971 }
5972 else {
5973 version.end = interval.end;
5974 break;
5975 }
5976 }
5977 break;
5978 }
5979 else if (isspace(text[interval.end])) {
5980 type.end = interval.end;
5981 break;
5982 }
5983 else
5984 interval.end++;
5985 }
5986 else {
5987 type.end = interval.end;
5988 break;
5989 }
5990 }
5991 if (start < interval.end) {
5992 interval.start = start;
5993 return true;
5994 }
5995 type.start = 1;
5996 type.end = 0;
5997 version.start = 1;
5998 version.end = 0;
5999 interval.start = 1;
6000 interval.end = 0;
6001 return false;
6002 }
6003
6004 virtual void invalidate()
6005 {
6006 type.start = 1;
6007 type.end = 0;
6008 version.start = 1;
6009 version.end = 0;
6010 parser::invalidate();
6011 }
6012
6013 public:
6016 };
6017
6021 class http_protocol : public parser
6022 {
6023 public:
6024 http_protocol(_In_ const std::locale& locale = std::locale()) :
6025 parser(locale),
6026 version(0x009)
6027 {}
6028
6029 virtual bool match(
6030 _In_reads_or_z_(end) const char* text,
6031 _In_ size_t start = 0,
6032 _In_ size_t end = (size_t)-1,
6033 _In_ int flags = match_default)
6034 {
6035 assert(text || start >= end);
6036 interval.end = start;
6037 type.start = interval.end;
6038 for (;;) {
6039 if (interval.end < end && text[interval.end]) {
6040 if (text[interval.end] == '/') {
6041 type.end = interval.end;
6042 interval.end++;
6043 break;
6044 }
6045 else if (isspace(text[interval.end]))
6046 goto error;
6047 else
6048 interval.end++;
6049 }
6050 else {
6051 type.end = interval.end;
6052 goto error;
6053 }
6054 }
6055 version_maj.start = interval.end;
6056 for (;;) {
6057 if (interval.end < end && text[interval.end]) {
6058 if (text[interval.end] == '.') {
6059 version_maj.end = interval.end;
6060 interval.end++;
6061 version_min.start = interval.end;
6062 for (;;) {
6063 if (interval.end < end && text[interval.end]) {
6064 if (isspace(text[interval.end])) {
6065 version_min.end = interval.end;
6066 version =
6067 (uint16_t)strtoui(text + version_maj.start, version_maj.size(), nullptr, 10) * 0x100 +
6068 (uint16_t)strtoui(text + version_min.start, version_min.size(), nullptr, 10);
6069 break;
6070 }
6071 else
6072 interval.end++;
6073 }
6074 else
6075 goto error;
6076 }
6077 break;
6078 }
6079 else if (isspace(text[interval.end])) {
6080 version_maj.end = interval.end;
6081 version_min.start = 1;
6082 version_min.end = 0;
6083 version = (uint16_t)strtoui(text + version_maj.start, version_maj.size(), nullptr, 10) * 0x100;
6084 break;
6085 }
6086 else
6087 interval.end++;
6088 }
6089 else
6090 goto error;
6091 }
6092 interval.start = start;
6093 return true;
6094
6095 error:
6096 type.start = 1;
6097 type.end = 0;
6098 version_maj.start = 1;
6099 version_maj.end = 0;
6100 version_min.start = 1;
6101 version_min.end = 0;
6102 version = 0x009;
6103 interval.start = 1;
6104 interval.end = 0;
6105 return false;
6106 }
6107
6108 virtual void invalidate()
6109 {
6110 type.start = 1;
6111 type.end = 0;
6112 version_maj.start = 1;
6113 version_maj.end = 0;
6114 version_min.start = 1;
6115 version_min.end = 0;
6116 version = 0x009;
6117 parser::invalidate();
6118 }
6119
6120 public:
6122 stdex::interval<size_t> version_maj;
6123 stdex::interval<size_t> version_min;
6124 uint16_t version;
6125 };
6126
6130 class http_request : public parser
6131 {
6132 public:
6133 http_request(_In_ const std::locale& locale = std::locale()) :
6134 parser(locale),
6135 url(locale),
6136 protocol(locale)
6137 {}
6138
6139 virtual bool match(
6140 _In_reads_or_z_(end) const char* text,
6141 _In_ size_t start = 0,
6142 _In_ size_t end = (size_t)-1,
6143 _In_ int flags = match_default)
6144 {
6145 assert(text || start >= end);
6146 interval.end = start;
6147
6148 for (;;) {
6149 if (m_line_break.match(text, interval.end, end, flags))
6150 goto error;
6151 else if (interval.end < end && text[interval.end]) {
6152 if (isspace(text[interval.end]))
6153 interval.end++;
6154 else
6155 break;
6156 }
6157 else
6158 goto error;
6159 }
6160 verb.start = interval.end;
6161 for (;;) {
6162 if (m_line_break.match(text, interval.end, end, flags))
6163 goto error;
6164 else if (interval.end < end && text[interval.end]) {
6165 if (isspace(text[interval.end])) {
6166 verb.end = interval.end;
6167 interval.end++;
6168 break;
6169 }
6170 else
6171 interval.end++;
6172 }
6173 else
6174 goto error;
6175 }
6176
6177 for (;;) {
6178 if (m_line_break.match(text, interval.end, end, flags))
6179 goto error;
6180 else if (interval.end < end && text[interval.end]) {
6181 if (isspace(text[interval.end]))
6182 interval.end++;
6183 else
6184 break;
6185 }
6186 else
6187 goto error;
6188 }
6189 if (url.match(text, interval.end, end, flags))
6191 else
6192 goto error;
6193
6194 protocol.invalidate();
6195 for (;;) {
6196 if (m_line_break.match(text, interval.end, end, flags)) {
6197 interval.end = m_line_break.interval.end;
6198 goto end;
6199 }
6200 else if (interval.end < end && text[interval.end]) {
6201 if (isspace(text[interval.end]))
6202 interval.end++;
6203 else
6204 break;
6205 }
6206 else
6207 goto end;
6208 }
6209 for (;;) {
6210 if (m_line_break.match(text, interval.end, end, flags)) {
6211 interval.end = m_line_break.interval.end;
6212 goto end;
6213 }
6214 else if (protocol.match(text, interval.end, end, flags)) {
6215 interval.end = protocol.interval.end;
6216 break;
6217 }
6218 else
6219 goto end;
6220 }
6221
6222 for (;;) {
6223 if (m_line_break.match(text, interval.end, end, flags)) {
6224 interval.end = m_line_break.interval.end;
6225 break;
6226 }
6227 else if (interval.end < end && text[interval.end])
6228 interval.end++;
6229 else
6230 goto end;
6231 }
6232
6233 end:
6234 interval.start = start;
6235 return true;
6236
6237 error:
6238 verb.start = 1;
6239 verb.end = 0;
6240 url.invalidate();
6241 protocol.invalidate();
6242 interval.start = 1;
6243 interval.end = 0;
6244 return false;
6245 }
6246
6247 virtual void invalidate()
6248 {
6249 verb.start = 1;
6250 verb.end = 0;
6251 url.invalidate();
6252 protocol.invalidate();
6253 parser::invalidate();
6254 }
6255
6256 public:
6258 http_url url;
6259 http_protocol protocol;
6260
6261 protected:
6262 http_line_break m_line_break;
6263 };
6264
6268 class http_header : public parser
6269 {
6270 public:
6271 virtual bool match(
6272 _In_reads_or_z_(end) const char* text,
6273 _In_ size_t start = 0,
6274 _In_ size_t end = (size_t)-1,
6275 _In_ int flags = match_default)
6276 {
6277 assert(text || start >= end);
6278 interval.end = start;
6279
6280 if (m_line_break.match(text, interval.end, end, flags) ||
6281 interval.end < end && text[interval.end] && isspace(text[interval.end]))
6282 goto error;
6283 name.start = interval.end;
6284 for (;;) {
6285 if (m_line_break.match(text, interval.end, end, flags))
6286 goto error;
6287 else if (interval.end < end && text[interval.end]) {
6288 if (isspace(text[interval.end])) {
6289 name.end = interval.end;
6290 interval.end++;
6291 for (;;) {
6292 if (m_line_break.match(text, interval.end, end, flags))
6293 goto error;
6294 else if (interval.end < end && text[interval.end]) {
6295 if (isspace(text[interval.end]))
6296 interval.end++;
6297 else
6298 break;
6299 }
6300 else
6301 goto error;
6302 }
6303 if (interval.end < end && text[interval.end] == ':') {
6304 interval.end++;
6305 break;
6306 }
6307 else
6308 goto error;
6309 break;
6310 }
6311 else if (text[interval.end] == ':') {
6312 name.end = interval.end;
6313 interval.end++;
6314 break;
6315 }
6316 else
6317 interval.end++;
6318 }
6319 else
6320 goto error;
6321 }
6322 value.start = (size_t)-1;
6323 value.end = 0;
6324 for (;;) {
6325 if (m_line_break.match(text, interval.end, end, flags)) {
6326 interval.end = m_line_break.interval.end;
6327 if (!m_line_break.match(text, interval.end, end, flags) &&
6328 interval.end < end && text[interval.end] && isspace(text[interval.end]))
6329 interval.end++;
6330 else
6331 break;
6332 }
6333 else if (interval.end < end && text[interval.end]) {
6334 if (isspace(text[interval.end]))
6335 interval.end++;
6336 else {
6337 if (value.start == (size_t)-1) value.start = interval.end;
6338 value.end = ++interval.end;
6339 }
6340 }
6341 else
6342 break;
6343 }
6344 interval.start = start;
6345 return true;
6346
6347 error:
6348 name.start = 1;
6349 name.end = 0;
6350 value.start = 1;
6351 value.end = 0;
6352 interval.start = 1;
6353 interval.end = 0;
6354 return false;
6355 }
6356
6357 virtual void invalidate()
6358 {
6359 name.start = 1;
6360 name.end = 0;
6361 value.start = 1;
6362 value.end = 0;
6363 parser::invalidate();
6364 }
6365
6366 public:
6369
6370 protected:
6371 http_line_break m_line_break;
6372 };
6373
6377 template <class T>
6378 class http_value_collection : public T
6379 {
6380 public:
6381 void insert(
6382 _In_reads_or_z_(end) const char* text,
6383 _In_ size_t start = 0,
6384 _In_ size_t end = (size_t)-1,
6385 _In_ int flags = match_default)
6386 {
6387 while (start < end) {
6388 while (start < end && text[start] && isspace(text[start])) start++;
6389 if (start < end && text[start] == ',') {
6390 start++;
6391 while (start < end&& text[start] && isspace(text[start])) start++;
6392 }
6393 T::key_type el;
6394 if (el.match(text, start, end, flags)) {
6395 start = el.interval.end;
6396 T::insert(std::move(el));
6397 }
6398 else
6399 break;
6400 }
6401 }
6402 };
6403
6404 template <class T>
6406 constexpr bool operator()(const T& a, const T& b) const noexcept
6407 {
6408 return a.factor.value > b.factor.value;
6409 }
6410 };
6411
6415 template <class T, class _Alloc = std::allocator<T>>
6417
6421 template <class T>
6423 {
6424 public:
6426 _In_ const std::shared_ptr<basic_parser<T>>& quote,
6427 _In_ const std::shared_ptr<basic_parser<T>>& chr,
6428 _In_ const std::shared_ptr<basic_parser<T>>& escape,
6429 _In_ const std::shared_ptr<basic_parser<T>>& sol,
6430 _In_ const std::shared_ptr<basic_parser<T>>& bs,
6431 _In_ const std::shared_ptr<basic_parser<T>>& ff,
6432 _In_ const std::shared_ptr<basic_parser<T>>& lf,
6433 _In_ const std::shared_ptr<basic_parser<T>>& cr,
6434 _In_ const std::shared_ptr<basic_parser<T>>& htab,
6435 _In_ const std::shared_ptr<basic_parser<T>>& uni,
6436 _In_ const std::shared_ptr<basic_integer16<T>>& hex,
6437 _In_ const std::locale& locale = std::locale()) :
6438 basic_parser<T>(locale),
6439 m_quote(quote),
6440 m_chr(chr),
6441 m_escape(escape),
6442 m_sol(sol),
6443 m_bs(bs),
6444 m_ff(ff),
6445 m_lf(lf),
6446 m_cr(cr),
6447 m_htab(htab),
6448 m_uni(uni),
6449 m_hex(hex)
6450 {}
6451
6452 virtual bool match(
6453 _In_reads_or_z_(end) const T* text,
6454 _In_ size_t start = 0,
6455 _In_ size_t end = (size_t)-1,
6456 _In_ int flags = match_default)
6457 {
6458 assert(text || start >= end);
6459 interval.end = start;
6460 if (m_quote->match(text, interval.end, end, flags)) {
6461 interval.end = m_quote->interval.end;
6462 value.clear();
6463 for (;;) {
6464 if (m_quote->match(text, interval.end, end, flags)) {
6465 interval.start = start;
6466 interval.end = m_quote->interval.end;
6467 return true;
6468 }
6469 if (m_escape->match(text, interval.end, end, flags)) {
6470 if (m_quote->match(text, m_escape->interval.end, end, flags)) {
6471 value += '"'; interval.end = m_quote->interval.end;
6472 continue;
6473 }
6474 if (m_sol->match(text, m_escape->interval.end, end, flags)) {
6475 value += '/'; interval.end = m_sol->interval.end;
6476 continue;
6477 }
6478 if (m_bs->match(text, m_escape->interval.end, end, flags)) {
6479 value += '\b'; interval.end = m_bs->interval.end;
6480 continue;
6481 }
6482 if (m_ff->match(text, m_escape->interval.end, end, flags)) {
6483 value += '\f'; interval.end = m_ff->interval.end;
6484 continue;
6485 }
6486 if (m_lf->match(text, m_escape->interval.end, end, flags)) {
6487 value += '\n'; interval.end = m_lf->interval.end;
6488 continue;
6489 }
6490 if (m_cr->match(text, m_escape->interval.end, end, flags)) {
6491 value += '\r'; interval.end = m_cr->interval.end;
6492 continue;
6493 }
6494 if (m_htab->match(text, m_escape->interval.end, end, flags)) {
6495 value += '\t'; interval.end = m_htab->interval.end;
6496 continue;
6497 }
6498 if (
6499 m_uni->match(text, m_escape->interval.end, end, flags) &&
6500 m_hex->match(text, m_uni->interval.end, std::min(m_uni->interval.end + 4, end), flags | match_case_insensitive) &&
6501 m_hex->interval.size() == 4 /* JSON requests 4-digit Unicode sequneces: \u.... */)
6502 {
6503 assert(m_hex->value <= 0xffff);
6504 if (sizeof(T) == 1) {
6505 if (m_hex->value > 0x7ff) {
6506 value += (T)(0xe0 | (m_hex->value >> 12) & 0x0f);
6507 value += (T)(0x80 | (m_hex->value >> 6) & 0x3f);
6508 value += (T)(0x80 | m_hex->value & 0x3f);
6509 }
6510 else if (m_hex->value > 0x7f) {
6511 value += (T)(0xc0 | (m_hex->value >> 6) & 0x1f);
6512 value += (T)(0x80 | m_hex->value & 0x3f);
6513 }
6514 else
6515 value += (T)(m_hex->value & 0x7f);
6516 }
6517 else
6518 value += (T)m_hex->value;
6519 interval.end = m_hex->interval.end;
6520 continue;
6521 }
6522 if (m_escape->match(text, m_escape->interval.end, end, flags)) {
6523 value += '\\'; interval.end = m_escape->interval.end;
6524 continue;
6525 }
6526 }
6527 if (m_chr->match(text, interval.end, end, flags)) {
6528 value.Prilepi(text + m_chr->interval.start, m_chr->interval.size());
6529 interval.end = m_chr->interval.end;
6530 continue;
6531 }
6532 break;
6533 }
6534 }
6535 value.clear();
6536 interval.start = (interval.end = start) + 1;
6537 return false;
6538 }
6539
6540 virtual void invalidate()
6541 {
6542 value.clear();
6544 }
6545
6546 public:
6547 std::basic_string<T> value;
6548
6549 protected:
6550 std::shared_ptr<basic_parser<T>> m_quote;
6551 std::shared_ptr<basic_parser<T>> m_chr;
6552 std::shared_ptr<basic_parser<T>> m_escape;
6553 std::shared_ptr<basic_parser<T>> m_sol;
6554 std::shared_ptr<basic_parser<T>> m_bs;
6555 std::shared_ptr<basic_parser<T>> m_ff;
6556 std::shared_ptr<basic_parser<T>> m_lf;
6557 std::shared_ptr<basic_parser<T>> m_cr;
6558 std::shared_ptr<basic_parser<T>> m_htab;
6559 std::shared_ptr<basic_parser<T>> m_uni;
6560 std::shared_ptr<basic_integer16<T>> m_hex;
6561 };
6562
6565#ifdef _UNICODE
6566 using tjson_string = wjson_string;
6567#else
6568 using tjson_string = json_string;
6569#endif
6570 }
6571}
6572
6573#undef ENUM_FLAG_OPERATOR
6574#undef ENUM_FLAGS
6575
6576#ifdef _MSC_VER
6577#pragma warning(pop)
6578#endif
Test for angle in d°mm'ss.dddd form.
Definition parser.hpp:4387
Test for any code unit.
Definition parser.hpp:214
Test for beginning of line.
Definition parser.hpp:608
Test for any.
Definition parser.hpp:1050
Test for chemical formula.
Definition parser.hpp:4661
Test for any code unit from a given string of code units.
Definition parser.hpp:713
Test for specific code unit.
Definition parser.hpp:284
Test for date.
Definition parser.hpp:4017
Test for valid DNS domain character.
Definition parser.hpp:2798
bool allow_on_edge
Is character allowed at the beginning or an end of a DNS domain?
Definition parser.hpp:2836
Test for DNS domain/hostname.
Definition parser.hpp:2898
bool m_allow_absolute
May DNS names end with a dot (absolute name)?
Definition parser.hpp:2962
Test for e-mail address.
Definition parser.hpp:3786
Test for emoticon.
Definition parser.hpp:3894
std::shared_ptr< basic_parser< T > > apex
apex/eyebrows/halo (e.g. O, 0)
Definition parser.hpp:3983
std::shared_ptr< basic_parser< T > > eyes
eyes (e.g. :, ;, >, |, B)
Definition parser.hpp:3984
std::shared_ptr< basic_set< T > > mouth
mouth (e.g. ), ), (, (, |, P, D, p, d)
Definition parser.hpp:3986
std::shared_ptr< basic_parser< T > > nose
nose (e.g. -, o)
Definition parser.hpp:3985
std::shared_ptr< basic_parser< T > > emoticon
emoticon as a whole (e.g. 😀, 🤔, 😶)
Definition parser.hpp:3982
Test for end of line.
Definition parser.hpp:646
Test for fraction.
Definition parser.hpp:1679
Test for decimal integer.
Definition parser.hpp:1288
Test for decimal integer possibly containing thousand separators.
Definition parser.hpp:1373
bool has_separators
Did integer have any separators?
Definition parser.hpp:1433
size_t digit_count
Total number of digits in integer.
Definition parser.hpp:1432
Test for hexadecimal integer.
Definition parser.hpp:1454
Base class for integer testing.
Definition parser.hpp:1266
size_t value
Calculated value of the numeral.
Definition parser.hpp:1280
Test for IPv4 address.
Definition parser.hpp:2338
stdex::interval< size_t > components[4]
Individual component intervals.
Definition parser.hpp:2453
struct in_addr value
IPv4 address value.
Definition parser.hpp:2454
Test for IPv6 address.
Definition parser.hpp:2557
std::shared_ptr< basic_parser< T > > scope_id
Scope ID (e.g. NIC index with link-local addresses)
Definition parser.hpp:2761
stdex::interval< size_t > components[8]
Individual component intervals.
Definition parser.hpp:2759
struct in6_addr value
IPv6 address value.
Definition parser.hpp:2760
Test for valid IPv6 address scope ID character.
Definition parser.hpp:2485
Test for repeating.
Definition parser.hpp:903
bool m_greedy
try to match as long sequence as possible
Definition parser.hpp:942
std::shared_ptr< basic_parser< T > > m_el
repeating element
Definition parser.hpp:939
size_t m_min_iterations
minimum number of iterations
Definition parser.hpp:940
size_t m_max_iterations
maximum number of iterations
Definition parser.hpp:941
Test for JSON string.
Definition parser.hpp:6423
Test for mixed numeral.
Definition parser.hpp:1914
std::shared_ptr< basic_parser< T > > fraction
fraction
Definition parser.hpp:2020
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:2018
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:2017
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:2016
std::shared_ptr< basic_parser< T > > integer
Integer part.
Definition parser.hpp:2019
Test for monetary numeral.
Definition parser.hpp:2209
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:2315
std::shared_ptr< basic_parser< T > > decimal_separator
Decimal separator.
Definition parser.hpp:2320
std::shared_ptr< basic_parser< T > > currency
Currency part.
Definition parser.hpp:2318
std::shared_ptr< basic_parser< T > > decimal
Decimal part.
Definition parser.hpp:2321
std::shared_ptr< basic_parser< T > > integer
Integer part.
Definition parser.hpp:2319
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:2316
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:2317
"No-op" match
Definition parser.hpp:182
Base template for all parsers.
Definition parser.hpp:63
interval< size_t > interval
Region of the last match.
Definition parser.hpp:162
Test for permutation.
Definition parser.hpp:1190
Test for phone number.
Definition parser.hpp:4510
std::basic_string< T > value
Normalized phone number.
Definition parser.hpp:4636
Test for any punctuation code unit.
Definition parser.hpp:456
Test for Roman numeral.
Definition parser.hpp:1563
Test for scientific numeral.
Definition parser.hpp:2040
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:2184
std::shared_ptr< basic_parser< T > > exponent_symbol
Exponent symbol (e.g. 'e')
Definition parser.hpp:2188
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:2182
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:2183
double value
Calculated value of the numeral.
Definition parser.hpp:2192
std::shared_ptr< basic_parser< T > > negative_exp_sign
Negative exponent sign (e.g. '-')
Definition parser.hpp:2190
std::shared_ptr< basic_integer< T > > decimal
Decimal part.
Definition parser.hpp:2187
std::shared_ptr< basic_parser< T > > positive_exp_sign
Positive exponent sign (e.g. '+')
Definition parser.hpp:2189
std::shared_ptr< basic_integer< T > > exponent
Exponent part.
Definition parser.hpp:2191
std::shared_ptr< basic_parser< T > > decimal_separator
Decimal separator.
Definition parser.hpp:2186
std::shared_ptr< basic_integer< T > > integer
Integer part.
Definition parser.hpp:2185
Test for match score.
Definition parser.hpp:1742
Test for sequence.
Definition parser.hpp:999
Definition parser.hpp:681
Test for signed numeral.
Definition parser.hpp:1828
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:1896
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:1895
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:1894
std::shared_ptr< basic_parser< T > > number
Number.
Definition parser.hpp:1897
Test for any space code unit.
Definition parser.hpp:377
Test for any space or punctuation code unit.
Definition parser.hpp:530
Test for any string.
Definition parser.hpp:1118
Test for given string.
Definition parser.hpp:808
Test for time.
Definition parser.hpp:4284
Test for valid URL password character.
Definition parser.hpp:3080
Test for valid URL path character.
Definition parser.hpp:3180
Test for URL path.
Definition parser.hpp:3288
Test for valid URL username character.
Definition parser.hpp:2981
Test for URL.
Definition parser.hpp:3429
Test for HTTP agent.
Definition parser.hpp:5946
Test for HTTP any type.
Definition parser.hpp:5068
Test for HTTP asterisk.
Definition parser.hpp:5710
Test for HTTP header.
Definition parser.hpp:6269
Test for HTTP language (RFC1766)
Definition parser.hpp:5578
Test for HTTP line break (RFC2616: CRLF | LF)
Definition parser.hpp:4742
Test for HTTP media range (RFC2616: media-range)
Definition parser.hpp:5100
Test for HTTP media type (RFC2616: media-type)
Definition parser.hpp:5155
Test for HTTP parameter (RFC2616: parameter)
Definition parser.hpp:5013
http_token name
Parameter name.
Definition parser.hpp:5057
http_value value
Parameter value.
Definition parser.hpp:5058
Test for HTTP protocol.
Definition parser.hpp:6022
uint16_t version
HTTP protocol version: 0x100 = 1.0, 0x101 = 1.1...
Definition parser.hpp:6124
Test for HTTP quoted string (RFC2616: quoted-string)
Definition parser.hpp:4903
stdex::interval< size_t > content
String content (without quotes)
Definition parser.hpp:4959
Test for HTTP request.
Definition parser.hpp:6131
Test for HTTP space (RFC2616: LWS)
Definition parser.hpp:4778
Test for HTTP text character (RFC2616: TEXT)
Definition parser.hpp:4815
Test for HTTP token (RFC2616: token - tolerates non-ASCII)
Definition parser.hpp:4849
Test for HTTP URL parameter.
Definition parser.hpp:5395
Test for HTTP URL path segment.
Definition parser.hpp:5306
Test for HTTP URL path segment.
Definition parser.hpp:5339
std::vector< http_url_path_segment > segments
Path segments.
Definition parser.hpp:5388
Test for HTTP URL port.
Definition parser.hpp:5250
Test for HTTP URL server.
Definition parser.hpp:5213
Test for HTTP URL.
Definition parser.hpp:5476
Collection of HTTP values.
Definition parser.hpp:6379
Test for HTTP value (RFC2616: value)
Definition parser.hpp:4969
http_quoted_string string
Value when matched as quoted string.
Definition parser.hpp:5005
http_token token
Value when matched as token.
Definition parser.hpp:5006
Test for HTTP weight factor.
Definition parser.hpp:5641
float value
Calculated value of the weight factor.
Definition parser.hpp:5703
Test for HTTP weighted value.
Definition parser.hpp:5733
Base template for collection-holding parsers.
Definition parser.hpp:959
Test for any SGML code point.
Definition parser.hpp:246
Test for any SGML code point from a given string of SGML code points.
Definition parser.hpp:765
Test for specific SGML code point.
Definition parser.hpp:333
Test for valid DNS domain SGML character.
Definition parser.hpp:2854
Test for valid IPv6 address scope ID SGML character.
Definition parser.hpp:2523
Test for any SGML punctuation code point.
Definition parser.hpp:497
Test for any SGML space code point.
Definition parser.hpp:420
Test for any SGML space or punctuation code point.
Definition parser.hpp:573
Test for SGML given string.
Definition parser.hpp:855
Test for valid URL password SGML character.
Definition parser.hpp:3132
Test for valid URL path SGML character.
Definition parser.hpp:3236
Test for valid URL username SGML character.
Definition parser.hpp:3032
Numerical interval.
Definition interval.hpp:18
T size() const
Returns interval size.
Definition interval.hpp:47
T end
interval end
Definition interval.hpp:20
interval() noexcept
Constructs an invalid interval.
Definition interval.hpp:25
T start
interval start
Definition interval.hpp:19
Definition parser.hpp:6405