stdex
Additional custom or not Standard C++ covered algorithms
Loading...
Searching...
No Matches
parser.hpp
1/*
2 SPDX-License-Identifier: MIT
3 Copyright © 2023 Amebis
4*/
5
6#pragma once
7
8#include "interval.hpp"
9#include "memory.hpp"
10#include "sal.hpp"
11#include "sgml.hpp"
12#include "string.hpp"
13#include "system.hpp"
14#include <assert.h>
15#include <stdarg.h>
16#include <stdint.h>
17#ifdef _WIN32
18#ifndef _WINSOCKAPI_
19#include <winsock2.h>
20#include <ws2ipdef.h>
21#endif
22#else
23#include <inaddr.h>
24#include <in6addr.h>
25#endif
26#include <limits>
27#include <list>
28#include <locale>
29#include <memory>
30#include <set>
31#include <string>
32
33#ifdef _MSC_VER
34#pragma warning(push)
35#pragma warning(disable: 4100)
36#endif
37
38#define ENUM_FLAG_OPERATOR(T,X) \
39inline T operator X (const T lhs, const T rhs) { return static_cast<T>(static_cast<std::underlying_type_t<T>>(lhs) X static_cast<std::underlying_type_t<T>>(rhs)); } \
40inline T operator X (const T lhs, const std::underlying_type_t<T> rhs) { return static_cast<T>(static_cast<std::underlying_type_t<T>>(lhs) X rhs); } \
41inline T operator X (const std::underlying_type_t<T> lhs, const T rhs) { return static_cast<T>(lhs X static_cast<std::underlying_type_t<T>>(rhs)); } \
42inline T& operator X= (T& lhs, const T rhs) { return lhs = lhs X rhs; } \
43inline T& operator X= (T& lhs, const std::underlying_type_t<T> rhs) { return lhs = lhs X rhs; }
44#define ENUM_FLAGS(T, type) \
45enum class T : type; \
46inline T operator ~ (T t) { return (T) (~static_cast<std::underlying_type_t <T>>(t)); } \
47ENUM_FLAG_OPERATOR(T,|) \
48ENUM_FLAG_OPERATOR(T,^) \
49ENUM_FLAG_OPERATOR(T,&) \
50enum class T : type
51
52namespace stdex
53{
54 namespace parser
55 {
59 constexpr int match_default = 0;
60 constexpr int match_case_insensitive = 0x1;
61 constexpr int match_multiline = 0x2;
62
66 template <class T>
68 {
69 public:
70 basic_parser(_In_ const std::locale& locale = std::locale()) : m_locale(locale) {}
71 virtual ~basic_parser() {}
72
73 bool search(
74 _In_reads_or_z_(end) const T* text,
75 _In_ size_t start = 0,
76 _In_ size_t end = (size_t)-1,
77 _In_ int flags = match_default)
78 {
79 for (size_t i = start; i < end && text[i]; i++)
80 if (match(text, i, end, flags))
81 return true;
82 return false;
83 }
84
85 virtual bool match(
86 _In_reads_or_z_(end) const T* text,
87 _In_ size_t start = 0,
88 _In_ size_t end = (size_t)-1,
89 _In_ int flags = match_default) = 0;
90
91 template<class _Traits, class _Ax>
92 inline bool match(
93 const std::basic_string<T, _Traits, _Ax>& text,
94 _In_ size_t start = 0,
95 _In_ size_t end = (size_t)-1,
96 _In_ int flags = match_default)
97 {
98 return match(text.c_str(), start, std::min<size_t>(end, text.size()), flags);
99 }
100
101 virtual void invalidate()
102 {
103 interval.start = 1;
104 interval.end = 0;
105 }
106
107 protected:
109 const wchar_t* next_sgml_cp(_In_ const char* text, _In_ size_t start, _In_ size_t end, _Out_ size_t& chr_end, _Out_ wchar_t(&buf)[3])
110 {
111 if (text[start] == '&') {
112 // Potential entity start
113 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
114 for (chr_end = start + 1;; chr_end++) {
115 if (chr_end >= end || text[chr_end] == 0) {
116 // Unterminated entity
117 break;
118 }
119 if (text[chr_end] == ';') {
120 // Entity end
121 size_t n = chr_end - start - 1;
122 if (n >= 2 && text[start + 1] == '#') {
123 // Numerical entity
124 char32_t unicode;
125 if (text[start + 2] == 'x' || text[start + 2] == 'X')
126 unicode = strtou32(text + start + 3, n - 2, nullptr, 16);
127 else
128 unicode = strtou32(text + start + 2, n - 1, nullptr, 10);
129#ifdef _WIN32
130 if (unicode < 0x10000) {
131 buf[0] = (wchar_t)unicode;
132 buf[1] = 0;
133 }
134 else {
135 ucs4_to_surrogate_pair(buf, unicode);
136 buf[2] = 0;
137 }
138#else
139 buf[0] = (wchar_t)unicode;
140 buf[1] = 0;
141#endif
142 chr_end++;
143 return buf;
144 }
145 const wchar_t* entity_w = sgml2uni(text + start + 1, n);
146 if (entity_w) {
147 chr_end++;
148 return entity_w;
149 }
150 // Unknown entity.
151 break;
152 }
153 else if (text[chr_end] == '&' || ctype.is(ctype.space, text[chr_end])) {
154 // This char cannot possibly be a part of entity.
155 break;
156 }
157 }
158 }
159 buf[0] = text[start];
160 buf[1] = 0;
161 chr_end = start + 1;
162 return buf;
163 }
165
166 public:
168
169 protected:
170 std::locale m_locale;
171 };
172
175#ifdef _UNICODE
176 using tparser = wparser;
177#else
178 using tparser = parser;
179#endif
181
185 template <class T>
186 class basic_noop : public basic_parser<T>
187 {
188 public:
189 virtual bool match(
190 _In_reads_or_z_(end) const T* text,
191 _In_ size_t start = 0,
192 _In_ size_t end = (size_t)-1,
193 _In_ int flags = match_default)
194 {
195 assert(text || start >= end);
196 if (start < end && text[start]) {
197 interval.start = interval.end = start;
198 return true;
199 }
200 interval.start = (interval.end = start) + 1;
201 return false;
202 }
203 };
204
205 using noop = basic_noop<char>;
207#ifdef _UNICODE
208 using tnoop = wnoop;
209#else
210 using tnoop = noop;
211#endif
213
217 template <class T>
218 class basic_any_cu : public basic_parser<T>
219 {
220 public:
221 basic_any_cu(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
222
223 virtual bool match(
224 _In_reads_or_z_(end) const T* text,
225 _In_ size_t start = 0,
226 _In_ size_t end = (size_t)-1,
227 _In_ int flags = match_default)
228 {
229 assert(text || start >= end);
230 if (start < end && text[start]) {
231 interval.end = (interval.start = start) + 1;
232 return true;
233 }
234 interval.start = (interval.end = start) + 1;
235 return false;
236 }
237 };
238
241#ifdef _UNICODE
242 using tany_cu = wany_cu;
243#else
244 using tany_cu = any_cu;
245#endif
246
250 class sgml_any_cp : public basic_any_cu<char>
251 {
252 public:
253 sgml_any_cp(_In_ const std::locale& locale = std::locale()) : basic_any_cu<char>(locale) {}
254
255 virtual bool match(
256 _In_reads_or_z_(end) const char* text,
257 _In_ size_t start = 0,
258 _In_ size_t end = (size_t)-1,
259 _In_ int flags = match_default)
260 {
261 assert(text || start >= end);
262 if (start < end && text[start]) {
263 if (text[start] == '&') {
264 // SGML entity
265 const auto& ctype = std::use_facet<std::ctype<char>>(m_locale);
266 for (interval.end = start + 1; interval.end < end && text[interval.end]; interval.end++)
267 if (text[interval.end] == ';') {
268 interval.end++;
269 interval.start = start;
270 return true;
271 }
272 else if (text[interval.end] == '&' || ctype.is(ctype.space, text[interval.end]))
273 break;
274 // Unterminated entity
275 }
276 interval.end = (interval.start = start) + 1;
277 return true;
278 }
279 interval.start = (interval.end = start) + 1;
280 return false;
281 }
282 };
283
287 template <class T>
288 class basic_cu : public basic_parser<T>
289 {
290 public:
291 basic_cu(T chr, bool invert = false, _In_ const std::locale& locale = std::locale()) :
292 basic_parser<T>(locale),
293 m_chr(chr),
294 m_invert(invert)
295 {}
296
297 virtual bool match(
298 _In_reads_or_z_(end) const T* text,
299 _In_ size_t start = 0,
300 _In_ size_t end = (size_t)-1,
301 _In_ int flags = match_default)
302 {
303 assert(text || start >= end);
304 if (start < end && text[start]) {
305 bool r;
306 if (flags & match_case_insensitive) {
307 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
308 r = ctype.tolower(text[start]) == ctype.tolower(m_chr);
309 }
310 else
311 r = text[start] == m_chr;
312 if (r && !m_invert || !r && m_invert) {
313 interval.end = (interval.start = start) + 1;
314 return true;
315 }
316 }
317 interval.start = (interval.end = start) + 1;
318 return false;
319 }
320
321 protected:
322 T m_chr;
323 bool m_invert;
324 };
325
326 using cu = basic_cu<char>;
327 using wcu = basic_cu<wchar_t>;
328#ifdef _UNICODE
329 using tcu = wcu;
330#else
331 using tcu = cu;
332#endif
333
337 class sgml_cp : public sgml_parser
338 {
339 public:
340 sgml_cp(const char* chr, size_t count = (size_t)-1, bool invert = false, _In_ const std::locale& locale = std::locale()) :
341 sgml_parser(locale),
342 m_invert(invert)
343 {
344 assert(chr || !count);
345 wchar_t buf[3];
346 size_t chr_end;
347 m_chr.assign(count ? next_sgml_cp(chr, 0, count, chr_end, buf) : L"");
348 }
349
350 virtual bool match(
351 _In_reads_or_z_(end) const char* text,
352 _In_ size_t start = 0,
353 _In_ size_t end = (size_t)-1,
354 _In_ int flags = match_default)
355 {
356 assert(text || start >= end);
357 if (start < end && text[start]) {
358 wchar_t buf[3];
359 const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf);
360 bool r = ((flags & match_case_insensitive) ?
361 stdex::strnicmp(chr, (size_t)-1, m_chr.c_str(), m_chr.size(), m_locale) :
362 stdex::strncmp(chr, (size_t)-1, m_chr.c_str(), m_chr.size())) == 0;
363 if (r && !m_invert || !r && m_invert) {
364 interval.start = start;
365 return true;
366 }
367 }
368 interval.start = (interval.end = start) + 1;
369 return false;
370 }
371
372 protected:
373 std::wstring m_chr;
374 bool m_invert;
375 };
376
380 template <class T>
381 class basic_space_cu : public basic_parser<T>
382 {
383 public:
384 basic_space_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) :
385 basic_parser<T>(locale),
386 m_invert(invert)
387 {}
388
389 virtual bool match(
390 _In_reads_or_z_(end) const T* text,
391 _In_ size_t start = 0,
392 _In_ size_t end = (size_t)-1,
393 _In_ int flags = match_default)
394 {
395 assert(text || start >= end);
396 if (start < end && text[start]) {
397 bool r =
398 ((flags & match_multiline) || !islbreak(text[start])) &&
399 std::use_facet<std::ctype<T>>(m_locale).is(std::ctype_base::space, text[start]);
400 if (r && !m_invert || !r && m_invert) {
401 interval.end = (interval.start = start) + 1;
402 return true;
403 }
404 }
405 interval.start = (interval.end = start) + 1;
406 return false;
407 }
408
409 protected:
410 bool m_invert;
411 };
412
415#ifdef _UNICODE
416 using tspace_cu = wspace_cu;
417#else
418 using tspace_cu = space_cu;
419#endif
420
424 class sgml_space_cp : public basic_space_cu<char>
425 {
426 public:
427 sgml_space_cp(_In_ bool invert = false, _In_ const std::locale& locale = std::locale()) :
429 {}
430
431 virtual bool match(
432 _In_reads_or_z_(end) const char* text,
433 _In_ size_t start = 0,
434 _In_ size_t end = (size_t)-1,
435 _In_ int flags = match_default)
436 {
437 assert(text || start >= end);
438 if (start < end && text[start]) {
439 wchar_t buf[3];
440 const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf);
441 const wchar_t* chr_end = chr + stdex::strlen(chr);
442 bool r =
443 ((flags & match_multiline) || !islbreak(chr, (size_t)-1)) &&
444 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::space, chr, chr_end) == chr_end;
445 if (r && !m_invert || !r && m_invert) {
446 interval.start = start;
447 return true;
448 }
449 }
450
451 interval.start = (interval.end = start) + 1;
452 return false;
453 }
454 };
455
459 template <class T>
460 class basic_punct_cu : public basic_parser<T>
461 {
462 public:
463 basic_punct_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) :
464 basic_parser<T>(locale),
465 m_invert(invert)
466 {}
467
468 virtual bool match(
469 _In_reads_or_z_(end) const T* text,
470 _In_ size_t start = 0,
471 _In_ size_t end = (size_t)-1,
472 _In_ int flags = match_default)
473 {
474 assert(text || start >= end);
475 if (start < end && text[start]) {
476 bool r = std::use_facet<std::ctype<T>>(m_locale).is(std::ctype_base::punct, text[start]);
477 if (r && !m_invert || !r && m_invert) {
478 interval.end = (interval.start = start) + 1;
479 return true;
480 }
481 }
482 interval.start = (interval.end = start) + 1;
483 return false;
484 }
485
486 protected:
487 bool m_invert;
488 };
489
492#ifdef _UNICODE
493 using tpunct_cu = wpunct_cu;
494#else
495 using tpunct_cu = punct_cu;
496#endif
497
501 class sgml_punct_cp : public basic_punct_cu<char>
502 {
503 public:
504 sgml_punct_cp(bool invert = false, _In_ const std::locale& locale = std::locale()) :
506 {}
507
508 virtual bool match(
509 _In_reads_or_z_(end) const char* text,
510 _In_ size_t start = 0,
511 _In_ size_t end = (size_t)-1,
512 _In_ int flags = match_default)
513 {
514 assert(text || start >= end);
515 if (start < end && text[start]) {
516 wchar_t buf[3];
517 const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf);
518 const wchar_t* chr_end = chr + stdex::strlen(chr);
519 bool r = std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::punct, chr, chr_end) == chr_end;
520 if (r && !m_invert || !r && m_invert) {
521 interval.start = start;
522 return true;
523 }
524 }
525 interval.start = (interval.end = start) + 1;
526 return false;
527 }
528 };
529
533 template <class T>
535 {
536 public:
537 basic_space_or_punct_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) :
538 basic_parser<T>(locale),
539 m_invert(invert)
540 {}
541
542 virtual bool match(
543 _In_reads_or_z_(end) const T* text,
544 _In_ size_t start = 0,
545 _In_ size_t end = (size_t)-1,
546 _In_ int flags = match_default)
547 {
548 assert(text || start >= end);
549 if (start < end && text[start]) {
550 bool r =
551 ((flags & match_multiline) || !islbreak(text[start])) &&
552 std::use_facet<std::ctype<T>>(m_locale).is(std::ctype_base::space | std::ctype_base::punct, text[start]);
553 if (r && !m_invert || !r && m_invert) {
554 interval.end = (interval.start = start) + 1;
555 return true;
556 }
557 }
558 interval.start = (interval.end = start) + 1;
559 return false;
560 }
561
562 protected:
563 bool m_invert;
564 };
565
568#ifdef _UNICODE
570#else
572#endif
573
578 {
579 public:
580 sgml_space_or_punct_cp(bool invert = false, _In_ const std::locale& locale = std::locale()) :
582 {}
583
584 virtual bool match(
585 _In_reads_or_z_(end) const char* text,
586 _In_ size_t start = 0,
587 _In_ size_t end = (size_t)-1,
588 _In_ int flags = match_default)
589 {
590 assert(text || start >= end);
591 if (start < end && text[start]) {
592 wchar_t buf[3];
593 const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf);
594 const wchar_t* chr_end = chr + stdex::strlen(chr);
595 bool r =
596 ((flags & match_multiline) || !islbreak(chr, (size_t)-1)) &&
597 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::space | std::ctype_base::punct, chr, chr_end) == chr_end;
598 if (r && !m_invert || !r && m_invert) {
599 interval.start = start;
600 return true;
601 }
602 }
603 interval.start = (interval.end = start) + 1;
604 return false;
605 }
606 };
607
611 template <class T>
612 class basic_bol : public basic_parser<T>
613 {
614 public:
615 basic_bol(bool invert = false) : m_invert(invert) {}
616
617 virtual bool match(
618 _In_reads_or_z_(end) const T* text,
619 _In_ size_t start = 0,
620 _In_ size_t end = (size_t)-1,
621 _In_ int flags = match_default)
622 {
623 assert(text || start >= end);
624 bool r = start == 0 || start <= end && islbreak(text[start - 1]);
625 if (r && !m_invert || !r && m_invert) {
626 interval.end = interval.start = start;
627 return true;
628 }
629 interval.start = (interval.end = start) + 1;
630 return false;
631 }
632
633 protected:
634 bool m_invert;
635 };
636
637 using bol = basic_bol<char>;
638 using wbol = basic_bol<wchar_t>;
639#ifdef _UNICODE
640 using tbol = wbol;
641#else
642 using tbol = bol;
643#endif
645
649 template <class T>
650 class basic_eol : public basic_parser<T>
651 {
652 public:
653 basic_eol(bool invert = false) : m_invert(invert) {}
654
655 virtual bool match(
656 _In_reads_or_z_(end) const T* text,
657 _In_ size_t start = 0,
658 _In_ size_t end = (size_t)-1,
659 _In_ int flags = match_default)
660 {
661 assert(text || start >= end);
662 bool r = islbreak(text[start]);
663 if (r && !m_invert || !r && m_invert) {
664 interval.end = interval.start = start;
665 return true;
666 }
667 interval.start = (interval.end = start) + 1;
668 return false;
669 }
670
671 protected:
672 bool m_invert;
673 };
674
675 using eol = basic_eol<char>;
676 using weol = basic_eol<wchar_t>;
677#ifdef _UNICODE
678 using teol = weol;
679#else
680 using teol = eol;
681#endif
683
684 template <class T>
685 class basic_set : public basic_parser<T>
686 {
687 public:
688 basic_set(bool invert = false, _In_ const std::locale& locale = std::locale()) :
689 basic_parser<T>(locale),
690 hit_offset((size_t)-1),
691 m_invert(invert)
692 {}
693
694 virtual bool match(
695 _In_reads_or_z_(end) const T* text,
696 _In_ size_t start = 0,
697 _In_ size_t end = (size_t)-1,
698 _In_ int flags = match_default) = 0;
699
700 virtual void invalidate()
701 {
702 hit_offset = (size_t)-1;
704 }
705
706 public:
707 size_t hit_offset;
708
709 protected:
710 bool m_invert;
711 };
712
716 template <class T>
717 class basic_cu_set : public basic_set<T>
718 {
719 public:
721 _In_reads_or_z_(count) const T* set,
722 _In_ size_t count = (size_t)-1,
723 _In_ bool invert = false,
724 _In_ const std::locale& locale = std::locale()) :
725 basic_set<T>(invert, locale)
726 {
727 if (set)
728 m_set.assign(set, set + stdex::strnlen(set, count));
729 }
730
731 virtual bool match(
732 _In_reads_or_z_(end) const T* text,
733 _In_ size_t start = 0,
734 _In_ size_t end = (size_t)-1,
735 _In_ int flags = match_default)
736 {
737 assert(text || start >= end);
738 if (start < end && text[start]) {
739 const T* set = m_set.c_str();
740 size_t r = (flags & match_case_insensitive) ?
741 stdex::strnichr(set, m_set.size(), text[start], m_locale) :
742 stdex::strnchr(set, m_set.size(), text[start]);
743 if (r != stdex::npos && !m_invert || r == stdex::npos && m_invert) {
744 hit_offset = r;
745 interval.end = (interval.start = start) + 1;
746 return true;
747 }
748 }
749 hit_offset = (size_t)-1;
750 interval.start = (interval.end = start) + 1;
751 return false;
752 }
753
754 protected:
755 std::basic_string<T> m_set;
756 };
757
760#ifdef _UNICODE
761 using tcu_set = wcu_set;
762#else
763 using tcu_set = cu_set;
764#endif
765
769 class sgml_cp_set : public basic_set<char>
770 {
771 public:
772 sgml_cp_set(const char* set, size_t count = (size_t)-1, bool invert = false, _In_ const std::locale& locale = std::locale()) :
773 basic_set<char>(invert, locale)
774 {
775 if (set)
776 m_set = sgml2wstr(set, count);
777 }
778
779 virtual bool match(
780 _In_reads_or_z_(end) const char* text,
781 _In_ size_t start = 0,
782 _In_ size_t end = (size_t)-1,
783 _In_ int flags = match_default)
784 {
785 assert(text || start >= end);
786 if (start < end && text[start]) {
787 wchar_t buf[3];
788 const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf);
789 const wchar_t* set = m_set.c_str();
790 size_t r = (flags & match_case_insensitive) ?
791 stdex::strnistr(set, m_set.size(), chr, m_locale) :
792 stdex::strnstr(set, m_set.size(), chr);
793 if (r != stdex::npos && !m_invert || r == stdex::npos && m_invert) {
794 hit_offset = r;
795 interval.start = start;
796 return true;
797 }
798 }
799 hit_offset = (size_t)-1;
800 interval.start = (interval.end = start) + 1;
801 return false;
802 }
803
804 protected:
805 std::wstring m_set;
806 };
807
811 template <class T>
812 class basic_string : public basic_parser<T>
813 {
814 public:
816 _In_reads_or_z_(count) const T* str,
817 _In_ size_t count = (size_t)-1,
818 _In_ const std::locale& locale = std::locale()) :
819 basic_parser<T>(locale),
820 m_str(str, str + stdex::strnlen(str, count))
821 {}
822
823 virtual bool match(
824 _In_reads_or_z_(end) const T* text,
825 _In_ size_t start = 0,
826 _In_ size_t end = (size_t)-1,
827 _In_ int flags = match_default)
828 {
829 assert(text || start >= end);
830 size_t
831 m = m_str.size(),
832 n = std::min<size_t>(end - start, m);
833 bool r = ((flags & match_case_insensitive) ?
834 stdex::strnicmp(text + start, n, m_str.c_str(), m, m_locale) :
835 stdex::strncmp(text + start, n, m_str.c_str(), m)) == 0;
836 if (r) {
837 interval.end = (interval.start = start) + n;
838 return true;
839 }
840 interval.start = (interval.end = start) + 1;
841 return false;
842 }
843
844 protected:
845 std::basic_string<T> m_str;
846 };
847
850#ifdef _UNICODE
851 using tstring = wstring;
852#else
853 using tstring = string;
854#endif
855
860 {
861 public:
862 sgml_string(const char* str, size_t count = (size_t)-1, _In_ const std::locale& locale = std::locale()) :
863 sgml_parser(locale),
864 m_str(sgml2wstr(str, count))
865 {}
866
867 virtual bool match(
868 _In_reads_or_z_(end) const char* text,
869 _In_ size_t start = 0,
870 _In_ size_t end = (size_t)-1,
871 _In_ int flags = match_default)
872 {
873 assert(text || start >= end);
874 const wchar_t* str = m_str.c_str();
875 const bool case_insensitive = flags & match_case_insensitive ? true : false;
876 const auto& ctype = std::use_facet<std::ctype<wchar_t>>(m_locale);
877 for (interval.end = start;;) {
878 if (!*str) {
879 interval.start = start;
880 return true;
881 }
882 if (interval.end >= end || !text[interval.end]) {
883 interval.start = (interval.end = start) + 1;
884 return false;
885 }
886 wchar_t buf[3];
887 const wchar_t* chr = next_sgml_cp(text, interval.end, end, interval.end, buf);
888 for (; *chr; ++str, ++chr) {
889 if (!*str ||
890 (case_insensitive ? ctype.tolower(*str) != ctype.tolower(*chr) : *str != *chr))
891 {
892 interval.start = (interval.end = start) + 1;
893 return false;
894 }
895 }
896 }
897 }
898
899 protected:
900 std::wstring m_str;
901 };
902
906 template <class T>
908 {
909 public:
910 basic_iterations(const std::shared_ptr<basic_parser<T>>& el, size_t min_iterations = 0, size_t max_iterations = (size_t)-1, bool greedy = true) :
911 m_el(el),
915 {}
916
917 virtual bool match(
918 _In_reads_or_z_(end) const T* text,
919 _In_ size_t start = 0,
920 _In_ size_t end = (size_t)-1,
921 _In_ int flags = match_default)
922 {
923 assert(text || start >= end);
924 interval.start = interval.end = start;
925 for (size_t i = 0; ; i++) {
927 return true;
928 if (!m_el->match(text, interval.end, end, flags)) {
929 if (i >= m_min_iterations)
930 return true;
931 break;
932 }
933 if (m_el->interval.end == interval.end) {
934 // Element did match, but the matching interval was empty. Quit instead of spinning.
935 return true;
936 }
937 interval.end = m_el->interval.end;
938 }
939 interval.start = (interval.end = start) + 1;
940 return false;
941 }
942
943 protected:
944 std::shared_ptr<basic_parser<T>> m_el;
947 bool m_greedy;
948 };
949
952#ifdef _UNICODE
953 using titerations = witerations;
954#else
955 using titerations = iterations;
956#endif
958
962 template <class T>
964 {
965 protected:
966 parser_collection(_In_ const std::locale& locale) : basic_parser<T>(locale) {}
967
968 public:
970 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el,
971 _In_ size_t count,
972 _In_ const std::locale& locale = std::locale()) :
973 basic_parser<T>(locale)
974 {
975 assert(el || !count);
976 m_collection.reserve(count);
977 for (size_t i = 0; i < count; i++)
978 m_collection.push_back(el[i]);
979 }
980
982 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
983 _In_ const std::locale& locale = std::locale()) :
984 basic_parser<T>(locale),
985 m_collection(std::move(collection))
986 {}
987
988 virtual void invalidate()
989 {
990 for (auto& el: m_collection)
991 el->invalidate();
993 }
994
995 protected:
996 std::vector<std::shared_ptr<basic_parser<T>>> m_collection;
997 };
998
1002 template <class T>
1004 {
1005 public:
1007 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el = nullptr,
1008 _In_ size_t count = 0,
1009 _In_ const std::locale& locale = std::locale()) :
1010 parser_collection<T>(el, count, locale)
1011 {}
1012
1014 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1015 _In_ const std::locale& locale = std::locale()) :
1016 parser_collection<T>(std::move(collection), locale)
1017 {}
1018
1019 virtual bool match(
1020 _In_reads_or_z_(end) const T* text,
1021 _In_ size_t start = 0,
1022 _In_ size_t end = (size_t)-1,
1023 _In_ int flags = match_default)
1024 {
1025 assert(text || start >= end);
1026 interval.end = start;
1027 for (auto i = m_collection.begin(); i != m_collection.end(); ++i) {
1028 if (!(*i)->match(text, interval.end, end, flags)) {
1029 for (++i; i != m_collection.end(); ++i)
1030 (*i)->invalidate();
1031 interval.start = (interval.end = start) + 1;
1032 return false;
1033 }
1034 interval.end = (*i)->interval.end;
1035 }
1036 interval.start = start;
1037 return true;
1038 }
1039 };
1040
1043#ifdef _UNICODE
1044 using tsequence = wsequence;
1045#else
1046 using tsequence = sequence;
1047#endif
1049
1053 template <class T>
1055 {
1056 protected:
1057 basic_branch(_In_ const std::locale& locale) :
1058 parser_collection<T>(locale),
1059 hit_offset((size_t)-1)
1060 {}
1061
1062 public:
1064 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el = nullptr,
1065 _In_ size_t count = 0,
1066 _In_ const std::locale& locale = std::locale()) :
1067 parser_collection<T>(el, count, locale),
1068 hit_offset((size_t)-1)
1069 {}
1070
1072 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1073 _In_ const std::locale& locale = std::locale()) :
1074 parser_collection<T>(std::move(collection), locale),
1075 hit_offset((size_t)-1)
1076 {}
1077
1078 virtual bool match(
1079 _In_reads_or_z_(end) const T* text,
1080 _In_ size_t start = 0,
1081 _In_ size_t end = (size_t)-1,
1082 _In_ int flags = match_default)
1083 {
1084 assert(text || start >= end);
1085 hit_offset = 0;
1086 for (auto i = m_collection.begin(); i != m_collection.end(); ++i, ++hit_offset) {
1087 if ((*i)->match(text, start, end, flags)) {
1088 interval = (*i)->interval;
1089 for (++i; i != m_collection.end(); ++i)
1090 (*i)->invalidate();
1091 return true;
1092 }
1093 }
1094 hit_offset = (size_t)-1;
1095 interval.start = (interval.end = start) + 1;
1096 return false;
1097 }
1098
1099 virtual void invalidate()
1100 {
1101 hit_offset = (size_t)-1;
1103 }
1104
1105 public:
1106 size_t hit_offset;
1107 };
1108
1109 using branch = basic_branch<char>;
1111#ifdef _UNICODE
1112 using tbranch = wbranch;
1113#else
1114 using tbranch = branch;
1115#endif
1117
1121 template <class T, class T_parser = basic_string<T>>
1123 {
1124 public:
1125 inline basic_string_branch(
1126 _In_reads_(count) const T* str_z = nullptr,
1127 _In_ size_t count = 0,
1128 _In_ const std::locale& locale = std::locale()) :
1129 basic_branch<T>(locale)
1130 {
1131 build(str_z, count);
1132 }
1133
1134 inline basic_string_branch(_In_z_ const T* str, ...) :
1135 basic_branch<T>(std::locale())
1136 {
1137 va_list params;
1138 va_start(params, str);
1139 build(str, params);
1140 va_end(params);
1141 }
1142
1143 inline basic_string_branch(_In_ const std::locale& locale, _In_z_ const T* str, ...) :
1144 basic_branch<T>(locale)
1145 {
1146 va_list params;
1147 va_start(params, str);
1148 build(str, params);
1149 va_end(params);
1150 }
1151
1152 protected:
1153 void build(_In_reads_(count) const T* str_z, _In_ size_t count)
1154 {
1155 assert(str_z || !count);
1156 if (count) {
1157 size_t offset, n;
1158 for (
1159 offset = n = 0;
1160 offset < count && str_z[offset];
1161 offset += stdex::strnlen(str_z + offset, count - offset) + 1, ++n);
1162 m_collection.reserve(n);
1163 for (
1164 offset = 0;
1165 offset < count && str_z[offset];
1166 offset += stdex::strnlen(str_z + offset, count - offset) + 1)
1167 m_collection.push_back(std::move(std::make_shared<T_parser>(str_z + offset, count - offset, m_locale)));
1168 }
1169 }
1170
1171 void build(_In_z_ const T* str, _In_ va_list params)
1172 {
1173 const T* p;
1174 for (
1175 m_collection.push_back(std::move(std::make_shared<T_parser>(str, (size_t)-1, m_locale)));
1176 (p = va_arg(params, const T*)) != nullptr;
1177 m_collection.push_back(std::move(std::make_shared<T_parser>(p, (size_t)-1, m_locale))));
1178 }
1179 };
1180
1183#ifdef _UNICODE
1185#else
1187#endif
1189
1193 template <class T>
1195 {
1196 public:
1198 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el = nullptr,
1199 _In_ size_t count = 0,
1200 _In_ const std::locale& locale = std::locale()) :
1201 parser_collection<T>(el, count, locale)
1202 {}
1203
1205 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1206 _In_ const std::locale& locale = std::locale()) :
1207 parser_collection<T>(std::move(collection), locale)
1208 {}
1209
1210 virtual bool match(
1211 _In_reads_or_z_(end) const T* text,
1212 _In_ size_t start = 0,
1213 _In_ size_t end = (size_t)-1,
1214 _In_ int flags = match_default)
1215 {
1216 assert(text || start >= end);
1217 for (auto& el: m_collection)
1218 el->invalidate();
1219 if (match_recursively(text, start, end, flags)) {
1220 interval.start = start;
1221 return true;
1222 }
1223 interval.start = (interval.end = start) + 1;
1224 return false;
1225 }
1226
1227 protected:
1228 bool match_recursively(
1229 _In_reads_or_z_(end) const T* text,
1230 _In_ size_t start = 0,
1231 _In_ size_t end = (size_t)-1,
1232 _In_ int flags = match_default)
1233 {
1234 bool all_matched = true;
1235 for (auto& el: m_collection) {
1236 if (!el->interval) {
1237 // Element was not matched in permutatuion yet.
1238 all_matched = false;
1239 if (el->match(text, start, end, flags)) {
1240 // Element matched for the first time.
1241 if (match_recursively(text, el->interval.end, end, flags)) {
1242 // Rest of the elements matched too.
1243 return true;
1244 }
1245 el->invalidate();
1246 }
1247 }
1248 }
1249 if (all_matched) {
1250 interval.end = start;
1251 return true;
1252 }
1253 return false;
1254 }
1255 };
1256
1259#ifdef _UNICODE
1260 using tpermutation = wpermutation;
1261#else
1262 using tpermutation = permutation;
1263#endif
1265
1269 template <class T>
1270 class basic_integer : public basic_parser<T>
1271 {
1272 public:
1273 basic_integer(_In_ const std::locale& locale = std::locale()) :
1274 basic_parser<T>(locale),
1275 value(0)
1276 {}
1277
1278 virtual void invalidate()
1279 {
1280 value = 0;
1282 }
1283
1284 public:
1285 size_t value;
1286 };
1287
1291 template <class T>
1293 {
1294 public:
1296 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
1297 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
1298 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
1299 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
1300 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
1301 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
1302 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
1303 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
1304 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
1305 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
1306 _In_ const std::locale& locale = std::locale()) :
1307 basic_integer<T>(locale),
1308 m_digit_0(digit_0),
1309 m_digit_1(digit_1),
1310 m_digit_2(digit_2),
1311 m_digit_3(digit_3),
1312 m_digit_4(digit_4),
1313 m_digit_5(digit_5),
1314 m_digit_6(digit_6),
1315 m_digit_7(digit_7),
1316 m_digit_8(digit_8),
1317 m_digit_9(digit_9)
1318 {}
1319
1320 virtual bool match(
1321 _In_reads_or_z_(end) const T* text,
1322 _In_ size_t start = 0,
1323 _In_ size_t end = (size_t)-1,
1324 _In_ int flags = match_default)
1325 {
1326 assert(text || start >= end);
1327 for (interval.end = start, value = 0; interval.end < end && text[interval.end];) {
1328 size_t dig;
1329 if (m_digit_0->match(text, interval.end, end, flags)) { dig = 0; interval.end = m_digit_0->interval.end; }
1330 else if (m_digit_1->match(text, interval.end, end, flags)) { dig = 1; interval.end = m_digit_1->interval.end; }
1331 else if (m_digit_2->match(text, interval.end, end, flags)) { dig = 2; interval.end = m_digit_2->interval.end; }
1332 else if (m_digit_3->match(text, interval.end, end, flags)) { dig = 3; interval.end = m_digit_3->interval.end; }
1333 else if (m_digit_4->match(text, interval.end, end, flags)) { dig = 4; interval.end = m_digit_4->interval.end; }
1334 else if (m_digit_5->match(text, interval.end, end, flags)) { dig = 5; interval.end = m_digit_5->interval.end; }
1335 else if (m_digit_6->match(text, interval.end, end, flags)) { dig = 6; interval.end = m_digit_6->interval.end; }
1336 else if (m_digit_7->match(text, interval.end, end, flags)) { dig = 7; interval.end = m_digit_7->interval.end; }
1337 else if (m_digit_8->match(text, interval.end, end, flags)) { dig = 8; interval.end = m_digit_8->interval.end; }
1338 else if (m_digit_9->match(text, interval.end, end, flags)) { dig = 9; interval.end = m_digit_9->interval.end; }
1339 else break;
1340 value = value * 10 + dig;
1341 }
1342 if (start < interval.end) {
1343 interval.start = start;
1344 return true;
1345 }
1346 interval.start = (interval.end = start) + 1;
1347 return false;
1348 }
1349
1350 protected:
1351 std::shared_ptr<basic_parser<T>>
1352 m_digit_0,
1353 m_digit_1,
1354 m_digit_2,
1355 m_digit_3,
1356 m_digit_4,
1357 m_digit_5,
1358 m_digit_6,
1359 m_digit_7,
1360 m_digit_8,
1361 m_digit_9;
1362 };
1363
1366#ifdef _UNICODE
1367 using tinteger10 = winteger10;
1368#else
1369 using tinteger10 = integer10;
1370#endif
1372
1376 template <class T>
1378 {
1379 public:
1381 _In_ const std::shared_ptr<basic_integer10<T>>& digits,
1382 _In_ const std::shared_ptr<basic_set<T>>& separator,
1383 _In_ const std::locale& locale = std::locale()) :
1384 basic_integer<T>(locale),
1385 digit_count(0),
1386 has_separators(false),
1387 m_digits(digits),
1388 m_separator(separator)
1389 {}
1390
1391 virtual bool match(
1392 _In_reads_or_z_(end) const T* text,
1393 _In_ size_t start = 0,
1394 _In_ size_t end = (size_t)-1,
1395 _In_ int flags = match_default)
1396 {
1397 assert(text || start >= end);
1398 if (m_digits->match(text, start, end, flags)) {
1399 // Leading part match.
1400 value = m_digits->value;
1401 digit_count = m_digits->interval.size();
1402 has_separators = false;
1403 interval.start = start;
1404 interval.end = m_digits->interval.end;
1405 if (m_digits->interval.size() <= 3) {
1406 // Maybe separated with thousand separators?
1407 size_t hit_offset = (size_t)-1;
1408 while (m_separator->match(text, interval.end, end, flags) &&
1409 (hit_offset == (size_t)-1 || hit_offset == m_separator->hit_offset) && // All separators must be the same, no mixing.
1410 m_digits->match(text, m_separator->interval.end, end, flags) &&
1411 m_digits->interval.size() == 3)
1412 {
1413 // Thousand separator and three-digit integer followed.
1414 value = value * 1000 + m_digits->value;
1415 digit_count += 3;
1416 has_separators = true;
1417 interval.end = m_digits->interval.end;
1418 hit_offset = m_separator->hit_offset;
1419 }
1420 }
1421
1422 return true;
1423 }
1424 value = 0;
1425 interval.start = (interval.end = start) + 1;
1426 return false;
1427 }
1428
1429 virtual void invalidate()
1430 {
1431 digit_count = 0;
1432 has_separators = false;
1434 }
1435
1436 public:
1439
1440 protected:
1441 std::shared_ptr<basic_integer10<T>> m_digits;
1442 std::shared_ptr<basic_set<T>> m_separator;
1443 };
1444
1447#ifdef _UNICODE
1448 using tinteger10ts = winteger10ts;
1449#else
1450 using tinteger10ts = integer10ts;
1451#endif
1453
1457 template <class T>
1459 {
1460 public:
1462 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
1463 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
1464 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
1465 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
1466 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
1467 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
1468 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
1469 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
1470 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
1471 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
1472 _In_ const std::shared_ptr<basic_parser<T>>& digit_10,
1473 _In_ const std::shared_ptr<basic_parser<T>>& digit_11,
1474 _In_ const std::shared_ptr<basic_parser<T>>& digit_12,
1475 _In_ const std::shared_ptr<basic_parser<T>>& digit_13,
1476 _In_ const std::shared_ptr<basic_parser<T>>& digit_14,
1477 _In_ const std::shared_ptr<basic_parser<T>>& digit_15,
1478 _In_ const std::locale& locale = std::locale()) :
1479 basic_integer<T>(locale),
1480 m_digit_0(digit_0),
1481 m_digit_1(digit_1),
1482 m_digit_2(digit_2),
1483 m_digit_3(digit_3),
1484 m_digit_4(digit_4),
1485 m_digit_5(digit_5),
1486 m_digit_6(digit_6),
1487 m_digit_7(digit_7),
1488 m_digit_8(digit_8),
1489 m_digit_9(digit_9),
1490 m_digit_10(digit_10),
1491 m_digit_11(digit_11),
1492 m_digit_12(digit_12),
1493 m_digit_13(digit_13),
1494 m_digit_14(digit_14),
1495 m_digit_15(digit_15)
1496 {}
1497
1498 virtual bool match(
1499 _In_reads_or_z_(end) const T* text,
1500 _In_ size_t start = 0,
1501 _In_ size_t end = (size_t)-1,
1502 _In_ int flags = match_default)
1503 {
1504 assert(text || start >= end);
1505 for (interval.end = start, value = 0; interval.end < end && text[interval.end];) {
1506 size_t dig;
1507 if (m_digit_0->match(text, interval.end, end, flags)) { dig = 0; interval.end = m_digit_0->interval.end; }
1508 else if (m_digit_1->match(text, interval.end, end, flags)) { dig = 1; interval.end = m_digit_1->interval.end; }
1509 else if (m_digit_2->match(text, interval.end, end, flags)) { dig = 2; interval.end = m_digit_2->interval.end; }
1510 else if (m_digit_3->match(text, interval.end, end, flags)) { dig = 3; interval.end = m_digit_3->interval.end; }
1511 else if (m_digit_4->match(text, interval.end, end, flags)) { dig = 4; interval.end = m_digit_4->interval.end; }
1512 else if (m_digit_5->match(text, interval.end, end, flags)) { dig = 5; interval.end = m_digit_5->interval.end; }
1513 else if (m_digit_6->match(text, interval.end, end, flags)) { dig = 6; interval.end = m_digit_6->interval.end; }
1514 else if (m_digit_7->match(text, interval.end, end, flags)) { dig = 7; interval.end = m_digit_7->interval.end; }
1515 else if (m_digit_8->match(text, interval.end, end, flags)) { dig = 8; interval.end = m_digit_8->interval.end; }
1516 else if (m_digit_9->match(text, interval.end, end, flags)) { dig = 9; interval.end = m_digit_9->interval.end; }
1517 else if (m_digit_10->match(text, interval.end, end, flags)) { dig = 10; interval.end = m_digit_10->interval.end; }
1518 else if (m_digit_11->match(text, interval.end, end, flags)) { dig = 11; interval.end = m_digit_11->interval.end; }
1519 else if (m_digit_12->match(text, interval.end, end, flags)) { dig = 12; interval.end = m_digit_12->interval.end; }
1520 else if (m_digit_13->match(text, interval.end, end, flags)) { dig = 13; interval.end = m_digit_13->interval.end; }
1521 else if (m_digit_14->match(text, interval.end, end, flags)) { dig = 14; interval.end = m_digit_14->interval.end; }
1522 else if (m_digit_15->match(text, interval.end, end, flags)) { dig = 15; interval.end = m_digit_15->interval.end; }
1523 else break;
1524 value = value * 16 + dig;
1525 }
1526 if (start < interval.end) {
1527 interval.start = start;
1528 return true;
1529 }
1530 interval.start = (interval.end = start) + 1;
1531 return false;
1532 }
1533
1534 protected:
1535 std::shared_ptr<basic_parser<T>>
1536 m_digit_0,
1537 m_digit_1,
1538 m_digit_2,
1539 m_digit_3,
1540 m_digit_4,
1541 m_digit_5,
1542 m_digit_6,
1543 m_digit_7,
1544 m_digit_8,
1545 m_digit_9,
1546 m_digit_10,
1547 m_digit_11,
1548 m_digit_12,
1549 m_digit_13,
1550 m_digit_14,
1551 m_digit_15;
1552 };
1553
1556#ifdef _UNICODE
1557 using tinteger16 = winteger16;
1558#else
1559 using tinteger16 = integer16;
1560#endif
1562
1566 template <class T>
1568 {
1569 public:
1571 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
1572 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
1573 _In_ const std::shared_ptr<basic_parser<T>>& digit_10,
1574 _In_ const std::shared_ptr<basic_parser<T>>& digit_50,
1575 _In_ const std::shared_ptr<basic_parser<T>>& digit_100,
1576 _In_ const std::shared_ptr<basic_parser<T>>& digit_500,
1577 _In_ const std::shared_ptr<basic_parser<T>>& digit_1000,
1578 _In_ const std::shared_ptr<basic_parser<T>>& digit_5000,
1579 _In_ const std::shared_ptr<basic_parser<T>>& digit_10000,
1580 _In_ const std::locale& locale = std::locale()) :
1581 basic_integer<T>(locale),
1582 m_digit_1(digit_1),
1583 m_digit_5(digit_5),
1584 m_digit_10(digit_10),
1585 m_digit_50(digit_50),
1586 m_digit_100(digit_100),
1587 m_digit_500(digit_500),
1588 m_digit_1000(digit_1000),
1589 m_digit_5000(digit_5000),
1590 m_digit_10000(digit_10000)
1591 {}
1592
1593 virtual bool match(
1594 _In_reads_or_z_(end) const T* text,
1595 _In_ size_t start = 0,
1596 _In_ size_t end = (size_t)-1,
1597 _In_ int flags = match_default)
1598 {
1599 assert(text || start >= end);
1600 size_t
1601 dig[5] = { (size_t)-1, (size_t)-1, (size_t)-1, (size_t)-1, (size_t)-1 },
1602 end2;
1603
1604 for (interval.end = start, value = 0; interval.end < end && text[interval.end]; dig[3] = dig[2], dig[2] = dig[1], dig[1] = dig[0], interval.end = end2) {
1605 if (m_digit_1 && m_digit_1->match(text, interval.end, end, flags)) { dig[0] = 1; end2 = m_digit_1->interval.end; }
1606 else if (m_digit_5 && m_digit_5->match(text, interval.end, end, flags)) { dig[0] = 5; end2 = m_digit_5->interval.end; }
1607 else if (m_digit_10 && m_digit_10->match(text, interval.end, end, flags)) { dig[0] = 10; end2 = m_digit_10->interval.end; }
1608 else if (m_digit_50 && m_digit_50->match(text, interval.end, end, flags)) { dig[0] = 50; end2 = m_digit_50->interval.end; }
1609 else if (m_digit_100 && m_digit_100->match(text, interval.end, end, flags)) { dig[0] = 100; end2 = m_digit_100->interval.end; }
1610 else if (m_digit_500 && m_digit_500->match(text, interval.end, end, flags)) { dig[0] = 500; end2 = m_digit_500->interval.end; }
1611 else if (m_digit_1000 && m_digit_1000->match(text, interval.end, end, flags)) { dig[0] = 1000; end2 = m_digit_1000->interval.end; }
1612 else if (m_digit_5000 && m_digit_5000->match(text, interval.end, end, flags)) { dig[0] = 5000; end2 = m_digit_5000->interval.end; }
1613 else if (m_digit_10000 && m_digit_10000->match(text, interval.end, end, flags)) { dig[0] = 10000; end2 = m_digit_10000->interval.end; }
1614 else break;
1615
1616 // Store first digit.
1617 if (dig[4] == (size_t)-1) dig[4] = dig[0];
1618
1619 if (dig[3] == dig[2] && dig[2] == dig[1] && dig[1] == dig[0] && dig[0] != dig[4]) {
1620 // Same digit repeated four times. No-go, unless first digit. E.g. XIIII vs. XIV. MMMMMCD allowed, IIII also...
1621 break;
1622 }
1623 if (dig[0] <= dig[1]) {
1624 // Digit is less or equal previous one: add.
1625 value += dig[0];
1626 }
1627 else if (
1628 dig[1] == 1 && (dig[0] == 5 || dig[0] == 10) ||
1629 dig[1] == 10 && (dig[0] == 50 || dig[0] == 100) ||
1630 dig[1] == 100 && (dig[0] == 500 || dig[0] == 1000) ||
1631 dig[1] == 1000 && (dig[0] == 5000 || dig[0] == 10000))
1632 {
1633 // Digit is up to two orders bigger than previous one: subtract. But...
1634 if (dig[2] < dig[0]) {
1635 // Digit is also bigger than pre-previous one. E.g. VIX (V < X => invalid)
1636 break;
1637 }
1638 value -= dig[1]; // Cancel addition in the previous step.
1639 dig[0] -= dig[1]; // Combine last two digits.
1640 dig[1] = dig[2]; // The true previous digit is now pre-previous one. :)
1641 dig[2] = dig[3]; // The true pre-previous digit is now pre-pre-previous one. :)
1642 value += dig[0]; // Add combined value.
1643 }
1644 else {
1645 // New digit is too big than the previous one. E.g. VX (V < X => invalid)
1646 break;
1647 }
1648 }
1649 if (value) {
1650 interval.start = start;
1651 return true;
1652 }
1653 interval.start = (interval.end = start) + 1;
1654 return false;
1655 }
1656
1657 protected:
1658 std::shared_ptr<basic_parser<T>>
1659 m_digit_1,
1660 m_digit_5,
1661 m_digit_10,
1662 m_digit_50,
1663 m_digit_100,
1664 m_digit_500,
1665 m_digit_1000,
1666 m_digit_5000,
1667 m_digit_10000;
1668 };
1669
1672#ifdef _UNICODE
1674#else
1676#endif
1678
1682 template <class T>
1684 {
1685 public:
1687 _In_ const std::shared_ptr<basic_parser<T>>& _numerator,
1688 _In_ const std::shared_ptr<basic_parser<T>>& _fraction_line,
1689 _In_ const std::shared_ptr<basic_parser<T>>& _denominator,
1690 _In_ const std::locale& locale = std::locale()) :
1691 basic_parser<T>(locale),
1692 numerator(_numerator),
1693 fraction_line(_fraction_line),
1694 denominator(_denominator)
1695 {}
1696
1697 virtual bool match(
1698 _In_reads_or_z_(end) const T* text,
1699 _In_ size_t start = 0,
1700 _In_ size_t end = (size_t)-1,
1701 _In_ int flags = match_default)
1702 {
1703 assert(text || start >= end);
1704 if (numerator->match(text, start, end, flags) &&
1705 fraction_line->match(text, numerator->interval.end, end, flags) &&
1706 denominator->match(text, fraction_line->interval.end, end, flags))
1707 {
1708 interval.start = start;
1709 interval.end = denominator->interval.end;
1710 return true;
1711 }
1712 numerator->invalidate();
1713 fraction_line->invalidate();
1714 denominator->invalidate();
1715 interval.start = (interval.end = start) + 1;
1716 return false;
1717 }
1718
1719 virtual void invalidate()
1720 {
1721 numerator->invalidate();
1722 fraction_line->invalidate();
1723 denominator->invalidate();
1725 }
1726
1727 public:
1728 std::shared_ptr<basic_parser<T>> numerator;
1729 std::shared_ptr<basic_parser<T>> fraction_line;
1730 std::shared_ptr<basic_parser<T>> denominator;
1731 };
1732
1735#ifdef _UNICODE
1736 using tfraction = wfraction;
1737#else
1738 using tfraction = fraction;
1739#endif
1741
1745 template <class T>
1746 class basic_score : public basic_parser<T>
1747 {
1748 public:
1750 _In_ const std::shared_ptr<basic_parser<T>>& _home,
1751 _In_ const std::shared_ptr<basic_parser<T>>& _separator,
1752 _In_ const std::shared_ptr<basic_parser<T>>& _guest,
1753 _In_ const std::shared_ptr<basic_parser<T>>& space,
1754 _In_ const std::locale& locale = std::locale()) :
1755 basic_parser<T>(locale),
1756 home(_home),
1757 separator(_separator),
1758 guest(_guest),
1759 m_space(space)
1760 {}
1761
1762 virtual bool match(
1763 _In_reads_or_z_(end) const T* text,
1764 _In_ size_t start = 0,
1765 _In_ size_t end = (size_t)-1,
1766 _In_ int flags = match_default)
1767 {
1768 assert(text || start >= end);
1769 interval.end = start;
1770
1771 if (home->match(text, interval.end, end, flags))
1772 interval.end = home->interval.end;
1773 else
1774 goto end;
1775
1776 const int space_match_flags = flags & ~match_multiline; // Spaces in score must never be broken in new line.
1777 for (; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
1778
1779 if (separator->match(text, interval.end, end, flags))
1780 interval.end = separator->interval.end;
1781 else
1782 goto end;
1783
1784 for (; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
1785
1786 if (guest->match(text, interval.end, end, flags))
1787 interval.end = guest->interval.end;
1788 else
1789 goto end;
1790
1791 interval.start = start;
1792 return true;
1793
1794 end:
1795 home->invalidate();
1796 separator->invalidate();
1797 guest->invalidate();
1798 interval.start = (interval.end = start) + 1;
1799 return false;
1800 }
1801
1802 virtual void invalidate()
1803 {
1804 home->invalidate();
1805 separator->invalidate();
1806 guest->invalidate();
1808 }
1809
1810 public:
1811 std::shared_ptr<basic_parser<T>> home;
1812 std::shared_ptr<basic_parser<T>> separator;
1813 std::shared_ptr<basic_parser<T>> guest;
1814
1815 protected:
1816 std::shared_ptr<basic_parser<T>> m_space;
1817 };
1818
1819 using score = basic_score<char>;
1821#ifdef _UNICODE
1822 using tscore = wscore;
1823#else
1824 using tscore = score;
1825#endif
1827
1831 template <class T>
1833 {
1834 public:
1836 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
1837 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
1838 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
1839 _In_ const std::shared_ptr<basic_parser<T>>& _number,
1840 _In_ const std::locale& locale = std::locale()) :
1841 basic_parser<T>(locale),
1846 {}
1847
1848 virtual bool match(
1849 _In_reads_or_z_(end) const T* text,
1850 _In_ size_t start = 0,
1851 _In_ size_t end = (size_t)-1,
1852 _In_ int flags = match_default)
1853 {
1854 assert(text || start >= end);
1855 interval.end = start;
1856 if (positive_sign && positive_sign->match(text, interval.end, end, flags)) {
1857 interval.end = positive_sign->interval.end;
1858 if (negative_sign) negative_sign->invalidate();
1859 if (special_sign) special_sign->invalidate();
1860 }
1861 else if (negative_sign && negative_sign->match(text, interval.end, end, flags)) {
1862 interval.end = negative_sign->interval.end;
1863 if (positive_sign) positive_sign->invalidate();
1864 if (special_sign) special_sign->invalidate();
1865 }
1866 else if (special_sign && special_sign->match(text, interval.end, end, flags)) {
1867 interval.end = special_sign->interval.end;
1868 if (positive_sign) positive_sign->invalidate();
1869 if (negative_sign) negative_sign->invalidate();
1870 }
1871 else {
1872 if (positive_sign) positive_sign->invalidate();
1873 if (negative_sign) negative_sign->invalidate();
1874 if (special_sign) special_sign->invalidate();
1875 }
1876 if (number->match(text, interval.end, end, flags)) {
1877 interval.start = start;
1878 interval.end = number->interval.end;
1879 return true;
1880 }
1881 if (positive_sign) positive_sign->invalidate();
1882 if (negative_sign) negative_sign->invalidate();
1883 if (special_sign) special_sign->invalidate();
1884 number->invalidate();
1885 interval.start = (interval.end = start) + 1;
1886 return false;
1887 }
1888
1889 virtual void invalidate()
1890 {
1891 if (positive_sign) positive_sign->invalidate();
1892 if (negative_sign) negative_sign->invalidate();
1893 if (special_sign) special_sign->invalidate();
1894 number->invalidate();
1896 }
1897
1898 public:
1899 std::shared_ptr<basic_parser<T>> positive_sign;
1900 std::shared_ptr<basic_parser<T>> negative_sign;
1901 std::shared_ptr<basic_parser<T>> special_sign;
1902 std::shared_ptr<basic_parser<T>> number;
1903 };
1904
1907#ifdef _UNICODE
1909#else
1911#endif
1913
1917 template <class T>
1919 {
1920 public:
1922 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
1923 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
1924 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
1925 _In_ const std::shared_ptr<basic_parser<T>>& _integer,
1926 _In_ const std::shared_ptr<basic_parser<T>>& space,
1927 _In_ const std::shared_ptr<basic_parser<T>>& _fraction,
1928 _In_ const std::locale& locale = std::locale()) :
1929 basic_parser<T>(locale),
1935 m_space(space)
1936 {}
1937
1938 virtual bool match(
1939 _In_reads_or_z_(end) const T* text,
1940 _In_ size_t start = 0,
1941 _In_ size_t end = (size_t)-1,
1942 _In_ int flags = match_default)
1943 {
1944 assert(text || start >= end);
1945 interval.end = start;
1946
1947 if (positive_sign && positive_sign->match(text, interval.end, end, flags)) {
1948 interval.end = positive_sign->interval.end;
1949 if (negative_sign) negative_sign->invalidate();
1950 if (special_sign) special_sign->invalidate();
1951 }
1952 else if (negative_sign && negative_sign->match(text, interval.end, end, flags)) {
1953 interval.end = negative_sign->interval.end;
1954 if (positive_sign) positive_sign->invalidate();
1955 if (special_sign) special_sign->invalidate();
1956 }
1957 else if (special_sign && special_sign->match(text, interval.end, end, flags)) {
1958 interval.end = special_sign->interval.end;
1959 if (positive_sign) positive_sign->invalidate();
1960 if (negative_sign) negative_sign->invalidate();
1961 }
1962 else {
1963 if (positive_sign) positive_sign->invalidate();
1964 if (negative_sign) negative_sign->invalidate();
1965 if (special_sign) special_sign->invalidate();
1966 }
1967
1968 // Check for <integer> <fraction>
1969 const int space_match_flags = flags & ~match_multiline; // Spaces in fractions must never be broken in new line.
1970 if (integer->match(text, interval.end, end, flags) &&
1971 m_space->match(text, integer->interval.end, end, space_match_flags))
1972 {
1973 for (interval.end = m_space->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
1974 if (fraction->match(text, interval.end, end, flags)) {
1975 interval.start = start;
1977 return true;
1978 }
1979 fraction->invalidate();
1980 interval.start = start;
1981 interval.end = integer->interval.end;
1982 return true;
1983 }
1984
1985 // Check for <fraction>
1986 if (fraction->match(text, interval.end, end, flags)) {
1987 integer->invalidate();
1988 interval.start = start;
1990 return true;
1991 }
1992
1993 // Check for <integer>
1994 if (integer->match(text, interval.end, end, flags)) {
1995 fraction->invalidate();
1996 interval.start = start;
1997 interval.end = integer->interval.end;
1998 return true;
1999 }
2000
2001 if (positive_sign) positive_sign->invalidate();
2002 if (negative_sign) negative_sign->invalidate();
2003 if (special_sign) special_sign->invalidate();
2004 integer->invalidate();
2005 fraction->invalidate();
2006 interval.start = (interval.end = start) + 1;
2007 return false;
2008 }
2009
2010 virtual void invalidate()
2011 {
2012 if (positive_sign) positive_sign->invalidate();
2013 if (negative_sign) negative_sign->invalidate();
2014 if (special_sign) special_sign->invalidate();
2015 integer->invalidate();
2016 fraction->invalidate();
2018 }
2019
2020 public:
2021 std::shared_ptr<basic_parser<T>> positive_sign;
2022 std::shared_ptr<basic_parser<T>> negative_sign;
2023 std::shared_ptr<basic_parser<T>> special_sign;
2024 std::shared_ptr<basic_parser<T>> integer;
2025 std::shared_ptr<basic_parser<T>> fraction;
2026
2027 protected:
2028 std::shared_ptr<basic_parser<T>> m_space;
2029 };
2030
2033#ifdef _UNICODE
2035#else
2037#endif
2039
2043 template <class T>
2045 {
2046 public:
2048 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
2049 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
2050 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
2051 _In_ const std::shared_ptr<basic_integer<T>>& _integer,
2052 _In_ const std::shared_ptr<basic_parser<T>>& _decimal_separator,
2053 _In_ const std::shared_ptr<basic_integer<T>>& _decimal,
2054 _In_ const std::shared_ptr<basic_parser<T>>& _exponent_symbol,
2055 _In_ const std::shared_ptr<basic_parser<T>>& _positive_exp_sign,
2056 _In_ const std::shared_ptr<basic_parser<T>>& _negative_exp_sign,
2057 _In_ const std::shared_ptr<basic_integer<T>>& _exponent,
2058 _In_ const std::locale& locale = std::locale()) :
2059 basic_parser<T>(locale),
2070 value(std::numeric_limits<double>::quiet_NaN())
2071 {}
2072
2073 virtual bool match(
2074 _In_reads_or_z_(end) const T* text,
2075 _In_ size_t start = 0,
2076 _In_ size_t end = (size_t)-1,
2077 _In_ int flags = match_default)
2078 {
2079 assert(text || start >= end);
2080 interval.end = start;
2081
2082 if (positive_sign && positive_sign->match(text, interval.end, end, flags)) {
2083 interval.end = positive_sign->interval.end;
2084 if (negative_sign) negative_sign->invalidate();
2085 if (special_sign) special_sign->invalidate();
2086 }
2087 else if (negative_sign && negative_sign->match(text, interval.end, end, flags)) {
2088 interval.end = negative_sign->interval.end;
2089 if (positive_sign) positive_sign->invalidate();
2090 if (special_sign) special_sign->invalidate();
2091 }
2092 else if (special_sign && special_sign->match(text, interval.end, end, flags)) {
2093 interval.end = special_sign->interval.end;
2094 if (positive_sign) positive_sign->invalidate();
2095 if (negative_sign) negative_sign->invalidate();
2096 }
2097 else {
2098 if (positive_sign) positive_sign->invalidate();
2099 if (negative_sign) negative_sign->invalidate();
2100 if (special_sign) special_sign->invalidate();
2101 }
2102
2103 if (integer->match(text, interval.end, end, flags))
2104 interval.end = integer->interval.end;
2105
2106 if (decimal_separator->match(text, interval.end, end, flags) &&
2108 interval.end = decimal->interval.end;
2109 else {
2110 decimal_separator->invalidate();
2111 decimal->invalidate();
2112 }
2113
2114 if (integer->interval.empty() &&
2115 decimal->interval.empty())
2116 {
2117 // No integer part, no decimal part.
2118 if (positive_sign) positive_sign->invalidate();
2119 if (negative_sign) negative_sign->invalidate();
2120 if (special_sign) special_sign->invalidate();
2121 integer->invalidate();
2122 decimal_separator->invalidate();
2123 decimal->invalidate();
2124 if (exponent_symbol) exponent_symbol->invalidate();
2125 if (positive_exp_sign) positive_exp_sign->invalidate();
2126 if (negative_exp_sign) negative_exp_sign->invalidate();
2127 if (exponent) exponent->invalidate();
2128 interval.start = (interval.end = start) + 1;
2129 return false;
2130 }
2131
2132 if (exponent_symbol && exponent_symbol->match(text, interval.end, end, flags) &&
2136 {
2137 interval.end = exponent->interval.end;
2138 if (negative_exp_sign) negative_exp_sign->invalidate();
2139 }
2140 else if (exponent_symbol && exponent_symbol->match(text, interval.end, end, flags) &&
2143 {
2144 interval.end = exponent->interval.end;
2145 if (positive_exp_sign) positive_exp_sign->invalidate();
2146 }
2147 else {
2148 if (exponent_symbol) exponent_symbol->invalidate();
2149 if (positive_exp_sign) positive_exp_sign->invalidate();
2150 if (negative_exp_sign) negative_exp_sign->invalidate();
2151 if (exponent) exponent->invalidate();
2152 }
2153
2154 value = (double)integer->value;
2155 if (decimal->interval)
2156 value += (double)decimal->value * pow(10.0, -(double)decimal->interval.size());
2157 if (negative_sign && negative_sign->interval)
2158 value = -value;
2159 if (exponent && exponent->interval) {
2160 double e = (double)exponent->value;
2161 if (negative_exp_sign && negative_exp_sign->interval)
2162 e = -e;
2163 value *= pow(10.0, e);
2164 }
2165
2166 interval.start = start;
2167 return true;
2168 }
2169
2170 virtual void invalidate()
2171 {
2172 if (positive_sign) positive_sign->invalidate();
2173 if (negative_sign) negative_sign->invalidate();
2174 if (special_sign) special_sign->invalidate();
2175 integer->invalidate();
2176 decimal_separator->invalidate();
2177 decimal->invalidate();
2178 if (exponent_symbol) exponent_symbol->invalidate();
2179 if (positive_exp_sign) positive_exp_sign->invalidate();
2180 if (negative_exp_sign) negative_exp_sign->invalidate();
2181 if (exponent) exponent->invalidate();
2182 value = std::numeric_limits<double>::quiet_NaN();
2184 }
2185
2186 public:
2187 std::shared_ptr<basic_parser<T>> positive_sign;
2188 std::shared_ptr<basic_parser<T>> negative_sign;
2189 std::shared_ptr<basic_parser<T>> special_sign;
2190 std::shared_ptr<basic_integer<T>> integer;
2191 std::shared_ptr<basic_parser<T>> decimal_separator;
2192 std::shared_ptr<basic_integer<T>> decimal;
2193 std::shared_ptr<basic_parser<T>> exponent_symbol;
2194 std::shared_ptr<basic_parser<T>> positive_exp_sign;
2195 std::shared_ptr<basic_parser<T>> negative_exp_sign;
2196 std::shared_ptr<basic_integer<T>> exponent;
2197 double value;
2198 };
2199
2202#ifdef _UNICODE
2204#else
2206#endif
2208
2212 template <class T>
2214 {
2215 public:
2217 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
2218 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
2219 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
2220 _In_ const std::shared_ptr<basic_parser<T>>& _currency,
2221 _In_ const std::shared_ptr<basic_parser<T>>& _integer,
2222 _In_ const std::shared_ptr<basic_parser<T>>& _decimal_separator,
2223 _In_ const std::shared_ptr<basic_parser<T>>& _decimal,
2224 _In_ const std::locale& locale = std::locale()) :
2225 basic_parser<T>(locale),
2233 {}
2234
2235 virtual bool match(
2236 _In_reads_or_z_(end) const T* text,
2237 _In_ size_t start = 0,
2238 _In_ size_t end = (size_t)-1,
2239 _In_ int flags = match_default)
2240 {
2241 assert(text || start >= end);
2242 interval.end = start;
2243
2244 if (positive_sign->match(text, interval.end, end, flags)) {
2245 interval.end = positive_sign->interval.end;
2246 if (negative_sign) negative_sign->invalidate();
2247 if (special_sign) special_sign->invalidate();
2248 }
2249 else if (negative_sign->match(text, interval.end, end, flags)) {
2250 interval.end = negative_sign->interval.end;
2251 if (positive_sign) positive_sign->invalidate();
2252 if (special_sign) special_sign->invalidate();
2253 }
2254 else if (special_sign->match(text, interval.end, end, flags)) {
2255 interval.end = special_sign->interval.end;
2256 if (positive_sign) positive_sign->invalidate();
2257 if (negative_sign) negative_sign->invalidate();
2258 }
2259 else {
2260 if (positive_sign) positive_sign->invalidate();
2261 if (negative_sign) negative_sign->invalidate();
2262 if (special_sign) special_sign->invalidate();
2263 }
2264
2265 if (currency->match(text, interval.end, end, flags))
2266 interval.end = currency->interval.end;
2267 else {
2268 if (positive_sign) positive_sign->invalidate();
2269 if (negative_sign) negative_sign->invalidate();
2270 if (special_sign) special_sign->invalidate();
2271 integer->invalidate();
2272 decimal_separator->invalidate();
2273 decimal->invalidate();
2274 interval.start = (interval.end = start) + 1;
2275 return false;
2276 }
2277
2278 if (integer->match(text, interval.end, end, flags))
2279 interval.end = integer->interval.end;
2280 if (decimal_separator->match(text, interval.end, end, flags) &&
2282 interval.end = decimal->interval.end;
2283 else {
2284 decimal_separator->invalidate();
2285 decimal->invalidate();
2286 }
2287
2288 if (integer->interval.empty() &&
2289 decimal->interval.empty())
2290 {
2291 // No integer part, no decimal part.
2292 if (positive_sign) positive_sign->invalidate();
2293 if (negative_sign) negative_sign->invalidate();
2294 if (special_sign) special_sign->invalidate();
2295 currency->invalidate();
2296 integer->invalidate();
2297 decimal_separator->invalidate();
2298 decimal->invalidate();
2299 interval.start = (interval.end = start) + 1;
2300 return false;
2301 }
2302
2303 interval.start = start;
2304 return true;
2305 }
2306
2307 virtual void invalidate()
2308 {
2309 if (positive_sign) positive_sign->invalidate();
2310 if (negative_sign) negative_sign->invalidate();
2311 if (special_sign) special_sign->invalidate();
2312 currency->invalidate();
2313 integer->invalidate();
2314 decimal_separator->invalidate();
2315 decimal->invalidate();
2317 }
2318
2319 public:
2320 std::shared_ptr<basic_parser<T>> positive_sign;
2321 std::shared_ptr<basic_parser<T>> negative_sign;
2322 std::shared_ptr<basic_parser<T>> special_sign;
2323 std::shared_ptr<basic_parser<T>> currency;
2324 std::shared_ptr<basic_parser<T>> integer;
2325 std::shared_ptr<basic_parser<T>> decimal_separator;
2326 std::shared_ptr<basic_parser<T>> decimal;
2327 };
2328
2331#ifdef _UNICODE
2333#else
2335#endif
2337
2341 template <class T>
2343 {
2344 public:
2346 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
2347 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
2348 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
2349 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
2350 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
2351 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
2352 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
2353 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
2354 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
2355 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
2356 _In_ const std::shared_ptr<basic_parser<T>>& separator,
2357 _In_ const std::locale& locale = std::locale()) :
2358 basic_parser<T>(locale),
2359 m_digit_0(digit_0),
2360 m_digit_1(digit_1),
2361 m_digit_2(digit_2),
2362 m_digit_3(digit_3),
2363 m_digit_4(digit_4),
2364 m_digit_5(digit_5),
2365 m_digit_6(digit_6),
2366 m_digit_7(digit_7),
2367 m_digit_8(digit_8),
2368 m_digit_9(digit_9),
2369 m_separator(separator)
2370 {
2371 value.s_addr = 0;
2372 }
2373
2374 virtual bool match(
2375 _In_reads_or_z_(end) const T* text,
2376 _In_ size_t start = 0,
2377 _In_ size_t end = (size_t)-1,
2378 _In_ int flags = match_default)
2379 {
2380 assert(text || start >= end);
2381 interval.end = start;
2382 value.s_addr = 0;
2383
2384 size_t i;
2385 for (i = 0; i < 4; i++) {
2386 if (i) {
2387 if (m_separator->match(text, interval.end, end, flags))
2388 interval.end = m_separator->interval.end;
2389 else
2390 goto error;
2391 }
2392
2394 bool is_empty = true;
2395 size_t x;
2396 for (x = 0; interval.end < end && text[interval.end];) {
2397 size_t dig, digit_end;
2398 if (m_digit_0->match(text, interval.end, end, flags)) { dig = 0; digit_end = m_digit_0->interval.end; }
2399 else if (m_digit_1->match(text, interval.end, end, flags)) { dig = 1; digit_end = m_digit_1->interval.end; }
2400 else if (m_digit_2->match(text, interval.end, end, flags)) { dig = 2; digit_end = m_digit_2->interval.end; }
2401 else if (m_digit_3->match(text, interval.end, end, flags)) { dig = 3; digit_end = m_digit_3->interval.end; }
2402 else if (m_digit_4->match(text, interval.end, end, flags)) { dig = 4; digit_end = m_digit_4->interval.end; }
2403 else if (m_digit_5->match(text, interval.end, end, flags)) { dig = 5; digit_end = m_digit_5->interval.end; }
2404 else if (m_digit_6->match(text, interval.end, end, flags)) { dig = 6; digit_end = m_digit_6->interval.end; }
2405 else if (m_digit_7->match(text, interval.end, end, flags)) { dig = 7; digit_end = m_digit_7->interval.end; }
2406 else if (m_digit_8->match(text, interval.end, end, flags)) { dig = 8; digit_end = m_digit_8->interval.end; }
2407 else if (m_digit_9->match(text, interval.end, end, flags)) { dig = 9; digit_end = m_digit_9->interval.end; }
2408 else break;
2409 size_t x_n = x * 10 + dig;
2410 if (x_n <= 255) {
2411 x = x_n;
2413 is_empty = false;
2414 }
2415 else
2416 break;
2417 }
2418 if (is_empty)
2419 goto error;
2421 value.s_addr = (value.s_addr << 8) | (uint8_t)x;
2422 }
2423 if (i < 4)
2424 goto error;
2425
2426 interval.start = start;
2427 return true;
2428
2429 error:
2430 components[0].start = 1;
2431 components[0].end = 0;
2432 components[1].start = 1;
2433 components[1].end = 0;
2434 components[2].start = 1;
2435 components[2].end = 0;
2436 components[3].start = 1;
2437 components[3].end = 0;
2438 value.s_addr = 0;
2439 interval.start = (interval.end = start) + 1;
2440 return false;
2441 }
2442
2443 virtual void invalidate()
2444 {
2445 components[0].start = 1;
2446 components[0].end = 0;
2447 components[1].start = 1;
2448 components[1].end = 0;
2449 components[2].start = 1;
2450 components[2].end = 0;
2451 components[3].start = 1;
2452 components[3].end = 0;
2453 value.s_addr = 0;
2455 }
2456
2457 public:
2460
2461 protected:
2462 std::shared_ptr<basic_parser<T>>
2463 m_digit_0,
2464 m_digit_1,
2465 m_digit_2,
2466 m_digit_3,
2467 m_digit_4,
2468 m_digit_5,
2469 m_digit_6,
2470 m_digit_7,
2471 m_digit_8,
2472 m_digit_9;
2473 std::shared_ptr<basic_parser<T>> m_separator;
2474 };
2475
2478#ifdef _UNICODE
2480#else
2482#endif
2484
2488 template <class T>
2490 {
2491 public:
2492 basic_ipv6_scope_id_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
2493
2494 virtual bool match(
2495 _In_reads_or_z_(end) const T* text,
2496 _In_ size_t start = 0,
2497 _In_ size_t end = (size_t)-1,
2498 _In_ int flags = match_default)
2499 {
2500 assert(text || start >= end);
2501 if (start < end && text[start]) {
2502 if (text[start] == '-' ||
2503 text[start] == '_' ||
2504 text[start] == ':' ||
2505 std::use_facet<std::ctype<T>>(m_locale).is(std::ctype_base::alnum, text[start]))
2506 {
2507 interval.end = (interval.start = start) + 1;
2508 return true;
2509 }
2510 }
2511 interval.start = (interval.end = start) + 1;
2512 return false;
2513 }
2514 };
2515
2518#ifdef _UNICODE
2520#else
2522#endif
2523
2528 {
2529 public:
2530 sgml_ipv6_scope_id_char(_In_ const std::locale& locale = std::locale()) : sgml_parser(locale) {}
2531
2532 virtual bool match(
2533 _In_reads_or_z_(end) const char* text,
2534 _In_ size_t start = 0,
2535 _In_ size_t end = (size_t)-1,
2536 _In_ int flags = match_default)
2537 {
2538 assert(text || start >= end);
2539 if (start < end && text[start]) {
2540 wchar_t buf[3];
2541 const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf);
2542 const wchar_t* chr_end = chr + stdex::strlen(chr);
2543 if ((chr[0] == L'-' ||
2544 chr[0] == L'_' ||
2545 chr[0] == L':') && chr[1] == 0 ||
2546 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
2547 {
2548 interval.start = start;
2549 return true;
2550 }
2551 }
2552 interval.start = (interval.end = start) + 1;
2553 return false;
2554 }
2555 };
2556
2560 template <class T>
2562 {
2563 public:
2565 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
2566 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
2567 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
2568 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
2569 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
2570 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
2571 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
2572 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
2573 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
2574 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
2575 _In_ const std::shared_ptr<basic_parser<T>>& digit_10,
2576 _In_ const std::shared_ptr<basic_parser<T>>& digit_11,
2577 _In_ const std::shared_ptr<basic_parser<T>>& digit_12,
2578 _In_ const std::shared_ptr<basic_parser<T>>& digit_13,
2579 _In_ const std::shared_ptr<basic_parser<T>>& digit_14,
2580 _In_ const std::shared_ptr<basic_parser<T>>& digit_15,
2581 _In_ const std::shared_ptr<basic_parser<T>>& separator,
2582 _In_ const std::shared_ptr<basic_parser<T>>& scope_id_separator = nullptr,
2583 _In_ const std::shared_ptr<basic_parser<T>>& _scope_id = nullptr,
2584 _In_ const std::locale& locale = std::locale()) :
2585 basic_parser<T>(locale),
2586 m_digit_0(digit_0),
2587 m_digit_1(digit_1),
2588 m_digit_2(digit_2),
2589 m_digit_3(digit_3),
2590 m_digit_4(digit_4),
2591 m_digit_5(digit_5),
2592 m_digit_6(digit_6),
2593 m_digit_7(digit_7),
2594 m_digit_8(digit_8),
2595 m_digit_9(digit_9),
2596 m_digit_10(digit_10),
2597 m_digit_11(digit_11),
2598 m_digit_12(digit_12),
2599 m_digit_13(digit_13),
2600 m_digit_14(digit_14),
2601 m_digit_15(digit_15),
2602 m_separator(separator),
2603 m_scope_id_separator(scope_id_separator),
2605 {
2606 memset(&value, 0, sizeof(value));
2607 }
2608
2609 virtual bool match(
2610 _In_reads_or_z_(end) const T* text,
2611 _In_ size_t start = 0,
2612 _In_ size_t end = (size_t)-1,
2613 _In_ int flags = match_default)
2614 {
2615 assert(text || start >= end);
2616 interval.end = start;
2617 memset(&value, 0, sizeof(value));
2618
2619 size_t i, compaction_i = (size_t)-1, compaction_start = start;
2620 for (i = 0; i < 8; i++) {
2621 bool is_empty = true;
2622
2623 if (m_separator->match(text, interval.end, end, flags)) {
2624 if (m_separator->match(text, m_separator->interval.end, end, flags)) {
2625 // :: found
2626 if (compaction_i == (size_t)-1) {
2627 // Zero compaction start
2628 compaction_i = i;
2629 compaction_start = m_separator->interval.start;
2630 interval.end = m_separator->interval.end;
2631 }
2632 else {
2633 // More than one zero compaction
2634 break;
2635 }
2636 }
2637 else if (i) {
2638 // Inner : found
2639 interval.end = m_separator->interval.end;
2640 }
2641 else {
2642 // Leading : found
2643 goto error;
2644 }
2645 }
2646 else if (i) {
2647 // : missing
2648 break;
2649 }
2650
2652 size_t x;
2653 for (x = 0; interval.end < end && text[interval.end];) {
2654 size_t dig, digit_end;
2655 if (m_digit_0->match(text, interval.end, end, flags)) { dig = 0; digit_end = m_digit_0->interval.end; }
2656 else if (m_digit_1->match(text, interval.end, end, flags)) { dig = 1; digit_end = m_digit_1->interval.end; }
2657 else if (m_digit_2->match(text, interval.end, end, flags)) { dig = 2; digit_end = m_digit_2->interval.end; }
2658 else if (m_digit_3->match(text, interval.end, end, flags)) { dig = 3; digit_end = m_digit_3->interval.end; }
2659 else if (m_digit_4->match(text, interval.end, end, flags)) { dig = 4; digit_end = m_digit_4->interval.end; }
2660 else if (m_digit_5->match(text, interval.end, end, flags)) { dig = 5; digit_end = m_digit_5->interval.end; }
2661 else if (m_digit_6->match(text, interval.end, end, flags)) { dig = 6; digit_end = m_digit_6->interval.end; }
2662 else if (m_digit_7->match(text, interval.end, end, flags)) { dig = 7; digit_end = m_digit_7->interval.end; }
2663 else if (m_digit_8->match(text, interval.end, end, flags)) { dig = 8; digit_end = m_digit_8->interval.end; }
2664 else if (m_digit_9->match(text, interval.end, end, flags)) { dig = 9; digit_end = m_digit_9->interval.end; }
2665 else if (m_digit_10->match(text, interval.end, end, flags)) { dig = 10; digit_end = m_digit_10->interval.end; }
2666 else if (m_digit_11->match(text, interval.end, end, flags)) { dig = 11; digit_end = m_digit_11->interval.end; }
2667 else if (m_digit_12->match(text, interval.end, end, flags)) { dig = 12; digit_end = m_digit_12->interval.end; }
2668 else if (m_digit_13->match(text, interval.end, end, flags)) { dig = 13; digit_end = m_digit_13->interval.end; }
2669 else if (m_digit_14->match(text, interval.end, end, flags)) { dig = 14; digit_end = m_digit_14->interval.end; }
2670 else if (m_digit_15->match(text, interval.end, end, flags)) { dig = 15; digit_end = m_digit_15->interval.end; }
2671 else break;
2672 size_t x_n = x * 16 + dig;
2673 if (x_n <= 0xffff) {
2674 x = x_n;
2676 is_empty = false;
2677 }
2678 else
2679 break;
2680 }
2681 if (is_empty) {
2682 if (compaction_i != (size_t)-1) {
2683 // Zero compaction active: no sweat.
2684 break;
2685 }
2686 goto error;
2687 }
2689 value.s6_words[i] = (uint16_t)x;
2690 }
2691
2692 if (compaction_i != (size_t)-1) {
2693 // Align components right due to zero compaction.
2694 size_t j, k;
2695 for (j = 8, k = i; k > compaction_i;) {
2696 value.s6_words[--j] = value.s6_words[--k];
2698 }
2699 for (; j > compaction_i;) {
2700 value.s6_words[--j] = 0;
2701 components[j].start =
2703 }
2704 }
2705 else if (i < 8)
2706 goto error;
2707
2708 if (m_scope_id_separator && m_scope_id_separator->match(text, interval.end, end, flags) &&
2709 scope_id && scope_id->match(text, m_scope_id_separator->interval.end, end, flags))
2710 interval.end = scope_id->interval.end;
2711 else if (scope_id)
2712 scope_id->invalidate();
2713
2714 interval.start = start;
2715 return true;
2716
2717 error:
2718 components[0].start = 1;
2719 components[0].end = 0;
2720 components[1].start = 1;
2721 components[1].end = 0;
2722 components[2].start = 1;
2723 components[2].end = 0;
2724 components[3].start = 1;
2725 components[3].end = 0;
2726 components[4].start = 1;
2727 components[4].end = 0;
2728 components[5].start = 1;
2729 components[5].end = 0;
2730 components[6].start = 1;
2731 components[6].end = 0;
2732 components[7].start = 1;
2733 components[7].end = 0;
2734 memset(&value, 0, sizeof(value));
2735 if (scope_id) scope_id->invalidate();
2736 interval.start = (interval.end = start) + 1;
2737 return false;
2738 }
2739
2740 virtual void invalidate()
2741 {
2742 components[0].start = 1;
2743 components[0].end = 0;
2744 components[1].start = 1;
2745 components[1].end = 0;
2746 components[2].start = 1;
2747 components[2].end = 0;
2748 components[3].start = 1;
2749 components[3].end = 0;
2750 components[4].start = 1;
2751 components[4].end = 0;
2752 components[5].start = 1;
2753 components[5].end = 0;
2754 components[6].start = 1;
2755 components[6].end = 0;
2756 components[7].start = 1;
2757 components[7].end = 0;
2758 memset(&value, 0, sizeof(value));
2759 if (scope_id) scope_id->invalidate();
2761 }
2762
2763 public:
2766 std::shared_ptr<basic_parser<T>> scope_id;
2767
2768 protected:
2769 std::shared_ptr<basic_parser<T>>
2770 m_digit_0,
2771 m_digit_1,
2772 m_digit_2,
2773 m_digit_3,
2774 m_digit_4,
2775 m_digit_5,
2776 m_digit_6,
2777 m_digit_7,
2778 m_digit_8,
2779 m_digit_9,
2780 m_digit_10,
2781 m_digit_11,
2782 m_digit_12,
2783 m_digit_13,
2784 m_digit_14,
2785 m_digit_15;
2786 std::shared_ptr<basic_parser<T>> m_separator, m_scope_id_separator;
2787 };
2788
2791#ifdef _UNICODE
2793#else
2795#endif
2797
2801 template <class T>
2803 {
2804 public:
2806 _In_ bool allow_idn,
2807 _In_ const std::locale& locale = std::locale()) :
2808 basic_parser<T>(locale),
2809 m_allow_idn(allow_idn),
2810 allow_on_edge(true)
2811 {}
2812
2813 virtual bool match(
2814 _In_reads_or_z_(end) const T* text,
2815 _In_ size_t start = 0,
2816 _In_ size_t end = (size_t)-1,
2817 _In_ int flags = match_default)
2818 {
2819 assert(text || start >= end);
2820 if (start < end && text[start]) {
2821 if (('A' <= text[start] && text[start] <= 'Z') ||
2822 ('a' <= text[start] && text[start] <= 'z') ||
2823 ('0' <= text[start] && text[start] <= '9'))
2824 allow_on_edge = true;
2825 else if (text[start] == '-')
2826 allow_on_edge = false;
2827 else if (m_allow_idn && std::use_facet<std::ctype<T>>(m_locale).is(std::ctype_base::alnum, text[start]))
2828 allow_on_edge = true;
2829 else {
2830 interval.start = (interval.end = start) + 1;
2831 return false;
2832 }
2833 interval.end = (interval.start = start) + 1;
2834 return true;
2835 }
2836 interval.start = (interval.end = start) + 1;
2837 return false;
2838 }
2839
2840 public:
2842
2843 protected:
2844 bool m_allow_idn;
2845 };
2846
2849#ifdef _UNICODE
2851#else
2853#endif
2854
2859 {
2860 public:
2862 _In_ bool allow_idn,
2863 _In_ const std::locale& locale = std::locale()) :
2865 {}
2866
2867 virtual bool match(
2868 _In_reads_or_z_(end) const char* text,
2869 _In_ size_t start = 0,
2870 _In_ size_t end = (size_t)-1,
2871 _In_ int flags = match_default)
2872 {
2873 assert(text || start >= end);
2874 if (start < end && text[start]) {
2875 wchar_t buf[3];
2876 const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf);
2877 const wchar_t* chr_end = chr + stdex::strlen(chr);
2878 if ((('A' <= chr[0] && chr[0] <= 'Z') ||
2879 ('a' <= chr[0] && chr[0] <= 'z') ||
2880 ('0' <= chr[0] && chr[0] <= '9')) && chr[1] == 0)
2881 allow_on_edge = true;
2882 else if (chr[0] == '-' && chr[1] == 0)
2883 allow_on_edge = false;
2884 else if (m_allow_idn && std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
2885 allow_on_edge = true;
2886 else {
2887 interval.start = (interval.end = start) + 1;
2888 return false;
2889 }
2890 interval.start = start;
2891 return true;
2892 }
2893 interval.start = (interval.end = start) + 1;
2894 return false;
2895 }
2896 };
2897
2901 template <class T>
2903 {
2904 public:
2906 _In_ bool allow_absolute,
2907 _In_ const std::shared_ptr<basic_dns_domain_char<T>>& domain_char,
2908 _In_ const std::shared_ptr<basic_parser<T>>& separator,
2909 _In_ const std::locale& locale = std::locale()) :
2910 basic_parser<T>(locale),
2912 m_domain_char(domain_char),
2913 m_separator(separator)
2914 {}
2915
2916 virtual bool match(
2917 _In_reads_or_z_(end) const T* text,
2918 _In_ size_t start = 0,
2919 _In_ size_t end = (size_t)-1,
2920 _In_ int flags = match_default)
2921 {
2922 assert(text || start >= end);
2923 size_t i = start, count;
2924 for (count = 0; i < end && text[i] && count < 127; count++) {
2925 if (m_domain_char->match(text, i, end, flags) &&
2926 m_domain_char->allow_on_edge)
2927 {
2928 // Domain start
2929 interval.end = i = m_domain_char->interval.end;
2930 while (i < end && text[i]) {
2931 if (m_domain_char->allow_on_edge &&
2932 m_separator->match(text, i, end, flags))
2933 {
2934 // Domain end
2935 if (m_allow_absolute)
2936 interval.end = i = m_separator->interval.end;
2937 else {
2938 interval.end = i;
2939 i = m_separator->interval.end;
2940 }
2941 break;
2942 }
2943 if (m_domain_char->match(text, i, end, flags)) {
2944 if (m_domain_char->allow_on_edge)
2945 interval.end = i = m_domain_char->interval.end;
2946 else
2947 i = m_domain_char->interval.end;
2948 }
2949 else {
2950 interval.start = start;
2951 return true;
2952 }
2953 }
2954 }
2955 else
2956 break;
2957 }
2958 if (count) {
2959 interval.start = start;
2960 return true;
2961 }
2962 interval.start = (interval.end = start) + 1;
2963 return false;
2964 }
2965
2966 protected:
2968 std::shared_ptr<basic_dns_domain_char<T>> m_domain_char;
2969 std::shared_ptr<basic_parser<T>> m_separator;
2970 };
2971
2974#ifdef _UNICODE
2975 using tdns_name = wdns_name;
2976#else
2977 using tdns_name = dns_name;
2978#endif
2980
2984 template <class T>
2986 {
2987 public:
2988 basic_url_username_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
2989
2990 virtual bool match(
2991 _In_reads_or_z_(end) const T* text,
2992 _In_ size_t start = 0,
2993 _In_ size_t end = (size_t)-1,
2994 _In_ int flags = match_default)
2995 {
2996 assert(text || start >= end);
2997 if (start < end && text[start]) {
2998 if (text[start] == '-' ||
2999 text[start] == '.' ||
3000 text[start] == '_' ||
3001 text[start] == '~' ||
3002 text[start] == '%' ||
3003 text[start] == '!' ||
3004 text[start] == '$' ||
3005 text[start] == '&' ||
3006 text[start] == '\'' ||
3007 //text[start] == '(' ||
3008 //text[start] == ')' ||
3009 text[start] == '*' ||
3010 text[start] == '+' ||
3011 text[start] == ',' ||
3012 text[start] == ';' ||
3013 text[start] == '=' ||
3014 std::use_facet<std::ctype<T>>(m_locale).is(std::ctype_base::alnum, text[start]))
3015 {
3016 interval.end = (interval.start = start) + 1;
3017 return true;
3018 }
3019 }
3020 interval.start = (interval.end = start) + 1;
3021 return false;
3022 }
3023 };
3024
3027#ifdef _UNICODE
3029#else
3031#endif
3032
3037 {
3038 public:
3039 sgml_url_username_char(_In_ const std::locale& locale = std::locale()) : basic_url_username_char<char>(locale) {}
3040
3041 virtual bool match(
3042 _In_reads_or_z_(end) const char* text,
3043 _In_ size_t start = 0,
3044 _In_ size_t end = (size_t)-1,
3045 _In_ int flags = match_default)
3046 {
3047 assert(text || start >= end);
3048 if (start < end && text[start]) {
3049 wchar_t buf[3];
3050 const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf);
3051 const wchar_t* chr_end = chr + stdex::strlen(chr);
3052 if ((chr[0] == L'-' ||
3053 chr[0] == L'.' ||
3054 chr[0] == L'_' ||
3055 chr[0] == L'~' ||
3056 chr[0] == L'%' ||
3057 chr[0] == L'!' ||
3058 chr[0] == L'$' ||
3059 chr[0] == L'&' ||
3060 chr[0] == L'\'' ||
3061 //chr[0] == L'(' ||
3062 //chr[0] == L')' ||
3063 chr[0] == L'*' ||
3064 chr[0] == L'+' ||
3065 chr[0] == L',' ||
3066 chr[0] == L';' ||
3067 chr[0] == L'=') && chr[1] == 0 ||
3068 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
3069 {
3070 interval.start = start;
3071 return true;
3072 }
3073 }
3074
3075 interval.start = (interval.end = start) + 1;
3076 return false;
3077 }
3078 };
3079
3083 template <class T>
3085 {
3086 public:
3087 basic_url_password_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
3088
3089 virtual bool match(
3090 _In_reads_or_z_(end) const T* text,
3091 _In_ size_t start = 0,
3092 _In_ size_t end = (size_t)-1,
3093 _In_ int flags = match_default)
3094 {
3095 assert(text || start >= end);
3096 if (start < end && text[start]) {
3097 if (text[start] == '-' ||
3098 text[start] == '.' ||
3099 text[start] == '_' ||
3100 text[start] == '~' ||
3101 text[start] == '%' ||
3102 text[start] == '!' ||
3103 text[start] == '$' ||
3104 text[start] == '&' ||
3105 text[start] == '\'' ||
3106 text[start] == '(' ||
3107 text[start] == ')' ||
3108 text[start] == '*' ||
3109 text[start] == '+' ||
3110 text[start] == ',' ||
3111 text[start] == ';' ||
3112 text[start] == '=' ||
3113 text[start] == ':' ||
3114 std::use_facet<std::ctype<T>>(m_locale).is(std::ctype_base::alnum, text[start]))
3115 {
3116 interval.end = (interval.start = start) + 1;
3117 return true;
3118 }
3119 }
3120 interval.start = (interval.end = start) + 1;
3121 return false;
3122 }
3123 };
3124
3127#ifdef _UNICODE
3129#else
3131#endif
3132
3137 {
3138 public:
3139 sgml_url_password_char(_In_ const std::locale& locale = std::locale()) : basic_url_password_char<char>(locale) {}
3140
3141 virtual bool match(
3142 _In_reads_or_z_(end) const char* text,
3143 _In_ size_t start = 0,
3144 _In_ size_t end = (size_t)-1,
3145 _In_ int flags = match_default)
3146 {
3147 assert(text || start >= end);
3148 if (start < end && text[start]) {
3149 wchar_t buf[3];
3150 const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf);
3151 const wchar_t* chr_end = chr + stdex::strlen(chr);
3152 if ((chr[0] == L'-' ||
3153 chr[0] == L'.' ||
3154 chr[0] == L'_' ||
3155 chr[0] == L'~' ||
3156 chr[0] == L'%' ||
3157 chr[0] == L'!' ||
3158 chr[0] == L'$' ||
3159 chr[0] == L'&' ||
3160 chr[0] == L'\'' ||
3161 chr[0] == L'(' ||
3162 chr[0] == L')' ||
3163 chr[0] == L'*' ||
3164 chr[0] == L'+' ||
3165 chr[0] == L',' ||
3166 chr[0] == L';' ||
3167 chr[0] == L'=' ||
3168 chr[0] == L':') && chr[1] == 0 ||
3169 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
3170 {
3171 interval.start = start;
3172 return true;
3173 }
3174 }
3175 interval.start = (interval.end = start) + 1;
3176 return false;
3177 }
3178 };
3179
3183 template <class T>
3185 {
3186 public:
3187 basic_url_path_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
3188
3189 virtual bool match(
3190 _In_reads_or_z_(end) const T* text,
3191 _In_ size_t start = 0,
3192 _In_ size_t end = (size_t)-1,
3193 _In_ int flags = match_default)
3194 {
3195 assert(text || start >= end);
3196 if (start < end && text[start]) {
3197 if (text[start] == '/' ||
3198 text[start] == '-' ||
3199 text[start] == '.' ||
3200 text[start] == '_' ||
3201 text[start] == '~' ||
3202 text[start] == '%' ||
3203 text[start] == '!' ||
3204 text[start] == '$' ||
3205 text[start] == '&' ||
3206 text[start] == '\'' ||
3207 text[start] == '(' ||
3208 text[start] == ')' ||
3209 text[start] == '*' ||
3210 text[start] == '+' ||
3211 text[start] == ',' ||
3212 text[start] == ';' ||
3213 text[start] == '=' ||
3214 text[start] == ':' ||
3215 text[start] == '@' ||
3216 text[start] == '?' ||
3217 text[start] == '#' ||
3218 std::use_facet<std::ctype<T>>(m_locale).is(std::ctype_base::alnum, text[start]))
3219 {
3220 interval.end = (interval.start = start) + 1;
3221 return true;
3222 }
3223 }
3224 interval.start = (interval.end = start) + 1;
3225 return false;
3226 }
3227 };
3228
3231#ifdef _UNICODE
3233#else
3235#endif
3236
3241 {
3242 public:
3243 sgml_url_path_char(_In_ const std::locale& locale = std::locale()) : basic_url_path_char<char>(locale) {}
3244
3245 virtual bool match(
3246 _In_reads_or_z_(end) const char* text,
3247 _In_ size_t start = 0,
3248 _In_ size_t end = (size_t)-1,
3249 _In_ int flags = match_default)
3250 {
3251 assert(text || start >= end);
3252 if (start < end && text[start]) {
3253 wchar_t buf[3];
3254 const wchar_t* chr = next_sgml_cp(text, start, end, interval.end, buf);
3255 const wchar_t* chr_end = chr + stdex::strlen(chr);
3256 if ((chr[0] == L'/' ||
3257 chr[0] == L'-' ||
3258 chr[0] == L'.' ||
3259 chr[0] == L'_' ||
3260 chr[0] == L'~' ||
3261 chr[0] == L'%' ||
3262 chr[0] == L'!' ||
3263 chr[0] == L'$' ||
3264 chr[0] == L'&' ||
3265 chr[0] == L'\'' ||
3266 chr[0] == L'(' ||
3267 chr[0] == L')' ||
3268 chr[0] == L'*' ||
3269 chr[0] == L'+' ||
3270 chr[0] == L',' ||
3271 chr[0] == L';' ||
3272 chr[0] == L'=' ||
3273 chr[0] == L':' ||
3274 chr[0] == L'@' ||
3275 chr[0] == L'?' ||
3276 chr[0] == L'#') && chr[1] == 0 ||
3277 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
3278 {
3279 interval.start = start;
3280 return true;
3281 }
3282 }
3283 interval.start = (interval.end = start) + 1;
3284 return false;
3285 }
3286 };
3287
3291 template <class T>
3293 {
3294 public:
3296 _In_ const std::shared_ptr<basic_parser<T>>& path_char,
3297 _In_ const std::shared_ptr<basic_parser<T>>& query_start,
3298 _In_ const std::shared_ptr<basic_parser<T>>& bookmark_start,
3299 _In_ const std::locale& locale = std::locale()) :
3300 basic_parser<T>(locale),
3301 m_path_char(path_char),
3302 m_query_start(query_start),
3303 m_bookmark_start(bookmark_start)
3304 {}
3305
3306 virtual bool match(
3307 _In_reads_or_z_(end) const T* text,
3308 _In_ size_t start = 0,
3309 _In_ size_t end = (size_t)-1,
3310 _In_ int flags = match_default)
3311 {
3312 assert(text || start >= end);
3313
3314 interval.end = start;
3315 path.start = start;
3316 query.start = 1;
3317 query.end = 0;
3318 bookmark.start = 1;
3319 bookmark.end = 0;
3320
3321 for (;;) {
3322 if (interval.end >= end || !text[interval.end])
3323 break;
3324 if (m_query_start->match(text, interval.end, end, flags)) {
3325 path.end = interval.end;
3326 query.start = interval.end = m_query_start->interval.end;
3327 for (;;) {
3328 if (interval.end >= end || !text[interval.end]) {
3329 query.end = interval.end;
3330 break;
3331 }
3332 if (m_bookmark_start->match(text, interval.end, end, flags)) {
3333 query.end = interval.end;
3334 bookmark.start = interval.end = m_bookmark_start->interval.end;
3335 for (;;) {
3336 if (interval.end >= end || !text[interval.end]) {
3337 bookmark.end = interval.end;
3338 break;
3339 }
3340 if (m_path_char->match(text, interval.end, end, flags))
3341 interval.end = m_path_char->interval.end;
3342 else {
3343 bookmark.end = interval.end;
3344 break;
3345 }
3346 }
3347 interval.start = start;
3348 return true;
3349 }
3350 if (m_path_char->match(text, interval.end, end, flags))
3351 interval.end = m_path_char->interval.end;
3352 else {
3353 query.end = interval.end;
3354 break;
3355 }
3356 }
3357 interval.start = start;
3358 return true;
3359 }
3360 if (m_bookmark_start->match(text, interval.end, end, flags)) {
3361 path.end = interval.end;
3362 bookmark.start = interval.end = m_bookmark_start->interval.end;
3363 for (;;) {
3364 if (interval.end >= end || !text[interval.end]) {
3365 bookmark.end = interval.end;
3366 break;
3367 }
3368 if (m_path_char->match(text, interval.end, end, flags))
3369 interval.end = m_path_char->interval.end;
3370 else {
3371 bookmark.end = interval.end;
3372 break;
3373 }
3374 }
3375 interval.start = start;
3376 return true;
3377 }
3378 if (m_path_char->match(text, interval.end, end, flags))
3379 interval.end = m_path_char->interval.end;
3380 else
3381 break;
3382 }
3383
3384 if (start < interval.end) {
3385 path.end = interval.end;
3386 interval.start = start;
3387 return true;
3388 }
3389
3390 path.start = 1;
3391 path.end = 0;
3392 bookmark.start = 1;
3393 bookmark.end = 0;
3394 interval.start = (interval.end = start) + 1;
3395 return false;
3396 }
3397
3398 virtual void invalidate()
3399 {
3400 path.start = 1;
3401 path.end = 0;
3402 query.start = 1;
3403 query.end = 0;
3404 bookmark.start = 1;
3405 bookmark.end = 0;
3407 }
3408
3409 public:
3412 stdex::interval<size_t> bookmark;
3413
3414 protected:
3415 std::shared_ptr<basic_parser<T>> m_path_char;
3416 std::shared_ptr<basic_parser<T>> m_query_start;
3417 std::shared_ptr<basic_parser<T>> m_bookmark_start;
3418 };
3419
3422#ifdef _UNICODE
3423 using turl_path = wurl_path;
3424#else
3425 using turl_path = url_path;
3426#endif
3428
3432 template <class T>
3433 class basic_url : public basic_parser<T>
3434 {
3435 public:
3436 basic_url(
3437 _In_ const std::shared_ptr<basic_parser<T>>& _http_scheme,
3438 _In_ const std::shared_ptr<basic_parser<T>>& _ftp_scheme,
3439 _In_ const std::shared_ptr<basic_parser<T>>& _mailto_scheme,
3440 _In_ const std::shared_ptr<basic_parser<T>>& _file_scheme,
3441 _In_ const std::shared_ptr<basic_parser<T>>& colon,
3442 _In_ const std::shared_ptr<basic_parser<T>>& slash,
3443 _In_ const std::shared_ptr<basic_parser<T>>& _username,
3444 _In_ const std::shared_ptr<basic_parser<T>>& _password,
3445 _In_ const std::shared_ptr<basic_parser<T>>& at,
3446 _In_ const std::shared_ptr<basic_parser<T>>& ip_lbracket,
3447 _In_ const std::shared_ptr<basic_parser<T>>& ip_rbracket,
3448 _In_ const std::shared_ptr<basic_parser<T>>& _ipv4_host,
3449 _In_ const std::shared_ptr<basic_parser<T>>& _ipv6_host,
3450 _In_ const std::shared_ptr<basic_parser<T>>& _dns_host,
3451 _In_ const std::shared_ptr<basic_parser<T>>& _port,
3452 _In_ const std::shared_ptr<basic_parser<T>>& _path,
3453 _In_ const std::locale& locale = std::locale()) :
3454 basic_parser<T>(locale),
3455 http_scheme(_http_scheme),
3456 ftp_scheme(_ftp_scheme),
3457 mailto_scheme(_mailto_scheme),
3458 file_scheme(_file_scheme),
3459 m_colon(colon),
3460 m_slash(slash),
3461 username(_username),
3462 password(_password),
3463 m_at(at),
3464 m_ip_lbracket(ip_lbracket),
3465 m_ip_rbracket(ip_rbracket),
3466 ipv4_host(_ipv4_host),
3467 ipv6_host(_ipv6_host),
3468 dns_host(_dns_host),
3469 port(_port),
3470 path(_path)
3471 {}
3472
3473 virtual bool match(
3474 _In_reads_or_z_(end) const T* text,
3475 _In_ size_t start = 0,
3476 _In_ size_t end = (size_t)-1,
3477 _In_ int flags = match_default)
3478 {
3479 assert(text || start >= end);
3480
3481 interval.end = start;
3482
3483 if (http_scheme->match(text, interval.end, end, flags) &&
3484 m_colon->match(text, http_scheme->interval.end, end, flags) &&
3485 m_slash->match(text, m_colon->interval.end, end, flags) &&
3486 m_slash->match(text, m_slash->interval.end, end, flags))
3487 {
3488 // http://
3489 interval.end = m_slash->interval.end;
3490 ftp_scheme->invalidate();
3491 mailto_scheme->invalidate();
3492 file_scheme->invalidate();
3493 }
3494 else if (ftp_scheme->match(text, interval.end, end, flags) &&
3495 m_colon->match(text, ftp_scheme->interval.end, end, flags) &&
3496 m_slash->match(text, m_colon->interval.end, end, flags) &&
3497 m_slash->match(text, m_slash->interval.end, end, flags))
3498 {
3499 // ftp://
3500 interval.end = m_slash->interval.end;
3501 http_scheme->invalidate();
3502 mailto_scheme->invalidate();
3503 file_scheme->invalidate();
3504 }
3505 else if (mailto_scheme->match(text, interval.end, end, flags) &&
3506 m_colon->match(text, mailto_scheme->interval.end, end, flags))
3507 {
3508 // mailto:
3509 interval.end = m_colon->interval.end;
3510 http_scheme->invalidate();
3511 ftp_scheme->invalidate();
3512 file_scheme->invalidate();
3513 }
3514 else if (file_scheme->match(text, interval.end, end, flags) &&
3515 m_colon->match(text, file_scheme->interval.end, end, flags) &&
3516 m_slash->match(text, m_colon->interval.end, end, flags) &&
3517 m_slash->match(text, m_slash->interval.end, end, flags))
3518 {
3519 // file://
3520 interval.end = m_slash->interval.end;
3521 http_scheme->invalidate();
3522 ftp_scheme->invalidate();
3523 mailto_scheme->invalidate();
3524 }
3525 else {
3526 // Default to http:
3527 http_scheme->invalidate();
3528 ftp_scheme->invalidate();
3529 mailto_scheme->invalidate();
3530 file_scheme->invalidate();
3531 }
3532
3533 if (ftp_scheme->interval) {
3534 if (username->match(text, interval.end, end, flags)) {
3535 if (m_colon->match(text, username->interval.end, end, flags) &&
3536 password->match(text, m_colon->interval.end, end, flags) &&
3537 m_at->match(text, password->interval.end, end, flags))
3538 {
3539 // Username and password
3540 interval.end = m_at->interval.end;
3541 }
3542 else if (m_at->match(text, interval.end, end, flags)) {
3543 // Username only
3544 interval.end = m_at->interval.end;
3545 password->invalidate();
3546 }
3547 else {
3548 username->invalidate();
3549 password->invalidate();
3550 }
3551 }
3552 else {
3553 username->invalidate();
3554 password->invalidate();
3555 }
3556
3557 if (ipv4_host->match(text, interval.end, end, flags)) {
3558 // Host is IPv4
3559 interval.end = ipv4_host->interval.end;
3560 ipv6_host->invalidate();
3561 dns_host->invalidate();
3562 }
3563 else if (
3564 m_ip_lbracket->match(text, interval.end, end, flags) &&
3565 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3566 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3567 {
3568 // Host is IPv6
3569 interval.end = m_ip_rbracket->interval.end;
3570 ipv4_host->invalidate();
3571 dns_host->invalidate();
3572 }
3573 else if (dns_host->match(text, interval.end, end, flags)) {
3574 // Host is hostname
3575 interval.end = dns_host->interval.end;
3576 ipv4_host->invalidate();
3577 ipv6_host->invalidate();
3578 }
3579 else {
3580 invalidate();
3581 return false;
3582 }
3583
3584 if (m_colon->match(text, interval.end, end, flags) &&
3585 port->match(text, m_colon->interval.end, end, flags))
3586 {
3587 // Port
3588 interval.end = port->interval.end;
3589 }
3590 else
3591 port->invalidate();
3592
3593 if (path->match(text, interval.end, end, flags)) {
3594 // Path
3595 interval.end = path->interval.end;
3596 }
3597
3598 interval.start = start;
3599 return true;
3600 }
3601
3602 if (mailto_scheme->interval) {
3603 if (username->match(text, interval.end, end, flags) &&
3604 m_at->match(text, username->interval.end, end, flags))
3605 {
3606 // Username
3607 interval.end = m_at->interval.end;
3608 }
3609 else {
3610 invalidate();
3611 return false;
3612 }
3613
3614 if (m_ip_lbracket->match(text, interval.end, end, flags) &&
3615 ipv4_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3616 m_ip_rbracket->match(text, ipv4_host->interval.end, end, flags))
3617 {
3618 // Host is IPv4
3619 interval.end = m_ip_rbracket->interval.end;
3620 ipv6_host->invalidate();
3621 dns_host->invalidate();
3622 }
3623 else if (
3624 m_ip_lbracket->match(text, interval.end, end, flags) &&
3625 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3626 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3627 {
3628 // Host is IPv6
3629 interval.end = m_ip_rbracket->interval.end;
3630 ipv4_host->invalidate();
3631 dns_host->invalidate();
3632 }
3633 else if (dns_host->match(text, interval.end, end, flags)) {
3634 // Host is hostname
3635 interval.end = dns_host->interval.end;
3636 ipv4_host->invalidate();
3637 ipv6_host->invalidate();
3638 }
3639 else {
3640 invalidate();
3641 return false;
3642 }
3643
3644 password->invalidate();
3645 port->invalidate();
3646 path->invalidate();
3647 interval.start = start;
3648 return true;
3649 }
3650
3651 if (file_scheme->interval) {
3652 if (path->match(text, interval.end, end, flags)) {
3653 // Path
3654 interval.end = path->interval.end;
3655 }
3656
3657 username->invalidate();
3658 password->invalidate();
3659 ipv4_host->invalidate();
3660 ipv6_host->invalidate();
3661 dns_host->invalidate();
3662 port->invalidate();
3663 interval.start = start;
3664 return true;
3665 }
3666
3667 // "http://" found or defaulted to
3668
3669 // If "http://" explicit, test for username&password.
3670 if (http_scheme->interval &&
3671 username->match(text, interval.end, end, flags))
3672 {
3673 if (m_colon->match(text, username->interval.end, end, flags) &&
3674 password->match(text, m_colon->interval.end, end, flags) &&
3675 m_at->match(text, password->interval.end, end, flags))
3676 {
3677 // Username and password
3678 interval.end = m_at->interval.end;
3679 }
3680 else if (m_at->match(text, username->interval.end, end, flags)) {
3681 // Username only
3682 interval.end = m_at->interval.end;
3683 password->invalidate();
3684 }
3685 else {
3686 username->invalidate();
3687 password->invalidate();
3688 }
3689 }
3690 else {
3691 username->invalidate();
3692 password->invalidate();
3693 }
3694
3695 if (ipv4_host->match(text, interval.end, end, flags)) {
3696 // Host is IPv4
3697 interval.end = ipv4_host->interval.end;
3698 ipv6_host->invalidate();
3699 dns_host->invalidate();
3700 }
3701 else if (
3702 m_ip_lbracket->match(text, interval.end, end, flags) &&
3703 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3704 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3705 {
3706 // Host is IPv6
3707 interval.end = m_ip_rbracket->interval.end;
3708 ipv4_host->invalidate();
3709 dns_host->invalidate();
3710 }
3711 else if (dns_host->match(text, interval.end, end, flags)) {
3712 // Host is hostname
3713 interval.end = dns_host->interval.end;
3714 ipv4_host->invalidate();
3715 ipv6_host->invalidate();
3716 }
3717 else {
3718 invalidate();
3719 return false;
3720 }
3721
3722 if (m_colon->match(text, interval.end, end, flags) &&
3723 port->match(text, m_colon->interval.end, end, flags))
3724 {
3725 // Port
3726 interval.end = port->interval.end;
3727 }
3728 else
3729 port->invalidate();
3730
3731 if (path->match(text, interval.end, end, flags)) {
3732 // Path
3733 interval.end = path->interval.end;
3734 }
3735
3736 interval.start = start;
3737 return true;
3738 }
3739
3740 virtual void invalidate()
3741 {
3742 http_scheme->invalidate();
3743 ftp_scheme->invalidate();
3744 mailto_scheme->invalidate();
3745 file_scheme->invalidate();
3746 username->invalidate();
3747 password->invalidate();
3748 ipv4_host->invalidate();
3749 ipv6_host->invalidate();
3750 dns_host->invalidate();
3751 port->invalidate();
3752 path->invalidate();
3754 }
3755
3756 public:
3757 std::shared_ptr<basic_parser<T>> http_scheme;
3758 std::shared_ptr<basic_parser<T>> ftp_scheme;
3759 std::shared_ptr<basic_parser<T>> mailto_scheme;
3760 std::shared_ptr<basic_parser<T>> file_scheme;
3761 std::shared_ptr<basic_parser<T>> username;
3762 std::shared_ptr<basic_parser<T>> password;
3763 std::shared_ptr<basic_parser<T>> ipv4_host;
3764 std::shared_ptr<basic_parser<T>> ipv6_host;
3765 std::shared_ptr<basic_parser<T>> dns_host;
3766 std::shared_ptr<basic_parser<T>> port;
3767 std::shared_ptr<basic_parser<T>> path;
3768
3769 protected:
3770 std::shared_ptr<basic_parser<T>> m_colon;
3771 std::shared_ptr<basic_parser<T>> m_slash;
3772 std::shared_ptr<basic_parser<T>> m_at;
3773 std::shared_ptr<basic_parser<T>> m_ip_lbracket;
3774 std::shared_ptr<basic_parser<T>> m_ip_rbracket;
3775 };
3776
3777 using url = basic_url<char>;
3778 using wurl = basic_url<wchar_t>;
3779#ifdef _UNICODE
3780 using turl = wurl;
3781#else
3782 using turl = url;
3783#endif
3784 using sgml_url = basic_url<char>;
3785
3789 template <class T>
3791 {
3792 public:
3794 _In_ const std::shared_ptr<basic_parser<T>>& _username,
3795 _In_ const std::shared_ptr<basic_parser<T>>& at,
3796 _In_ const std::shared_ptr<basic_parser<T>>& ip_lbracket,
3797 _In_ const std::shared_ptr<basic_parser<T>>& ip_rbracket,
3798 _In_ const std::shared_ptr<basic_parser<T>>& _ipv4_host,
3799 _In_ const std::shared_ptr<basic_parser<T>>& _ipv6_host,
3800 _In_ const std::shared_ptr<basic_parser<T>>& _dns_host,
3801 _In_ const std::locale& locale = std::locale()) :
3802 basic_parser<T>(locale),
3803 username(_username),
3804 m_at(at),
3805 m_ip_lbracket(ip_lbracket),
3806 m_ip_rbracket(ip_rbracket),
3807 ipv4_host(_ipv4_host),
3808 ipv6_host(_ipv6_host),
3809 dns_host(_dns_host)
3810 {}
3811
3812 virtual bool match(
3813 _In_reads_or_z_(end) const T* text,
3814 _In_ size_t start = 0,
3815 _In_ size_t end = (size_t)-1,
3816 _In_ int flags = match_default)
3817 {
3818 assert(text || start >= end);
3819
3820 if (username->match(text, start, end, flags) &&
3821 m_at->match(text, username->interval.end, end, flags))
3822 {
3823 // Username@
3824 if (m_ip_lbracket->match(text, m_at->interval.end, end, flags) &&
3825 ipv4_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3826 m_ip_rbracket->match(text, ipv4_host->interval.end, end, flags))
3827 {
3828 // Host is IPv4
3829 interval.end = m_ip_rbracket->interval.end;
3830 ipv6_host->invalidate();
3831 dns_host->invalidate();
3832 }
3833 else if (
3834 m_ip_lbracket->match(text, m_at->interval.end, end, flags) &&
3835 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3836 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3837 {
3838 // Host is IPv6
3839 interval.end = m_ip_rbracket->interval.end;
3840 ipv4_host->invalidate();
3841 dns_host->invalidate();
3842 }
3843 else if (dns_host->match(text, m_at->interval.end, end, flags)) {
3844 // Host is hostname
3845 interval.end = dns_host->interval.end;
3846 ipv4_host->invalidate();
3847 ipv6_host->invalidate();
3848 }
3849 else
3850 goto error;
3851 interval.start = start;
3852 return true;
3853 }
3854
3855 error:
3856 username->invalidate();
3857 ipv4_host->invalidate();
3858 ipv6_host->invalidate();
3859 dns_host->invalidate();
3860 interval.start = (interval.end = start) + 1;
3861 return false;
3862 }
3863
3864 virtual void invalidate()
3865 {
3866 username->invalidate();
3867 ipv4_host->invalidate();
3868 ipv6_host->invalidate();
3869 dns_host->invalidate();
3871 }
3872
3873 public:
3874 std::shared_ptr<basic_parser<T>> username;
3875 std::shared_ptr<basic_parser<T>> ipv4_host;
3876 std::shared_ptr<basic_parser<T>> ipv6_host;
3877 std::shared_ptr<basic_parser<T>> dns_host;
3878
3879 protected:
3880 std::shared_ptr<basic_parser<T>> m_at;
3881 std::shared_ptr<basic_parser<T>> m_ip_lbracket;
3882 std::shared_ptr<basic_parser<T>> m_ip_rbracket;
3883 };
3884
3887#ifdef _UNICODE
3889#else
3891#endif
3893
3897 template <class T>
3899 {
3900 public:
3902 _In_ const std::shared_ptr<basic_parser<T>>& _emoticon,
3903 _In_ const std::shared_ptr<basic_parser<T>>& _apex,
3904 _In_ const std::shared_ptr<basic_parser<T>>& _eyes,
3905 _In_ const std::shared_ptr<basic_parser<T>>& _nose,
3906 _In_ const std::shared_ptr<basic_set<T>>& _mouth,
3907 _In_ const std::locale& locale = std::locale()) :
3908 basic_parser<T>(locale),
3910 apex(_apex),
3911 eyes(_eyes),
3912 nose(_nose),
3913 mouth(_mouth)
3914 {}
3915
3916 virtual bool match(
3917 _In_reads_or_z_(end) const T* text,
3918 _In_ size_t start = 0,
3919 _In_ size_t end = (size_t)-1,
3920 _In_ int flags = match_default)
3921 {
3922 assert(text || start >= end);
3923
3924 if (emoticon && emoticon->match(text, start, end, flags)) {
3925 if (apex) apex->invalidate();
3926 eyes->invalidate();
3927 if (nose) nose->invalidate();
3928 mouth->invalidate();
3929 interval.start = start;
3931 return true;
3932 }
3933
3934 interval.end = start;
3935
3936 if (apex && apex->match(text, interval.end, end, flags))
3937 interval.end = apex->interval.end;
3938
3939 if (eyes->match(text, interval.end, end, flags)) {
3940 if (nose && nose->match(text, eyes->interval.end, end, flags) &&
3941 mouth->match(text, nose->interval.end, end, flags))
3942 {
3943 size_t
3945 hit_offset = mouth->hit_offset;
3946 // Mouth may repeat :-)))))))
3947 for (interval.end = mouth->interval.end; mouth->match(text, interval.end, end, flags) && mouth->hit_offset == hit_offset; interval.end = mouth->interval.end);
3948 mouth->interval.start = start_mouth;
3950 interval.start = start;
3951 return true;
3952 }
3953 if (mouth->match(text, eyes->interval.end, end, flags)) {
3954 size_t
3956 hit_offset = mouth->hit_offset;
3957 // Mouth may repeat :-)))))))
3958 for (interval.end = mouth->interval.end; mouth->match(text, interval.end, end, flags) && mouth->hit_offset == hit_offset; interval.end = mouth->interval.end);
3959 if (nose) nose->invalidate();
3960 mouth->interval.start = start_mouth;
3962 interval.start = start;
3963 return true;
3964 }
3965 }
3966
3967 if (emoticon) emoticon->invalidate();
3968 if (apex) apex->invalidate();
3969 eyes->invalidate();
3970 if (nose) nose->invalidate();
3971 mouth->invalidate();
3972 interval.start = (interval.end = start) + 1;
3973 return false;
3974 }
3975
3976 virtual void invalidate()
3977 {
3978 if (emoticon) emoticon->invalidate();
3979 if (apex) apex->invalidate();
3980 eyes->invalidate();
3981 if (nose) nose->invalidate();
3982 mouth->invalidate();
3984 }
3985
3986 public:
3987 std::shared_ptr<basic_parser<T>> emoticon;
3988 std::shared_ptr<basic_parser<T>> apex;
3989 std::shared_ptr<basic_parser<T>> eyes;
3990 std::shared_ptr<basic_parser<T>> nose;
3991 std::shared_ptr<basic_set<T>> mouth;
3992 };
3993
3996#ifdef _UNICODE
3997 using temoticon = wemoticon;
3998#else
3999 using temoticon = emoticon;
4000#endif
4002
4006 enum date_format_t {
4007 date_format_none = 0,
4008 date_format_dmy = 0x1,
4009 date_format_mdy = 0x2,
4010 date_format_ymd = 0x4,
4011 date_format_ym = 0x8,
4012 date_format_my = 0x10,
4013 date_format_dm = 0x20,
4014 date_format_md = 0x40,
4015 };
4016
4020 template <class T>
4021 class basic_date : public basic_parser<T>
4022 {
4023 public:
4024 basic_date(
4025 _In_ int format_mask,
4026 _In_ const std::shared_ptr<basic_integer<T>>& _day,
4027 _In_ const std::shared_ptr<basic_integer<T>>& _month,
4028 _In_ const std::shared_ptr<basic_integer<T>>& _year,
4029 _In_ const std::shared_ptr<basic_set<T>>& separator,
4030 _In_ const std::shared_ptr<basic_parser<T>>& space,
4031 _In_ const std::locale& locale = std::locale()) :
4032 basic_parser<T>(locale),
4033 format(date_format_none),
4034 m_format_mask(format_mask),
4035 day(_day),
4036 month(_month),
4037 year(_year),
4038 m_separator(separator),
4039 m_space(space)
4040 {}
4041
4042 virtual bool match(
4043 _In_reads_or_z_(end) const T* text,
4044 _In_ size_t start = 0,
4045 _In_ size_t end = (size_t)-1,
4046 _In_ int flags = match_default)
4047 {
4048 assert(text || start >= end);
4049
4050 const int space_match_flags = flags & ~match_multiline; // Spaces in dates must never be broken in new line.
4051 if ((m_format_mask & date_format_dmy) == date_format_dmy) {
4052 if (day->match(text, start, end, flags)) {
4053 for (interval.end = day->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4054 if (m_separator->match(text, interval.end, end, flags)) {
4055 size_t hit_offset = m_separator->hit_offset;
4056 for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4057 if (month->match(text, interval.end, end, flags)) {
4058 for (interval.end = month->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4059 if (m_separator->match(text, interval.end, end, flags) &&
4060 m_separator->hit_offset == hit_offset) // Both separators must match.
4061 {
4062 for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4063 if (year->match(text, interval.end, end, flags) &&
4064 is_valid(day->value, month->value))
4065 {
4066 interval.start = start;
4067 interval.end = year->interval.end;
4068 format = date_format_dmy;
4069 return true;
4070 }
4071 }
4072 }
4073 }
4074 }
4075 }
4076
4077 if ((m_format_mask & date_format_mdy) == date_format_mdy) {
4078 if (month->match(text, start, end, flags)) {
4079 for (interval.end = month->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4080 if (m_separator->match(text, interval.end, end, flags)) {
4081 size_t hit_offset = m_separator->hit_offset;
4082 for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4083 if (day->match(text, interval.end, end, flags)) {
4084 for (interval.end = day->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4085 if (m_separator->match(text, interval.end, end, flags) &&
4086 m_separator->hit_offset == hit_offset) // Both separators must match.
4087 {
4088 for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4089 if (year->match(text, interval.end, end, flags) &&
4090 is_valid(day->value, month->value))
4091 {
4092 interval.start = start;
4093 interval.end = year->interval.end;
4094 format = date_format_mdy;
4095 return true;
4096 }
4097 }
4098 }
4099 }
4100 }
4101 }
4102
4103 if ((m_format_mask & date_format_ymd) == date_format_ymd) {
4104 if (year->match(text, start, end, flags)) {
4105 for (interval.end = year->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4106 if (m_separator->match(text, interval.end, end, flags)) {
4107 size_t hit_offset = m_separator->hit_offset;
4108 for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4109 if (month->match(text, interval.end, end, flags)) {
4110 for (interval.end = month->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4111 if (m_separator->match(text, interval.end, end, flags) &&
4112 m_separator->hit_offset == hit_offset) // Both separators must match.
4113 {
4114 for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4115 if (day->match(text, interval.end, end, flags) &&
4116 is_valid(day->value, month->value))
4117 {
4118 interval.start = start;
4119 interval.end = day->interval.end;
4120 format = date_format_ymd;
4121 return true;
4122 }
4123 }
4124 }
4125 }
4126 }
4127 }
4128
4129 if ((m_format_mask & date_format_ym) == date_format_ym) {
4130 if (year->match(text, start, end, flags)) {
4131 for (interval.end = year->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4132 if (m_separator->match(text, interval.end, end, flags)) {
4133 for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4134 if (month->match(text, interval.end, end, flags) &&
4135 is_valid((size_t)-1, month->value))
4136 {
4137 if (day) day->invalidate();
4138 interval.start = start;
4139 interval.end = month->interval.end;
4140 format = date_format_ym;
4141 return true;
4142 }
4143 }
4144 }
4145 }
4146
4147 if ((m_format_mask & date_format_my) == date_format_my) {
4148 if (month->match(text, start, end, flags)) {
4149 for (interval.end = month->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4150 if (m_separator->match(text, interval.end, end, flags)) {
4151 for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4152 if (year->match(text, interval.end, end, flags) &&
4153 is_valid((size_t)-1, month->value))
4154 {
4155 if (day) day->invalidate();
4156 interval.start = start;
4157 interval.end = year->interval.end;
4158 format = date_format_my;
4159 return true;
4160 }
4161 }
4162 }
4163 }
4164
4165 if ((m_format_mask & date_format_dm) == date_format_dm) {
4166 if (day->match(text, start, end, flags)) {
4167 for (interval.end = day->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4168 if (m_separator->match(text, interval.end, end, flags)) {
4169 size_t hit_offset = m_separator->hit_offset;
4170 for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4171 if (month->match(text, interval.end, end, flags) &&
4172 is_valid(day->value, month->value))
4173 {
4174 if (year) year->invalidate();
4175 interval.start = start;
4176 for (interval.end = month->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4177 if (m_separator->match(text, interval.end, end, flags) &&
4178 m_separator->hit_offset == hit_offset) // Both separators must match.
4179 interval.end = m_separator->interval.end;
4180 else
4181 interval.end = month->interval.end;
4182 format = date_format_dm;
4183 return true;
4184 }
4185 }
4186 }
4187 }
4188
4189 if ((m_format_mask & date_format_md) == date_format_md) {
4190 if (month->match(text, start, end, flags)) {
4191 for (interval.end = month->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4192 if (m_separator->match(text, interval.end, end, flags)) {
4193 size_t hit_offset = m_separator->hit_offset;
4194 for (interval.end = m_separator->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4195 if (day->match(text, interval.end, end, flags) &&
4196 is_valid(day->value, month->value))
4197 {
4198 if (year) year->invalidate();
4199 interval.start = start;
4200 for (interval.end = day->interval.end; m_space->match(text, interval.end, end, space_match_flags); interval.end = m_space->interval.end);
4201 if (m_separator->match(text, interval.end, end, flags) &&
4202 m_separator->hit_offset == hit_offset) // Both separators must match.
4203 interval.end = m_separator->interval.end;
4204 else
4205 interval.end = day->interval.end;
4206 format = date_format_md;
4207 return true;
4208 }
4209 }
4210 }
4211 }
4212
4213 if (day) day->invalidate();
4214 if (month) month->invalidate();
4215 if (year) year->invalidate();
4216 format = date_format_none;
4217 interval.start = (interval.end = start) + 1;
4218 return false;
4219 }
4220
4221 virtual void invalidate()
4222 {
4223 if (day) day->invalidate();
4224 if (month) month->invalidate();
4225 if (year) year->invalidate();
4226 format = date_format_none;
4228 }
4229
4230 protected:
4231 static inline bool is_valid(size_t day, size_t month)
4232 {
4233 if (month == (size_t)-1) {
4234 // Default to January. This allows validating day only, as January has all 31 days.
4235 month = 1;
4236 }
4237 if (day == (size_t)-1) {
4238 // Default to 1st day in month. This allows validating month only, as each month has 1st day.
4239 day = 1;
4240 }
4241
4242 switch (month) {
4243 case 1:
4244 case 3:
4245 case 5:
4246 case 7:
4247 case 8:
4248 case 10:
4249 case 12:
4250 return 1 <= day && day <= 31;
4251 case 2:
4252 return 1 <= day && day <= 29;
4253 case 4:
4254 case 6:
4255 case 9:
4256 case 11:
4257 return 1 <= day && day <= 30;
4258 default:
4259 return false;
4260 }
4261 }
4262
4263 public:
4264 date_format_t format;
4265 std::shared_ptr<basic_integer<T>> day;
4266 std::shared_ptr<basic_integer<T>> month;
4267 std::shared_ptr<basic_integer<T>> year;
4268
4269 protected:
4270 int m_format_mask;
4271 std::shared_ptr<basic_set<T>> m_separator;
4272 std::shared_ptr<basic_parser<T>> m_space;
4273 };
4274
4275 using date = basic_date<char>;
4276 using wdate = basic_date<wchar_t>;
4277#ifdef _UNICODE
4278 using tdate = wdate;
4279#else
4280 using tdate = date;
4281#endif
4283
4287 template <class T>
4288 class basic_time : public basic_parser<T>
4289 {
4290 public:
4291 basic_time(
4292 _In_ const std::shared_ptr<basic_integer10<T>>& _hour,
4293 _In_ const std::shared_ptr<basic_integer10<T>>& _minute,
4294 _In_ const std::shared_ptr<basic_integer10<T>>& _second,
4295 _In_ const std::shared_ptr<basic_integer10<T>>& _millisecond,
4296 _In_ const std::shared_ptr<basic_set<T>>& separator,
4297 _In_ const std::shared_ptr<basic_parser<T>>& millisecond_separator,
4298 _In_ const std::locale& locale = std::locale()) :
4299 basic_parser<T>(locale),
4300 hour(_hour),
4301 minute(_minute),
4302 second(_second),
4303 millisecond(_millisecond),
4304 m_separator(separator),
4305 m_millisecond_separator(millisecond_separator)
4306 {}
4307
4308 virtual bool match(
4309 _In_reads_or_z_(end) const T* text,
4310 _In_ size_t start = 0,
4311 _In_ size_t end = (size_t)-1,
4312 _In_ int flags = match_default)
4313 {
4314 assert(text || start >= end);
4315
4316 if (hour->match(text, start, end, flags) &&
4317 m_separator->match(text, hour->interval.end, end, flags) &&
4318 minute->match(text, m_separator->interval.end, end, flags) &&
4319 minute->value < 60)
4320 {
4321 // hh::mm
4322 size_t hit_offset = m_separator->hit_offset;
4323 if (m_separator->match(text, minute->interval.end, end, flags) &&
4324 m_separator->hit_offset == hit_offset && // Both separators must match.
4325 second && second->match(text, m_separator->interval.end, end, flags) &&
4326 second->value < 60)
4327 {
4328 // hh::mm:ss
4329 if (m_millisecond_separator && m_millisecond_separator->match(text, second->interval.end, end, flags) &&
4330 millisecond && millisecond->match(text, m_millisecond_separator->interval.end, end, flags) &&
4331 millisecond->value < 1000)
4332 {
4333 // hh::mm:ss.mmmm
4334 interval.end = millisecond->interval.end;
4335 }
4336 else {
4337 if (millisecond) millisecond->invalidate();
4338 interval.end = second->interval.end;
4339 }
4340 }
4341 else {
4342 if (second) second->invalidate();
4343 if (millisecond) millisecond->invalidate();
4344 interval.end = minute->interval.end;
4345 }
4346 interval.start = start;
4347 return true;
4348 }
4349
4350 hour->invalidate();
4351 minute->invalidate();
4352 if (second) second->invalidate();
4353 if (millisecond) millisecond->invalidate();
4354 interval.start = (interval.end = start) + 1;
4355 return false;
4356 }
4357
4358 virtual void invalidate()
4359 {
4360 hour->invalidate();
4361 minute->invalidate();
4362 if (second) second->invalidate();
4363 if (millisecond) millisecond->invalidate();
4365 }
4366
4367 public:
4368 std::shared_ptr<basic_integer10<T>> hour;
4369 std::shared_ptr<basic_integer10<T>> minute;
4370 std::shared_ptr<basic_integer10<T>> second;
4371 std::shared_ptr<basic_integer10<T>> millisecond;
4372
4373 protected:
4374 std::shared_ptr<basic_set<T>> m_separator;
4375 std::shared_ptr<basic_parser<T>> m_millisecond_separator;
4376 };
4377
4378 using time = basic_time<char>;
4379 using wtime = basic_time<wchar_t>;
4380#ifdef _UNICODE
4381 using ttime = wtime;
4382#else
4383 using ttime = time;
4384#endif
4386
4390 template <class T>
4391 class basic_angle : public basic_parser<T>
4392 {
4393 public:
4395 _In_ const std::shared_ptr<basic_integer10<T>>& _degree,
4396 _In_ const std::shared_ptr<basic_parser<T>>& _degree_separator,
4397 _In_ const std::shared_ptr<basic_integer10<T>>& _minute,
4398 _In_ const std::shared_ptr<basic_parser<T>>& _minute_separator,
4399 _In_ const std::shared_ptr<basic_integer10<T>>& _second,
4400 _In_ const std::shared_ptr<basic_parser<T>>& _second_separator,
4401 _In_ const std::shared_ptr<basic_parser<T>>& _decimal,
4402 _In_ const std::locale& locale = std::locale()) :
4403 basic_parser<T>(locale),
4404 degree(_degree),
4405 degree_separator(_degree_separator),
4406 minute(_minute),
4407 minute_separator(_minute_separator),
4408 second(_second),
4409 second_separator(_second_separator),
4410 decimal(_decimal)
4411 {}
4412
4413 virtual bool match(
4414 _In_reads_or_z_(end) const T* text,
4415 _In_ size_t start = 0,
4416 _In_ size_t end = (size_t)-1,
4417 _In_ int flags = match_default)
4418 {
4419 assert(text || start >= end);
4420
4421 interval.end = start;
4422
4423 if (degree->match(text, interval.end, end, flags) &&
4424 degree_separator->match(text, degree->interval.end, end, flags))
4425 {
4426 // Degrees
4427 interval.end = degree_separator->interval.end;
4428 }
4429 else {
4430 degree->invalidate();
4431 degree_separator->invalidate();
4432 }
4433
4434 if (minute->match(text, interval.end, end, flags) &&
4435 minute->value < 60 &&
4436 minute_separator->match(text, minute->interval.end, end, flags))
4437 {
4438 // Minutes
4439 interval.end = minute_separator->interval.end;
4440 }
4441 else {
4442 minute->invalidate();
4443 minute_separator->invalidate();
4444 }
4445
4446 if (second && second->match(text, interval.end, end, flags) &&
4447 second->value < 60)
4448 {
4449 // Seconds
4450 interval.end = second->interval.end;
4451 if (second_separator && second_separator->match(text, interval.end, end, flags))
4452 interval.end = second_separator->interval.end;
4453 else
4454 if (second_separator) second_separator->invalidate();
4455 }
4456 else {
4457 if (second) second->invalidate();
4458 if (second_separator) second_separator->invalidate();
4459 }
4460
4461 if (degree->interval.start < degree->interval.end ||
4462 minute->interval.start < minute->interval.end ||
4463 second && second->interval.start < second->interval.end)
4464 {
4465 if (decimal && decimal->match(text, interval.end, end, flags)) {
4466 // Decimals
4467 interval.end = decimal->interval.end;
4468 }
4469 else if (decimal)
4470 decimal->invalidate();
4471 interval.start = start;
4472 return true;
4473 }
4474 if (decimal) decimal->invalidate();
4475 interval.start = (interval.end = start) + 1;
4476 return false;
4477 }
4478
4479 virtual void invalidate()
4480 {
4481 degree->invalidate();
4482 degree_separator->invalidate();
4483 minute->invalidate();
4484 minute_separator->invalidate();
4485 if (second) second->invalidate();
4486 if (second_separator) second_separator->invalidate();
4487 if (decimal) decimal->invalidate();
4489 }
4490
4491 public:
4492 std::shared_ptr<basic_integer10<T>> degree;
4493 std::shared_ptr<basic_parser<T>> degree_separator;
4494 std::shared_ptr<basic_integer10<T>> minute;
4495 std::shared_ptr<basic_parser<T>> minute_separator;
4496 std::shared_ptr<basic_integer10<T>> second;
4497 std::shared_ptr<basic_parser<T>> second_separator;
4498 std::shared_ptr<basic_parser<T>> decimal;
4499 };
4500
4501 using angle = basic_angle<char>;
4503#ifdef _UNICODE
4504 using RRegElKot = wangle;
4505#else
4506 using RRegElKot = angle;
4507#endif
4509
4513 template <class T>
4515 {
4516 public:
4518 _In_ const std::shared_ptr<basic_parser<T>>& digit,
4519 _In_ const std::shared_ptr<basic_parser<T>>& plus_sign,
4520 _In_ const std::shared_ptr<basic_set<T>>& lparenthesis,
4521 _In_ const std::shared_ptr<basic_set<T>>& rparenthesis,
4522 _In_ const std::shared_ptr<basic_parser<T>>& separator,
4523 _In_ const std::shared_ptr<basic_parser<T>>& space,
4524 _In_ const std::locale& locale = std::locale()) :
4525 basic_parser<T>(locale),
4526 m_digit(digit),
4527 m_plus_sign(plus_sign),
4528 m_lparenthesis(lparenthesis),
4529 m_rparenthesis(rparenthesis),
4530 m_separator(separator),
4531 m_space(space)
4532 {}
4533
4534 virtual bool match(
4535 _In_reads_or_z_(end) const T* text,
4536 _In_ size_t start = 0,
4537 _In_ size_t end = (size_t)-1,
4538 _In_ int flags = match_default)
4539 {
4540 assert(text || start >= end);
4541
4542 size_t safe_digit_end = start, safe_value_size = 0;
4543 bool has_digits = false, after_digit = false, in_parentheses = false, after_parentheses = false;
4544 const int space_match_flags = flags & ~match_multiline; // Spaces in phone numbers must never be broken in new line.
4545
4546 interval.end = start;
4547 value.clear();
4548 m_lparenthesis->invalidate();
4549 m_rparenthesis->invalidate();
4550
4551 if (m_plus_sign && m_plus_sign->match(text, interval.end, end, flags)) {
4552 value.append(text + m_plus_sign->interval.start, text + m_plus_sign->interval.end);
4553 safe_value_size = value.size();
4554 interval.end = m_plus_sign->interval.end;
4555 }
4556
4557 for (;;) {
4558 assert(text || interval.end >= end);
4559 if (interval.end >= end || !text[interval.end])
4560 break;
4561 if (m_digit->match(text, interval.end, end, flags)) {
4562 // Digit
4563 value.append(text + m_digit->interval.start, text + m_digit->interval.end);
4564 interval.end = m_digit->interval.end;
4565 if (!in_parentheses) {
4567 safe_value_size = value.size();
4568 has_digits = true;
4569 }
4570 after_digit = true;
4571 after_parentheses = false;
4572 }
4573 else if (
4574 m_lparenthesis && !m_lparenthesis->interval && // No left parenthesis yet
4575 m_rparenthesis && !m_rparenthesis->interval && // Right parenthesis after left
4576 m_lparenthesis->match(text, interval.end, end, flags))
4577 {
4578 // Left parenthesis
4579 value.append(text + m_lparenthesis->interval.start, m_lparenthesis->interval.size());
4580 interval.end = m_lparenthesis->interval.end;
4581 in_parentheses = true;
4582 after_digit = false;
4583 after_parentheses = false;
4584 }
4585 else if (
4586 in_parentheses && // After left parenthesis
4587 m_rparenthesis && !m_rparenthesis->interval && // No right parenthesis yet
4588 m_rparenthesis->match(text, interval.end, end, flags) &&
4589 m_lparenthesis->hit_offset == m_rparenthesis->hit_offset) // Left and right parentheses must match
4590 {
4591 // Right parenthesis
4592 value.append(text + m_rparenthesis->interval.start, text + m_rparenthesis->interval.end);
4593 interval.end = m_rparenthesis->interval.end;
4595 safe_value_size = value.size();
4596 in_parentheses = false;
4597 after_digit = false;
4598 after_parentheses = true;
4599 }
4600 else if (
4601 after_digit &&
4602 !in_parentheses && // No separators inside parentheses
4603 !after_parentheses && // No separators following right parenthesis
4604 m_separator && m_separator->match(text, interval.end, end, flags))
4605 {
4606 // Separator
4607 interval.end = m_separator->interval.end;
4608 after_digit = false;
4609 after_parentheses = false;
4610 }
4611 else if (
4613 m_space && m_space->match(text, interval.end, end, space_match_flags))
4614 {
4615 // Space
4616 interval.end = m_space->interval.end;
4617 after_digit = false;
4618 after_parentheses = false;
4619 }
4620 else
4621 break;
4622 }
4623 if (has_digits) {
4624 value.erase(safe_value_size);
4625 interval.start = start;
4627 return true;
4628 }
4629 value.clear();
4630 interval.start = (interval.end = start) + 1;
4631 return false;
4632 }
4633
4634 virtual void invalidate()
4635 {
4636 value.clear();
4638 }
4639
4640 public:
4641 std::basic_string<T> value;
4642
4643 protected:
4644 std::shared_ptr<basic_parser<T>> m_digit;
4645 std::shared_ptr<basic_parser<T>> m_plus_sign;
4646 std::shared_ptr<basic_set<T>> m_lparenthesis;
4647 std::shared_ptr<basic_set<T>> m_rparenthesis;
4648 std::shared_ptr<basic_parser<T>> m_separator;
4649 std::shared_ptr<basic_parser<T>> m_space;
4650 };
4651
4654#ifdef _UNICODE
4656#else
4658#endif
4660
4664 template <class T>
4666 {
4667 public:
4669 _In_ const std::shared_ptr<basic_parser<T>>& element,
4670 _In_ const std::shared_ptr<basic_parser<T>>& digit,
4671 _In_ const std::shared_ptr<basic_parser<T>>& sign,
4672 _In_ const std::locale& locale = std::locale()) :
4673 basic_parser<T>(locale),
4674 m_element(element),
4675 m_digit(digit),
4676 m_sign(sign),
4677 has_digits(false),
4678 has_charge(false)
4679 {}
4680
4681 virtual bool match(
4682 _In_reads_or_z_(end) const T* text,
4683 _In_ size_t start = 0,
4684 _In_ size_t end = (size_t)-1,
4685 _In_ int flags = match_default)
4686 {
4687 assert(text || start >= end);
4688
4689 has_digits = false;
4690 has_charge = false;
4691 interval.end = start;
4692
4693 const int element_match_flags = flags & ~match_case_insensitive; // Chemical elements are always case-sensitive.
4694 for (;;) {
4695 if (m_element->match(text, interval.end, end, element_match_flags)) {
4696 interval.end = m_element->interval.end;
4697 while (m_digit->match(text, interval.end, end, flags)) {
4698 interval.end = m_digit->interval.end;
4699 has_digits = true;
4700 }
4701 }
4702 else if (start < interval.end) {
4703 if (m_sign->match(text, interval.end, end, flags)) {
4704 interval.end = m_sign->interval.end;
4705 has_charge = true;
4706 }
4707 interval.start = start;
4708 return true;
4709 }
4710 else {
4711 interval.start = (interval.end = start) + 1;
4712 return false;
4713 }
4714 }
4715 }
4716
4717 virtual void invalidate()
4718 {
4719 has_digits = false;
4720 has_charge = false;
4722 }
4723
4724 public:
4725 bool has_digits;
4726 bool has_charge;
4727
4728 protected:
4729 std::shared_ptr<basic_parser<T>> m_element;
4730 std::shared_ptr<basic_parser<T>> m_digit;
4731 std::shared_ptr<basic_parser<T>> m_sign;
4732 };
4733
4736#ifdef _UNICODE
4738#else
4740#endif
4742
4747 {
4748 public:
4749 virtual bool match(
4750 _In_reads_or_z_(end) const char* text,
4751 _In_ size_t start = 0,
4752 _In_ size_t end = (size_t)-1,
4753 _In_ int flags = match_default)
4754 {
4755 assert(text || start >= end);
4756 interval.end = start;
4757
4758 assert(text || interval.end >= end);
4759 if (interval.end < end && text[interval.end]) {
4760 if (text[interval.end] == '\r') {
4761 interval.end++;
4762 if (interval.end < end && text[interval.end] == '\n') {
4763 interval.start = start;
4764 interval.end++;
4765 return true;
4766 }
4767 }
4768 else if (text[interval.end] == '\n') {
4769 interval.start = start;
4770 interval.end++;
4771 return true;
4772 }
4773 }
4774 interval.start = (interval.end = start) + 1;
4775 return false;
4776 }
4777 };
4778
4782 class http_space : public parser
4783 {
4784 public:
4785 virtual bool match(
4786 _In_reads_or_z_(end) const char* text,
4787 _In_ size_t start = 0,
4788 _In_ size_t end = (size_t)-1,
4789 _In_ int flags = match_default)
4790 {
4791 assert(text || start >= end);
4792 interval.end = start;
4793 if (m_line_break.match(text, interval.end, end, flags)) {
4794 interval.end = m_line_break.interval.end;
4795 if (interval.end < end && text[interval.end] && isspace(text[interval.end])) {
4796 interval.start = start;
4797 interval.end++;
4798 while (interval.end < end && text[interval.end] && isspace(text[interval.end])) interval.end++;
4799 return true;
4800 }
4801 }
4802 else if (interval.end < end && text[interval.end] && isspace(text[interval.end])) {
4803 interval.start = start;
4804 interval.end++;
4805 while (interval.end < end && text[interval.end] && isspace(text[interval.end])) interval.end++;
4806 return true;
4807 }
4808 interval.start = (interval.end = start) + 1;
4809 return false;
4810 }
4811
4812 protected:
4813 http_line_break m_line_break;
4814 };
4815
4819 class http_text_char : public parser
4820 {
4821 public:
4822 virtual bool match(
4823 _In_reads_or_z_(end) const char* text,
4824 _In_ size_t start = 0,
4825 _In_ size_t end = (size_t)-1,
4826 _In_ int flags = match_default)
4827 {
4828 assert(text || start >= end);
4829 interval.end = start;
4830
4831 assert(text || interval.end >= end);
4832 if (m_space.match(text, interval.end, end, flags)) {
4833 interval.start = start;
4834 interval.end = m_space.interval.end;
4835 return true;
4836 }
4837 else if (interval.end < end && text[interval.end] && text[interval.end] >= 0x20) {
4838 interval.start = start;
4839 interval.end++;
4840 return true;
4841 }
4842 interval.start = (interval.end = start) + 1;
4843 return false;
4844 }
4845
4846 protected:
4847 http_space m_space;
4848 };
4849
4853 class http_token : public parser
4854 {
4855 public:
4856 virtual bool match(
4857 _In_reads_or_z_(end) const char* text,
4858 _In_ size_t start = 0,
4859 _In_ size_t end = (size_t)-1,
4860 _In_ int flags = match_default)
4861 {
4862 assert(text || start >= end);
4863 interval.end = start;
4864 for (;;) {
4865 if (interval.end < end && text[interval.end]) {
4866 if ((unsigned int)text[interval.end] < 0x20 ||
4867 (unsigned int)text[interval.end] == 0x7f ||
4868 text[interval.end] == '(' ||
4869 text[interval.end] == ')' ||
4870 text[interval.end] == '<' ||
4871 text[interval.end] == '>' ||
4872 text[interval.end] == '@' ||
4873 text[interval.end] == ',' ||
4874 text[interval.end] == ';' ||
4875 text[interval.end] == ':' ||
4876 text[interval.end] == '\\' ||
4877 text[interval.end] == '\"' ||
4878 text[interval.end] == '/' ||
4879 text[interval.end] == '[' ||
4880 text[interval.end] == ']' ||
4881 text[interval.end] == '?' ||
4882 text[interval.end] == '=' ||
4883 text[interval.end] == '{' ||
4884 text[interval.end] == '}' ||
4886 break;
4887 else
4888 interval.end++;
4889 }
4890 else
4891 break;
4892 }
4893 if (start < interval.end) {
4894 interval.start = start;
4895 return true;
4896 }
4897 else {
4898 interval.start = (interval.end = start) + 1;
4899 return false;
4900 }
4901 }
4902 };
4903
4908 {
4909 public:
4910 virtual bool match(
4911 _In_reads_or_z_(end) const char* text,
4912 _In_ size_t start = 0,
4913 _In_ size_t end = (size_t)-1,
4914 _In_ int flags = match_default)
4915 {
4916 assert(text || start >= end);
4917 interval.end = start;
4918 if (interval.end < end && text[interval.end] != '"')
4919 goto error;
4920 interval.end++;
4922 for (;;) {
4923 assert(text || interval.end >= end);
4924 if (interval.end < end && text[interval.end]) {
4925 if (text[interval.end] == '"') {
4927 interval.end++;
4928 break;
4929 }
4930 else if (text[interval.end] == '\\') {
4931 interval.end++;
4932 if (interval.end < end && text[interval.end]) {
4933 interval.end++;
4934 }
4935 else
4936 goto error;
4937 }
4938 else if (m_chr.match(text, interval.end, end, flags))
4939 interval.end++;
4940 else
4941 goto error;
4942 }
4943 else
4944 goto error;
4945 }
4946 interval.start = start;
4947 return true;
4948
4949 error:
4950 content.start = 1;
4951 content.end = 0;
4952 interval.start = (interval.end = start) + 1;
4953 return false;
4954 }
4955
4956 virtual void invalidate()
4957 {
4958 content.start = 1;
4959 content.end = 0;
4960 parser::invalidate();
4961 }
4962
4963 public:
4965
4966 protected:
4967 http_text_char m_chr;
4968 };
4969
4973 class http_value : public parser
4974 {
4975 public:
4976 virtual bool match(
4977 _In_reads_or_z_(end) const char* text,
4978 _In_ size_t start = 0,
4979 _In_ size_t end = (size_t)-1,
4980 _In_ int flags = match_default)
4981 {
4982 assert(text || start >= end);
4983 interval.end = start;
4984 if (string.match(text, interval.end, end, flags)) {
4985 token.invalidate();
4986 interval.end = string.interval.end;
4987 interval.start = start;
4988 return true;
4989 }
4990 else if (token.match(text, interval.end, end, flags)) {
4991 string.invalidate();
4993 interval.start = start;
4994 return true;
4995 }
4996 else {
4997 interval.start = (interval.end = start) + 1;
4998 return false;
4999 }
5000 }
5001
5002 virtual void invalidate()
5003 {
5004 string.invalidate();
5005 token.invalidate();
5006 parser::invalidate();
5007 }
5008
5009 public:
5012 };
5013
5017 class http_parameter : public parser
5018 {
5019 public:
5020 virtual bool match(
5021 _In_reads_or_z_(end) const char* text,
5022 _In_ size_t start = 0,
5023 _In_ size_t end = (size_t)-1,
5024 _In_ int flags = match_default)
5025 {
5026 assert(text || start >= end);
5027 interval.end = start;
5028 if (name.match(text, interval.end, end, flags))
5030 else
5031 goto error;
5032 while (m_space.match(text, interval.end, end, flags))
5033 interval.end = m_space.interval.end;
5034 assert(text || interval.end >= end);
5035 if (interval.end < end && text[interval.end] == '=')
5036 interval.end++;
5037 else
5038 while (m_space.match(text, interval.end, end, flags))
5039 interval.end = m_space.interval.end;
5040 if (value.match(text, interval.end, end, flags))
5042 else
5043 goto error;
5044 interval.start = start;
5045 return true;
5046
5047 error:
5048 name.invalidate();
5049 value.invalidate();
5050 interval.start = (interval.end = start) + 1;
5051 return false;
5052 }
5053
5054 virtual void invalidate()
5055 {
5056 name.invalidate();
5057 value.invalidate();
5058 parser::invalidate();
5059 }
5060
5061 public:
5064
5065 protected:
5066 http_space m_space;
5067 };
5068
5072 class http_any_type : public parser
5073 {
5074 public:
5075 virtual bool match(
5076 _In_reads_or_z_(end) const char* text,
5077 _In_ size_t start = 0,
5078 _In_ size_t end = (size_t)-1,
5079 _In_ int flags = match_default)
5080 {
5081 assert(text || start >= end);
5082 if (start + 2 < end &&
5083 text[start] == '*' &&
5084 text[start + 1] == '/' &&
5085 text[start + 2] == '*')
5086 {
5087 interval.end = (interval.start = start) + 3;
5088 return true;
5089 }
5090 else if (start < end && text[start] == '*') {
5091 interval.end = (interval.start = start) + 1;
5092 return true;
5093 }
5094 else {
5095 interval.start = (interval.end = start) + 1;
5096 return false;
5097 }
5098 }
5099 };
5100
5105 {
5106 public:
5107 virtual bool match(
5108 _In_reads_or_z_(end) const char* text,
5109 _In_ size_t start = 0,
5110 _In_ size_t end = (size_t)-1,
5111 _In_ int flags = match_default)
5112 {
5113 assert(text || start >= end);
5114 interval.end = start;
5115 if (type.match(text, interval.end, end, flags))
5116 interval.end = type.interval.end;
5117 else
5118 goto error;
5119 while (m_space.match(text, interval.end, end, flags))
5120 interval.end = m_space.interval.end;
5121 if (interval.end < end && text[interval.end] == '/')
5122 interval.end++;
5123 else
5124 goto error;
5125 while (m_space.match(text, interval.end, end, flags))
5126 interval.end = m_space.interval.end;
5127 if (subtype.match(text, interval.end, end, flags))
5128 interval.end = subtype.interval.end;
5129 else
5130 goto error;
5131 interval.start = start;
5132 return true;
5133
5134 error:
5135 type.invalidate();
5136 subtype.invalidate();
5137 interval.start = (interval.end = start) + 1;
5138 return false;
5139 }
5140
5141 virtual void invalidate()
5142 {
5143 type.invalidate();
5144 subtype.invalidate();
5145 parser::invalidate();
5146 }
5147
5148 public:
5149 http_token type;
5150 http_token subtype;
5151
5152 protected:
5153 http_space m_space;
5154 };
5155
5160 {
5161 public:
5162 virtual bool match(
5163 _In_reads_or_z_(end) const char* text,
5164 _In_ size_t start = 0,
5165 _In_ size_t end = (size_t)-1,
5166 _In_ int flags = match_default)
5167 {
5168 assert(text || start >= end);
5169 if (!http_media_range::match(text, start, end, flags))
5170 goto error;
5171 params.clear();
5172 for (;;) {
5173 if (interval.end < end && text[interval.end]) {
5174 if (m_space.match(text, interval.end, end, flags))
5175 interval.end = m_space.interval.end;
5176 else if (text[interval.end] == ';') {
5177 interval.end++;
5178 while (m_space.match(text, interval.end, end, flags))
5179 interval.end = m_space.interval.end;
5181 if (param.match(text, interval.end, end, flags)) {
5183 params.push_back(std::move(param));
5184 }
5185 else
5186 break;
5187 }
5188 else
5189 break;
5190 }
5191 else
5192 break;
5193 }
5194 interval.end = params.empty() ? subtype.interval.end : params.back().interval.end;
5195 return true;
5196
5197 error:
5198 http_media_range::invalidate();
5199 params.clear();
5200 interval.start = (interval.end = start) + 1;
5201 return false;
5202 }
5203
5204 virtual void invalidate()
5205 {
5206 params.clear();
5207 http_media_range::invalidate();
5208 }
5209
5210 public:
5211 std::list<http_parameter> params;
5212 };
5213
5218 {
5219 public:
5220 virtual bool match(
5221 _In_reads_or_z_(end) const char* text,
5222 _In_ size_t start = 0,
5223 _In_ size_t end = (size_t)-1,
5224 _In_ int flags = match_default)
5225 {
5226 assert(text || start >= end);
5227 interval.end = start;
5228 for (;;) {
5229 if (interval.end < end && text[interval.end]) {
5230 if ((unsigned int)text[interval.end] < 0x20 ||
5231 (unsigned int)text[interval.end] == 0x7f ||
5232 text[interval.end] == ':' ||
5233 text[interval.end] == '/' ||
5235 break;
5236 else
5237 interval.end++;
5238 }
5239 else
5240 break;
5241 }
5242 if (start < interval.end) {
5243 interval.start = start;
5244 return true;
5245 }
5246 interval.start = (interval.end = start) + 1;
5247 return false;
5248 }
5249 };
5250
5254 class http_url_port : public parser
5255 {
5256 public:
5257 http_url_port(_In_ const std::locale& locale = std::locale()) :
5258 parser(locale),
5259 value(0)
5260 {}
5261
5262 virtual bool match(
5263 _In_reads_or_z_(end) const char* text,
5264 _In_ size_t start = 0,
5265 _In_ size_t end = (size_t)-1,
5266 _In_ int flags = match_default)
5267 {
5268 assert(text || start >= end);
5269 value = 0;
5270 interval.end = start;
5271 for (;;) {
5272 if (interval.end < end && text[interval.end]) {
5273 if ('0' <= text[interval.end] && text[interval.end] <= '9') {
5274 size_t _value = (size_t)value * 10 + text[interval.end] - '0';
5275 if (_value > (uint16_t)-1) {
5276 value = 0;
5277 interval.start = (interval.end = start) + 1;
5278 return false;
5279 }
5280 value = (uint16_t)_value;
5281 interval.end++;
5282 }
5283 else
5284 break;
5285 }
5286 else
5287 break;
5288 }
5289 if (start < interval.end) {
5290 interval.start = start;
5291 return true;
5292 }
5293 interval.start = (interval.end = start) + 1;
5294 return false;
5295 }
5296
5297 virtual void invalidate()
5298 {
5299 value = 0;
5300 parser::invalidate();
5301 }
5302
5303 public:
5304 uint16_t value;
5305 };
5306
5311 {
5312 public:
5313 virtual bool match(
5314 _In_reads_or_z_(end) const char* text,
5315 _In_ size_t start = 0,
5316 _In_ size_t end = (size_t)-1,
5317 _In_ int flags = match_default)
5318 {
5319 assert(text || start >= end);
5320 interval.end = start;
5321 for (;;) {
5322 if (interval.end < end && text[interval.end]) {
5323 if ((unsigned int)text[interval.end] < 0x20 ||
5324 (unsigned int)text[interval.end] == 0x7f ||
5325 text[interval.end] == '?' ||
5326 text[interval.end] == '/' ||
5328 break;
5329 else
5330 interval.end++;
5331 }
5332 else
5333 break;
5334 }
5335 interval.start = start;
5336 return true;
5337 }
5338 };
5339
5343 class http_url_path : public parser
5344 {
5345 public:
5346 virtual bool match(
5347 _In_reads_or_z_(end) const char* text,
5348 _In_ size_t start = 0,
5349 _In_ size_t end = (size_t)-1,
5350 _In_ int flags = match_default)
5351 {
5352 assert(text || start >= end);
5354 interval.end = start;
5355 segments.clear();
5356 assert(text || interval.end >= end);
5357 if (interval.end < end && text[interval.end] != '/')
5358 goto error;
5359 interval.end++;
5360 s.match(text, interval.end, end, flags);
5361 segments.push_back(s);
5363 for (;;) {
5364 if (interval.end < end && text[interval.end]) {
5365 if (text[interval.end] == '/') {
5366 interval.end++;
5367 s.match(text, interval.end, end, flags);
5368 segments.push_back(s);
5370 }
5371 else
5372 break;
5373 }
5374 else
5375 break;
5376 }
5377 interval.start = start;
5378 return true;
5379
5380 error:
5381 segments.clear();
5382 interval.start = (interval.end = start) + 1;
5383 return false;
5384 }
5385
5386 virtual void invalidate()
5387 {
5388 segments.clear();
5389 parser::invalidate();
5390 }
5391
5392 public:
5393 std::vector<http_url_path_segment> segments;
5394 };
5395
5400 {
5401 public:
5402 virtual bool match(
5403 _In_reads_or_z_(end) const char* text,
5404 _In_ size_t start = 0,
5405 _In_ size_t end = (size_t)-1,
5406 _In_ int flags = match_default)
5407 {
5408 assert(text || start >= end);
5409 interval.end = start;
5410 name.start = interval.end;
5411 for (;;) {
5412 if (interval.end < end && text[interval.end]) {
5413 if ((unsigned int)text[interval.end] < 0x20 ||
5414 (unsigned int)text[interval.end] == 0x7f ||
5415 text[interval.end] == '&' ||
5416 text[interval.end] == '=' ||
5418 break;
5419 else
5420 interval.end++;
5421 }
5422 else
5423 break;
5424 }
5425 if (start < interval.end)
5426 name.end = interval.end;
5427 else
5428 goto error;
5429 if (text[interval.end] == '=') {
5430 interval.end++;
5431 value.start = interval.end;
5432 for (;;) {
5433 if (interval.end < end && text[interval.end]) {
5434 if ((unsigned int)text[interval.end] < 0x20 ||
5435 (unsigned int)text[interval.end] == 0x7f ||
5436 text[interval.end] == '&' ||
5438 break;
5439 else
5440 interval.end++;
5441 }
5442 else
5443 break;
5444 }
5445 value.end = interval.end;
5446 }
5447 else {
5448 value.start = 1;
5449 value.end = 0;
5450 }
5451 interval.start = start;
5452 return true;
5453
5454 error:
5455 name.start = 1;
5456 name.end = 0;
5457 value.start = 1;
5458 value.end = 0;
5459 interval.start = (interval.end = start) + 1;
5460 return false;
5461 }
5462
5463 virtual void invalidate()
5464 {
5465 name.start = 1;
5466 name.end = 0;
5467 value.start = 1;
5468 value.end = 0;
5469 parser::invalidate();
5470 }
5471
5472 public:
5475 };
5476
5480 class http_url : public parser
5481 {
5482 public:
5483 http_url(_In_ const std::locale& locale = std::locale()) :
5484 parser(locale),
5485 port(locale)
5486 {}
5487
5488 virtual bool match(
5489 _In_reads_or_z_(end) const char* text,
5490 _In_ size_t start = 0,
5491 _In_ size_t end = (size_t)-1,
5492 _In_ int flags = match_default)
5493 {
5494 assert(text || start >= end);
5495 interval.end = start;
5496
5497 if (interval.end + 7 <= end && stdex::strnicmp(text + interval.end, 7, "http://", (size_t)-1, m_locale) == 0) {
5498 interval.end += 7;
5499 if (server.match(text, interval.end, end, flags))
5500 interval.end = server.interval.end;
5501 else
5502 goto error;
5503 if (interval.end < end && text[interval.end] == ':') {
5504 interval.end++;
5505 if (port.match(text, interval.end, end, flags))
5506 interval.end = port.interval.end;
5507 }
5508 else {
5509 port.invalidate();
5510 port.value = 80;
5511 }
5512 }
5513 else {
5514 server.invalidate();
5515 port.invalidate();
5516 port.value = 80;
5517 }
5518
5519 if (path.match(text, interval.end, end, flags))
5520 interval.end = path.interval.end;
5521 else
5522 goto error;
5523
5524 params.clear();
5525
5526 if (interval.end < end && text[interval.end] == '?') {
5527 interval.end++;
5528 for (;;) {
5529 if (interval.end < end && text[interval.end]) {
5530 if ((unsigned int)text[interval.end] < 0x20 ||
5531 (unsigned int)text[interval.end] == 0x7f ||
5533 break;
5534 else if (text[interval.end] == '&')
5535 interval.end++;
5536 else {
5538 if (param.match(text, interval.end, end, flags)) {
5540 params.push_back(std::move(param));
5541 }
5542 else
5543 break;
5544 }
5545 }
5546 else
5547 break;
5548 }
5549 }
5550
5551 interval.start = start;
5552 return true;
5553
5554 error:
5555 server.invalidate();
5556 port.invalidate();
5557 path.invalidate();
5558 params.clear();
5559 interval.start = (interval.end = start) + 1;
5560 return false;
5561 }
5562
5563 virtual void invalidate()
5564 {
5565 server.invalidate();
5566 port.invalidate();
5567 path.invalidate();
5568 params.clear();
5569 parser::invalidate();
5570 }
5571
5572 public:
5573 http_url_server server;
5574 http_url_port port;
5575 http_url_path path;
5576 std::list<http_url_parameter> params;
5577 };
5578
5582 class http_language : public parser
5583 {
5584 public:
5585 virtual bool match(
5586 _In_reads_or_z_(end) const char* text,
5587 _In_ size_t start = 0,
5588 _In_ size_t end = (size_t)-1,
5589 _In_ int flags = match_default)
5590 {
5591 assert(text || start >= end);
5592 interval.end = start;
5593 components.clear();
5594 for (;;) {
5595 if (interval.end < end && text[interval.end]) {
5597 k.end = interval.end;
5598 for (;;) {
5599 if (k.end < end && text[k.end]) {
5600 if (isalpha(text[k.end]))
5601 k.end++;
5602 else
5603 break;
5604 }
5605 else
5606 break;
5607 }
5608 if (interval.end < k.end) {
5609 k.start = interval.end;
5610 interval.end = k.end;
5611 components.push_back(k);
5612 }
5613 else
5614 break;
5615 if (interval.end < end && text[interval.end] == '-')
5616 interval.end++;
5617 else
5618 break;
5619 }
5620 else
5621 break;
5622 }
5623 if (!components.empty()) {
5624 interval.start = start;
5625 interval.end = components.back().end;
5626 return true;
5627 }
5628 interval.start = (interval.end = start) + 1;
5629 return false;
5630 }
5631
5632 virtual void invalidate()
5633 {
5634 components.clear();
5635 parser::invalidate();
5636 }
5637
5638 public:
5639 std::vector<stdex::interval<size_t>> components;
5640 };
5641
5645 class http_weight : public parser
5646 {
5647 public:
5648 http_weight(_In_ const std::locale& locale = std::locale()) :
5649 parser(locale),
5650 value(1.0f)
5651 {}
5652
5653 virtual bool match(
5654 _In_reads_or_z_(end) const char* text,
5655 _In_ size_t start = 0,
5656 _In_ size_t end = (size_t)-1,
5657 _In_ int flags = match_default)
5658 {
5659 assert(text || start >= end);
5660 size_t celi_del = 0, decimalni_del = 0, decimalni_del_n = 1;
5661 interval.end = start;
5662 for (;;) {
5663 if (interval.end < end && text[interval.end]) {
5664 if ('0' <= text[interval.end] && text[interval.end] <= '9') {
5665 celi_del = celi_del * 10 + text[interval.end] - '0';
5666 interval.end++;
5667 }
5668 else if (text[interval.end] == '.') {
5669 interval.end++;
5670 for (;;) {
5671 if (interval.end < end && text[interval.end]) {
5672 if ('0' <= text[interval.end] && text[interval.end] <= '9') {
5674 decimalni_del_n *= 10;
5675 interval.end++;
5676 }
5677 else
5678 break;
5679 }
5680 else
5681 break;
5682 }
5683 break;
5684 }
5685 else
5686 break;
5687 }
5688 else
5689 break;
5690 }
5691 if (start < interval.end) {
5693 interval.start = start;
5694 return true;
5695 }
5696 value = 1.0f;
5697 interval.start = (interval.end = start) + 1;
5698 return false;
5699 }
5700
5701 virtual void invalidate()
5702 {
5703 value = 1.0f;
5704 parser::invalidate();
5705 }
5706
5707 public:
5708 float value;
5709 };
5710
5714 class http_asterisk : public parser
5715 {
5716 public:
5717 virtual bool match(
5718 _In_reads_or_z_(end) const char* text,
5719 _In_ size_t start = 0,
5720 _In_ size_t end = (size_t)-1,
5721 _In_ int flags = match_default)
5722 {
5723 assert(text || end <= start);
5724 if (start < end && text[start] == '*') {
5725 interval.end = (interval.start = start) + 1;
5726 return true;
5727 }
5728 interval.start = (interval.end = start) + 1;
5729 return false;
5730 }
5731 };
5732
5736 template <class T, class T_asterisk = http_asterisk>
5738 {
5739 public:
5740 http_weighted_value(_In_ const std::locale& locale = std::locale()) :
5741 parser(locale),
5742 factor(locale)
5743 {}
5744
5745 virtual bool match(
5746 _In_reads_or_z_(end) const char* text,
5747 _In_ size_t start = 0,
5748 _In_ size_t end = (size_t)-1,
5749 _In_ int flags = match_default)
5750 {
5751 assert(text || start >= end);
5752 size_t konec_vrednosti;
5753 interval.end = start;
5754 if (asterisk.match(text, interval.end, end, flags)) {
5756 value.invalidate();
5757 }
5758 else if (value.match(text, interval.end, end, flags)) {
5759 interval.end = konec_vrednosti = value.interval.end;
5760 asterisk.invalidate();
5761 }
5762 else {
5763 asterisk.invalidate();
5764 value.invalidate();
5765 interval.start = (interval.end = start) + 1;
5766 return false;
5767 }
5768
5769 while (interval.end < end && text[interval.end] && isspace(text[interval.end])) interval.end++;
5770 if (interval.end < end && text[interval.end] == ';') {
5771 interval.end++;
5772 while (interval.end < end && text[interval.end] && isspace(text[interval.end])) interval.end++;
5773 if (interval.end < end && (text[interval.end] == 'q' || text[interval.end] == 'Q')) {
5774 interval.end++;
5775 while (interval.end < end && text[interval.end] && isspace(text[interval.end])) interval.end++;
5776 if (interval.end < end && text[interval.end] == '=') {
5777 interval.end++;
5778 while (interval.end < end && text[interval.end] && isspace(text[interval.end])) interval.end++;
5779 if (factor.match(text, interval.end, end, flags))
5780 interval.end = factor.interval.end;
5781 }
5782 }
5783 }
5784 if (!factor.interval) {
5785 factor.invalidate();
5787 }
5788 interval.start = start;
5789 return true;
5790 }
5791
5792 virtual void invalidate()
5793 {
5794 asterisk.invalidate();
5795 value.invalidate();
5796 factor.invalidate();
5797 parser::invalidate();
5798 }
5799
5800 public:
5801 T_asterisk asterisk;
5802 T value;
5803 http_weight factor;
5804 };
5805
5810 {
5811 public:
5812 virtual bool match(
5813 _In_reads_or_z_(end) const char* text,
5814 _In_ size_t start = 0,
5815 _In_ size_t end = (size_t)-1,
5816 _In_ int flags = match_default)
5817 {
5818 assert(text || start >= end);
5819 interval.end = start;
5820 if (interval.end < end && text[interval.end] == '$')
5821 interval.end++;
5822 else
5823 goto error;
5824 if (name.match(text, interval.end, end, flags))
5825 interval.end = name.interval.end;
5826 else
5827 goto error;
5828 while (m_space.match(text, interval.end, end, flags))
5829 interval.end = m_space.interval.end;
5830 if (interval.end < end && text[interval.end] == '=')
5831 interval.end++;
5832 else
5833 goto error;
5834 while (m_space.match(text, interval.end, end, flags))
5835 interval.end = m_space.interval.end;
5836 if (value.match(text, interval.end, end, flags))
5837 interval.end = value.interval.end;
5838 else
5839 goto error;
5840 interval.start = start;
5841 return true;
5842
5843 error:
5844 name.invalidate();
5845 value.invalidate();
5846 interval.start = (interval.end = start) + 1;
5847 return false;
5848 }
5849
5850 virtual void invalidate()
5851 {
5852 name.invalidate();
5853 value.invalidate();
5854 parser::invalidate();
5855 }
5856
5857 public:
5858 http_token name;
5859 http_value value;
5860
5861 protected:
5862 http_space m_space;
5863 };
5864
5868 class http_cookie : public parser
5869 {
5870 public:
5871 virtual bool match(
5872 _In_reads_or_z_(end) const char* text,
5873 _In_ size_t start = 0,
5874 _In_ size_t end = (size_t)-1,
5875 _In_ int flags = match_default)
5876 {
5877 assert(text || start >= end);
5878 interval.end = start;
5879 if (name.match(text, interval.end, end, flags))
5881 else
5882 goto error;
5883 while (m_space.match(text, interval.end, end, flags))
5884 interval.end = m_space.interval.end;
5885 if (interval.end < end && text[interval.end] == '=')
5886 interval.end++;
5887 else
5888 goto error;
5889 while (m_space.match(text, interval.end, end, flags))
5890 interval.end = m_space.interval.end;
5891 if (value.match(text, interval.end, end, flags))
5893 else
5894 goto error;
5895 params.clear();
5896 for (;;) {
5897 if (interval.end < end && text[interval.end]) {
5898 if (m_space.match(text, interval.end, end, flags))
5899 interval.end = m_space.interval.end;
5900 else if (text[interval.end] == ';') {
5901 interval.end++;
5902 while (m_space.match(text, interval.end, end, flags))
5903 interval.end = m_space.interval.end;
5905 if (param.match(text, interval.end, end, flags)) {
5907 params.push_back(std::move(param));
5908 }
5909 else
5910 break;
5911 }
5912 else
5913 break;
5914 }
5915 else
5916 break;
5917 }
5918 interval.start = start;
5919 interval.end = params.empty() ? value.interval.end : params.back().interval.end;
5920 return true;
5921
5922 error:
5923 name.invalidate();
5924 value.invalidate();
5925 params.clear();
5926 interval.start = (interval.end = start) + 1;
5927 return false;
5928 }
5929
5930 virtual void invalidate()
5931 {
5932 name.invalidate();
5933 value.invalidate();
5934 params.clear();
5935 parser::invalidate();
5936 }
5937
5938 public:
5941 std::list<http_cookie_parameter> params;
5942
5943 protected:
5944 http_space m_space;
5945 };
5946
5950 class http_agent : public parser
5951 {
5952 public:
5953 virtual bool match(
5954 _In_reads_or_z_(end) const char* text,
5955 _In_ size_t start = 0,
5956 _In_ size_t end = (size_t)-1,
5957 _In_ int flags = match_default)
5958 {
5959 assert(text || start >= end);
5960 interval.end = start;
5961 type.start = interval.end;
5962 for (;;) {
5963 if (interval.end < end && text[interval.end]) {
5964 if (text[interval.end] == '/') {
5965 type.end = interval.end;
5966 interval.end++;
5967 version.start = interval.end;
5968 for (;;) {
5969 if (interval.end < end && text[interval.end]) {
5970 if (isspace(text[interval.end])) {
5971 version.end = interval.end;
5972 break;
5973 }
5974 else
5975 interval.end++;
5976 }
5977 else {
5978 version.end = interval.end;
5979 break;
5980 }
5981 }
5982 break;
5983 }
5984 else if (isspace(text[interval.end])) {
5985 type.end = interval.end;
5986 break;
5987 }
5988 else
5989 interval.end++;
5990 }
5991 else {
5992 type.end = interval.end;
5993 break;
5994 }
5995 }
5996 if (start < interval.end) {
5997 interval.start = start;
5998 return true;
5999 }
6000 type.start = 1;
6001 type.end = 0;
6002 version.start = 1;
6003 version.end = 0;
6004 interval.start = 1;
6005 interval.end = 0;
6006 return false;
6007 }
6008
6009 virtual void invalidate()
6010 {
6011 type.start = 1;
6012 type.end = 0;
6013 version.start = 1;
6014 version.end = 0;
6015 parser::invalidate();
6016 }
6017
6018 public:
6021 };
6022
6026 class http_protocol : public parser
6027 {
6028 public:
6029 http_protocol(_In_ const std::locale& locale = std::locale()) :
6030 parser(locale),
6031 version(0x009)
6032 {}
6033
6034 virtual bool match(
6035 _In_reads_or_z_(end) const char* text,
6036 _In_ size_t start = 0,
6037 _In_ size_t end = (size_t)-1,
6038 _In_ int flags = match_default)
6039 {
6040 assert(text || start >= end);
6041 interval.end = start;
6042 type.start = interval.end;
6043 for (;;) {
6044 if (interval.end < end && text[interval.end]) {
6045 if (text[interval.end] == '/') {
6046 type.end = interval.end;
6047 interval.end++;
6048 break;
6049 }
6050 else if (isspace(text[interval.end]))
6051 goto error;
6052 else
6053 interval.end++;
6054 }
6055 else {
6056 type.end = interval.end;
6057 goto error;
6058 }
6059 }
6060 version_maj.start = interval.end;
6061 for (;;) {
6062 if (interval.end < end && text[interval.end]) {
6063 if (text[interval.end] == '.') {
6064 version_maj.end = interval.end;
6065 interval.end++;
6066 version_min.start = interval.end;
6067 for (;;) {
6068 if (interval.end < end && text[interval.end]) {
6069 if (isspace(text[interval.end])) {
6070 version_min.end = interval.end;
6071 version =
6072 (uint16_t)strtoui(text + version_maj.start, version_maj.size(), nullptr, 10) * 0x100 +
6073 (uint16_t)strtoui(text + version_min.start, version_min.size(), nullptr, 10);
6074 break;
6075 }
6076 else
6077 interval.end++;
6078 }
6079 else
6080 goto error;
6081 }
6082 break;
6083 }
6084 else if (isspace(text[interval.end])) {
6085 version_maj.end = interval.end;
6086 version_min.start = 1;
6087 version_min.end = 0;
6088 version = (uint16_t)strtoui(text + version_maj.start, version_maj.size(), nullptr, 10) * 0x100;
6089 break;
6090 }
6091 else
6092 interval.end++;
6093 }
6094 else
6095 goto error;
6096 }
6097 interval.start = start;
6098 return true;
6099
6100 error:
6101 type.start = 1;
6102 type.end = 0;
6103 version_maj.start = 1;
6104 version_maj.end = 0;
6105 version_min.start = 1;
6106 version_min.end = 0;
6107 version = 0x009;
6108 interval.start = 1;
6109 interval.end = 0;
6110 return false;
6111 }
6112
6113 virtual void invalidate()
6114 {
6115 type.start = 1;
6116 type.end = 0;
6117 version_maj.start = 1;
6118 version_maj.end = 0;
6119 version_min.start = 1;
6120 version_min.end = 0;
6121 version = 0x009;
6122 parser::invalidate();
6123 }
6124
6125 public:
6127 stdex::interval<size_t> version_maj;
6128 stdex::interval<size_t> version_min;
6130 };
6131
6135 class http_request : public parser
6136 {
6137 public:
6138 http_request(_In_ const std::locale& locale = std::locale()) :
6139 parser(locale),
6140 url(locale),
6141 protocol(locale)
6142 {}
6143
6144 virtual bool match(
6145 _In_reads_or_z_(end) const char* text,
6146 _In_ size_t start = 0,
6147 _In_ size_t end = (size_t)-1,
6148 _In_ int flags = match_default)
6149 {
6150 assert(text || start >= end);
6151 interval.end = start;
6152
6153 for (;;) {
6154 if (m_line_break.match(text, interval.end, end, flags))
6155 goto error;
6156 else if (interval.end < end && text[interval.end]) {
6157 if (isspace(text[interval.end]))
6158 interval.end++;
6159 else
6160 break;
6161 }
6162 else
6163 goto error;
6164 }
6165 verb.start = interval.end;
6166 for (;;) {
6167 if (m_line_break.match(text, interval.end, end, flags))
6168 goto error;
6169 else if (interval.end < end && text[interval.end]) {
6170 if (isspace(text[interval.end])) {
6171 verb.end = interval.end;
6172 interval.end++;
6173 break;
6174 }
6175 else
6176 interval.end++;
6177 }
6178 else
6179 goto error;
6180 }
6181
6182 for (;;) {
6183 if (m_line_break.match(text, interval.end, end, flags))
6184 goto error;
6185 else if (interval.end < end && text[interval.end]) {
6186 if (isspace(text[interval.end]))
6187 interval.end++;
6188 else
6189 break;
6190 }
6191 else
6192 goto error;
6193 }
6194 if (url.match(text, interval.end, end, flags))
6196 else
6197 goto error;
6198
6199 protocol.invalidate();
6200 for (;;) {
6201 if (m_line_break.match(text, interval.end, end, flags)) {
6202 interval.end = m_line_break.interval.end;
6203 goto end;
6204 }
6205 else if (interval.end < end && text[interval.end]) {
6206 if (isspace(text[interval.end]))
6207 interval.end++;
6208 else
6209 break;
6210 }
6211 else
6212 goto end;
6213 }
6214 for (;;) {
6215 if (m_line_break.match(text, interval.end, end, flags)) {
6216 interval.end = m_line_break.interval.end;
6217 goto end;
6218 }
6219 else if (protocol.match(text, interval.end, end, flags)) {
6220 interval.end = protocol.interval.end;
6221 break;
6222 }
6223 else
6224 goto end;
6225 }
6226
6227 for (;;) {
6228 if (m_line_break.match(text, interval.end, end, flags)) {
6229 interval.end = m_line_break.interval.end;
6230 break;
6231 }
6232 else if (interval.end < end && text[interval.end])
6233 interval.end++;
6234 else
6235 goto end;
6236 }
6237
6238 end:
6239 interval.start = start;
6240 return true;
6241
6242 error:
6243 verb.start = 1;
6244 verb.end = 0;
6245 url.invalidate();
6246 protocol.invalidate();
6247 interval.start = 1;
6248 interval.end = 0;
6249 return false;
6250 }
6251
6252 virtual void invalidate()
6253 {
6254 verb.start = 1;
6255 verb.end = 0;
6256 url.invalidate();
6257 protocol.invalidate();
6258 parser::invalidate();
6259 }
6260
6261 public:
6263 http_url url;
6264 http_protocol protocol;
6265
6266 protected:
6267 http_line_break m_line_break;
6268 };
6269
6273 class http_header : public parser
6274 {
6275 public:
6276 virtual bool match(
6277 _In_reads_or_z_(end) const char* text,
6278 _In_ size_t start = 0,
6279 _In_ size_t end = (size_t)-1,
6280 _In_ int flags = match_default)
6281 {
6282 assert(text || start >= end);
6283 interval.end = start;
6284
6285 if (m_line_break.match(text, interval.end, end, flags) ||
6287 goto error;
6288 name.start = interval.end;
6289 for (;;) {
6290 if (m_line_break.match(text, interval.end, end, flags))
6291 goto error;
6292 else if (interval.end < end && text[interval.end]) {
6293 if (isspace(text[interval.end])) {
6294 name.end = interval.end;
6295 interval.end++;
6296 for (;;) {
6297 if (m_line_break.match(text, interval.end, end, flags))
6298 goto error;
6299 else if (interval.end < end && text[interval.end]) {
6300 if (isspace(text[interval.end]))
6301 interval.end++;
6302 else
6303 break;
6304 }
6305 else
6306 goto error;
6307 }
6308 if (interval.end < end && text[interval.end] == ':') {
6309 interval.end++;
6310 break;
6311 }
6312 else
6313 goto error;
6314 break;
6315 }
6316 else if (text[interval.end] == ':') {
6317 name.end = interval.end;
6318 interval.end++;
6319 break;
6320 }
6321 else
6322 interval.end++;
6323 }
6324 else
6325 goto error;
6326 }
6327 value.start = (size_t)-1;
6328 value.end = 0;
6329 for (;;) {
6330 if (m_line_break.match(text, interval.end, end, flags)) {
6331 interval.end = m_line_break.interval.end;
6332 if (!m_line_break.match(text, interval.end, end, flags) &&
6334 interval.end++;
6335 else
6336 break;
6337 }
6338 else if (interval.end < end && text[interval.end]) {
6339 if (isspace(text[interval.end]))
6340 interval.end++;
6341 else {
6342 if (value.start == (size_t)-1) value.start = interval.end;
6343 value.end = ++interval.end;
6344 }
6345 }
6346 else
6347 break;
6348 }
6349 interval.start = start;
6350 return true;
6351
6352 error:
6353 name.start = 1;
6354 name.end = 0;
6355 value.start = 1;
6356 value.end = 0;
6357 interval.start = 1;
6358 interval.end = 0;
6359 return false;
6360 }
6361
6362 virtual void invalidate()
6363 {
6364 name.start = 1;
6365 name.end = 0;
6366 value.start = 1;
6367 value.end = 0;
6368 parser::invalidate();
6369 }
6370
6371 public:
6374
6375 protected:
6376 http_line_break m_line_break;
6377 };
6378
6382 template <class T>
6383 class http_value_collection : public T
6384 {
6385 public:
6386 void insert(
6387 _In_reads_or_z_(end) const char* text,
6388 _In_ size_t start = 0,
6389 _In_ size_t end = (size_t)-1,
6390 _In_ int flags = match_default)
6391 {
6392 while (start < end) {
6393 while (start < end && text[start] && isspace(text[start])) start++;
6394 if (start < end && text[start] == ',') {
6395 start++;
6396 while (start < end&& text[start] && isspace(text[start])) start++;
6397 }
6398 T::key_type el;
6399 if (el.match(text, start, end, flags)) {
6400 start = el.interval.end;
6401 T::insert(std::move(el));
6402 }
6403 else
6404 break;
6405 }
6406 }
6407 };
6408
6409 template <class T>
6411 constexpr bool operator()(const T& a, const T& b) const noexcept
6412 {
6413 return a.factor.value > b.factor.value;
6414 }
6415 };
6416
6420 template <class T, class _Alloc = std::allocator<T>>
6422
6426 template <class T>
6428 {
6429 public:
6431 _In_ const std::shared_ptr<basic_parser<T>>& quote,
6432 _In_ const std::shared_ptr<basic_parser<T>>& chr,
6433 _In_ const std::shared_ptr<basic_parser<T>>& escape,
6434 _In_ const std::shared_ptr<basic_parser<T>>& sol,
6435 _In_ const std::shared_ptr<basic_parser<T>>& bs,
6436 _In_ const std::shared_ptr<basic_parser<T>>& ff,
6437 _In_ const std::shared_ptr<basic_parser<T>>& lf,
6438 _In_ const std::shared_ptr<basic_parser<T>>& cr,
6439 _In_ const std::shared_ptr<basic_parser<T>>& htab,
6440 _In_ const std::shared_ptr<basic_parser<T>>& uni,
6441 _In_ const std::shared_ptr<basic_integer16<T>>& hex,
6442 _In_ const std::locale& locale = std::locale()) :
6443 basic_parser<T>(locale),
6444 m_quote(quote),
6445 m_chr(chr),
6446 m_escape(escape),
6447 m_sol(sol),
6448 m_bs(bs),
6449 m_ff(ff),
6450 m_lf(lf),
6451 m_cr(cr),
6452 m_htab(htab),
6453 m_uni(uni),
6454 m_hex(hex)
6455 {}
6456
6457 virtual bool match(
6458 _In_reads_or_z_(end) const T* text,
6459 _In_ size_t start = 0,
6460 _In_ size_t end = (size_t)-1,
6461 _In_ int flags = match_default)
6462 {
6463 assert(text || start >= end);
6464 interval.end = start;
6465 if (m_quote->match(text, interval.end, end, flags)) {
6466 interval.end = m_quote->interval.end;
6467 value.clear();
6468 for (;;) {
6469 if (m_quote->match(text, interval.end, end, flags)) {
6470 interval.start = start;
6471 interval.end = m_quote->interval.end;
6472 return true;
6473 }
6474 if (m_escape->match(text, interval.end, end, flags)) {
6475 if (m_quote->match(text, m_escape->interval.end, end, flags)) {
6476 value += '"'; interval.end = m_quote->interval.end;
6477 continue;
6478 }
6479 if (m_sol->match(text, m_escape->interval.end, end, flags)) {
6480 value += '/'; interval.end = m_sol->interval.end;
6481 continue;
6482 }
6483 if (m_bs->match(text, m_escape->interval.end, end, flags)) {
6484 value += '\b'; interval.end = m_bs->interval.end;
6485 continue;
6486 }
6487 if (m_ff->match(text, m_escape->interval.end, end, flags)) {
6488 value += '\f'; interval.end = m_ff->interval.end;
6489 continue;
6490 }
6491 if (m_lf->match(text, m_escape->interval.end, end, flags)) {
6492 value += '\n'; interval.end = m_lf->interval.end;
6493 continue;
6494 }
6495 if (m_cr->match(text, m_escape->interval.end, end, flags)) {
6496 value += '\r'; interval.end = m_cr->interval.end;
6497 continue;
6498 }
6499 if (m_htab->match(text, m_escape->interval.end, end, flags)) {
6500 value += '\t'; interval.end = m_htab->interval.end;
6501 continue;
6502 }
6503 if (
6504 m_uni->match(text, m_escape->interval.end, end, flags) &&
6505 m_hex->match(text, m_uni->interval.end, std::min(m_uni->interval.end + 4, end), flags | match_case_insensitive) &&
6506 m_hex->interval.size() == 4 /* JSON requests 4-digit Unicode sequneces: \u.... */)
6507 {
6508 assert(m_hex->value <= 0xffff);
6509 if (sizeof(T) == 1) {
6510 if (m_hex->value > 0x7ff) {
6511 value += (T)(0xe0 | (m_hex->value >> 12) & 0x0f);
6512 value += (T)(0x80 | (m_hex->value >> 6) & 0x3f);
6513 value += (T)(0x80 | m_hex->value & 0x3f);
6514 }
6515 else if (m_hex->value > 0x7f) {
6516 value += (T)(0xc0 | (m_hex->value >> 6) & 0x1f);
6517 value += (T)(0x80 | m_hex->value & 0x3f);
6518 }
6519 else
6520 value += (T)(m_hex->value & 0x7f);
6521 }
6522 else
6523 value += (T)m_hex->value;
6524 interval.end = m_hex->interval.end;
6525 continue;
6526 }
6527 if (m_escape->match(text, m_escape->interval.end, end, flags)) {
6528 value += '\\'; interval.end = m_escape->interval.end;
6529 continue;
6530 }
6531 }
6532 if (m_chr->match(text, interval.end, end, flags)) {
6533 value.Prilepi(text + m_chr->interval.start, m_chr->interval.size());
6534 interval.end = m_chr->interval.end;
6535 continue;
6536 }
6537 break;
6538 }
6539 }
6540 value.clear();
6541 interval.start = (interval.end = start) + 1;
6542 return false;
6543 }
6544
6545 virtual void invalidate()
6546 {
6547 value.clear();
6549 }
6550
6551 public:
6552 std::basic_string<T> value;
6553
6554 protected:
6555 std::shared_ptr<basic_parser<T>> m_quote;
6556 std::shared_ptr<basic_parser<T>> m_chr;
6557 std::shared_ptr<basic_parser<T>> m_escape;
6558 std::shared_ptr<basic_parser<T>> m_sol;
6559 std::shared_ptr<basic_parser<T>> m_bs;
6560 std::shared_ptr<basic_parser<T>> m_ff;
6561 std::shared_ptr<basic_parser<T>> m_lf;
6562 std::shared_ptr<basic_parser<T>> m_cr;
6563 std::shared_ptr<basic_parser<T>> m_htab;
6564 std::shared_ptr<basic_parser<T>> m_uni;
6565 std::shared_ptr<basic_integer16<T>> m_hex;
6566 };
6567
6570#ifdef _UNICODE
6571 using tjson_string = wjson_string;
6572#else
6573 using tjson_string = json_string;
6574#endif
6575 }
6576}
6577
6578#undef ENUM_FLAG_OPERATOR
6579#undef ENUM_FLAGS
6580
6581#ifdef _MSC_VER
6582#pragma warning(pop)
6583#endif
Test for angle in d°mm'ss.dddd form.
Definition parser.hpp:4392
Test for any code unit.
Definition parser.hpp:219
Test for beginning of line.
Definition parser.hpp:613
Test for any.
Definition parser.hpp:1055
Test for chemical formula.
Definition parser.hpp:4666
Test for any code unit from a given string of code units.
Definition parser.hpp:718
Test for specific code unit.
Definition parser.hpp:289
Test for date.
Definition parser.hpp:4022
Test for valid DNS domain character.
Definition parser.hpp:2803
bool allow_on_edge
Is character allowed at the beginning or an end of a DNS domain?
Definition parser.hpp:2841
Test for DNS domain/hostname.
Definition parser.hpp:2903
bool m_allow_absolute
May DNS names end with a dot (absolute name)?
Definition parser.hpp:2967
Test for e-mail address.
Definition parser.hpp:3791
Test for emoticon.
Definition parser.hpp:3899
std::shared_ptr< basic_parser< T > > apex
apex/eyebrows/halo (e.g. O, 0)
Definition parser.hpp:3988
std::shared_ptr< basic_parser< T > > eyes
eyes (e.g. :, ;, >, |, B)
Definition parser.hpp:3989
std::shared_ptr< basic_set< T > > mouth
mouth (e.g. ), ), (, (, |, P, D, p, d)
Definition parser.hpp:3991
std::shared_ptr< basic_parser< T > > nose
nose (e.g. -, o)
Definition parser.hpp:3990
std::shared_ptr< basic_parser< T > > emoticon
emoticon as a whole (e.g. 😀, 🤔, 😶)
Definition parser.hpp:3987
Test for end of line.
Definition parser.hpp:651
Test for fraction.
Definition parser.hpp:1684
Test for decimal integer.
Definition parser.hpp:1293
Test for decimal integer possibly containing thousand separators.
Definition parser.hpp:1378
bool has_separators
Did integer have any separators?
Definition parser.hpp:1438
size_t digit_count
Total number of digits in integer.
Definition parser.hpp:1437
Test for hexadecimal integer.
Definition parser.hpp:1459
Base class for integer testing.
Definition parser.hpp:1271
size_t value
Calculated value of the numeral.
Definition parser.hpp:1285
Test for IPv4 address.
Definition parser.hpp:2343
stdex::interval< size_t > components[4]
Individual component intervals.
Definition parser.hpp:2458
struct in_addr value
IPv4 address value.
Definition parser.hpp:2459
Test for IPv6 address.
Definition parser.hpp:2562
std::shared_ptr< basic_parser< T > > scope_id
Scope ID (e.g. NIC index with link-local addresses)
Definition parser.hpp:2766
stdex::interval< size_t > components[8]
Individual component intervals.
Definition parser.hpp:2764
struct in6_addr value
IPv6 address value.
Definition parser.hpp:2765
Test for valid IPv6 address scope ID character.
Definition parser.hpp:2490
Test for repeating.
Definition parser.hpp:908
bool m_greedy
try to match as long sequence as possible
Definition parser.hpp:947
std::shared_ptr< basic_parser< T > > m_el
repeating element
Definition parser.hpp:944
size_t m_min_iterations
minimum number of iterations
Definition parser.hpp:945
size_t m_max_iterations
maximum number of iterations
Definition parser.hpp:946
Test for JSON string.
Definition parser.hpp:6428
Test for mixed numeral.
Definition parser.hpp:1919
std::shared_ptr< basic_parser< T > > fraction
fraction
Definition parser.hpp:2025
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:2023
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:2022
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:2021
std::shared_ptr< basic_parser< T > > integer
Integer part.
Definition parser.hpp:2024
Test for monetary numeral.
Definition parser.hpp:2214
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:2320
std::shared_ptr< basic_parser< T > > decimal_separator
Decimal separator.
Definition parser.hpp:2325
std::shared_ptr< basic_parser< T > > currency
Currency part.
Definition parser.hpp:2323
std::shared_ptr< basic_parser< T > > decimal
Decimal part.
Definition parser.hpp:2326
std::shared_ptr< basic_parser< T > > integer
Integer part.
Definition parser.hpp:2324
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:2321
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:2322
"No-op" match
Definition parser.hpp:187
Base template for all parsers.
Definition parser.hpp:68
interval< size_t > interval
Region of the last match.
Definition parser.hpp:167
Test for permutation.
Definition parser.hpp:1195
Test for phone number.
Definition parser.hpp:4515
std::basic_string< T > value
Normalized phone number.
Definition parser.hpp:4641
Test for any punctuation code unit.
Definition parser.hpp:461
Test for Roman numeral.
Definition parser.hpp:1568
Test for scientific numeral.
Definition parser.hpp:2045
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:2189
std::shared_ptr< basic_parser< T > > exponent_symbol
Exponent symbol (e.g. 'e')
Definition parser.hpp:2193
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:2187
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:2188
double value
Calculated value of the numeral.
Definition parser.hpp:2197
std::shared_ptr< basic_parser< T > > negative_exp_sign
Negative exponent sign (e.g. '-')
Definition parser.hpp:2195
std::shared_ptr< basic_integer< T > > decimal
Decimal part.
Definition parser.hpp:2192
std::shared_ptr< basic_parser< T > > positive_exp_sign
Positive exponent sign (e.g. '+')
Definition parser.hpp:2194
std::shared_ptr< basic_integer< T > > exponent
Exponent part.
Definition parser.hpp:2196
std::shared_ptr< basic_parser< T > > decimal_separator
Decimal separator.
Definition parser.hpp:2191
std::shared_ptr< basic_integer< T > > integer
Integer part.
Definition parser.hpp:2190
Test for match score.
Definition parser.hpp:1747
Test for sequence.
Definition parser.hpp:1004
Definition parser.hpp:686
Test for signed numeral.
Definition parser.hpp:1833
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:1901
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:1900
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:1899
std::shared_ptr< basic_parser< T > > number
Number.
Definition parser.hpp:1902
Test for any space code unit.
Definition parser.hpp:382
Test for any space or punctuation code unit.
Definition parser.hpp:535
Test for any string.
Definition parser.hpp:1123
Test for given string.
Definition parser.hpp:813
Test for time.
Definition parser.hpp:4289
Test for valid URL password character.
Definition parser.hpp:3085
Test for valid URL path character.
Definition parser.hpp:3185
Test for URL path.
Definition parser.hpp:3293
Test for valid URL username character.
Definition parser.hpp:2986
Test for URL.
Definition parser.hpp:3434
Test for HTTP agent.
Definition parser.hpp:5951
Test for HTTP any type.
Definition parser.hpp:5073
Test for HTTP asterisk.
Definition parser.hpp:5715
Test for HTTP header.
Definition parser.hpp:6274
Test for HTTP language (RFC1766)
Definition parser.hpp:5583
Test for HTTP line break (RFC2616: CRLF | LF)
Definition parser.hpp:4747
Test for HTTP media range (RFC2616: media-range)
Definition parser.hpp:5105
Test for HTTP media type (RFC2616: media-type)
Definition parser.hpp:5160
Test for HTTP parameter (RFC2616: parameter)
Definition parser.hpp:5018
http_token name
Parameter name.
Definition parser.hpp:5062
http_value value
Parameter value.
Definition parser.hpp:5063
Test for HTTP protocol.
Definition parser.hpp:6027
uint16_t version
HTTP protocol version: 0x100 = 1.0, 0x101 = 1.1...
Definition parser.hpp:6129
Test for HTTP quoted string (RFC2616: quoted-string)
Definition parser.hpp:4908
stdex::interval< size_t > content
String content (without quotes)
Definition parser.hpp:4964
Test for HTTP request.
Definition parser.hpp:6136
Test for HTTP space (RFC2616: LWS)
Definition parser.hpp:4783
Test for HTTP text character (RFC2616: TEXT)
Definition parser.hpp:4820
Test for HTTP token (RFC2616: token - tolerates non-ASCII)
Definition parser.hpp:4854
Test for HTTP URL parameter.
Definition parser.hpp:5400
Test for HTTP URL path segment.
Definition parser.hpp:5311
Test for HTTP URL path segment.
Definition parser.hpp:5344
std::vector< http_url_path_segment > segments
Path segments.
Definition parser.hpp:5393
Test for HTTP URL port.
Definition parser.hpp:5255
Test for HTTP URL server.
Definition parser.hpp:5218
Test for HTTP URL.
Definition parser.hpp:5481
Collection of HTTP values.
Definition parser.hpp:6384
Test for HTTP value (RFC2616: value)
Definition parser.hpp:4974
http_quoted_string string
Value when matched as quoted string.
Definition parser.hpp:5010
http_token token
Value when matched as token.
Definition parser.hpp:5011
Test for HTTP weight factor.
Definition parser.hpp:5646
float value
Calculated value of the weight factor.
Definition parser.hpp:5708
Test for HTTP weighted value.
Definition parser.hpp:5738
Base template for collection-holding parsers.
Definition parser.hpp:964
Test for any SGML code point.
Definition parser.hpp:251
Test for any SGML code point from a given string of SGML code points.
Definition parser.hpp:770
Test for specific SGML code point.
Definition parser.hpp:338
Test for valid DNS domain SGML character.
Definition parser.hpp:2859
Test for valid IPv6 address scope ID SGML character.
Definition parser.hpp:2528
Test for any SGML punctuation code point.
Definition parser.hpp:502
Test for any SGML space code point.
Definition parser.hpp:425
Test for any SGML space or punctuation code point.
Definition parser.hpp:578
Test for SGML given string.
Definition parser.hpp:860
Test for valid URL password SGML character.
Definition parser.hpp:3137
Test for valid URL path SGML character.
Definition parser.hpp:3241
Test for valid URL username SGML character.
Definition parser.hpp:3037
Numerical interval.
Definition interval.hpp:18
T size() const
Returns interval size.
Definition interval.hpp:47
T end
interval end
Definition interval.hpp:20
interval() noexcept
Constructs an invalid interval.
Definition interval.hpp:25
T start
interval start
Definition interval.hpp:19
Definition parser.hpp:6410