stdex
Additional custom or not Standard C++ covered algorithms
Loading...
Searching...
No Matches
parser.hpp
1/*
2 SPDX-License-Identifier: MIT
3 Copyright © 2023 Amebis
4*/
5
6#pragma once
7
8#include "compat.hpp"
9#include "interval.hpp"
10#include "memory.hpp"
11#include "sgml.hpp"
12#include "string.hpp"
13#include "system.hpp"
14#include <assert.h>
15#include <stdarg.h>
16#include <stdint.h>
17#if defined(_WIN32)
18#ifndef _WINSOCKAPI_
19#include <winsock2.h>
20#include <ws2ipdef.h>
21#endif
22#elif defined(__APPLE__)
23#include <netinet/in.h>
24#else
25#include <inaddr.h>
26#include <in6addr.h>
27#endif
28#include <limits>
29#include <list>
30#include <locale>
31#include <memory>
32#include <set>
33#include <string>
34
35#ifdef _MSC_VER
36#pragma warning(push)
37#pragma warning(disable: 4100)
38#endif
39
40#define ENUM_FLAG_OPERATOR(T,X) \
41inline T operator X (const T lhs, const T rhs) { return static_cast<T>(static_cast<std::underlying_type_t<T>>(lhs) X static_cast<std::underlying_type_t<T>>(rhs)); } \
42inline T operator X (const T lhs, const std::underlying_type_t<T> rhs) { return static_cast<T>(static_cast<std::underlying_type_t<T>>(lhs) X rhs); } \
43inline T operator X (const std::underlying_type_t<T> lhs, const T rhs) { return static_cast<T>(lhs X static_cast<std::underlying_type_t<T>>(rhs)); } \
44inline T& operator X= (T& lhs, const T rhs) { return lhs = lhs X rhs; } \
45inline T& operator X= (T& lhs, const std::underlying_type_t<T> rhs) { return lhs = lhs X rhs; }
46#define ENUM_FLAGS(T, type) \
47enum class T : type; \
48inline T operator ~ (T t) { return (T) (~static_cast<std::underlying_type_t <T>>(t)); } \
49ENUM_FLAG_OPERATOR(T,|) \
50ENUM_FLAG_OPERATOR(T,^) \
51ENUM_FLAG_OPERATOR(T,&) \
52enum class T : type
53
54namespace stdex
55{
56 namespace parser
57 {
61 constexpr int match_default = 0;
62 constexpr int match_case_insensitive = 0x1;
63 constexpr int match_multiline = 0x2;
64
68 template <class T>
70 {
71 public:
72 basic_parser(_In_ const std::locale& locale = std::locale()) : m_locale(locale) {}
73 virtual ~basic_parser() {}
74
75 bool search(
76 _In_reads_or_z_(end) const T* text,
77 _In_ size_t start = 0,
78 _In_ size_t end = (size_t)-1,
79 _In_ int flags = match_default)
80 {
81 for (size_t i = start; i < end && text[i]; i++)
82 if (match(text, i, end, flags))
83 return true;
84 return false;
85 }
86
87 virtual bool match(
88 _In_reads_or_z_(end) const T* text,
89 _In_ size_t start = 0,
90 _In_ size_t end = (size_t)-1,
91 _In_ int flags = match_default) = 0;
92
93 template<class _Traits, class _Ax>
94 inline bool match(
95 const std::basic_string<T, _Traits, _Ax>& text,
96 _In_ size_t start = 0,
97 _In_ size_t end = (size_t)-1,
98 _In_ int flags = match_default)
99 {
100 return match(text.c_str(), start, std::min<size_t>(end, text.size()), flags);
101 }
102
103 virtual void invalidate()
104 {
105 this->interval.start = 1;
106 this->interval.end = 0;
107 }
108
109 protected:
111 const wchar_t* next_sgml_cp(_In_ const char* text, _In_ size_t start, _In_ size_t end, _Out_ size_t& chr_end, _Out_ wchar_t(&buf)[3])
112 {
113 if (text[start] == '&') {
114 // Potential entity start
115 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
116 for (chr_end = start + 1;; chr_end++) {
117 if (chr_end >= end || text[chr_end] == 0) {
118 // Unterminated entity
119 break;
120 }
121 if (text[chr_end] == ';') {
122 // Entity end
123 size_t n = chr_end - start - 1;
124 if (n >= 2 && text[start + 1] == '#') {
125 // Numerical entity
126 char32_t unicode;
127 if (text[start + 2] == 'x' || text[start + 2] == 'X')
128 unicode = strtou32(text + start + 3, n - 2, nullptr, 16);
129 else
130 unicode = strtou32(text + start + 2, n - 1, nullptr, 10);
131#ifdef _WIN32
132 if (unicode < 0x10000) {
133 buf[0] = (wchar_t)unicode;
134 buf[1] = 0;
135 }
136 else {
137 ucs4_to_surrogate_pair(buf, unicode);
138 buf[2] = 0;
139 }
140#else
141 buf[0] = (wchar_t)unicode;
142 buf[1] = 0;
143#endif
144 chr_end++;
145 return buf;
146 }
147 const wchar_t* entity_w = sgml2uni(text + start + 1, n);
148 if (entity_w) {
149 chr_end++;
150 return entity_w;
151 }
152 // Unknown entity.
153 break;
154 }
155 else if (text[chr_end] == '&' || ctype.is(ctype.space, text[chr_end])) {
156 // This char cannot possibly be a part of entity.
157 break;
158 }
159 }
160 }
161 buf[0] = text[start];
162 buf[1] = 0;
163 chr_end = start + 1;
164 return buf;
165 }
167
168 public:
170
171 protected:
172 std::locale m_locale;
173 };
174
177#ifdef _UNICODE
178 using tparser = wparser;
179#else
180 using tparser = parser;
181#endif
183
187 template <class T>
188 class basic_noop : public basic_parser<T>
189 {
190 public:
191 virtual bool match(
192 _In_reads_or_z_(end) const T* text,
193 _In_ size_t start = 0,
194 _In_ size_t end = (size_t)-1,
195 _In_ int flags = match_default)
196 {
197 assert(text || start >= end);
198 if (start < end && text[start]) {
199 this->interval.start = this->interval.end = start;
200 return true;
201 }
202 this->interval.start = (this->interval.end = start) + 1;
203 return false;
204 }
205 };
206
207 using noop = basic_noop<char>;
209#ifdef _UNICODE
210 using tnoop = wnoop;
211#else
212 using tnoop = noop;
213#endif
215
219 template <class T>
220 class basic_any_cu : public basic_parser<T>
221 {
222 public:
223 basic_any_cu(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
224
225 virtual bool match(
226 _In_reads_or_z_(end) const T* text,
227 _In_ size_t start = 0,
228 _In_ size_t end = (size_t)-1,
229 _In_ int flags = match_default)
230 {
231 assert(text || start >= end);
232 if (start < end && text[start]) {
233 this->interval.end = (this->interval.start = start) + 1;
234 return true;
235 }
236 this->interval.start = (this->interval.end = start) + 1;
237 return false;
238 }
239 };
240
243#ifdef _UNICODE
244 using tany_cu = wany_cu;
245#else
246 using tany_cu = any_cu;
247#endif
248
252 class sgml_any_cp : public basic_any_cu<char>
253 {
254 public:
255 sgml_any_cp(_In_ const std::locale& locale = std::locale()) : basic_any_cu<char>(locale) {}
256
257 virtual bool match(
258 _In_reads_or_z_(end) const char* text,
259 _In_ size_t start = 0,
260 _In_ size_t end = (size_t)-1,
261 _In_ int flags = match_default)
262 {
263 assert(text || start >= end);
264 if (start < end && text[start]) {
265 if (text[start] == '&') {
266 // SGML entity
267 const auto& ctype = std::use_facet<std::ctype<char>>(m_locale);
268 for (this->interval.end = start + 1; this->interval.end < end && text[this->interval.end]; this->interval.end++)
269 if (text[this->interval.end] == ';') {
270 this->interval.end++;
271 this->interval.start = start;
272 return true;
273 }
274 else if (text[this->interval.end] == '&' || ctype.is(ctype.space, text[this->interval.end]))
275 break;
276 // Unterminated entity
277 }
278 this->interval.end = (this->interval.start = start) + 1;
279 return true;
280 }
281 this->interval.start = (this->interval.end = start) + 1;
282 return false;
283 }
284 };
285
289 template <class T>
290 class basic_cu : public basic_parser<T>
291 {
292 public:
293 basic_cu(T chr, bool invert = false, _In_ const std::locale& locale = std::locale()) :
294 basic_parser<T>(locale),
295 m_chr(chr),
296 m_invert(invert)
297 {}
298
299 virtual bool match(
300 _In_reads_or_z_(end) const T* text,
301 _In_ size_t start = 0,
302 _In_ size_t end = (size_t)-1,
303 _In_ int flags = match_default)
304 {
305 assert(text || start >= end);
306 if (start < end && text[start]) {
307 bool r;
308 if (flags & match_case_insensitive) {
309 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
310 r = ctype.tolower(text[start]) == ctype.tolower(m_chr);
311 }
312 else
313 r = text[start] == m_chr;
314 if ((r && !m_invert) || (!r && m_invert)) {
315 this->interval.end = (this->interval.start = start) + 1;
316 return true;
317 }
318 }
319 this->interval.start = (this->interval.end = start) + 1;
320 return false;
321 }
322
323 protected:
324 T m_chr;
325 bool m_invert;
326 };
327
328 using cu = basic_cu<char>;
329 using wcu = basic_cu<wchar_t>;
330#ifdef _UNICODE
331 using tcu = wcu;
332#else
333 using tcu = cu;
334#endif
335
339 class sgml_cp : public sgml_parser
340 {
341 public:
342 sgml_cp(const char* chr, size_t count = (size_t)-1, bool invert = false, _In_ const std::locale& locale = std::locale()) :
343 sgml_parser(locale),
344 m_invert(invert)
345 {
346 assert(chr || !count);
347 wchar_t buf[3];
348 size_t chr_end;
349 m_chr.assign(count ? next_sgml_cp(chr, 0, count, chr_end, buf) : L"");
350 }
351
352 virtual bool match(
353 _In_reads_or_z_(end) const char* text,
354 _In_ size_t start = 0,
355 _In_ size_t end = (size_t)-1,
356 _In_ int flags = match_default)
357 {
358 assert(text || start >= end);
359 if (start < end && text[start]) {
360 wchar_t buf[3];
361 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
362 bool r = ((flags & match_case_insensitive) ?
363 stdex::strnicmp(chr, (size_t)-1, m_chr.c_str(), m_chr.size(), m_locale) :
364 stdex::strncmp(chr, (size_t)-1, m_chr.c_str(), m_chr.size())) == 0;
365 if ((r && !m_invert) || (!r && m_invert)) {
366 this->interval.start = start;
367 return true;
368 }
369 }
370 this->interval.start = (this->interval.end = start) + 1;
371 return false;
372 }
373
374 protected:
375 std::wstring m_chr;
376 bool m_invert;
377 };
378
382 template <class T>
383 class basic_space_cu : public basic_parser<T>
384 {
385 public:
386 basic_space_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) :
387 basic_parser<T>(locale),
388 m_invert(invert)
389 {}
390
391 virtual bool match(
392 _In_reads_or_z_(end) const T* text,
393 _In_ size_t start = 0,
394 _In_ size_t end = (size_t)-1,
395 _In_ int flags = match_default)
396 {
397 assert(text || start >= end);
398 if (start < end && text[start]) {
399 bool r =
400 ((flags & match_multiline) || !islbreak(text[start])) &&
401 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::space, text[start]);
402 if ((r && !m_invert) || (!r && m_invert)) {
403 this->interval.end = (this->interval.start = start) + 1;
404 return true;
405 }
406 }
407 this->interval.start = (this->interval.end = start) + 1;
408 return false;
409 }
410
411 protected:
412 bool m_invert;
413 };
414
417#ifdef _UNICODE
418 using tspace_cu = wspace_cu;
419#else
420 using tspace_cu = space_cu;
421#endif
422
426 class sgml_space_cp : public basic_space_cu<char>
427 {
428 public:
429 sgml_space_cp(_In_ bool invert = false, _In_ const std::locale& locale = std::locale()) :
431 {}
432
433 virtual bool match(
434 _In_reads_or_z_(end) const char* text,
435 _In_ size_t start = 0,
436 _In_ size_t end = (size_t)-1,
437 _In_ int flags = match_default)
438 {
439 assert(text || start >= end);
440 if (start < end && text[start]) {
441 wchar_t buf[3];
442 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
443 const wchar_t* chr_end = chr + stdex::strlen(chr);
444 bool r =
445 ((flags & match_multiline) || !islbreak(chr, (size_t)-1)) &&
446 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::space, chr, chr_end) == chr_end;
447 if ((r && !m_invert) || (!r && m_invert)) {
448 this->interval.start = start;
449 return true;
450 }
451 }
452
453 this->interval.start = (this->interval.end = start) + 1;
454 return false;
455 }
456 };
457
461 template <class T>
462 class basic_punct_cu : public basic_parser<T>
463 {
464 public:
465 basic_punct_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) :
466 basic_parser<T>(locale),
467 m_invert(invert)
468 {}
469
470 virtual bool match(
471 _In_reads_or_z_(end) const T* text,
472 _In_ size_t start = 0,
473 _In_ size_t end = (size_t)-1,
474 _In_ int flags = match_default)
475 {
476 assert(text || start >= end);
477 if (start < end && text[start]) {
478 bool r = std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::punct, text[start]);
479 if ((r && !m_invert) || (!r && m_invert)) {
480 this->interval.end = (this->interval.start = start) + 1;
481 return true;
482 }
483 }
484 this->interval.start = (this->interval.end = start) + 1;
485 return false;
486 }
487
488 protected:
489 bool m_invert;
490 };
491
494#ifdef _UNICODE
495 using tpunct_cu = wpunct_cu;
496#else
497 using tpunct_cu = punct_cu;
498#endif
499
503 class sgml_punct_cp : public basic_punct_cu<char>
504 {
505 public:
506 sgml_punct_cp(bool invert = false, _In_ const std::locale& locale = std::locale()) :
508 {}
509
510 virtual bool match(
511 _In_reads_or_z_(end) const char* text,
512 _In_ size_t start = 0,
513 _In_ size_t end = (size_t)-1,
514 _In_ int flags = match_default)
515 {
516 assert(text || start >= end);
517 if (start < end && text[start]) {
518 wchar_t buf[3];
519 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
520 const wchar_t* chr_end = chr + stdex::strlen(chr);
521 bool r = std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::punct, chr, chr_end) == chr_end;
522 if ((r && !m_invert) || (!r && m_invert)) {
523 this->interval.start = start;
524 return true;
525 }
526 }
527 this->interval.start = (this->interval.end = start) + 1;
528 return false;
529 }
530 };
531
535 template <class T>
537 {
538 public:
539 basic_space_or_punct_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) :
540 basic_parser<T>(locale),
541 m_invert(invert)
542 {}
543
544 virtual bool match(
545 _In_reads_or_z_(end) const T* text,
546 _In_ size_t start = 0,
547 _In_ size_t end = (size_t)-1,
548 _In_ int flags = match_default)
549 {
550 assert(text || start >= end);
551 if (start < end && text[start]) {
552 bool r =
553 ((flags & match_multiline) || !islbreak(text[start])) &&
554 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::space | std::ctype_base::punct, text[start]);
555 if ((r && !m_invert) || (!r && m_invert)) {
556 this->interval.end = (this->interval.start = start) + 1;
557 return true;
558 }
559 }
560 this->interval.start = (this->interval.end = start) + 1;
561 return false;
562 }
563
564 protected:
565 bool m_invert;
566 };
567
570#ifdef _UNICODE
572#else
574#endif
575
580 {
581 public:
582 sgml_space_or_punct_cp(bool invert = false, _In_ const std::locale& locale = std::locale()) :
584 {}
585
586 virtual bool match(
587 _In_reads_or_z_(end) const char* text,
588 _In_ size_t start = 0,
589 _In_ size_t end = (size_t)-1,
590 _In_ int flags = match_default)
591 {
592 assert(text || start >= end);
593 if (start < end && text[start]) {
594 wchar_t buf[3];
595 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
596 const wchar_t* chr_end = chr + stdex::strlen(chr);
597 bool r =
598 ((flags & match_multiline) || !islbreak(chr, (size_t)-1)) &&
599 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::space | std::ctype_base::punct, chr, chr_end) == chr_end;
600 if ((r && !m_invert) || (!r && m_invert)) {
601 this->interval.start = start;
602 return true;
603 }
604 }
605 this->interval.start = (this->interval.end = start) + 1;
606 return false;
607 }
608 };
609
613 template <class T>
614 class basic_bol : public basic_parser<T>
615 {
616 public:
617 basic_bol(bool invert = false) : m_invert(invert) {}
618
619 virtual bool match(
620 _In_reads_or_z_(end) const T* text,
621 _In_ size_t start = 0,
622 _In_ size_t end = (size_t)-1,
623 _In_ int flags = match_default)
624 {
625 assert(text || start >= end);
626 bool r = start == 0 || (start <= end && islbreak(text[start - 1]));
627 if ((r && !m_invert) || (!r && m_invert)) {
628 this->interval.end = this->interval.start = start;
629 return true;
630 }
631 this->interval.start = (this->interval.end = start) + 1;
632 return false;
633 }
634
635 protected:
636 bool m_invert;
637 };
638
639 using bol = basic_bol<char>;
640 using wbol = basic_bol<wchar_t>;
641#ifdef _UNICODE
642 using tbol = wbol;
643#else
644 using tbol = bol;
645#endif
647
651 template <class T>
652 class basic_eol : public basic_parser<T>
653 {
654 public:
655 basic_eol(bool invert = false) : m_invert(invert) {}
656
657 virtual bool match(
658 _In_reads_or_z_(end) const T* text,
659 _In_ size_t start = 0,
660 _In_ size_t end = (size_t)-1,
661 _In_ int flags = match_default)
662 {
663 assert(text || start >= end);
664 bool r = islbreak(text[start]);
665 if ((r && !m_invert) || (!r && m_invert)) {
666 this->interval.end = this->interval.start = start;
667 return true;
668 }
669 this->interval.start = (this->interval.end = start) + 1;
670 return false;
671 }
672
673 protected:
674 bool m_invert;
675 };
676
677 using eol = basic_eol<char>;
678 using weol = basic_eol<wchar_t>;
679#ifdef _UNICODE
680 using teol = weol;
681#else
682 using teol = eol;
683#endif
685
686 template <class T>
687 class basic_set : public basic_parser<T>
688 {
689 public:
690 basic_set(bool invert = false, _In_ const std::locale& locale = std::locale()) :
691 basic_parser<T>(locale),
692 hit_offset((size_t)-1),
693 m_invert(invert)
694 {}
695
696 virtual bool match(
697 _In_reads_or_z_(end) const T* text,
698 _In_ size_t start = 0,
699 _In_ size_t end = (size_t)-1,
700 _In_ int flags = match_default) = 0;
701
702 virtual void invalidate()
703 {
704 hit_offset = (size_t)-1;
706 }
707
708 public:
709 size_t hit_offset;
710
711 protected:
712 bool m_invert;
713 };
714
718 template <class T>
719 class basic_cu_set : public basic_set<T>
720 {
721 public:
723 _In_reads_or_z_(count) const T* set,
724 _In_ size_t count = (size_t)-1,
725 _In_ bool invert = false,
726 _In_ const std::locale& locale = std::locale()) :
727 basic_set<T>(invert, locale)
728 {
729 if (set)
730 m_set.assign(set, set + stdex::strnlen(set, count));
731 }
732
733 virtual bool match(
734 _In_reads_or_z_(end) const T* text,
735 _In_ size_t start = 0,
736 _In_ size_t end = (size_t)-1,
737 _In_ int flags = match_default)
738 {
739 assert(text || start >= end);
740 if (start < end && text[start]) {
741 const T* set = m_set.c_str();
742 size_t r = (flags & match_case_insensitive) ?
743 stdex::strnichr(set, m_set.size(), text[start], this->m_locale) :
744 stdex::strnchr(set, m_set.size(), text[start]);
745 if ((r != stdex::npos && !this->m_invert) || (r == stdex::npos && this->m_invert)) {
746 this->hit_offset = r;
747 this->interval.end = (this->interval.start = start) + 1;
748 return true;
749 }
750 }
751 this->hit_offset = (size_t)-1;
752 this->interval.start = (this->interval.end = start) + 1;
753 return false;
754 }
755
756 protected:
757 std::basic_string<T> m_set;
758 };
759
762#ifdef _UNICODE
763 using tcu_set = wcu_set;
764#else
765 using tcu_set = cu_set;
766#endif
767
771 class sgml_cp_set : public basic_set<char>
772 {
773 public:
774 sgml_cp_set(const char* set, size_t count = (size_t)-1, bool invert = false, _In_ const std::locale& locale = std::locale()) :
775 basic_set<char>(invert, locale)
776 {
777 if (set)
778 m_set = sgml2wstr(set, count);
779 }
780
781 virtual bool match(
782 _In_reads_or_z_(end) const char* text,
783 _In_ size_t start = 0,
784 _In_ size_t end = (size_t)-1,
785 _In_ int flags = match_default)
786 {
787 assert(text || start >= end);
788 if (start < end && text[start]) {
789 wchar_t buf[3];
790 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
791 const wchar_t* set = m_set.c_str();
792 size_t r = (flags & match_case_insensitive) ?
793 stdex::strnistr(set, m_set.size(), chr, m_locale) :
794 stdex::strnstr(set, m_set.size(), chr);
795 if ((r != stdex::npos && !m_invert) || (r == stdex::npos && m_invert)) {
796 hit_offset = r;
797 this->interval.start = start;
798 return true;
799 }
800 }
801 hit_offset = (size_t)-1;
802 this->interval.start = (this->interval.end = start) + 1;
803 return false;
804 }
805
806 protected:
807 std::wstring m_set;
808 };
809
813 template <class T>
814 class basic_string : public basic_parser<T>
815 {
816 public:
818 _In_reads_or_z_(count) const T* str,
819 _In_ size_t count = (size_t)-1,
820 _In_ const std::locale& locale = std::locale()) :
821 basic_parser<T>(locale),
822 m_str(str, str + stdex::strnlen(str, count))
823 {}
824
825 virtual bool match(
826 _In_reads_or_z_(end) const T* text,
827 _In_ size_t start = 0,
828 _In_ size_t end = (size_t)-1,
829 _In_ int flags = match_default)
830 {
831 assert(text || start >= end);
832 size_t
833 m = m_str.size(),
834 n = std::min<size_t>(end - start, m);
835 bool r = ((flags & match_case_insensitive) ?
836 stdex::strnicmp(text + start, n, m_str.c_str(), m, this->m_locale) :
837 stdex::strncmp(text + start, n, m_str.c_str(), m)) == 0;
838 if (r) {
839 this->interval.end = (this->interval.start = start) + n;
840 return true;
841 }
842 this->interval.start = (this->interval.end = start) + 1;
843 return false;
844 }
845
846 protected:
847 std::basic_string<T> m_str;
848 };
849
852#ifdef _UNICODE
853 using tstring = wstring;
854#else
855 using tstring = string;
856#endif
857
862 {
863 public:
864 sgml_string(const char* str, size_t count = (size_t)-1, _In_ const std::locale& locale = std::locale()) :
865 sgml_parser(locale),
866 m_str(sgml2wstr(str, count))
867 {}
868
869 virtual bool match(
870 _In_reads_or_z_(end) const char* text,
871 _In_ size_t start = 0,
872 _In_ size_t end = (size_t)-1,
873 _In_ int flags = match_default)
874 {
875 assert(text || start >= end);
876 const wchar_t* str = m_str.c_str();
877 const bool case_insensitive = flags & match_case_insensitive ? true : false;
878 const auto& ctype = std::use_facet<std::ctype<wchar_t>>(m_locale);
879 for (this->interval.end = start;;) {
880 if (!*str) {
881 this->interval.start = start;
882 return true;
883 }
884 if (this->interval.end >= end || !text[this->interval.end]) {
885 this->interval.start = (this->interval.end = start) + 1;
886 return false;
887 }
888 wchar_t buf[3];
889 const wchar_t* chr = next_sgml_cp(text, this->interval.end, end, this->interval.end, buf);
890 for (; *chr; ++str, ++chr) {
891 if (!*str ||
892 (case_insensitive ? ctype.tolower(*str) != ctype.tolower(*chr) : *str != *chr))
893 {
894 this->interval.start = (this->interval.end = start) + 1;
895 return false;
896 }
897 }
898 }
899 }
900
901 protected:
902 std::wstring m_str;
903 };
904
908 template <class T>
910 {
911 public:
912 basic_iterations(const std::shared_ptr<basic_parser<T>>& el, size_t min_iterations = 0, size_t max_iterations = (size_t)-1, bool greedy = true) :
913 m_el(el),
917 {}
918
919 virtual bool match(
920 _In_reads_or_z_(end) const T* text,
921 _In_ size_t start = 0,
922 _In_ size_t end = (size_t)-1,
923 _In_ int flags = match_default)
924 {
925 assert(text || start >= end);
926 this->interval.start = this->interval.end = start;
927 for (size_t i = 0; ; i++) {
928 if ((!m_greedy && i >= m_min_iterations) || i >= m_max_iterations)
929 return true;
930 if (!m_el->match(text, this->interval.end, end, flags)) {
931 if (i >= m_min_iterations)
932 return true;
933 break;
934 }
935 if (m_el->interval.end == this->interval.end) {
936 // Element did match, but the matching interval was empty. Quit instead of spinning.
937 return true;
938 }
939 this->interval.end = m_el->interval.end;
940 }
941 this->interval.start = (this->interval.end = start) + 1;
942 return false;
943 }
944
945 protected:
946 std::shared_ptr<basic_parser<T>> m_el;
949 bool m_greedy;
950 };
951
954#ifdef _UNICODE
955 using titerations = witerations;
956#else
957 using titerations = iterations;
958#endif
960
964 template <class T>
966 {
967 protected:
968 parser_collection(_In_ const std::locale& locale) : basic_parser<T>(locale) {}
969
970 public:
972 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el,
973 _In_ size_t count,
974 _In_ const std::locale& locale = std::locale()) :
975 basic_parser<T>(locale)
976 {
977 assert(el || !count);
978 m_collection.reserve(count);
979 for (size_t i = 0; i < count; i++)
980 m_collection.push_back(el[i]);
981 }
982
984 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
985 _In_ const std::locale& locale = std::locale()) :
986 basic_parser<T>(locale),
987 m_collection(std::move(collection))
988 {}
989
990 virtual void invalidate()
991 {
992 for (auto& el: m_collection)
993 el->invalidate();
995 }
996
997 protected:
998 std::vector<std::shared_ptr<basic_parser<T>>> m_collection;
999 };
1000
1004 template <class T>
1006 {
1007 public:
1009 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el = nullptr,
1010 _In_ size_t count = 0,
1011 _In_ const std::locale& locale = std::locale()) :
1012 parser_collection<T>(el, count, locale)
1013 {}
1014
1016 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1017 _In_ const std::locale& locale = std::locale()) :
1018 parser_collection<T>(std::move(collection), locale)
1019 {}
1020
1021 virtual bool match(
1022 _In_reads_or_z_(end) const T* text,
1023 _In_ size_t start = 0,
1024 _In_ size_t end = (size_t)-1,
1025 _In_ int flags = match_default)
1026 {
1027 assert(text || start >= end);
1028 this->interval.end = start;
1029 for (auto i = this->m_collection.begin(); i != this->m_collection.end(); ++i) {
1030 if (!(*i)->match(text, this->interval.end, end, flags)) {
1031 for (++i; i != this->m_collection.end(); ++i)
1032 (*i)->invalidate();
1033 this->interval.start = (this->interval.end = start) + 1;
1034 return false;
1035 }
1036 this->interval.end = (*i)->interval.end;
1037 }
1038 this->interval.start = start;
1039 return true;
1040 }
1041 };
1042
1045#ifdef _UNICODE
1046 using tsequence = wsequence;
1047#else
1048 using tsequence = sequence;
1049#endif
1051
1055 template <class T>
1057 {
1058 protected:
1059 basic_branch(_In_ const std::locale& locale) :
1060 parser_collection<T>(locale),
1061 hit_offset((size_t)-1)
1062 {}
1063
1064 public:
1066 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el = nullptr,
1067 _In_ size_t count = 0,
1068 _In_ const std::locale& locale = std::locale()) :
1069 parser_collection<T>(el, count, locale),
1070 hit_offset((size_t)-1)
1071 {}
1072
1074 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1075 _In_ const std::locale& locale = std::locale()) :
1076 parser_collection<T>(std::move(collection), locale),
1077 hit_offset((size_t)-1)
1078 {}
1079
1080 virtual bool match(
1081 _In_reads_or_z_(end) const T* text,
1082 _In_ size_t start = 0,
1083 _In_ size_t end = (size_t)-1,
1084 _In_ int flags = match_default)
1085 {
1086 assert(text || start >= end);
1087 hit_offset = 0;
1088 for (auto i = this->m_collection.begin(); i != this->m_collection.end(); ++i, ++hit_offset) {
1089 if ((*i)->match(text, start, end, flags)) {
1090 this->interval = (*i)->interval;
1091 for (++i; i != this->m_collection.end(); ++i)
1092 (*i)->invalidate();
1093 return true;
1094 }
1095 }
1096 hit_offset = (size_t)-1;
1097 this->interval.start = (this->interval.end = start) + 1;
1098 return false;
1099 }
1100
1101 virtual void invalidate()
1102 {
1103 hit_offset = (size_t)-1;
1105 }
1106
1107 public:
1108 size_t hit_offset;
1109 };
1110
1111 using branch = basic_branch<char>;
1113#ifdef _UNICODE
1114 using tbranch = wbranch;
1115#else
1116 using tbranch = branch;
1117#endif
1119
1123 template <class T, class T_parser = basic_string<T>>
1125 {
1126 public:
1127 inline basic_string_branch(
1128 _In_reads_(count) const T* str_z = nullptr,
1129 _In_ size_t count = 0,
1130 _In_ const std::locale& locale = std::locale()) :
1131 basic_branch<T>(locale)
1132 {
1133 build(str_z, count);
1134 }
1135
1136 inline basic_string_branch(_In_z_ const T* str, ...) :
1137 basic_branch<T>(std::locale())
1138 {
1139 va_list params;
1140 va_start(params, str);
1141 build(str, params);
1142 va_end(params);
1143 }
1144
1145 inline basic_string_branch(_In_ const std::locale& locale, _In_z_ const T* str, ...) :
1146 basic_branch<T>(locale)
1147 {
1148 va_list params;
1149 va_start(params, str);
1150 build(str, params);
1151 va_end(params);
1152 }
1153
1154 protected:
1155 void build(_In_reads_(count) const T* str_z, _In_ size_t count)
1156 {
1157 assert(str_z || !count);
1158 if (count) {
1159 size_t offset, n;
1160 for (
1161 offset = n = 0;
1162 offset < count && str_z[offset];
1163 offset += stdex::strnlen(str_z + offset, count - offset) + 1, ++n);
1164 this->m_collection.reserve(n);
1165 for (
1166 offset = 0;
1167 offset < count && str_z[offset];
1168 offset += stdex::strnlen(str_z + offset, count - offset) + 1)
1169 this->m_collection.push_back(std::move(std::make_shared<T_parser>(str_z + offset, count - offset, this->m_locale)));
1170 }
1171 }
1172
1173 void build(_In_z_ const T* str, _In_ va_list params)
1174 {
1175 const T* p;
1176 for (
1177 this->m_collection.push_back(std::move(std::make_shared<T_parser>(str, (size_t)-1, this->m_locale)));
1178 (p = va_arg(params, const T*)) != nullptr;
1179 this->m_collection.push_back(std::move(std::make_shared<T_parser>(p, (size_t)-1, this->m_locale))));
1180 }
1181 };
1182
1185#ifdef _UNICODE
1187#else
1189#endif
1191
1195 template <class T>
1197 {
1198 public:
1200 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el = nullptr,
1201 _In_ size_t count = 0,
1202 _In_ const std::locale& locale = std::locale()) :
1203 parser_collection<T>(el, count, locale)
1204 {}
1205
1207 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1208 _In_ const std::locale& locale = std::locale()) :
1209 parser_collection<T>(std::move(collection), locale)
1210 {}
1211
1212 virtual bool match(
1213 _In_reads_or_z_(end) const T* text,
1214 _In_ size_t start = 0,
1215 _In_ size_t end = (size_t)-1,
1216 _In_ int flags = match_default)
1217 {
1218 assert(text || start >= end);
1219 for (auto& el: this->m_collection)
1220 el->invalidate();
1221 if (match_recursively(text, start, end, flags)) {
1222 this->interval.start = start;
1223 return true;
1224 }
1225 this->interval.start = (this->interval.end = start) + 1;
1226 return false;
1227 }
1228
1229 protected:
1230 bool match_recursively(
1231 _In_reads_or_z_(end) const T* text,
1232 _In_ size_t start = 0,
1233 _In_ size_t end = (size_t)-1,
1234 _In_ int flags = match_default)
1235 {
1236 bool all_matched = true;
1237 for (auto& el: this->m_collection) {
1238 if (!el->interval) {
1239 // Element was not matched in permutatuion yet.
1240 all_matched = false;
1241 if (el->match(text, start, end, flags)) {
1242 // Element matched for the first time.
1243 if (match_recursively(text, el->interval.end, end, flags)) {
1244 // Rest of the elements matched too.
1245 return true;
1246 }
1247 el->invalidate();
1248 }
1249 }
1250 }
1251 if (all_matched) {
1252 this->interval.end = start;
1253 return true;
1254 }
1255 return false;
1256 }
1257 };
1258
1261#ifdef _UNICODE
1262 using tpermutation = wpermutation;
1263#else
1264 using tpermutation = permutation;
1265#endif
1267
1271 template <class T>
1272 class basic_integer : public basic_parser<T>
1273 {
1274 public:
1275 basic_integer(_In_ const std::locale& locale = std::locale()) :
1276 basic_parser<T>(locale),
1277 value(0)
1278 {}
1279
1280 virtual void invalidate()
1281 {
1282 value = 0;
1284 }
1285
1286 public:
1287 size_t value;
1288 };
1289
1293 template <class T>
1295 {
1296 public:
1298 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
1299 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
1300 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
1301 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
1302 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
1303 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
1304 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
1305 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
1306 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
1307 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
1308 _In_ const std::locale& locale = std::locale()) :
1309 basic_integer<T>(locale),
1310 m_digit_0(digit_0),
1311 m_digit_1(digit_1),
1312 m_digit_2(digit_2),
1313 m_digit_3(digit_3),
1314 m_digit_4(digit_4),
1315 m_digit_5(digit_5),
1316 m_digit_6(digit_6),
1317 m_digit_7(digit_7),
1318 m_digit_8(digit_8),
1319 m_digit_9(digit_9)
1320 {}
1321
1322 virtual bool match(
1323 _In_reads_or_z_(end) const T* text,
1324 _In_ size_t start = 0,
1325 _In_ size_t end = (size_t)-1,
1326 _In_ int flags = match_default)
1327 {
1328 assert(text || start >= end);
1329 for (this->interval.end = start, this->value = 0; this->interval.end < end && text[this->interval.end];) {
1330 size_t dig;
1331 if (m_digit_0->match(text, this->interval.end, end, flags)) { dig = 0; this->interval.end = m_digit_0->interval.end; }
1332 else if (m_digit_1->match(text, this->interval.end, end, flags)) { dig = 1; this->interval.end = m_digit_1->interval.end; }
1333 else if (m_digit_2->match(text, this->interval.end, end, flags)) { dig = 2; this->interval.end = m_digit_2->interval.end; }
1334 else if (m_digit_3->match(text, this->interval.end, end, flags)) { dig = 3; this->interval.end = m_digit_3->interval.end; }
1335 else if (m_digit_4->match(text, this->interval.end, end, flags)) { dig = 4; this->interval.end = m_digit_4->interval.end; }
1336 else if (m_digit_5->match(text, this->interval.end, end, flags)) { dig = 5; this->interval.end = m_digit_5->interval.end; }
1337 else if (m_digit_6->match(text, this->interval.end, end, flags)) { dig = 6; this->interval.end = m_digit_6->interval.end; }
1338 else if (m_digit_7->match(text, this->interval.end, end, flags)) { dig = 7; this->interval.end = m_digit_7->interval.end; }
1339 else if (m_digit_8->match(text, this->interval.end, end, flags)) { dig = 8; this->interval.end = m_digit_8->interval.end; }
1340 else if (m_digit_9->match(text, this->interval.end, end, flags)) { dig = 9; this->interval.end = m_digit_9->interval.end; }
1341 else break;
1342 this->value = this->value * 10 + dig;
1343 }
1345 this->interval.start = start;
1346 return true;
1347 }
1348 this->interval.start = (this->interval.end = start) + 1;
1349 return false;
1350 }
1351
1352 protected:
1353 std::shared_ptr<basic_parser<T>>
1354 m_digit_0,
1355 m_digit_1,
1356 m_digit_2,
1357 m_digit_3,
1358 m_digit_4,
1359 m_digit_5,
1360 m_digit_6,
1361 m_digit_7,
1362 m_digit_8,
1363 m_digit_9;
1364 };
1365
1368#ifdef _UNICODE
1369 using tinteger10 = winteger10;
1370#else
1371 using tinteger10 = integer10;
1372#endif
1374
1378 template <class T>
1380 {
1381 public:
1383 _In_ const std::shared_ptr<basic_integer10<T>>& digits,
1384 _In_ const std::shared_ptr<basic_set<T>>& separator,
1385 _In_ const std::locale& locale = std::locale()) :
1386 basic_integer<T>(locale),
1387 digit_count(0),
1388 has_separators(false),
1389 m_digits(digits),
1390 m_separator(separator)
1391 {}
1392
1393 virtual bool match(
1394 _In_reads_or_z_(end) const T* text,
1395 _In_ size_t start = 0,
1396 _In_ size_t end = (size_t)-1,
1397 _In_ int flags = match_default)
1398 {
1399 assert(text || start >= end);
1400 if (m_digits->match(text, start, end, flags)) {
1401 // Leading part match.
1402 this->value = m_digits->value;
1403 digit_count = m_digits->interval.size();
1404 has_separators = false;
1405 this->interval.start = start;
1406 this->interval.end = m_digits->interval.end;
1407 if (m_digits->interval.size() <= 3) {
1408 // Maybe separated with thousand separators?
1409 size_t hit_offset = (size_t)-1;
1410 while (m_separator->match(text, this->interval.end, end, flags) &&
1411 (hit_offset == (size_t)-1 || hit_offset == m_separator->hit_offset) && // All separators must be the same, no mixing.
1412 m_digits->match(text, m_separator->interval.end, end, flags) &&
1413 m_digits->interval.size() == 3)
1414 {
1415 // Thousand separator and three-digit integer followed.
1416 this->value = this->value * 1000 + m_digits->value;
1417 digit_count += 3;
1418 has_separators = true;
1419 this->interval.end = m_digits->interval.end;
1420 hit_offset = m_separator->hit_offset;
1421 }
1422 }
1423
1424 return true;
1425 }
1426 this->value = 0;
1427 this->interval.start = (this->interval.end = start) + 1;
1428 return false;
1429 }
1430
1431 virtual void invalidate()
1432 {
1433 digit_count = 0;
1434 has_separators = false;
1436 }
1437
1438 public:
1441
1442 protected:
1443 std::shared_ptr<basic_integer10<T>> m_digits;
1444 std::shared_ptr<basic_set<T>> m_separator;
1445 };
1446
1449#ifdef _UNICODE
1450 using tinteger10ts = winteger10ts;
1451#else
1452 using tinteger10ts = integer10ts;
1453#endif
1455
1459 template <class T>
1461 {
1462 public:
1464 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
1465 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
1466 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
1467 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
1468 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
1469 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
1470 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
1471 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
1472 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
1473 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
1474 _In_ const std::shared_ptr<basic_parser<T>>& digit_10,
1475 _In_ const std::shared_ptr<basic_parser<T>>& digit_11,
1476 _In_ const std::shared_ptr<basic_parser<T>>& digit_12,
1477 _In_ const std::shared_ptr<basic_parser<T>>& digit_13,
1478 _In_ const std::shared_ptr<basic_parser<T>>& digit_14,
1479 _In_ const std::shared_ptr<basic_parser<T>>& digit_15,
1480 _In_ const std::locale& locale = std::locale()) :
1481 basic_integer<T>(locale),
1482 m_digit_0(digit_0),
1483 m_digit_1(digit_1),
1484 m_digit_2(digit_2),
1485 m_digit_3(digit_3),
1486 m_digit_4(digit_4),
1487 m_digit_5(digit_5),
1488 m_digit_6(digit_6),
1489 m_digit_7(digit_7),
1490 m_digit_8(digit_8),
1491 m_digit_9(digit_9),
1492 m_digit_10(digit_10),
1493 m_digit_11(digit_11),
1494 m_digit_12(digit_12),
1495 m_digit_13(digit_13),
1496 m_digit_14(digit_14),
1497 m_digit_15(digit_15)
1498 {}
1499
1500 virtual bool match(
1501 _In_reads_or_z_(end) const T* text,
1502 _In_ size_t start = 0,
1503 _In_ size_t end = (size_t)-1,
1504 _In_ int flags = match_default)
1505 {
1506 assert(text || start >= end);
1507 for (this->interval.end = start, this->value = 0; this->interval.end < end && text[this->interval.end];) {
1508 size_t dig;
1509 if (m_digit_0->match(text, this->interval.end, end, flags)) { dig = 0; this->interval.end = m_digit_0->interval.end; }
1510 else if (m_digit_1->match(text, this->interval.end, end, flags)) { dig = 1; this->interval.end = m_digit_1->interval.end; }
1511 else if (m_digit_2->match(text, this->interval.end, end, flags)) { dig = 2; this->interval.end = m_digit_2->interval.end; }
1512 else if (m_digit_3->match(text, this->interval.end, end, flags)) { dig = 3; this->interval.end = m_digit_3->interval.end; }
1513 else if (m_digit_4->match(text, this->interval.end, end, flags)) { dig = 4; this->interval.end = m_digit_4->interval.end; }
1514 else if (m_digit_5->match(text, this->interval.end, end, flags)) { dig = 5; this->interval.end = m_digit_5->interval.end; }
1515 else if (m_digit_6->match(text, this->interval.end, end, flags)) { dig = 6; this->interval.end = m_digit_6->interval.end; }
1516 else if (m_digit_7->match(text, this->interval.end, end, flags)) { dig = 7; this->interval.end = m_digit_7->interval.end; }
1517 else if (m_digit_8->match(text, this->interval.end, end, flags)) { dig = 8; this->interval.end = m_digit_8->interval.end; }
1518 else if (m_digit_9->match(text, this->interval.end, end, flags)) { dig = 9; this->interval.end = m_digit_9->interval.end; }
1519 else if (m_digit_10->match(text, this->interval.end, end, flags)) { dig = 10; this->interval.end = m_digit_10->interval.end; }
1520 else if (m_digit_11->match(text, this->interval.end, end, flags)) { dig = 11; this->interval.end = m_digit_11->interval.end; }
1521 else if (m_digit_12->match(text, this->interval.end, end, flags)) { dig = 12; this->interval.end = m_digit_12->interval.end; }
1522 else if (m_digit_13->match(text, this->interval.end, end, flags)) { dig = 13; this->interval.end = m_digit_13->interval.end; }
1523 else if (m_digit_14->match(text, this->interval.end, end, flags)) { dig = 14; this->interval.end = m_digit_14->interval.end; }
1524 else if (m_digit_15->match(text, this->interval.end, end, flags)) { dig = 15; this->interval.end = m_digit_15->interval.end; }
1525 else break;
1526 this->value = this->value * 16 + dig;
1527 }
1529 this->interval.start = start;
1530 return true;
1531 }
1532 this->interval.start = (this->interval.end = start) + 1;
1533 return false;
1534 }
1535
1536 protected:
1537 std::shared_ptr<basic_parser<T>>
1538 m_digit_0,
1539 m_digit_1,
1540 m_digit_2,
1541 m_digit_3,
1542 m_digit_4,
1543 m_digit_5,
1544 m_digit_6,
1545 m_digit_7,
1546 m_digit_8,
1547 m_digit_9,
1548 m_digit_10,
1549 m_digit_11,
1550 m_digit_12,
1551 m_digit_13,
1552 m_digit_14,
1553 m_digit_15;
1554 };
1555
1558#ifdef _UNICODE
1559 using tinteger16 = winteger16;
1560#else
1561 using tinteger16 = integer16;
1562#endif
1564
1568 template <class T>
1570 {
1571 public:
1573 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
1574 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
1575 _In_ const std::shared_ptr<basic_parser<T>>& digit_10,
1576 _In_ const std::shared_ptr<basic_parser<T>>& digit_50,
1577 _In_ const std::shared_ptr<basic_parser<T>>& digit_100,
1578 _In_ const std::shared_ptr<basic_parser<T>>& digit_500,
1579 _In_ const std::shared_ptr<basic_parser<T>>& digit_1000,
1580 _In_ const std::shared_ptr<basic_parser<T>>& digit_5000,
1581 _In_ const std::shared_ptr<basic_parser<T>>& digit_10000,
1582 _In_ const std::locale& locale = std::locale()) :
1583 basic_integer<T>(locale),
1584 m_digit_1(digit_1),
1585 m_digit_5(digit_5),
1586 m_digit_10(digit_10),
1587 m_digit_50(digit_50),
1588 m_digit_100(digit_100),
1589 m_digit_500(digit_500),
1590 m_digit_1000(digit_1000),
1591 m_digit_5000(digit_5000),
1592 m_digit_10000(digit_10000)
1593 {}
1594
1595 virtual bool match(
1596 _In_reads_or_z_(end) const T* text,
1597 _In_ size_t start = 0,
1598 _In_ size_t end = (size_t)-1,
1599 _In_ int flags = match_default)
1600 {
1601 assert(text || start >= end);
1602 size_t
1603 dig[5] = { (size_t)-1, (size_t)-1, (size_t)-1, (size_t)-1, (size_t)-1 },
1604 end2;
1605
1606 for (this->interval.end = start, this->value = 0; this->interval.end < end && text[this->interval.end]; dig[3] = dig[2], dig[2] = dig[1], dig[1] = dig[0], this->interval.end = end2) {
1607 if (m_digit_1 && m_digit_1->match(text, this->interval.end, end, flags)) { dig[0] = 1; end2 = m_digit_1->interval.end; }
1608 else if (m_digit_5 && m_digit_5->match(text, this->interval.end, end, flags)) { dig[0] = 5; end2 = m_digit_5->interval.end; }
1609 else if (m_digit_10 && m_digit_10->match(text, this->interval.end, end, flags)) { dig[0] = 10; end2 = m_digit_10->interval.end; }
1610 else if (m_digit_50 && m_digit_50->match(text, this->interval.end, end, flags)) { dig[0] = 50; end2 = m_digit_50->interval.end; }
1611 else if (m_digit_100 && m_digit_100->match(text, this->interval.end, end, flags)) { dig[0] = 100; end2 = m_digit_100->interval.end; }
1612 else if (m_digit_500 && m_digit_500->match(text, this->interval.end, end, flags)) { dig[0] = 500; end2 = m_digit_500->interval.end; }
1613 else if (m_digit_1000 && m_digit_1000->match(text, this->interval.end, end, flags)) { dig[0] = 1000; end2 = m_digit_1000->interval.end; }
1614 else if (m_digit_5000 && m_digit_5000->match(text, this->interval.end, end, flags)) { dig[0] = 5000; end2 = m_digit_5000->interval.end; }
1615 else if (m_digit_10000 && m_digit_10000->match(text, this->interval.end, end, flags)) { dig[0] = 10000; end2 = m_digit_10000->interval.end; }
1616 else break;
1617
1618 // Store first digit.
1619 if (dig[4] == (size_t)-1) dig[4] = dig[0];
1620
1621 if (dig[3] == dig[2] && dig[2] == dig[1] && dig[1] == dig[0] && dig[0] != dig[4]) {
1622 // Same digit repeated four times. No-go, unless first digit. E.g. XIIII vs. XIV. MMMMMCD allowed, IIII also...
1623 break;
1624 }
1625 if (dig[0] <= dig[1]) {
1626 // Digit is less or equal previous one: add.
1627 this->value += dig[0];
1628 }
1629 else if (
1630 (dig[1] == 1 && (dig[0] == 5 || dig[0] == 10)) ||
1631 (dig[1] == 10 && (dig[0] == 50 || dig[0] == 100)) ||
1632 (dig[1] == 100 && (dig[0] == 500 || dig[0] == 1000)) ||
1633 (dig[1] == 1000 && (dig[0] == 5000 || dig[0] == 10000)))
1634 {
1635 // Digit is up to two orders bigger than previous one: subtract. But...
1636 if (dig[2] < dig[0]) {
1637 // Digit is also bigger than pre-previous one. E.g. VIX (V < X => invalid)
1638 break;
1639 }
1640 this->value -= dig[1]; // Cancel addition in the previous step.
1641 dig[0] -= dig[1]; // Combine last two digits.
1642 dig[1] = dig[2]; // The true previous digit is now pre-previous one. :)
1643 dig[2] = dig[3]; // The true pre-previous digit is now pre-pre-previous one. :)
1644 this->value += dig[0]; // Add combined value.
1645 }
1646 else {
1647 // New digit is too big than the previous one. E.g. VX (V < X => invalid)
1648 break;
1649 }
1650 }
1651 if (this->value) {
1652 this->interval.start = start;
1653 return true;
1654 }
1655 this->interval.start = (this->interval.end = start) + 1;
1656 return false;
1657 }
1658
1659 protected:
1660 std::shared_ptr<basic_parser<T>>
1661 m_digit_1,
1662 m_digit_5,
1663 m_digit_10,
1664 m_digit_50,
1665 m_digit_100,
1666 m_digit_500,
1667 m_digit_1000,
1668 m_digit_5000,
1669 m_digit_10000;
1670 };
1671
1674#ifdef _UNICODE
1676#else
1678#endif
1680
1684 template <class T>
1686 {
1687 public:
1689 _In_ const std::shared_ptr<basic_parser<T>>& _numerator,
1690 _In_ const std::shared_ptr<basic_parser<T>>& _fraction_line,
1691 _In_ const std::shared_ptr<basic_parser<T>>& _denominator,
1692 _In_ const std::locale& locale = std::locale()) :
1693 basic_parser<T>(locale),
1694 numerator(_numerator),
1695 fraction_line(_fraction_line),
1696 denominator(_denominator)
1697 {}
1698
1699 virtual bool match(
1700 _In_reads_or_z_(end) const T* text,
1701 _In_ size_t start = 0,
1702 _In_ size_t end = (size_t)-1,
1703 _In_ int flags = match_default)
1704 {
1705 assert(text || start >= end);
1706 if (numerator->match(text, start, end, flags) &&
1707 fraction_line->match(text, numerator->interval.end, end, flags) &&
1708 denominator->match(text, fraction_line->interval.end, end, flags))
1709 {
1710 this->interval.start = start;
1711 this->interval.end = denominator->interval.end;
1712 return true;
1713 }
1714 numerator->invalidate();
1715 fraction_line->invalidate();
1716 denominator->invalidate();
1717 this->interval.start = (this->interval.end = start) + 1;
1718 return false;
1719 }
1720
1721 virtual void invalidate()
1722 {
1723 numerator->invalidate();
1724 fraction_line->invalidate();
1725 denominator->invalidate();
1727 }
1728
1729 public:
1730 std::shared_ptr<basic_parser<T>> numerator;
1731 std::shared_ptr<basic_parser<T>> fraction_line;
1732 std::shared_ptr<basic_parser<T>> denominator;
1733 };
1734
1737#ifdef _UNICODE
1738 using tfraction = wfraction;
1739#else
1740 using tfraction = fraction;
1741#endif
1743
1747 template <class T>
1748 class basic_score : public basic_parser<T>
1749 {
1750 public:
1752 _In_ const std::shared_ptr<basic_parser<T>>& _home,
1753 _In_ const std::shared_ptr<basic_parser<T>>& _separator,
1754 _In_ const std::shared_ptr<basic_parser<T>>& _guest,
1755 _In_ const std::shared_ptr<basic_parser<T>>& space,
1756 _In_ const std::locale& locale = std::locale()) :
1757 basic_parser<T>(locale),
1758 home(_home),
1759 separator(_separator),
1760 guest(_guest),
1761 m_space(space)
1762 {}
1763
1764 virtual bool match(
1765 _In_reads_or_z_(end) const T* text,
1766 _In_ size_t start = 0,
1767 _In_ size_t end = (size_t)-1,
1768 _In_ int flags = match_default)
1769 {
1770 assert(text || start >= end);
1771 this->interval.end = start;
1772
1773 const int space_match_flags = flags & ~match_multiline; // Spaces in score must never be broken in new line.
1774
1775 if (home->match(text, this->interval.end, end, flags))
1776 this->interval.end = home->interval.end;
1777 else
1778 goto end;
1779
1780 for (; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
1781
1782 if (separator->match(text, this->interval.end, end, flags))
1783 this->interval.end = separator->interval.end;
1784 else
1785 goto end;
1786
1787 for (; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
1788
1789 if (guest->match(text, this->interval.end, end, flags))
1790 this->interval.end = guest->interval.end;
1791 else
1792 goto end;
1793
1794 this->interval.start = start;
1795 return true;
1796
1797 end:
1798 home->invalidate();
1799 separator->invalidate();
1800 guest->invalidate();
1801 this->interval.start = (this->interval.end = start) + 1;
1802 return false;
1803 }
1804
1805 virtual void invalidate()
1806 {
1807 home->invalidate();
1808 separator->invalidate();
1809 guest->invalidate();
1811 }
1812
1813 public:
1814 std::shared_ptr<basic_parser<T>> home;
1815 std::shared_ptr<basic_parser<T>> separator;
1816 std::shared_ptr<basic_parser<T>> guest;
1817
1818 protected:
1819 std::shared_ptr<basic_parser<T>> m_space;
1820 };
1821
1822 using score = basic_score<char>;
1824#ifdef _UNICODE
1825 using tscore = wscore;
1826#else
1827 using tscore = score;
1828#endif
1830
1834 template <class T>
1836 {
1837 public:
1839 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
1840 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
1841 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
1842 _In_ const std::shared_ptr<basic_parser<T>>& _number,
1843 _In_ const std::locale& locale = std::locale()) :
1844 basic_parser<T>(locale),
1849 {}
1850
1851 virtual bool match(
1852 _In_reads_or_z_(end) const T* text,
1853 _In_ size_t start = 0,
1854 _In_ size_t end = (size_t)-1,
1855 _In_ int flags = match_default)
1856 {
1857 assert(text || start >= end);
1858 this->interval.end = start;
1859 if (positive_sign && positive_sign->match(text, this->interval.end, end, flags)) {
1860 this->interval.end = positive_sign->interval.end;
1861 if (negative_sign) negative_sign->invalidate();
1862 if (special_sign) special_sign->invalidate();
1863 }
1864 else if (negative_sign && negative_sign->match(text, this->interval.end, end, flags)) {
1865 this->interval.end = negative_sign->interval.end;
1866 if (positive_sign) positive_sign->invalidate();
1867 if (special_sign) special_sign->invalidate();
1868 }
1869 else if (special_sign && special_sign->match(text, this->interval.end, end, flags)) {
1870 this->interval.end = special_sign->interval.end;
1871 if (positive_sign) positive_sign->invalidate();
1872 if (negative_sign) negative_sign->invalidate();
1873 }
1874 else {
1875 if (positive_sign) positive_sign->invalidate();
1876 if (negative_sign) negative_sign->invalidate();
1877 if (special_sign) special_sign->invalidate();
1878 }
1879 if (number->match(text, this->interval.end, end, flags)) {
1880 this->interval.start = start;
1881 this->interval.end = number->interval.end;
1882 return true;
1883 }
1884 if (positive_sign) positive_sign->invalidate();
1885 if (negative_sign) negative_sign->invalidate();
1886 if (special_sign) special_sign->invalidate();
1887 number->invalidate();
1888 this->interval.start = (this->interval.end = start) + 1;
1889 return false;
1890 }
1891
1892 virtual void invalidate()
1893 {
1894 if (positive_sign) positive_sign->invalidate();
1895 if (negative_sign) negative_sign->invalidate();
1896 if (special_sign) special_sign->invalidate();
1897 number->invalidate();
1899 }
1900
1901 public:
1902 std::shared_ptr<basic_parser<T>> positive_sign;
1903 std::shared_ptr<basic_parser<T>> negative_sign;
1904 std::shared_ptr<basic_parser<T>> special_sign;
1905 std::shared_ptr<basic_parser<T>> number;
1906 };
1907
1910#ifdef _UNICODE
1912#else
1914#endif
1916
1920 template <class T>
1922 {
1923 public:
1925 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
1926 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
1927 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
1928 _In_ const std::shared_ptr<basic_parser<T>>& _integer,
1929 _In_ const std::shared_ptr<basic_parser<T>>& space,
1930 _In_ const std::shared_ptr<basic_parser<T>>& _fraction,
1931 _In_ const std::locale& locale = std::locale()) :
1932 basic_parser<T>(locale),
1938 m_space(space)
1939 {}
1940
1941 virtual bool match(
1942 _In_reads_or_z_(end) const T* text,
1943 _In_ size_t start = 0,
1944 _In_ size_t end = (size_t)-1,
1945 _In_ int flags = match_default)
1946 {
1947 assert(text || start >= end);
1948 this->interval.end = start;
1949
1950 if (positive_sign && positive_sign->match(text, this->interval.end, end, flags)) {
1951 this->interval.end = positive_sign->interval.end;
1952 if (negative_sign) negative_sign->invalidate();
1953 if (special_sign) special_sign->invalidate();
1954 }
1955 else if (negative_sign && negative_sign->match(text, this->interval.end, end, flags)) {
1956 this->interval.end = negative_sign->interval.end;
1957 if (positive_sign) positive_sign->invalidate();
1958 if (special_sign) special_sign->invalidate();
1959 }
1960 else if (special_sign && special_sign->match(text, this->interval.end, end, flags)) {
1961 this->interval.end = special_sign->interval.end;
1962 if (positive_sign) positive_sign->invalidate();
1963 if (negative_sign) negative_sign->invalidate();
1964 }
1965 else {
1966 if (positive_sign) positive_sign->invalidate();
1967 if (negative_sign) negative_sign->invalidate();
1968 if (special_sign) special_sign->invalidate();
1969 }
1970
1971 // Check for <integer> <fraction>
1972 const int space_match_flags = flags & ~match_multiline; // Spaces in fractions must never be broken in new line.
1973 if (integer->match(text, this->interval.end, end, flags) &&
1974 m_space->match(text, integer->interval.end, end, space_match_flags))
1975 {
1976 for (this->interval.end = m_space->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
1977 if (fraction->match(text, this->interval.end, end, flags)) {
1978 this->interval.start = start;
1979 this->interval.end = fraction->interval.end;
1980 return true;
1981 }
1982 fraction->invalidate();
1983 this->interval.start = start;
1984 this->interval.end = integer->interval.end;
1985 return true;
1986 }
1987
1988 // Check for <fraction>
1989 if (fraction->match(text, this->interval.end, end, flags)) {
1990 integer->invalidate();
1991 this->interval.start = start;
1992 this->interval.end = fraction->interval.end;
1993 return true;
1994 }
1995
1996 // Check for <integer>
1997 if (integer->match(text, this->interval.end, end, flags)) {
1998 fraction->invalidate();
1999 this->interval.start = start;
2000 this->interval.end = integer->interval.end;
2001 return true;
2002 }
2003
2004 if (positive_sign) positive_sign->invalidate();
2005 if (negative_sign) negative_sign->invalidate();
2006 if (special_sign) special_sign->invalidate();
2007 integer->invalidate();
2008 fraction->invalidate();
2009 this->interval.start = (this->interval.end = start) + 1;
2010 return false;
2011 }
2012
2013 virtual void invalidate()
2014 {
2015 if (positive_sign) positive_sign->invalidate();
2016 if (negative_sign) negative_sign->invalidate();
2017 if (special_sign) special_sign->invalidate();
2018 integer->invalidate();
2019 fraction->invalidate();
2021 }
2022
2023 public:
2024 std::shared_ptr<basic_parser<T>> positive_sign;
2025 std::shared_ptr<basic_parser<T>> negative_sign;
2026 std::shared_ptr<basic_parser<T>> special_sign;
2027 std::shared_ptr<basic_parser<T>> integer;
2028 std::shared_ptr<basic_parser<T>> fraction;
2029
2030 protected:
2031 std::shared_ptr<basic_parser<T>> m_space;
2032 };
2033
2036#ifdef _UNICODE
2038#else
2040#endif
2042
2046 template <class T>
2048 {
2049 public:
2051 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
2052 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
2053 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
2054 _In_ const std::shared_ptr<basic_integer<T>>& _integer,
2055 _In_ const std::shared_ptr<basic_parser<T>>& _decimal_separator,
2056 _In_ const std::shared_ptr<basic_integer<T>>& _decimal,
2057 _In_ const std::shared_ptr<basic_parser<T>>& _exponent_symbol,
2058 _In_ const std::shared_ptr<basic_parser<T>>& _positive_exp_sign,
2059 _In_ const std::shared_ptr<basic_parser<T>>& _negative_exp_sign,
2060 _In_ const std::shared_ptr<basic_integer<T>>& _exponent,
2061 _In_ const std::locale& locale = std::locale()) :
2062 basic_parser<T>(locale),
2073 value(std::numeric_limits<double>::quiet_NaN())
2074 {}
2075
2076 virtual bool match(
2077 _In_reads_or_z_(end) const T* text,
2078 _In_ size_t start = 0,
2079 _In_ size_t end = (size_t)-1,
2080 _In_ int flags = match_default)
2081 {
2082 assert(text || start >= end);
2083 this->interval.end = start;
2084
2085 if (positive_sign && positive_sign->match(text, this->interval.end, end, flags)) {
2086 this->interval.end = positive_sign->interval.end;
2087 if (negative_sign) negative_sign->invalidate();
2088 if (special_sign) special_sign->invalidate();
2089 }
2090 else if (negative_sign && negative_sign->match(text, this->interval.end, end, flags)) {
2091 this->interval.end = negative_sign->interval.end;
2092 if (positive_sign) positive_sign->invalidate();
2093 if (special_sign) special_sign->invalidate();
2094 }
2095 else if (special_sign && special_sign->match(text, this->interval.end, end, flags)) {
2096 this->interval.end = special_sign->interval.end;
2097 if (positive_sign) positive_sign->invalidate();
2098 if (negative_sign) negative_sign->invalidate();
2099 }
2100 else {
2101 if (positive_sign) positive_sign->invalidate();
2102 if (negative_sign) negative_sign->invalidate();
2103 if (special_sign) special_sign->invalidate();
2104 }
2105
2106 if (integer->match(text, this->interval.end, end, flags))
2107 this->interval.end = integer->interval.end;
2108
2109 if (decimal_separator->match(text, this->interval.end, end, flags) &&
2111 this->interval.end = decimal->interval.end;
2112 else {
2113 decimal_separator->invalidate();
2114 decimal->invalidate();
2115 }
2116
2117 if (integer->interval.empty() &&
2118 decimal->interval.empty())
2119 {
2120 // No integer part, no decimal part.
2121 if (positive_sign) positive_sign->invalidate();
2122 if (negative_sign) negative_sign->invalidate();
2123 if (special_sign) special_sign->invalidate();
2124 integer->invalidate();
2125 decimal_separator->invalidate();
2126 decimal->invalidate();
2127 if (exponent_symbol) exponent_symbol->invalidate();
2128 if (positive_exp_sign) positive_exp_sign->invalidate();
2129 if (negative_exp_sign) negative_exp_sign->invalidate();
2130 if (exponent) exponent->invalidate();
2131 this->interval.start = (this->interval.end = start) + 1;
2132 return false;
2133 }
2134
2135 if (exponent_symbol && exponent_symbol->match(text, this->interval.end, end, flags) &&
2138 (exponent && exponent->match(text, exponent_symbol->interval.end, end, flags))))
2139 {
2140 this->interval.end = exponent->interval.end;
2141 if (negative_exp_sign) negative_exp_sign->invalidate();
2142 }
2143 else if (exponent_symbol && exponent_symbol->match(text, this->interval.end, end, flags) &&
2146 {
2147 this->interval.end = exponent->interval.end;
2148 if (positive_exp_sign) positive_exp_sign->invalidate();
2149 }
2150 else {
2151 if (exponent_symbol) exponent_symbol->invalidate();
2152 if (positive_exp_sign) positive_exp_sign->invalidate();
2153 if (negative_exp_sign) negative_exp_sign->invalidate();
2154 if (exponent) exponent->invalidate();
2155 }
2156
2157 value = (double)integer->value;
2158 if (decimal->interval)
2159 value += (double)decimal->value * pow(10.0, -(double)decimal->interval.size());
2160 if (negative_sign && negative_sign->interval)
2161 value = -value;
2162 if (exponent && exponent->interval) {
2163 double e = (double)exponent->value;
2164 if (negative_exp_sign && negative_exp_sign->interval)
2165 e = -e;
2166 value *= pow(10.0, e);
2167 }
2168
2169 this->interval.start = start;
2170 return true;
2171 }
2172
2173 virtual void invalidate()
2174 {
2175 if (positive_sign) positive_sign->invalidate();
2176 if (negative_sign) negative_sign->invalidate();
2177 if (special_sign) special_sign->invalidate();
2178 integer->invalidate();
2179 decimal_separator->invalidate();
2180 decimal->invalidate();
2181 if (exponent_symbol) exponent_symbol->invalidate();
2182 if (positive_exp_sign) positive_exp_sign->invalidate();
2183 if (negative_exp_sign) negative_exp_sign->invalidate();
2184 if (exponent) exponent->invalidate();
2185 value = std::numeric_limits<double>::quiet_NaN();
2187 }
2188
2189 public:
2190 std::shared_ptr<basic_parser<T>> positive_sign;
2191 std::shared_ptr<basic_parser<T>> negative_sign;
2192 std::shared_ptr<basic_parser<T>> special_sign;
2193 std::shared_ptr<basic_integer<T>> integer;
2194 std::shared_ptr<basic_parser<T>> decimal_separator;
2195 std::shared_ptr<basic_integer<T>> decimal;
2196 std::shared_ptr<basic_parser<T>> exponent_symbol;
2197 std::shared_ptr<basic_parser<T>> positive_exp_sign;
2198 std::shared_ptr<basic_parser<T>> negative_exp_sign;
2199 std::shared_ptr<basic_integer<T>> exponent;
2200 double value;
2201 };
2202
2205#ifdef _UNICODE
2207#else
2209#endif
2211
2215 template <class T>
2217 {
2218 public:
2220 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
2221 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
2222 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
2223 _In_ const std::shared_ptr<basic_parser<T>>& _currency,
2224 _In_ const std::shared_ptr<basic_parser<T>>& _integer,
2225 _In_ const std::shared_ptr<basic_parser<T>>& _decimal_separator,
2226 _In_ const std::shared_ptr<basic_parser<T>>& _decimal,
2227 _In_ const std::locale& locale = std::locale()) :
2228 basic_parser<T>(locale),
2236 {}
2237
2238 virtual bool match(
2239 _In_reads_or_z_(end) const T* text,
2240 _In_ size_t start = 0,
2241 _In_ size_t end = (size_t)-1,
2242 _In_ int flags = match_default)
2243 {
2244 assert(text || start >= end);
2245 this->interval.end = start;
2246
2247 if (positive_sign->match(text, this->interval.end, end, flags)) {
2248 this->interval.end = positive_sign->interval.end;
2249 if (negative_sign) negative_sign->invalidate();
2250 if (special_sign) special_sign->invalidate();
2251 }
2252 else if (negative_sign->match(text, this->interval.end, end, flags)) {
2253 this->interval.end = negative_sign->interval.end;
2254 if (positive_sign) positive_sign->invalidate();
2255 if (special_sign) special_sign->invalidate();
2256 }
2257 else if (special_sign->match(text, this->interval.end, end, flags)) {
2258 this->interval.end = special_sign->interval.end;
2259 if (positive_sign) positive_sign->invalidate();
2260 if (negative_sign) negative_sign->invalidate();
2261 }
2262 else {
2263 if (positive_sign) positive_sign->invalidate();
2264 if (negative_sign) negative_sign->invalidate();
2265 if (special_sign) special_sign->invalidate();
2266 }
2267
2268 if (currency->match(text, this->interval.end, end, flags))
2269 this->interval.end = currency->interval.end;
2270 else {
2271 if (positive_sign) positive_sign->invalidate();
2272 if (negative_sign) negative_sign->invalidate();
2273 if (special_sign) special_sign->invalidate();
2274 integer->invalidate();
2275 decimal_separator->invalidate();
2276 decimal->invalidate();
2277 this->interval.start = (this->interval.end = start) + 1;
2278 return false;
2279 }
2280
2281 if (integer->match(text, this->interval.end, end, flags))
2282 this->interval.end = integer->interval.end;
2283 if (decimal_separator->match(text, this->interval.end, end, flags) &&
2285 this->interval.end = decimal->interval.end;
2286 else {
2287 decimal_separator->invalidate();
2288 decimal->invalidate();
2289 }
2290
2291 if (integer->interval.empty() &&
2292 decimal->interval.empty())
2293 {
2294 // No integer part, no decimal part.
2295 if (positive_sign) positive_sign->invalidate();
2296 if (negative_sign) negative_sign->invalidate();
2297 if (special_sign) special_sign->invalidate();
2298 currency->invalidate();
2299 integer->invalidate();
2300 decimal_separator->invalidate();
2301 decimal->invalidate();
2302 this->interval.start = (this->interval.end = start) + 1;
2303 return false;
2304 }
2305
2306 this->interval.start = start;
2307 return true;
2308 }
2309
2310 virtual void invalidate()
2311 {
2312 if (positive_sign) positive_sign->invalidate();
2313 if (negative_sign) negative_sign->invalidate();
2314 if (special_sign) special_sign->invalidate();
2315 currency->invalidate();
2316 integer->invalidate();
2317 decimal_separator->invalidate();
2318 decimal->invalidate();
2320 }
2321
2322 public:
2323 std::shared_ptr<basic_parser<T>> positive_sign;
2324 std::shared_ptr<basic_parser<T>> negative_sign;
2325 std::shared_ptr<basic_parser<T>> special_sign;
2326 std::shared_ptr<basic_parser<T>> currency;
2327 std::shared_ptr<basic_parser<T>> integer;
2328 std::shared_ptr<basic_parser<T>> decimal_separator;
2329 std::shared_ptr<basic_parser<T>> decimal;
2330 };
2331
2334#ifdef _UNICODE
2336#else
2338#endif
2340
2344 template <class T>
2346 {
2347 public:
2349 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
2350 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
2351 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
2352 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
2353 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
2354 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
2355 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
2356 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
2357 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
2358 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
2359 _In_ const std::shared_ptr<basic_parser<T>>& separator,
2360 _In_ const std::locale& locale = std::locale()) :
2361 basic_parser<T>(locale),
2362 m_digit_0(digit_0),
2363 m_digit_1(digit_1),
2364 m_digit_2(digit_2),
2365 m_digit_3(digit_3),
2366 m_digit_4(digit_4),
2367 m_digit_5(digit_5),
2368 m_digit_6(digit_6),
2369 m_digit_7(digit_7),
2370 m_digit_8(digit_8),
2371 m_digit_9(digit_9),
2372 m_separator(separator)
2373 {
2374 value.s_addr = 0;
2375 }
2376
2377 virtual bool match(
2378 _In_reads_or_z_(end) const T* text,
2379 _In_ size_t start = 0,
2380 _In_ size_t end = (size_t)-1,
2381 _In_ int flags = match_default)
2382 {
2383 assert(text || start >= end);
2384 this->interval.end = start;
2385 value.s_addr = 0;
2386
2387 size_t i;
2388 for (i = 0; i < 4; i++) {
2389 if (i) {
2390 if (m_separator->match(text, this->interval.end, end, flags))
2391 this->interval.end = m_separator->interval.end;
2392 else
2393 goto error;
2394 }
2395
2396 components[i].start = this->interval.end;
2397 bool is_empty = true;
2398 size_t x;
2399 for (x = 0; this->interval.end < end && text[this->interval.end];) {
2400 size_t dig, digit_end;
2401 if (m_digit_0->match(text, this->interval.end, end, flags)) { dig = 0; digit_end = m_digit_0->interval.end; }
2402 else if (m_digit_1->match(text, this->interval.end, end, flags)) { dig = 1; digit_end = m_digit_1->interval.end; }
2403 else if (m_digit_2->match(text, this->interval.end, end, flags)) { dig = 2; digit_end = m_digit_2->interval.end; }
2404 else if (m_digit_3->match(text, this->interval.end, end, flags)) { dig = 3; digit_end = m_digit_3->interval.end; }
2405 else if (m_digit_4->match(text, this->interval.end, end, flags)) { dig = 4; digit_end = m_digit_4->interval.end; }
2406 else if (m_digit_5->match(text, this->interval.end, end, flags)) { dig = 5; digit_end = m_digit_5->interval.end; }
2407 else if (m_digit_6->match(text, this->interval.end, end, flags)) { dig = 6; digit_end = m_digit_6->interval.end; }
2408 else if (m_digit_7->match(text, this->interval.end, end, flags)) { dig = 7; digit_end = m_digit_7->interval.end; }
2409 else if (m_digit_8->match(text, this->interval.end, end, flags)) { dig = 8; digit_end = m_digit_8->interval.end; }
2410 else if (m_digit_9->match(text, this->interval.end, end, flags)) { dig = 9; digit_end = m_digit_9->interval.end; }
2411 else break;
2412 size_t x_n = x * 10 + dig;
2413 if (x_n <= 255) {
2414 x = x_n;
2415 this->interval.end = digit_end;
2416 is_empty = false;
2417 }
2418 else
2419 break;
2420 }
2421 if (is_empty)
2422 goto error;
2423 components[i].end = this->interval.end;
2424 value.s_addr = (value.s_addr << 8) | (uint8_t)x;
2425 }
2426 if (i < 4)
2427 goto error;
2428
2429 this->interval.start = start;
2430 return true;
2431
2432 error:
2433 components[0].start = 1;
2434 components[0].end = 0;
2435 components[1].start = 1;
2436 components[1].end = 0;
2437 components[2].start = 1;
2438 components[2].end = 0;
2439 components[3].start = 1;
2440 components[3].end = 0;
2441 value.s_addr = 0;
2442 this->interval.start = (this->interval.end = start) + 1;
2443 return false;
2444 }
2445
2446 virtual void invalidate()
2447 {
2448 components[0].start = 1;
2449 components[0].end = 0;
2450 components[1].start = 1;
2451 components[1].end = 0;
2452 components[2].start = 1;
2453 components[2].end = 0;
2454 components[3].start = 1;
2455 components[3].end = 0;
2456 value.s_addr = 0;
2458 }
2459
2460 public:
2463
2464 protected:
2465 std::shared_ptr<basic_parser<T>>
2466 m_digit_0,
2467 m_digit_1,
2468 m_digit_2,
2469 m_digit_3,
2470 m_digit_4,
2471 m_digit_5,
2472 m_digit_6,
2473 m_digit_7,
2474 m_digit_8,
2475 m_digit_9;
2476 std::shared_ptr<basic_parser<T>> m_separator;
2477 };
2478
2481#ifdef _UNICODE
2483#else
2485#endif
2487
2491 template <class T>
2493 {
2494 public:
2495 basic_ipv6_scope_id_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
2496
2497 virtual bool match(
2498 _In_reads_or_z_(end) const T* text,
2499 _In_ size_t start = 0,
2500 _In_ size_t end = (size_t)-1,
2501 _In_ int flags = match_default)
2502 {
2503 assert(text || start >= end);
2504 if (start < end && text[start]) {
2505 if (text[start] == '-' ||
2506 text[start] == '_' ||
2507 text[start] == ':' ||
2508 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
2509 {
2510 this->interval.end = (this->interval.start = start) + 1;
2511 return true;
2512 }
2513 }
2514 this->interval.start = (this->interval.end = start) + 1;
2515 return false;
2516 }
2517 };
2518
2521#ifdef _UNICODE
2523#else
2525#endif
2526
2531 {
2532 public:
2533 sgml_ipv6_scope_id_char(_In_ const std::locale& locale = std::locale()) : sgml_parser(locale) {}
2534
2535 virtual bool match(
2536 _In_reads_or_z_(end) const char* text,
2537 _In_ size_t start = 0,
2538 _In_ size_t end = (size_t)-1,
2539 _In_ int flags = match_default)
2540 {
2541 assert(text || start >= end);
2542 if (start < end && text[start]) {
2543 wchar_t buf[3];
2544 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
2545 const wchar_t* chr_end = chr + stdex::strlen(chr);
2546 if (((chr[0] == L'-' ||
2547 chr[0] == L'_' ||
2548 chr[0] == L':') && chr[1] == 0) ||
2549 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
2550 {
2551 this->interval.start = start;
2552 return true;
2553 }
2554 }
2555 this->interval.start = (this->interval.end = start) + 1;
2556 return false;
2557 }
2558 };
2559
2563 template <class T>
2565 {
2566 public:
2568 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
2569 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
2570 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
2571 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
2572 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
2573 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
2574 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
2575 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
2576 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
2577 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
2578 _In_ const std::shared_ptr<basic_parser<T>>& digit_10,
2579 _In_ const std::shared_ptr<basic_parser<T>>& digit_11,
2580 _In_ const std::shared_ptr<basic_parser<T>>& digit_12,
2581 _In_ const std::shared_ptr<basic_parser<T>>& digit_13,
2582 _In_ const std::shared_ptr<basic_parser<T>>& digit_14,
2583 _In_ const std::shared_ptr<basic_parser<T>>& digit_15,
2584 _In_ const std::shared_ptr<basic_parser<T>>& separator,
2585 _In_ const std::shared_ptr<basic_parser<T>>& scope_id_separator = nullptr,
2586 _In_ const std::shared_ptr<basic_parser<T>>& _scope_id = nullptr,
2587 _In_ const std::locale& locale = std::locale()) :
2588 basic_parser<T>(locale),
2589 m_digit_0(digit_0),
2590 m_digit_1(digit_1),
2591 m_digit_2(digit_2),
2592 m_digit_3(digit_3),
2593 m_digit_4(digit_4),
2594 m_digit_5(digit_5),
2595 m_digit_6(digit_6),
2596 m_digit_7(digit_7),
2597 m_digit_8(digit_8),
2598 m_digit_9(digit_9),
2599 m_digit_10(digit_10),
2600 m_digit_11(digit_11),
2601 m_digit_12(digit_12),
2602 m_digit_13(digit_13),
2603 m_digit_14(digit_14),
2604 m_digit_15(digit_15),
2605 m_separator(separator),
2606 m_scope_id_separator(scope_id_separator),
2608 {
2609 memset(&value, 0, sizeof(value));
2610 }
2611
2612 virtual bool match(
2613 _In_reads_or_z_(end) const T* text,
2614 _In_ size_t start = 0,
2615 _In_ size_t end = (size_t)-1,
2616 _In_ int flags = match_default)
2617 {
2618 assert(text || start >= end);
2619 this->interval.end = start;
2620 memset(&value, 0, sizeof(value));
2621
2622 size_t i, compaction_i = (size_t)-1, compaction_start = start;
2623 for (i = 0; i < 8; i++) {
2624 bool is_empty = true;
2625
2626 if (m_separator->match(text, this->interval.end, end, flags)) {
2627 if (m_separator->match(text, m_separator->interval.end, end, flags)) {
2628 // :: found
2629 if (compaction_i == (size_t)-1) {
2630 // Zero compaction start
2631 compaction_i = i;
2632 compaction_start = m_separator->interval.start;
2633 this->interval.end = m_separator->interval.end;
2634 }
2635 else {
2636 // More than one zero compaction
2637 break;
2638 }
2639 }
2640 else if (i) {
2641 // Inner : found
2642 this->interval.end = m_separator->interval.end;
2643 }
2644 else {
2645 // Leading : found
2646 goto error;
2647 }
2648 }
2649 else if (i) {
2650 // : missing
2651 break;
2652 }
2653
2654 components[i].start = this->interval.end;
2655 size_t x;
2656 for (x = 0; this->interval.end < end && text[this->interval.end];) {
2657 size_t dig, digit_end;
2658 if (m_digit_0->match(text, this->interval.end, end, flags)) { dig = 0; digit_end = m_digit_0->interval.end; }
2659 else if (m_digit_1->match(text, this->interval.end, end, flags)) { dig = 1; digit_end = m_digit_1->interval.end; }
2660 else if (m_digit_2->match(text, this->interval.end, end, flags)) { dig = 2; digit_end = m_digit_2->interval.end; }
2661 else if (m_digit_3->match(text, this->interval.end, end, flags)) { dig = 3; digit_end = m_digit_3->interval.end; }
2662 else if (m_digit_4->match(text, this->interval.end, end, flags)) { dig = 4; digit_end = m_digit_4->interval.end; }
2663 else if (m_digit_5->match(text, this->interval.end, end, flags)) { dig = 5; digit_end = m_digit_5->interval.end; }
2664 else if (m_digit_6->match(text, this->interval.end, end, flags)) { dig = 6; digit_end = m_digit_6->interval.end; }
2665 else if (m_digit_7->match(text, this->interval.end, end, flags)) { dig = 7; digit_end = m_digit_7->interval.end; }
2666 else if (m_digit_8->match(text, this->interval.end, end, flags)) { dig = 8; digit_end = m_digit_8->interval.end; }
2667 else if (m_digit_9->match(text, this->interval.end, end, flags)) { dig = 9; digit_end = m_digit_9->interval.end; }
2668 else if (m_digit_10->match(text, this->interval.end, end, flags)) { dig = 10; digit_end = m_digit_10->interval.end; }
2669 else if (m_digit_11->match(text, this->interval.end, end, flags)) { dig = 11; digit_end = m_digit_11->interval.end; }
2670 else if (m_digit_12->match(text, this->interval.end, end, flags)) { dig = 12; digit_end = m_digit_12->interval.end; }
2671 else if (m_digit_13->match(text, this->interval.end, end, flags)) { dig = 13; digit_end = m_digit_13->interval.end; }
2672 else if (m_digit_14->match(text, this->interval.end, end, flags)) { dig = 14; digit_end = m_digit_14->interval.end; }
2673 else if (m_digit_15->match(text, this->interval.end, end, flags)) { dig = 15; digit_end = m_digit_15->interval.end; }
2674 else break;
2675 size_t x_n = x * 16 + dig;
2676 if (x_n <= 0xffff) {
2677 x = x_n;
2678 this->interval.end = digit_end;
2679 is_empty = false;
2680 }
2681 else
2682 break;
2683 }
2684 if (is_empty) {
2685 if (compaction_i != (size_t)-1) {
2686 // Zero compaction active: no sweat.
2687 break;
2688 }
2689 goto error;
2690 }
2691 components[i].end = this->interval.end;
2692 this->value.s6_words[i] = (uint16_t)x;
2693 }
2694
2695 if (compaction_i != (size_t)-1) {
2696 // Align components right due to zero compaction.
2697 size_t j, k;
2698 for (j = 8, k = i; k > compaction_i;) {
2699 this->value.s6_words[--j] = this->value.s6_words[--k];
2701 }
2702 for (; j > compaction_i;) {
2703 this->value.s6_words[--j] = 0;
2704 components[j].start =
2706 }
2707 }
2708 else if (i < 8)
2709 goto error;
2710
2711 if (m_scope_id_separator && m_scope_id_separator->match(text, this->interval.end, end, flags) &&
2712 scope_id && scope_id->match(text, m_scope_id_separator->interval.end, end, flags))
2713 this->interval.end = scope_id->interval.end;
2714 else if (scope_id)
2715 scope_id->invalidate();
2716
2717 this->interval.start = start;
2718 return true;
2719
2720 error:
2721 components[0].start = 1;
2722 components[0].end = 0;
2723 components[1].start = 1;
2724 components[1].end = 0;
2725 components[2].start = 1;
2726 components[2].end = 0;
2727 components[3].start = 1;
2728 components[3].end = 0;
2729 components[4].start = 1;
2730 components[4].end = 0;
2731 components[5].start = 1;
2732 components[5].end = 0;
2733 components[6].start = 1;
2734 components[6].end = 0;
2735 components[7].start = 1;
2736 components[7].end = 0;
2737 memset(&value, 0, sizeof(value));
2738 if (scope_id) scope_id->invalidate();
2739 this->interval.start = (this->interval.end = start) + 1;
2740 return false;
2741 }
2742
2743 virtual void invalidate()
2744 {
2745 components[0].start = 1;
2746 components[0].end = 0;
2747 components[1].start = 1;
2748 components[1].end = 0;
2749 components[2].start = 1;
2750 components[2].end = 0;
2751 components[3].start = 1;
2752 components[3].end = 0;
2753 components[4].start = 1;
2754 components[4].end = 0;
2755 components[5].start = 1;
2756 components[5].end = 0;
2757 components[6].start = 1;
2758 components[6].end = 0;
2759 components[7].start = 1;
2760 components[7].end = 0;
2761 memset(&value, 0, sizeof(value));
2762 if (scope_id) scope_id->invalidate();
2764 }
2765
2766 public:
2769 std::shared_ptr<basic_parser<T>> scope_id;
2770
2771 protected:
2772 std::shared_ptr<basic_parser<T>>
2773 m_digit_0,
2774 m_digit_1,
2775 m_digit_2,
2776 m_digit_3,
2777 m_digit_4,
2778 m_digit_5,
2779 m_digit_6,
2780 m_digit_7,
2781 m_digit_8,
2782 m_digit_9,
2783 m_digit_10,
2784 m_digit_11,
2785 m_digit_12,
2786 m_digit_13,
2787 m_digit_14,
2788 m_digit_15;
2789 std::shared_ptr<basic_parser<T>> m_separator, m_scope_id_separator;
2790 };
2791
2794#ifdef _UNICODE
2796#else
2798#endif
2800
2804 template <class T>
2806 {
2807 public:
2809 _In_ bool allow_idn,
2810 _In_ const std::locale& locale = std::locale()) :
2811 basic_parser<T>(locale),
2812 m_allow_idn(allow_idn),
2813 allow_on_edge(true)
2814 {}
2815
2816 virtual bool match(
2817 _In_reads_or_z_(end) const T* text,
2818 _In_ size_t start = 0,
2819 _In_ size_t end = (size_t)-1,
2820 _In_ int flags = match_default)
2821 {
2822 assert(text || start >= end);
2823 if (start < end && text[start]) {
2824 if (('A' <= text[start] && text[start] <= 'Z') ||
2825 ('a' <= text[start] && text[start] <= 'z') ||
2826 ('0' <= text[start] && text[start] <= '9'))
2827 allow_on_edge = true;
2828 else if (text[start] == '-')
2829 allow_on_edge = false;
2830 else if (m_allow_idn && std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
2831 allow_on_edge = true;
2832 else {
2833 this->interval.start = (this->interval.end = start) + 1;
2834 return false;
2835 }
2836 this->interval.end = (this->interval.start = start) + 1;
2837 return true;
2838 }
2839 this->interval.start = (this->interval.end = start) + 1;
2840 return false;
2841 }
2842
2843 public:
2845
2846 protected:
2847 bool m_allow_idn;
2848 };
2849
2852#ifdef _UNICODE
2854#else
2856#endif
2857
2862 {
2863 public:
2865 _In_ bool allow_idn,
2866 _In_ const std::locale& locale = std::locale()) :
2868 {}
2869
2870 virtual bool match(
2871 _In_reads_or_z_(end) const char* text,
2872 _In_ size_t start = 0,
2873 _In_ size_t end = (size_t)-1,
2874 _In_ int flags = match_default)
2875 {
2876 assert(text || start >= end);
2877 if (start < end && text[start]) {
2878 wchar_t buf[3];
2879 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
2880 const wchar_t* chr_end = chr + stdex::strlen(chr);
2881 if ((('A' <= chr[0] && chr[0] <= 'Z') ||
2882 ('a' <= chr[0] && chr[0] <= 'z') ||
2883 ('0' <= chr[0] && chr[0] <= '9')) && chr[1] == 0)
2884 allow_on_edge = true;
2885 else if (chr[0] == '-' && chr[1] == 0)
2886 allow_on_edge = false;
2887 else if (m_allow_idn && std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
2888 allow_on_edge = true;
2889 else {
2890 this->interval.start = (this->interval.end = start) + 1;
2891 return false;
2892 }
2893 this->interval.start = start;
2894 return true;
2895 }
2896 this->interval.start = (this->interval.end = start) + 1;
2897 return false;
2898 }
2899 };
2900
2904 template <class T>
2906 {
2907 public:
2909 _In_ bool allow_absolute,
2910 _In_ const std::shared_ptr<basic_dns_domain_char<T>>& domain_char,
2911 _In_ const std::shared_ptr<basic_parser<T>>& separator,
2912 _In_ const std::locale& locale = std::locale()) :
2913 basic_parser<T>(locale),
2915 m_domain_char(domain_char),
2916 m_separator(separator)
2917 {}
2918
2919 virtual bool match(
2920 _In_reads_or_z_(end) const T* text,
2921 _In_ size_t start = 0,
2922 _In_ size_t end = (size_t)-1,
2923 _In_ int flags = match_default)
2924 {
2925 assert(text || start >= end);
2926 size_t i = start, count;
2927 for (count = 0; i < end && text[i] && count < 127; count++) {
2928 if (m_domain_char->match(text, i, end, flags) &&
2929 m_domain_char->allow_on_edge)
2930 {
2931 // Domain start
2932 this->interval.end = i = m_domain_char->interval.end;
2933 while (i < end && text[i]) {
2934 if (m_domain_char->allow_on_edge &&
2935 m_separator->match(text, i, end, flags))
2936 {
2937 // Domain end
2938 if (m_allow_absolute)
2939 this->interval.end = i = m_separator->interval.end;
2940 else {
2941 this->interval.end = i;
2942 i = m_separator->interval.end;
2943 }
2944 break;
2945 }
2946 if (m_domain_char->match(text, i, end, flags)) {
2947 if (m_domain_char->allow_on_edge)
2948 this->interval.end = i = m_domain_char->interval.end;
2949 else
2950 i = m_domain_char->interval.end;
2951 }
2952 else {
2953 this->interval.start = start;
2954 return true;
2955 }
2956 }
2957 }
2958 else
2959 break;
2960 }
2961 if (count) {
2962 this->interval.start = start;
2963 return true;
2964 }
2965 this->interval.start = (this->interval.end = start) + 1;
2966 return false;
2967 }
2968
2969 protected:
2971 std::shared_ptr<basic_dns_domain_char<T>> m_domain_char;
2972 std::shared_ptr<basic_parser<T>> m_separator;
2973 };
2974
2977#ifdef _UNICODE
2978 using tdns_name = wdns_name;
2979#else
2980 using tdns_name = dns_name;
2981#endif
2983
2987 template <class T>
2989 {
2990 public:
2991 basic_url_username_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
2992
2993 virtual bool match(
2994 _In_reads_or_z_(end) const T* text,
2995 _In_ size_t start = 0,
2996 _In_ size_t end = (size_t)-1,
2997 _In_ int flags = match_default)
2998 {
2999 assert(text || start >= end);
3000 if (start < end && text[start]) {
3001 if (text[start] == '-' ||
3002 text[start] == '.' ||
3003 text[start] == '_' ||
3004 text[start] == '~' ||
3005 text[start] == '%' ||
3006 text[start] == '!' ||
3007 text[start] == '$' ||
3008 text[start] == '&' ||
3009 text[start] == '\'' ||
3010 //text[start] == '(' ||
3011 //text[start] == ')' ||
3012 text[start] == '*' ||
3013 text[start] == '+' ||
3014 text[start] == ',' ||
3015 text[start] == ';' ||
3016 text[start] == '=' ||
3017 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
3018 {
3019 this->interval.end = (this->interval.start = start) + 1;
3020 return true;
3021 }
3022 }
3023 this->interval.start = (this->interval.end = start) + 1;
3024 return false;
3025 }
3026 };
3027
3030#ifdef _UNICODE
3032#else
3034#endif
3035
3040 {
3041 public:
3042 sgml_url_username_char(_In_ const std::locale& locale = std::locale()) : basic_url_username_char<char>(locale) {}
3043
3044 virtual bool match(
3045 _In_reads_or_z_(end) const char* text,
3046 _In_ size_t start = 0,
3047 _In_ size_t end = (size_t)-1,
3048 _In_ int flags = match_default)
3049 {
3050 assert(text || start >= end);
3051 if (start < end && text[start]) {
3052 wchar_t buf[3];
3053 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
3054 const wchar_t* chr_end = chr + stdex::strlen(chr);
3055 if (((chr[0] == L'-' ||
3056 chr[0] == L'.' ||
3057 chr[0] == L'_' ||
3058 chr[0] == L'~' ||
3059 chr[0] == L'%' ||
3060 chr[0] == L'!' ||
3061 chr[0] == L'$' ||
3062 chr[0] == L'&' ||
3063 chr[0] == L'\'' ||
3064 //chr[0] == L'(' ||
3065 //chr[0] == L')' ||
3066 chr[0] == L'*' ||
3067 chr[0] == L'+' ||
3068 chr[0] == L',' ||
3069 chr[0] == L';' ||
3070 chr[0] == L'=') && chr[1] == 0) ||
3071 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
3072 {
3073 this->interval.start = start;
3074 return true;
3075 }
3076 }
3077
3078 this->interval.start = (this->interval.end = start) + 1;
3079 return false;
3080 }
3081 };
3082
3086 template <class T>
3088 {
3089 public:
3090 basic_url_password_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
3091
3092 virtual bool match(
3093 _In_reads_or_z_(end) const T* text,
3094 _In_ size_t start = 0,
3095 _In_ size_t end = (size_t)-1,
3096 _In_ int flags = match_default)
3097 {
3098 assert(text || start >= end);
3099 if (start < end && text[start]) {
3100 if (text[start] == '-' ||
3101 text[start] == '.' ||
3102 text[start] == '_' ||
3103 text[start] == '~' ||
3104 text[start] == '%' ||
3105 text[start] == '!' ||
3106 text[start] == '$' ||
3107 text[start] == '&' ||
3108 text[start] == '\'' ||
3109 text[start] == '(' ||
3110 text[start] == ')' ||
3111 text[start] == '*' ||
3112 text[start] == '+' ||
3113 text[start] == ',' ||
3114 text[start] == ';' ||
3115 text[start] == '=' ||
3116 text[start] == ':' ||
3117 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
3118 {
3119 this->interval.end = (this->interval.start = start) + 1;
3120 return true;
3121 }
3122 }
3123 this->interval.start = (this->interval.end = start) + 1;
3124 return false;
3125 }
3126 };
3127
3130#ifdef _UNICODE
3132#else
3134#endif
3135
3140 {
3141 public:
3142 sgml_url_password_char(_In_ const std::locale& locale = std::locale()) : basic_url_password_char<char>(locale) {}
3143
3144 virtual bool match(
3145 _In_reads_or_z_(end) const char* text,
3146 _In_ size_t start = 0,
3147 _In_ size_t end = (size_t)-1,
3148 _In_ int flags = match_default)
3149 {
3150 assert(text || start >= end);
3151 if (start < end && text[start]) {
3152 wchar_t buf[3];
3153 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
3154 const wchar_t* chr_end = chr + stdex::strlen(chr);
3155 if (((chr[0] == L'-' ||
3156 chr[0] == L'.' ||
3157 chr[0] == L'_' ||
3158 chr[0] == L'~' ||
3159 chr[0] == L'%' ||
3160 chr[0] == L'!' ||
3161 chr[0] == L'$' ||
3162 chr[0] == L'&' ||
3163 chr[0] == L'\'' ||
3164 chr[0] == L'(' ||
3165 chr[0] == L')' ||
3166 chr[0] == L'*' ||
3167 chr[0] == L'+' ||
3168 chr[0] == L',' ||
3169 chr[0] == L';' ||
3170 chr[0] == L'=' ||
3171 chr[0] == L':') && chr[1] == 0) ||
3172 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
3173 {
3174 this->interval.start = start;
3175 return true;
3176 }
3177 }
3178 this->interval.start = (this->interval.end = start) + 1;
3179 return false;
3180 }
3181 };
3182
3186 template <class T>
3188 {
3189 public:
3190 basic_url_path_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
3191
3192 virtual bool match(
3193 _In_reads_or_z_(end) const T* text,
3194 _In_ size_t start = 0,
3195 _In_ size_t end = (size_t)-1,
3196 _In_ int flags = match_default)
3197 {
3198 assert(text || start >= end);
3199 if (start < end && text[start]) {
3200 if (text[start] == '/' ||
3201 text[start] == '-' ||
3202 text[start] == '.' ||
3203 text[start] == '_' ||
3204 text[start] == '~' ||
3205 text[start] == '%' ||
3206 text[start] == '!' ||
3207 text[start] == '$' ||
3208 text[start] == '&' ||
3209 text[start] == '\'' ||
3210 text[start] == '(' ||
3211 text[start] == ')' ||
3212 text[start] == '*' ||
3213 text[start] == '+' ||
3214 text[start] == ',' ||
3215 text[start] == ';' ||
3216 text[start] == '=' ||
3217 text[start] == ':' ||
3218 text[start] == '@' ||
3219 text[start] == '?' ||
3220 text[start] == '#' ||
3221 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
3222 {
3223 this->interval.end = (this->interval.start = start) + 1;
3224 return true;
3225 }
3226 }
3227 this->interval.start = (this->interval.end = start) + 1;
3228 return false;
3229 }
3230 };
3231
3234#ifdef _UNICODE
3236#else
3238#endif
3239
3244 {
3245 public:
3246 sgml_url_path_char(_In_ const std::locale& locale = std::locale()) : basic_url_path_char<char>(locale) {}
3247
3248 virtual bool match(
3249 _In_reads_or_z_(end) const char* text,
3250 _In_ size_t start = 0,
3251 _In_ size_t end = (size_t)-1,
3252 _In_ int flags = match_default)
3253 {
3254 assert(text || start >= end);
3255 if (start < end && text[start]) {
3256 wchar_t buf[3];
3257 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
3258 const wchar_t* chr_end = chr + stdex::strlen(chr);
3259 if (((chr[0] == L'/' ||
3260 chr[0] == L'-' ||
3261 chr[0] == L'.' ||
3262 chr[0] == L'_' ||
3263 chr[0] == L'~' ||
3264 chr[0] == L'%' ||
3265 chr[0] == L'!' ||
3266 chr[0] == L'$' ||
3267 chr[0] == L'&' ||
3268 chr[0] == L'\'' ||
3269 chr[0] == L'(' ||
3270 chr[0] == L')' ||
3271 chr[0] == L'*' ||
3272 chr[0] == L'+' ||
3273 chr[0] == L',' ||
3274 chr[0] == L';' ||
3275 chr[0] == L'=' ||
3276 chr[0] == L':' ||
3277 chr[0] == L'@' ||
3278 chr[0] == L'?' ||
3279 chr[0] == L'#') && chr[1] == 0) ||
3280 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
3281 {
3282 this->interval.start = start;
3283 return true;
3284 }
3285 }
3286 this->interval.start = (this->interval.end = start) + 1;
3287 return false;
3288 }
3289 };
3290
3294 template <class T>
3296 {
3297 public:
3299 _In_ const std::shared_ptr<basic_parser<T>>& path_char,
3300 _In_ const std::shared_ptr<basic_parser<T>>& query_start,
3301 _In_ const std::shared_ptr<basic_parser<T>>& bookmark_start,
3302 _In_ const std::locale& locale = std::locale()) :
3303 basic_parser<T>(locale),
3304 m_path_char(path_char),
3305 m_query_start(query_start),
3306 m_bookmark_start(bookmark_start)
3307 {}
3308
3309 virtual bool match(
3310 _In_reads_or_z_(end) const T* text,
3311 _In_ size_t start = 0,
3312 _In_ size_t end = (size_t)-1,
3313 _In_ int flags = match_default)
3314 {
3315 assert(text || start >= end);
3316
3317 this->interval.end = start;
3318 path.start = start;
3319 query.start = 1;
3320 query.end = 0;
3321 bookmark.start = 1;
3322 bookmark.end = 0;
3323
3324 for (;;) {
3325 if (this->interval.end >= end || !text[this->interval.end])
3326 break;
3327 if (m_query_start->match(text, this->interval.end, end, flags)) {
3328 path.end = this->interval.end;
3329 query.start = this->interval.end = m_query_start->interval.end;
3330 for (;;) {
3331 if (this->interval.end >= end || !text[this->interval.end]) {
3332 query.end = this->interval.end;
3333 break;
3334 }
3335 if (m_bookmark_start->match(text, this->interval.end, end, flags)) {
3336 query.end = this->interval.end;
3337 bookmark.start = this->interval.end = m_bookmark_start->interval.end;
3338 for (;;) {
3339 if (this->interval.end >= end || !text[this->interval.end]) {
3340 bookmark.end = this->interval.end;
3341 break;
3342 }
3343 if (m_path_char->match(text, this->interval.end, end, flags))
3344 this->interval.end = m_path_char->interval.end;
3345 else {
3346 bookmark.end = this->interval.end;
3347 break;
3348 }
3349 }
3350 this->interval.start = start;
3351 return true;
3352 }
3353 if (m_path_char->match(text, this->interval.end, end, flags))
3354 this->interval.end = m_path_char->interval.end;
3355 else {
3356 query.end = this->interval.end;
3357 break;
3358 }
3359 }
3360 this->interval.start = start;
3361 return true;
3362 }
3363 if (m_bookmark_start->match(text, this->interval.end, end, flags)) {
3364 path.end = this->interval.end;
3365 bookmark.start = this->interval.end = m_bookmark_start->interval.end;
3366 for (;;) {
3367 if (this->interval.end >= end || !text[this->interval.end]) {
3368 bookmark.end = this->interval.end;
3369 break;
3370 }
3371 if (m_path_char->match(text, this->interval.end, end, flags))
3372 this->interval.end = m_path_char->interval.end;
3373 else {
3374 bookmark.end = this->interval.end;
3375 break;
3376 }
3377 }
3378 this->interval.start = start;
3379 return true;
3380 }
3381 if (m_path_char->match(text, this->interval.end, end, flags))
3382 this->interval.end = m_path_char->interval.end;
3383 else
3384 break;
3385 }
3386
3388 path.end = this->interval.end;
3389 this->interval.start = start;
3390 return true;
3391 }
3392
3393 path.start = 1;
3394 path.end = 0;
3395 bookmark.start = 1;
3396 bookmark.end = 0;
3397 this->interval.start = (this->interval.end = start) + 1;
3398 return false;
3399 }
3400
3401 virtual void invalidate()
3402 {
3403 path.start = 1;
3404 path.end = 0;
3405 query.start = 1;
3406 query.end = 0;
3407 bookmark.start = 1;
3408 bookmark.end = 0;
3410 }
3411
3412 public:
3415 stdex::interval<size_t> bookmark;
3416
3417 protected:
3418 std::shared_ptr<basic_parser<T>> m_path_char;
3419 std::shared_ptr<basic_parser<T>> m_query_start;
3420 std::shared_ptr<basic_parser<T>> m_bookmark_start;
3421 };
3422
3425#ifdef _UNICODE
3426 using turl_path = wurl_path;
3427#else
3428 using turl_path = url_path;
3429#endif
3431
3435 template <class T>
3436 class basic_url : public basic_parser<T>
3437 {
3438 public:
3439 basic_url(
3440 _In_ const std::shared_ptr<basic_parser<T>>& _http_scheme,
3441 _In_ const std::shared_ptr<basic_parser<T>>& _ftp_scheme,
3442 _In_ const std::shared_ptr<basic_parser<T>>& _mailto_scheme,
3443 _In_ const std::shared_ptr<basic_parser<T>>& _file_scheme,
3444 _In_ const std::shared_ptr<basic_parser<T>>& colon,
3445 _In_ const std::shared_ptr<basic_parser<T>>& slash,
3446 _In_ const std::shared_ptr<basic_parser<T>>& _username,
3447 _In_ const std::shared_ptr<basic_parser<T>>& _password,
3448 _In_ const std::shared_ptr<basic_parser<T>>& at,
3449 _In_ const std::shared_ptr<basic_parser<T>>& ip_lbracket,
3450 _In_ const std::shared_ptr<basic_parser<T>>& ip_rbracket,
3451 _In_ const std::shared_ptr<basic_parser<T>>& _ipv4_host,
3452 _In_ const std::shared_ptr<basic_parser<T>>& _ipv6_host,
3453 _In_ const std::shared_ptr<basic_parser<T>>& _dns_host,
3454 _In_ const std::shared_ptr<basic_parser<T>>& _port,
3455 _In_ const std::shared_ptr<basic_parser<T>>& _path,
3456 _In_ const std::locale& locale = std::locale()) :
3457 basic_parser<T>(locale),
3458 http_scheme(_http_scheme),
3459 ftp_scheme(_ftp_scheme),
3460 mailto_scheme(_mailto_scheme),
3461 file_scheme(_file_scheme),
3462 m_colon(colon),
3463 m_slash(slash),
3464 username(_username),
3465 password(_password),
3466 m_at(at),
3467 m_ip_lbracket(ip_lbracket),
3468 m_ip_rbracket(ip_rbracket),
3469 ipv4_host(_ipv4_host),
3470 ipv6_host(_ipv6_host),
3471 dns_host(_dns_host),
3472 port(_port),
3473 path(_path)
3474 {}
3475
3476 virtual bool match(
3477 _In_reads_or_z_(end) const T* text,
3478 _In_ size_t start = 0,
3479 _In_ size_t end = (size_t)-1,
3480 _In_ int flags = match_default)
3481 {
3482 assert(text || start >= end);
3483
3484 this->interval.end = start;
3485
3486 if (http_scheme->match(text, this->interval.end, end, flags) &&
3487 m_colon->match(text, http_scheme->interval.end, end, flags) &&
3488 m_slash->match(text, m_colon->interval.end, end, flags) &&
3489 m_slash->match(text, m_slash->interval.end, end, flags))
3490 {
3491 // http://
3492 this->interval.end = m_slash->interval.end;
3493 ftp_scheme->invalidate();
3494 mailto_scheme->invalidate();
3495 file_scheme->invalidate();
3496 }
3497 else if (ftp_scheme->match(text, this->interval.end, end, flags) &&
3498 m_colon->match(text, ftp_scheme->interval.end, end, flags) &&
3499 m_slash->match(text, m_colon->interval.end, end, flags) &&
3500 m_slash->match(text, m_slash->interval.end, end, flags))
3501 {
3502 // ftp://
3503 this->interval.end = m_slash->interval.end;
3504 http_scheme->invalidate();
3505 mailto_scheme->invalidate();
3506 file_scheme->invalidate();
3507 }
3508 else if (mailto_scheme->match(text, this->interval.end, end, flags) &&
3509 m_colon->match(text, mailto_scheme->interval.end, end, flags))
3510 {
3511 // mailto:
3512 this->interval.end = m_colon->interval.end;
3513 http_scheme->invalidate();
3514 ftp_scheme->invalidate();
3515 file_scheme->invalidate();
3516 }
3517 else if (file_scheme->match(text, this->interval.end, end, flags) &&
3518 m_colon->match(text, file_scheme->interval.end, end, flags) &&
3519 m_slash->match(text, m_colon->interval.end, end, flags) &&
3520 m_slash->match(text, m_slash->interval.end, end, flags))
3521 {
3522 // file://
3523 this->interval.end = m_slash->interval.end;
3524 http_scheme->invalidate();
3525 ftp_scheme->invalidate();
3526 mailto_scheme->invalidate();
3527 }
3528 else {
3529 // Default to http:
3530 http_scheme->invalidate();
3531 ftp_scheme->invalidate();
3532 mailto_scheme->invalidate();
3533 file_scheme->invalidate();
3534 }
3535
3536 if (ftp_scheme->interval) {
3537 if (username->match(text, this->interval.end, end, flags)) {
3538 if (m_colon->match(text, username->interval.end, end, flags) &&
3539 password->match(text, m_colon->interval.end, end, flags) &&
3540 m_at->match(text, password->interval.end, end, flags))
3541 {
3542 // Username and password
3543 this->interval.end = m_at->interval.end;
3544 }
3545 else if (m_at->match(text, this->interval.end, end, flags)) {
3546 // Username only
3547 this->interval.end = m_at->interval.end;
3548 password->invalidate();
3549 }
3550 else {
3551 username->invalidate();
3552 password->invalidate();
3553 }
3554 }
3555 else {
3556 username->invalidate();
3557 password->invalidate();
3558 }
3559
3560 if (ipv4_host->match(text, this->interval.end, end, flags)) {
3561 // Host is IPv4
3562 this->interval.end = ipv4_host->interval.end;
3563 ipv6_host->invalidate();
3564 dns_host->invalidate();
3565 }
3566 else if (
3567 m_ip_lbracket->match(text, this->interval.end, end, flags) &&
3568 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3569 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3570 {
3571 // Host is IPv6
3572 this->interval.end = m_ip_rbracket->interval.end;
3573 ipv4_host->invalidate();
3574 dns_host->invalidate();
3575 }
3576 else if (dns_host->match(text, this->interval.end, end, flags)) {
3577 // Host is hostname
3578 this->interval.end = dns_host->interval.end;
3579 ipv4_host->invalidate();
3580 ipv6_host->invalidate();
3581 }
3582 else {
3583 invalidate();
3584 return false;
3585 }
3586
3587 if (m_colon->match(text, this->interval.end, end, flags) &&
3588 port->match(text, m_colon->interval.end, end, flags))
3589 {
3590 // Port
3591 this->interval.end = port->interval.end;
3592 }
3593 else
3594 port->invalidate();
3595
3596 if (path->match(text, this->interval.end, end, flags)) {
3597 // Path
3598 this->interval.end = path->interval.end;
3599 }
3600
3601 this->interval.start = start;
3602 return true;
3603 }
3604
3605 if (mailto_scheme->interval) {
3606 if (username->match(text, this->interval.end, end, flags) &&
3607 m_at->match(text, username->interval.end, end, flags))
3608 {
3609 // Username
3610 this->interval.end = m_at->interval.end;
3611 }
3612 else {
3613 invalidate();
3614 return false;
3615 }
3616
3617 if (m_ip_lbracket->match(text, this->interval.end, end, flags) &&
3618 ipv4_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3619 m_ip_rbracket->match(text, ipv4_host->interval.end, end, flags))
3620 {
3621 // Host is IPv4
3622 this->interval.end = m_ip_rbracket->interval.end;
3623 ipv6_host->invalidate();
3624 dns_host->invalidate();
3625 }
3626 else if (
3627 m_ip_lbracket->match(text, this->interval.end, end, flags) &&
3628 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3629 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3630 {
3631 // Host is IPv6
3632 this->interval.end = m_ip_rbracket->interval.end;
3633 ipv4_host->invalidate();
3634 dns_host->invalidate();
3635 }
3636 else if (dns_host->match(text, this->interval.end, end, flags)) {
3637 // Host is hostname
3638 this->interval.end = dns_host->interval.end;
3639 ipv4_host->invalidate();
3640 ipv6_host->invalidate();
3641 }
3642 else {
3643 invalidate();
3644 return false;
3645 }
3646
3647 password->invalidate();
3648 port->invalidate();
3649 path->invalidate();
3650 this->interval.start = start;
3651 return true;
3652 }
3653
3654 if (file_scheme->interval) {
3655 if (path->match(text, this->interval.end, end, flags)) {
3656 // Path
3657 this->interval.end = path->interval.end;
3658 }
3659
3660 username->invalidate();
3661 password->invalidate();
3662 ipv4_host->invalidate();
3663 ipv6_host->invalidate();
3664 dns_host->invalidate();
3665 port->invalidate();
3666 this->interval.start = start;
3667 return true;
3668 }
3669
3670 // "http://" found or defaulted to
3671
3672 // If "http://" explicit, test for username&password.
3673 if (http_scheme->interval &&
3674 username->match(text, this->interval.end, end, flags))
3675 {
3676 if (m_colon->match(text, username->interval.end, end, flags) &&
3677 password->match(text, m_colon->interval.end, end, flags) &&
3678 m_at->match(text, password->interval.end, end, flags))
3679 {
3680 // Username and password
3681 this->interval.end = m_at->interval.end;
3682 }
3683 else if (m_at->match(text, username->interval.end, end, flags)) {
3684 // Username only
3685 this->interval.end = m_at->interval.end;
3686 password->invalidate();
3687 }
3688 else {
3689 username->invalidate();
3690 password->invalidate();
3691 }
3692 }
3693 else {
3694 username->invalidate();
3695 password->invalidate();
3696 }
3697
3698 if (ipv4_host->match(text, this->interval.end, end, flags)) {
3699 // Host is IPv4
3700 this->interval.end = ipv4_host->interval.end;
3701 ipv6_host->invalidate();
3702 dns_host->invalidate();
3703 }
3704 else if (
3705 m_ip_lbracket->match(text, this->interval.end, end, flags) &&
3706 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3707 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3708 {
3709 // Host is IPv6
3710 this->interval.end = m_ip_rbracket->interval.end;
3711 ipv4_host->invalidate();
3712 dns_host->invalidate();
3713 }
3714 else if (dns_host->match(text, this->interval.end, end, flags)) {
3715 // Host is hostname
3716 this->interval.end = dns_host->interval.end;
3717 ipv4_host->invalidate();
3718 ipv6_host->invalidate();
3719 }
3720 else {
3721 invalidate();
3722 return false;
3723 }
3724
3725 if (m_colon->match(text, this->interval.end, end, flags) &&
3726 port->match(text, m_colon->interval.end, end, flags))
3727 {
3728 // Port
3729 this->interval.end = port->interval.end;
3730 }
3731 else
3732 port->invalidate();
3733
3734 if (path->match(text, this->interval.end, end, flags)) {
3735 // Path
3736 this->interval.end = path->interval.end;
3737 }
3738
3739 this->interval.start = start;
3740 return true;
3741 }
3742
3743 virtual void invalidate()
3744 {
3745 http_scheme->invalidate();
3746 ftp_scheme->invalidate();
3747 mailto_scheme->invalidate();
3748 file_scheme->invalidate();
3749 username->invalidate();
3750 password->invalidate();
3751 ipv4_host->invalidate();
3752 ipv6_host->invalidate();
3753 dns_host->invalidate();
3754 port->invalidate();
3755 path->invalidate();
3757 }
3758
3759 public:
3760 std::shared_ptr<basic_parser<T>> http_scheme;
3761 std::shared_ptr<basic_parser<T>> ftp_scheme;
3762 std::shared_ptr<basic_parser<T>> mailto_scheme;
3763 std::shared_ptr<basic_parser<T>> file_scheme;
3764 std::shared_ptr<basic_parser<T>> username;
3765 std::shared_ptr<basic_parser<T>> password;
3766 std::shared_ptr<basic_parser<T>> ipv4_host;
3767 std::shared_ptr<basic_parser<T>> ipv6_host;
3768 std::shared_ptr<basic_parser<T>> dns_host;
3769 std::shared_ptr<basic_parser<T>> port;
3770 std::shared_ptr<basic_parser<T>> path;
3771
3772 protected:
3773 std::shared_ptr<basic_parser<T>> m_colon;
3774 std::shared_ptr<basic_parser<T>> m_slash;
3775 std::shared_ptr<basic_parser<T>> m_at;
3776 std::shared_ptr<basic_parser<T>> m_ip_lbracket;
3777 std::shared_ptr<basic_parser<T>> m_ip_rbracket;
3778 };
3779
3780 using url = basic_url<char>;
3781 using wurl = basic_url<wchar_t>;
3782#ifdef _UNICODE
3783 using turl = wurl;
3784#else
3785 using turl = url;
3786#endif
3787 using sgml_url = basic_url<char>;
3788
3792 template <class T>
3794 {
3795 public:
3797 _In_ const std::shared_ptr<basic_parser<T>>& _username,
3798 _In_ const std::shared_ptr<basic_parser<T>>& at,
3799 _In_ const std::shared_ptr<basic_parser<T>>& ip_lbracket,
3800 _In_ const std::shared_ptr<basic_parser<T>>& ip_rbracket,
3801 _In_ const std::shared_ptr<basic_parser<T>>& _ipv4_host,
3802 _In_ const std::shared_ptr<basic_parser<T>>& _ipv6_host,
3803 _In_ const std::shared_ptr<basic_parser<T>>& _dns_host,
3804 _In_ const std::locale& locale = std::locale()) :
3805 basic_parser<T>(locale),
3806 username(_username),
3807 m_at(at),
3808 m_ip_lbracket(ip_lbracket),
3809 m_ip_rbracket(ip_rbracket),
3810 ipv4_host(_ipv4_host),
3811 ipv6_host(_ipv6_host),
3812 dns_host(_dns_host)
3813 {}
3814
3815 virtual bool match(
3816 _In_reads_or_z_(end) const T* text,
3817 _In_ size_t start = 0,
3818 _In_ size_t end = (size_t)-1,
3819 _In_ int flags = match_default)
3820 {
3821 assert(text || start >= end);
3822
3823 if (username->match(text, start, end, flags) &&
3824 m_at->match(text, username->interval.end, end, flags))
3825 {
3826 // Username@
3827 if (m_ip_lbracket->match(text, m_at->interval.end, end, flags) &&
3828 ipv4_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3829 m_ip_rbracket->match(text, ipv4_host->interval.end, end, flags))
3830 {
3831 // Host is IPv4
3832 this->interval.end = m_ip_rbracket->interval.end;
3833 ipv6_host->invalidate();
3834 dns_host->invalidate();
3835 }
3836 else if (
3837 m_ip_lbracket->match(text, m_at->interval.end, end, flags) &&
3838 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3839 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3840 {
3841 // Host is IPv6
3842 this->interval.end = m_ip_rbracket->interval.end;
3843 ipv4_host->invalidate();
3844 dns_host->invalidate();
3845 }
3846 else if (dns_host->match(text, m_at->interval.end, end, flags)) {
3847 // Host is hostname
3848 this->interval.end = dns_host->interval.end;
3849 ipv4_host->invalidate();
3850 ipv6_host->invalidate();
3851 }
3852 else
3853 goto error;
3854 this->interval.start = start;
3855 return true;
3856 }
3857
3858 error:
3859 username->invalidate();
3860 ipv4_host->invalidate();
3861 ipv6_host->invalidate();
3862 dns_host->invalidate();
3863 this->interval.start = (this->interval.end = start) + 1;
3864 return false;
3865 }
3866
3867 virtual void invalidate()
3868 {
3869 username->invalidate();
3870 ipv4_host->invalidate();
3871 ipv6_host->invalidate();
3872 dns_host->invalidate();
3874 }
3875
3876 public:
3877 std::shared_ptr<basic_parser<T>> username;
3878 std::shared_ptr<basic_parser<T>> ipv4_host;
3879 std::shared_ptr<basic_parser<T>> ipv6_host;
3880 std::shared_ptr<basic_parser<T>> dns_host;
3881
3882 protected:
3883 std::shared_ptr<basic_parser<T>> m_at;
3884 std::shared_ptr<basic_parser<T>> m_ip_lbracket;
3885 std::shared_ptr<basic_parser<T>> m_ip_rbracket;
3886 };
3887
3890#ifdef _UNICODE
3892#else
3894#endif
3896
3900 template <class T>
3902 {
3903 public:
3905 _In_ const std::shared_ptr<basic_parser<T>>& _emoticon,
3906 _In_ const std::shared_ptr<basic_parser<T>>& _apex,
3907 _In_ const std::shared_ptr<basic_parser<T>>& _eyes,
3908 _In_ const std::shared_ptr<basic_parser<T>>& _nose,
3909 _In_ const std::shared_ptr<basic_set<T>>& _mouth,
3910 _In_ const std::locale& locale = std::locale()) :
3911 basic_parser<T>(locale),
3913 apex(_apex),
3914 eyes(_eyes),
3915 nose(_nose),
3916 mouth(_mouth)
3917 {}
3918
3919 virtual bool match(
3920 _In_reads_or_z_(end) const T* text,
3921 _In_ size_t start = 0,
3922 _In_ size_t end = (size_t)-1,
3923 _In_ int flags = match_default)
3924 {
3925 assert(text || start >= end);
3926
3927 if (emoticon && emoticon->match(text, start, end, flags)) {
3928 if (apex) apex->invalidate();
3929 eyes->invalidate();
3930 if (nose) nose->invalidate();
3931 mouth->invalidate();
3932 this->interval.start = start;
3933 this->interval.end = emoticon->interval.end;
3934 return true;
3935 }
3936
3937 this->interval.end = start;
3938
3939 if (apex && apex->match(text, this->interval.end, end, flags))
3940 this->interval.end = apex->interval.end;
3941
3942 if (eyes->match(text, this->interval.end, end, flags)) {
3943 if (nose && nose->match(text, eyes->interval.end, end, flags) &&
3944 mouth->match(text, nose->interval.end, end, flags))
3945 {
3946 size_t
3948 hit_offset = mouth->hit_offset;
3949 // Mouth may repeat :-)))))))
3950 for (this->interval.end = mouth->interval.end; mouth->match(text, this->interval.end, end, flags) && mouth->hit_offset == hit_offset; this->interval.end = mouth->interval.end);
3951 mouth->interval.start = start_mouth;
3952 mouth->interval.end = this->interval.end;
3953 this->interval.start = start;
3954 return true;
3955 }
3956 if (mouth->match(text, eyes->interval.end, end, flags)) {
3957 size_t
3959 hit_offset = mouth->hit_offset;
3960 // Mouth may repeat :-)))))))
3961 for (this->interval.end = mouth->interval.end; mouth->match(text, this->interval.end, end, flags) && mouth->hit_offset == hit_offset; this->interval.end = mouth->interval.end);
3962 if (nose) nose->invalidate();
3963 mouth->interval.start = start_mouth;
3964 mouth->interval.end = this->interval.end;
3965 this->interval.start = start;
3966 return true;
3967 }
3968 }
3969
3970 if (emoticon) emoticon->invalidate();
3971 if (apex) apex->invalidate();
3972 eyes->invalidate();
3973 if (nose) nose->invalidate();
3974 mouth->invalidate();
3975 this->interval.start = (this->interval.end = start) + 1;
3976 return false;
3977 }
3978
3979 virtual void invalidate()
3980 {
3981 if (emoticon) emoticon->invalidate();
3982 if (apex) apex->invalidate();
3983 eyes->invalidate();
3984 if (nose) nose->invalidate();
3985 mouth->invalidate();
3987 }
3988
3989 public:
3990 std::shared_ptr<basic_parser<T>> emoticon;
3991 std::shared_ptr<basic_parser<T>> apex;
3992 std::shared_ptr<basic_parser<T>> eyes;
3993 std::shared_ptr<basic_parser<T>> nose;
3994 std::shared_ptr<basic_set<T>> mouth;
3995 };
3996
3999#ifdef _UNICODE
4000 using temoticon = wemoticon;
4001#else
4002 using temoticon = emoticon;
4003#endif
4005
4009 enum date_format_t {
4010 date_format_none = 0,
4011 date_format_dmy = 0x1,
4012 date_format_mdy = 0x2,
4013 date_format_ymd = 0x4,
4014 date_format_ym = 0x8,
4015 date_format_my = 0x10,
4016 date_format_dm = 0x20,
4017 date_format_md = 0x40,
4018 };
4019
4023 template <class T>
4024 class basic_date : public basic_parser<T>
4025 {
4026 public:
4027 basic_date(
4028 _In_ int format_mask,
4029 _In_ const std::shared_ptr<basic_integer<T>>& _day,
4030 _In_ const std::shared_ptr<basic_integer<T>>& _month,
4031 _In_ const std::shared_ptr<basic_integer<T>>& _year,
4032 _In_ const std::shared_ptr<basic_set<T>>& separator,
4033 _In_ const std::shared_ptr<basic_parser<T>>& space,
4034 _In_ const std::locale& locale = std::locale()) :
4035 basic_parser<T>(locale),
4036 format(date_format_none),
4037 m_format_mask(format_mask),
4038 day(_day),
4039 month(_month),
4040 year(_year),
4041 m_separator(separator),
4042 m_space(space)
4043 {}
4044
4045 virtual bool match(
4046 _In_reads_or_z_(end) const T* text,
4047 _In_ size_t start = 0,
4048 _In_ size_t end = (size_t)-1,
4049 _In_ int flags = match_default)
4050 {
4051 assert(text || start >= end);
4052
4053 const int space_match_flags = flags & ~match_multiline; // Spaces in dates must never be broken in new line.
4054 if ((m_format_mask & date_format_dmy) == date_format_dmy) {
4055 if (day->match(text, start, end, flags)) {
4056 for (this->interval.end = day->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4057 if (m_separator->match(text, this->interval.end, end, flags)) {
4058 size_t hit_offset = m_separator->hit_offset;
4059 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4060 if (month->match(text, this->interval.end, end, flags)) {
4061 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4062 if (m_separator->match(text, this->interval.end, end, flags) &&
4063 m_separator->hit_offset == hit_offset) // Both separators must match.
4064 {
4065 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4066 if (year->match(text, this->interval.end, end, flags) &&
4067 is_valid(day->value, month->value))
4068 {
4069 this->interval.start = start;
4070 this->interval.end = year->interval.end;
4071 format = date_format_dmy;
4072 return true;
4073 }
4074 }
4075 }
4076 }
4077 }
4078 }
4079
4080 if ((m_format_mask & date_format_mdy) == date_format_mdy) {
4081 if (month->match(text, start, end, flags)) {
4082 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4083 if (m_separator->match(text, this->interval.end, end, flags)) {
4084 size_t hit_offset = m_separator->hit_offset;
4085 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4086 if (day->match(text, this->interval.end, end, flags)) {
4087 for (this->interval.end = day->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4088 if (m_separator->match(text, this->interval.end, end, flags) &&
4089 m_separator->hit_offset == hit_offset) // Both separators must match.
4090 {
4091 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4092 if (year->match(text, this->interval.end, end, flags) &&
4093 is_valid(day->value, month->value))
4094 {
4095 this->interval.start = start;
4096 this->interval.end = year->interval.end;
4097 format = date_format_mdy;
4098 return true;
4099 }
4100 }
4101 }
4102 }
4103 }
4104 }
4105
4106 if ((m_format_mask & date_format_ymd) == date_format_ymd) {
4107 if (year->match(text, start, end, flags)) {
4108 for (this->interval.end = year->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4109 if (m_separator->match(text, this->interval.end, end, flags)) {
4110 size_t hit_offset = m_separator->hit_offset;
4111 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4112 if (month->match(text, this->interval.end, end, flags)) {
4113 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4114 if (m_separator->match(text, this->interval.end, end, flags) &&
4115 m_separator->hit_offset == hit_offset) // Both separators must match.
4116 {
4117 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4118 if (day->match(text, this->interval.end, end, flags) &&
4119 is_valid(day->value, month->value))
4120 {
4121 this->interval.start = start;
4122 this->interval.end = day->interval.end;
4123 format = date_format_ymd;
4124 return true;
4125 }
4126 }
4127 }
4128 }
4129 }
4130 }
4131
4132 if ((m_format_mask & date_format_ym) == date_format_ym) {
4133 if (year->match(text, start, end, flags)) {
4134 for (this->interval.end = year->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4135 if (m_separator->match(text, this->interval.end, end, flags)) {
4136 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4137 if (month->match(text, this->interval.end, end, flags) &&
4138 is_valid((size_t)-1, month->value))
4139 {
4140 if (day) day->invalidate();
4141 this->interval.start = start;
4142 this->interval.end = month->interval.end;
4143 format = date_format_ym;
4144 return true;
4145 }
4146 }
4147 }
4148 }
4149
4150 if ((m_format_mask & date_format_my) == date_format_my) {
4151 if (month->match(text, start, end, flags)) {
4152 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4153 if (m_separator->match(text, this->interval.end, end, flags)) {
4154 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4155 if (year->match(text, this->interval.end, end, flags) &&
4156 is_valid((size_t)-1, month->value))
4157 {
4158 if (day) day->invalidate();
4159 this->interval.start = start;
4160 this->interval.end = year->interval.end;
4161 format = date_format_my;
4162 return true;
4163 }
4164 }
4165 }
4166 }
4167
4168 if ((m_format_mask & date_format_dm) == date_format_dm) {
4169 if (day->match(text, start, end, flags)) {
4170 for (this->interval.end = day->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4171 if (m_separator->match(text, this->interval.end, end, flags)) {
4172 size_t hit_offset = m_separator->hit_offset;
4173 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4174 if (month->match(text, this->interval.end, end, flags) &&
4175 is_valid(day->value, month->value))
4176 {
4177 if (year) year->invalidate();
4178 this->interval.start = start;
4179 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4180 if (m_separator->match(text, this->interval.end, end, flags) &&
4181 m_separator->hit_offset == hit_offset) // Both separators must match.
4182 this->interval.end = m_separator->interval.end;
4183 else
4184 this->interval.end = month->interval.end;
4185 format = date_format_dm;
4186 return true;
4187 }
4188 }
4189 }
4190 }
4191
4192 if ((m_format_mask & date_format_md) == date_format_md) {
4193 if (month->match(text, start, end, flags)) {
4194 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4195 if (m_separator->match(text, this->interval.end, end, flags)) {
4196 size_t hit_offset = m_separator->hit_offset;
4197 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4198 if (day->match(text, this->interval.end, end, flags) &&
4199 is_valid(day->value, month->value))
4200 {
4201 if (year) year->invalidate();
4202 this->interval.start = start;
4203 for (this->interval.end = day->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4204 if (m_separator->match(text, this->interval.end, end, flags) &&
4205 m_separator->hit_offset == hit_offset) // Both separators must match.
4206 this->interval.end = m_separator->interval.end;
4207 else
4208 this->interval.end = day->interval.end;
4209 format = date_format_md;
4210 return true;
4211 }
4212 }
4213 }
4214 }
4215
4216 if (day) day->invalidate();
4217 if (month) month->invalidate();
4218 if (year) year->invalidate();
4219 format = date_format_none;
4220 this->interval.start = (this->interval.end = start) + 1;
4221 return false;
4222 }
4223
4224 virtual void invalidate()
4225 {
4226 if (day) day->invalidate();
4227 if (month) month->invalidate();
4228 if (year) year->invalidate();
4229 format = date_format_none;
4231 }
4232
4233 protected:
4234 static inline bool is_valid(size_t day, size_t month)
4235 {
4236 if (month == (size_t)-1) {
4237 // Default to January. This allows validating day only, as January has all 31 days.
4238 month = 1;
4239 }
4240 if (day == (size_t)-1) {
4241 // Default to 1st day in month. This allows validating month only, as each month has 1st day.
4242 day = 1;
4243 }
4244
4245 switch (month) {
4246 case 1:
4247 case 3:
4248 case 5:
4249 case 7:
4250 case 8:
4251 case 10:
4252 case 12:
4253 return 1 <= day && day <= 31;
4254 case 2:
4255 return 1 <= day && day <= 29;
4256 case 4:
4257 case 6:
4258 case 9:
4259 case 11:
4260 return 1 <= day && day <= 30;
4261 default:
4262 return false;
4263 }
4264 }
4265
4266 public:
4267 date_format_t format;
4268 std::shared_ptr<basic_integer<T>> day;
4269 std::shared_ptr<basic_integer<T>> month;
4270 std::shared_ptr<basic_integer<T>> year;
4271
4272 protected:
4273 int m_format_mask;
4274 std::shared_ptr<basic_set<T>> m_separator;
4275 std::shared_ptr<basic_parser<T>> m_space;
4276 };
4277
4278 using date = basic_date<char>;
4279 using wdate = basic_date<wchar_t>;
4280#ifdef _UNICODE
4281 using tdate = wdate;
4282#else
4283 using tdate = date;
4284#endif
4286
4290 template <class T>
4291 class basic_time : public basic_parser<T>
4292 {
4293 public:
4294 basic_time(
4295 _In_ const std::shared_ptr<basic_integer10<T>>& _hour,
4296 _In_ const std::shared_ptr<basic_integer10<T>>& _minute,
4297 _In_ const std::shared_ptr<basic_integer10<T>>& _second,
4298 _In_ const std::shared_ptr<basic_integer10<T>>& _millisecond,
4299 _In_ const std::shared_ptr<basic_set<T>>& separator,
4300 _In_ const std::shared_ptr<basic_parser<T>>& millisecond_separator,
4301 _In_ const std::locale& locale = std::locale()) :
4302 basic_parser<T>(locale),
4303 hour(_hour),
4304 minute(_minute),
4305 second(_second),
4306 millisecond(_millisecond),
4307 m_separator(separator),
4308 m_millisecond_separator(millisecond_separator)
4309 {}
4310
4311 virtual bool match(
4312 _In_reads_or_z_(end) const T* text,
4313 _In_ size_t start = 0,
4314 _In_ size_t end = (size_t)-1,
4315 _In_ int flags = match_default)
4316 {
4317 assert(text || start >= end);
4318
4319 if (hour->match(text, start, end, flags) &&
4320 m_separator->match(text, hour->interval.end, end, flags) &&
4321 minute->match(text, m_separator->interval.end, end, flags) &&
4322 minute->value < 60)
4323 {
4324 // hh::mm
4325 size_t hit_offset = m_separator->hit_offset;
4326 if (m_separator->match(text, minute->interval.end, end, flags) &&
4327 m_separator->hit_offset == hit_offset && // Both separators must match.
4328 second && second->match(text, m_separator->interval.end, end, flags) &&
4329 second->value < 60)
4330 {
4331 // hh::mm:ss
4332 if (m_millisecond_separator && m_millisecond_separator->match(text, second->interval.end, end, flags) &&
4333 millisecond && millisecond->match(text, m_millisecond_separator->interval.end, end, flags) &&
4334 millisecond->value < 1000)
4335 {
4336 // hh::mm:ss.mmmm
4337 this->interval.end = millisecond->interval.end;
4338 }
4339 else {
4340 if (millisecond) millisecond->invalidate();
4341 this->interval.end = second->interval.end;
4342 }
4343 }
4344 else {
4345 if (second) second->invalidate();
4346 if (millisecond) millisecond->invalidate();
4347 this->interval.end = minute->interval.end;
4348 }
4349 this->interval.start = start;
4350 return true;
4351 }
4352
4353 hour->invalidate();
4354 minute->invalidate();
4355 if (second) second->invalidate();
4356 if (millisecond) millisecond->invalidate();
4357 this->interval.start = (this->interval.end = start) + 1;
4358 return false;
4359 }
4360
4361 virtual void invalidate()
4362 {
4363 hour->invalidate();
4364 minute->invalidate();
4365 if (second) second->invalidate();
4366 if (millisecond) millisecond->invalidate();
4368 }
4369
4370 public:
4371 std::shared_ptr<basic_integer10<T>> hour;
4372 std::shared_ptr<basic_integer10<T>> minute;
4373 std::shared_ptr<basic_integer10<T>> second;
4374 std::shared_ptr<basic_integer10<T>> millisecond;
4375
4376 protected:
4377 std::shared_ptr<basic_set<T>> m_separator;
4378 std::shared_ptr<basic_parser<T>> m_millisecond_separator;
4379 };
4380
4381 using time = basic_time<char>;
4382 using wtime = basic_time<wchar_t>;
4383#ifdef _UNICODE
4384 using ttime = wtime;
4385#else
4386 using ttime = time;
4387#endif
4389
4393 template <class T>
4394 class basic_angle : public basic_parser<T>
4395 {
4396 public:
4398 _In_ const std::shared_ptr<basic_integer10<T>>& _degree,
4399 _In_ const std::shared_ptr<basic_parser<T>>& _degree_separator,
4400 _In_ const std::shared_ptr<basic_integer10<T>>& _minute,
4401 _In_ const std::shared_ptr<basic_parser<T>>& _minute_separator,
4402 _In_ const std::shared_ptr<basic_integer10<T>>& _second,
4403 _In_ const std::shared_ptr<basic_parser<T>>& _second_separator,
4404 _In_ const std::shared_ptr<basic_parser<T>>& _decimal,
4405 _In_ const std::locale& locale = std::locale()) :
4406 basic_parser<T>(locale),
4407 degree(_degree),
4408 degree_separator(_degree_separator),
4409 minute(_minute),
4410 minute_separator(_minute_separator),
4411 second(_second),
4412 second_separator(_second_separator),
4413 decimal(_decimal)
4414 {}
4415
4416 virtual bool match(
4417 _In_reads_or_z_(end) const T* text,
4418 _In_ size_t start = 0,
4419 _In_ size_t end = (size_t)-1,
4420 _In_ int flags = match_default)
4421 {
4422 assert(text || start >= end);
4423
4424 this->interval.end = start;
4425
4426 if (degree->match(text, this->interval.end, end, flags) &&
4427 degree_separator->match(text, degree->interval.end, end, flags))
4428 {
4429 // Degrees
4430 this->interval.end = degree_separator->interval.end;
4431 }
4432 else {
4433 degree->invalidate();
4434 degree_separator->invalidate();
4435 }
4436
4437 if (minute->match(text, this->interval.end, end, flags) &&
4438 minute->value < 60 &&
4439 minute_separator->match(text, minute->interval.end, end, flags))
4440 {
4441 // Minutes
4442 this->interval.end = minute_separator->interval.end;
4443 }
4444 else {
4445 minute->invalidate();
4446 minute_separator->invalidate();
4447 }
4448
4449 if (second && second->match(text, this->interval.end, end, flags) &&
4450 second->value < 60)
4451 {
4452 // Seconds
4453 this->interval.end = second->interval.end;
4454 if (second_separator && second_separator->match(text, this->interval.end, end, flags))
4455 this->interval.end = second_separator->interval.end;
4456 else
4457 if (second_separator) second_separator->invalidate();
4458 }
4459 else {
4460 if (second) second->invalidate();
4461 if (second_separator) second_separator->invalidate();
4462 }
4463
4464 if (degree->interval.start < degree->interval.end ||
4465 minute->interval.start < minute->interval.end ||
4466 (second && second->interval.start < second->interval.end))
4467 {
4468 if (decimal && decimal->match(text, this->interval.end, end, flags)) {
4469 // Decimals
4470 this->interval.end = decimal->interval.end;
4471 }
4472 else if (decimal)
4473 decimal->invalidate();
4474 this->interval.start = start;
4475 return true;
4476 }
4477 if (decimal) decimal->invalidate();
4478 this->interval.start = (this->interval.end = start) + 1;
4479 return false;
4480 }
4481
4482 virtual void invalidate()
4483 {
4484 degree->invalidate();
4485 degree_separator->invalidate();
4486 minute->invalidate();
4487 minute_separator->invalidate();
4488 if (second) second->invalidate();
4489 if (second_separator) second_separator->invalidate();
4490 if (decimal) decimal->invalidate();
4492 }
4493
4494 public:
4495 std::shared_ptr<basic_integer10<T>> degree;
4496 std::shared_ptr<basic_parser<T>> degree_separator;
4497 std::shared_ptr<basic_integer10<T>> minute;
4498 std::shared_ptr<basic_parser<T>> minute_separator;
4499 std::shared_ptr<basic_integer10<T>> second;
4500 std::shared_ptr<basic_parser<T>> second_separator;
4501 std::shared_ptr<basic_parser<T>> decimal;
4502 };
4503
4504 using angle = basic_angle<char>;
4506#ifdef _UNICODE
4507 using RRegElKot = wangle;
4508#else
4509 using RRegElKot = angle;
4510#endif
4512
4516 template <class T>
4518 {
4519 public:
4521 _In_ const std::shared_ptr<basic_parser<T>>& digit,
4522 _In_ const std::shared_ptr<basic_parser<T>>& plus_sign,
4523 _In_ const std::shared_ptr<basic_set<T>>& lparenthesis,
4524 _In_ const std::shared_ptr<basic_set<T>>& rparenthesis,
4525 _In_ const std::shared_ptr<basic_parser<T>>& separator,
4526 _In_ const std::shared_ptr<basic_parser<T>>& space,
4527 _In_ const std::locale& locale = std::locale()) :
4528 basic_parser<T>(locale),
4529 m_digit(digit),
4530 m_plus_sign(plus_sign),
4531 m_lparenthesis(lparenthesis),
4532 m_rparenthesis(rparenthesis),
4533 m_separator(separator),
4534 m_space(space)
4535 {}
4536
4537 virtual bool match(
4538 _In_reads_or_z_(end) const T* text,
4539 _In_ size_t start = 0,
4540 _In_ size_t end = (size_t)-1,
4541 _In_ int flags = match_default)
4542 {
4543 assert(text || start >= end);
4544
4545 size_t safe_digit_end = start, safe_value_size = 0;
4546 bool has_digits = false, after_digit = false, in_parentheses = false, after_parentheses = false;
4547 const int space_match_flags = flags & ~match_multiline; // Spaces in phone numbers must never be broken in new line.
4548
4549 this->interval.end = start;
4550 value.clear();
4551 m_lparenthesis->invalidate();
4552 m_rparenthesis->invalidate();
4553
4554 if (m_plus_sign && m_plus_sign->match(text, this->interval.end, end, flags)) {
4555 value.append(text + m_plus_sign->interval.start, text + m_plus_sign->interval.end);
4556 safe_value_size = value.size();
4557 this->interval.end = m_plus_sign->interval.end;
4558 }
4559
4560 for (;;) {
4561 assert(text || this->interval.end >= end);
4562 if (this->interval.end >= end || !text[this->interval.end])
4563 break;
4564 if (m_digit->match(text, this->interval.end, end, flags)) {
4565 // Digit
4566 value.append(text + m_digit->interval.start, text + m_digit->interval.end);
4567 this->interval.end = m_digit->interval.end;
4568 if (!in_parentheses) {
4569 safe_digit_end = this->interval.end;
4570 safe_value_size = value.size();
4571 has_digits = true;
4572 }
4573 after_digit = true;
4574 after_parentheses = false;
4575 }
4576 else if (
4577 m_lparenthesis && !m_lparenthesis->interval && // No left parenthesis yet
4578 m_rparenthesis && !m_rparenthesis->interval && // Right parenthesis after left
4579 m_lparenthesis->match(text, this->interval.end, end, flags))
4580 {
4581 // Left parenthesis
4582 value.append(text + m_lparenthesis->interval.start, m_lparenthesis->interval.size());
4583 this->interval.end = m_lparenthesis->interval.end;
4584 in_parentheses = true;
4585 after_digit = false;
4586 after_parentheses = false;
4587 }
4588 else if (
4589 in_parentheses && // After left parenthesis
4590 m_rparenthesis && !m_rparenthesis->interval && // No right parenthesis yet
4591 m_rparenthesis->match(text, this->interval.end, end, flags) &&
4592 m_lparenthesis->hit_offset == m_rparenthesis->hit_offset) // Left and right parentheses must match
4593 {
4594 // Right parenthesis
4595 value.append(text + m_rparenthesis->interval.start, text + m_rparenthesis->interval.end);
4596 this->interval.end = m_rparenthesis->interval.end;
4597 safe_digit_end = this->interval.end;
4598 safe_value_size = value.size();
4599 in_parentheses = false;
4600 after_digit = false;
4601 after_parentheses = true;
4602 }
4603 else if (
4604 after_digit &&
4605 !in_parentheses && // No separators inside parentheses
4606 !after_parentheses && // No separators following right parenthesis
4607 m_separator && m_separator->match(text, this->interval.end, end, flags))
4608 {
4609 // Separator
4610 this->interval.end = m_separator->interval.end;
4611 after_digit = false;
4612 after_parentheses = false;
4613 }
4614 else if (
4616 m_space && m_space->match(text, this->interval.end, end, space_match_flags))
4617 {
4618 // Space
4619 this->interval.end = m_space->interval.end;
4620 after_digit = false;
4621 after_parentheses = false;
4622 }
4623 else
4624 break;
4625 }
4626 if (has_digits) {
4627 value.erase(safe_value_size);
4628 this->interval.start = start;
4629 this->interval.end = safe_digit_end;
4630 return true;
4631 }
4632 value.clear();
4633 this->interval.start = (this->interval.end = start) + 1;
4634 return false;
4635 }
4636
4637 virtual void invalidate()
4638 {
4639 value.clear();
4641 }
4642
4643 public:
4644 std::basic_string<T> value;
4645
4646 protected:
4647 std::shared_ptr<basic_parser<T>> m_digit;
4648 std::shared_ptr<basic_parser<T>> m_plus_sign;
4649 std::shared_ptr<basic_set<T>> m_lparenthesis;
4650 std::shared_ptr<basic_set<T>> m_rparenthesis;
4651 std::shared_ptr<basic_parser<T>> m_separator;
4652 std::shared_ptr<basic_parser<T>> m_space;
4653 };
4654
4657#ifdef _UNICODE
4659#else
4661#endif
4663
4667 template <class T>
4669 {
4670 public:
4672 _In_ const std::shared_ptr<basic_parser<T>>& element,
4673 _In_ const std::shared_ptr<basic_parser<T>>& digit,
4674 _In_ const std::shared_ptr<basic_parser<T>>& sign,
4675 _In_ const std::locale& locale = std::locale()) :
4676 basic_parser<T>(locale),
4677 m_element(element),
4678 m_digit(digit),
4679 m_sign(sign),
4680 has_digits(false),
4681 has_charge(false)
4682 {}
4683
4684 virtual bool match(
4685 _In_reads_or_z_(end) const T* text,
4686 _In_ size_t start = 0,
4687 _In_ size_t end = (size_t)-1,
4688 _In_ int flags = match_default)
4689 {
4690 assert(text || start >= end);
4691
4692 has_digits = false;
4693 has_charge = false;
4694 this->interval.end = start;
4695
4696 const int element_match_flags = flags & ~match_case_insensitive; // Chemical elements are always case-sensitive.
4697 for (;;) {
4698 if (m_element->match(text, this->interval.end, end, element_match_flags)) {
4699 this->interval.end = m_element->interval.end;
4700 while (m_digit->match(text, this->interval.end, end, flags)) {
4701 this->interval.end = m_digit->interval.end;
4702 has_digits = true;
4703 }
4704 }
4705 else if (start < this->interval.end) {
4706 if (m_sign->match(text, this->interval.end, end, flags)) {
4707 this->interval.end = m_sign->interval.end;
4708 has_charge = true;
4709 }
4710 this->interval.start = start;
4711 return true;
4712 }
4713 else {
4714 this->interval.start = (this->interval.end = start) + 1;
4715 return false;
4716 }
4717 }
4718 }
4719
4720 virtual void invalidate()
4721 {
4722 has_digits = false;
4723 has_charge = false;
4725 }
4726
4727 public:
4728 bool has_digits;
4729 bool has_charge;
4730
4731 protected:
4732 std::shared_ptr<basic_parser<T>> m_element;
4733 std::shared_ptr<basic_parser<T>> m_digit;
4734 std::shared_ptr<basic_parser<T>> m_sign;
4735 };
4736
4739#ifdef _UNICODE
4741#else
4743#endif
4745
4750 {
4751 public:
4752 virtual bool match(
4753 _In_reads_or_z_(end) const char* text,
4754 _In_ size_t start = 0,
4755 _In_ size_t end = (size_t)-1,
4756 _In_ int flags = match_default)
4757 {
4758 assert(text || start >= end);
4759 this->interval.end = start;
4760
4761 assert(text || this->interval.end >= end);
4762 if (this->interval.end < end && text[this->interval.end]) {
4763 if (text[this->interval.end] == '\r') {
4764 this->interval.end++;
4765 if (this->interval.end < end && text[this->interval.end] == '\n') {
4766 this->interval.start = start;
4767 this->interval.end++;
4768 return true;
4769 }
4770 }
4771 else if (text[this->interval.end] == '\n') {
4772 this->interval.start = start;
4773 this->interval.end++;
4774 return true;
4775 }
4776 }
4777 this->interval.start = (this->interval.end = start) + 1;
4778 return false;
4779 }
4780 };
4781
4785 class http_space : public parser
4786 {
4787 public:
4788 virtual bool match(
4789 _In_reads_or_z_(end) const char* text,
4790 _In_ size_t start = 0,
4791 _In_ size_t end = (size_t)-1,
4792 _In_ int flags = match_default)
4793 {
4794 assert(text || start >= end);
4795 this->interval.end = start;
4796 if (m_line_break.match(text, this->interval.end, end, flags)) {
4797 this->interval.end = m_line_break.interval.end;
4798 if (this->interval.end < end && text[this->interval.end] && isspace(text[this->interval.end])) {
4799 this->interval.start = start;
4800 this->interval.end++;
4801 while (this->interval.end < end && text[this->interval.end] && isspace(text[this->interval.end])) this->interval.end++;
4802 return true;
4803 }
4804 }
4805 else if (this->interval.end < end && text[this->interval.end] && isspace(text[this->interval.end])) {
4806 this->interval.start = start;
4807 this->interval.end++;
4808 while (this->interval.end < end && text[this->interval.end] && isspace(text[this->interval.end])) this->interval.end++;
4809 return true;
4810 }
4811 this->interval.start = (this->interval.end = start) + 1;
4812 return false;
4813 }
4814
4815 protected:
4816 http_line_break m_line_break;
4817 };
4818
4822 class http_text_char : public parser
4823 {
4824 public:
4825 virtual bool match(
4826 _In_reads_or_z_(end) const char* text,
4827 _In_ size_t start = 0,
4828 _In_ size_t end = (size_t)-1,
4829 _In_ int flags = match_default)
4830 {
4831 assert(text || start >= end);
4832 this->interval.end = start;
4833
4834 assert(text || this->interval.end >= end);
4835 if (m_space.match(text, this->interval.end, end, flags)) {
4836 this->interval.start = start;
4837 this->interval.end = m_space.interval.end;
4838 return true;
4839 }
4840 else if (this->interval.end < end && text[this->interval.end] && text[this->interval.end] >= 0x20) {
4841 this->interval.start = start;
4842 this->interval.end++;
4843 return true;
4844 }
4845 this->interval.start = (this->interval.end = start) + 1;
4846 return false;
4847 }
4848
4849 protected:
4850 http_space m_space;
4851 };
4852
4856 class http_token : public parser
4857 {
4858 public:
4859 virtual bool match(
4860 _In_reads_or_z_(end) const char* text,
4861 _In_ size_t start = 0,
4862 _In_ size_t end = (size_t)-1,
4863 _In_ int flags = match_default)
4864 {
4865 assert(text || start >= end);
4866 this->interval.end = start;
4867 for (;;) {
4868 if (this->interval.end < end && text[this->interval.end]) {
4869 if ((unsigned int)text[this->interval.end] < 0x20 ||
4870 (unsigned int)text[this->interval.end] == 0x7f ||
4871 text[this->interval.end] == '(' ||
4872 text[this->interval.end] == ')' ||
4873 text[this->interval.end] == '<' ||
4874 text[this->interval.end] == '>' ||
4875 text[this->interval.end] == '@' ||
4876 text[this->interval.end] == ',' ||
4877 text[this->interval.end] == ';' ||
4878 text[this->interval.end] == ':' ||
4879 text[this->interval.end] == '\\' ||
4880 text[this->interval.end] == '\"' ||
4881 text[this->interval.end] == '/' ||
4882 text[this->interval.end] == '[' ||
4883 text[this->interval.end] == ']' ||
4884 text[this->interval.end] == '?' ||
4885 text[this->interval.end] == '=' ||
4886 text[this->interval.end] == '{' ||
4887 text[this->interval.end] == '}' ||
4888 isspace(text[this->interval.end]))
4889 break;
4890 else
4891 this->interval.end++;
4892 }
4893 else
4894 break;
4895 }
4897 this->interval.start = start;
4898 return true;
4899 }
4900 else {
4901 this->interval.start = (this->interval.end = start) + 1;
4902 return false;
4903 }
4904 }
4905 };
4906
4911 {
4912 public:
4913 virtual bool match(
4914 _In_reads_or_z_(end) const char* text,
4915 _In_ size_t start = 0,
4916 _In_ size_t end = (size_t)-1,
4917 _In_ int flags = match_default)
4918 {
4919 assert(text || start >= end);
4920 this->interval.end = start;
4921 if (this->interval.end < end && text[this->interval.end] != '"')
4922 goto error;
4923 this->interval.end++;
4924 content.start = this->interval.end;
4925 for (;;) {
4926 assert(text || this->interval.end >= end);
4927 if (this->interval.end < end && text[this->interval.end]) {
4928 if (text[this->interval.end] == '"') {
4929 content.end = this->interval.end;
4930 this->interval.end++;
4931 break;
4932 }
4933 else if (text[this->interval.end] == '\\') {
4934 this->interval.end++;
4935 if (this->interval.end < end && text[this->interval.end]) {
4936 this->interval.end++;
4937 }
4938 else
4939 goto error;
4940 }
4941 else if (m_chr.match(text, this->interval.end, end, flags))
4942 this->interval.end++;
4943 else
4944 goto error;
4945 }
4946 else
4947 goto error;
4948 }
4949 this->interval.start = start;
4950 return true;
4951
4952 error:
4953 content.start = 1;
4954 content.end = 0;
4955 this->interval.start = (this->interval.end = start) + 1;
4956 return false;
4957 }
4958
4959 virtual void invalidate()
4960 {
4961 content.start = 1;
4962 content.end = 0;
4963 parser::invalidate();
4964 }
4965
4966 public:
4968
4969 protected:
4970 http_text_char m_chr;
4971 };
4972
4976 class http_value : public parser
4977 {
4978 public:
4979 virtual bool match(
4980 _In_reads_or_z_(end) const char* text,
4981 _In_ size_t start = 0,
4982 _In_ size_t end = (size_t)-1,
4983 _In_ int flags = match_default)
4984 {
4985 assert(text || start >= end);
4986 this->interval.end = start;
4987 if (string.match(text, this->interval.end, end, flags)) {
4988 token.invalidate();
4989 this->interval.end = string.interval.end;
4990 this->interval.start = start;
4991 return true;
4992 }
4993 else if (token.match(text, this->interval.end, end, flags)) {
4994 string.invalidate();
4995 this->interval.end = token.interval.end;
4996 this->interval.start = start;
4997 return true;
4998 }
4999 else {
5000 this->interval.start = (this->interval.end = start) + 1;
5001 return false;
5002 }
5003 }
5004
5005 virtual void invalidate()
5006 {
5007 string.invalidate();
5008 token.invalidate();
5009 parser::invalidate();
5010 }
5011
5012 public:
5015 };
5016
5020 class http_parameter : public parser
5021 {
5022 public:
5023 virtual bool match(
5024 _In_reads_or_z_(end) const char* text,
5025 _In_ size_t start = 0,
5026 _In_ size_t end = (size_t)-1,
5027 _In_ int flags = match_default)
5028 {
5029 assert(text || start >= end);
5030 this->interval.end = start;
5031 if (name.match(text, this->interval.end, end, flags))
5032 this->interval.end = name.interval.end;
5033 else
5034 goto error;
5035 while (m_space.match(text, this->interval.end, end, flags))
5036 this->interval.end = m_space.interval.end;
5037 assert(text || this->interval.end >= end);
5038 if (this->interval.end < end && text[this->interval.end] == '=')
5039 this->interval.end++;
5040 else
5041 while (m_space.match(text, this->interval.end, end, flags))
5042 this->interval.end = m_space.interval.end;
5043 if (value.match(text, this->interval.end, end, flags))
5044 this->interval.end = value.interval.end;
5045 else
5046 goto error;
5047 this->interval.start = start;
5048 return true;
5049
5050 error:
5051 name.invalidate();
5052 value.invalidate();
5053 this->interval.start = (this->interval.end = start) + 1;
5054 return false;
5055 }
5056
5057 virtual void invalidate()
5058 {
5059 name.invalidate();
5060 value.invalidate();
5061 parser::invalidate();
5062 }
5063
5064 public:
5067
5068 protected:
5069 http_space m_space;
5070 };
5071
5075 class http_any_type : public parser
5076 {
5077 public:
5078 virtual bool match(
5079 _In_reads_or_z_(end) const char* text,
5080 _In_ size_t start = 0,
5081 _In_ size_t end = (size_t)-1,
5082 _In_ int flags = match_default)
5083 {
5084 assert(text || start >= end);
5085 if (start + 2 < end &&
5086 text[start] == '*' &&
5087 text[start + 1] == '/' &&
5088 text[start + 2] == '*')
5089 {
5090 this->interval.end = (this->interval.start = start) + 3;
5091 return true;
5092 }
5093 else if (start < end && text[start] == '*') {
5094 this->interval.end = (this->interval.start = start) + 1;
5095 return true;
5096 }
5097 else {
5098 this->interval.start = (this->interval.end = start) + 1;
5099 return false;
5100 }
5101 }
5102 };
5103
5108 {
5109 public:
5110 virtual bool match(
5111 _In_reads_or_z_(end) const char* text,
5112 _In_ size_t start = 0,
5113 _In_ size_t end = (size_t)-1,
5114 _In_ int flags = match_default)
5115 {
5116 assert(text || start >= end);
5117 this->interval.end = start;
5118 if (type.match(text, this->interval.end, end, flags))
5119 this->interval.end = type.interval.end;
5120 else
5121 goto error;
5122 while (m_space.match(text, this->interval.end, end, flags))
5123 this->interval.end = m_space.interval.end;
5124 if (this->interval.end < end && text[this->interval.end] == '/')
5125 this->interval.end++;
5126 else
5127 goto error;
5128 while (m_space.match(text, this->interval.end, end, flags))
5129 this->interval.end = m_space.interval.end;
5130 if (subtype.match(text, this->interval.end, end, flags))
5131 this->interval.end = subtype.interval.end;
5132 else
5133 goto error;
5134 this->interval.start = start;
5135 return true;
5136
5137 error:
5138 type.invalidate();
5139 subtype.invalidate();
5140 this->interval.start = (this->interval.end = start) + 1;
5141 return false;
5142 }
5143
5144 virtual void invalidate()
5145 {
5146 type.invalidate();
5147 subtype.invalidate();
5148 parser::invalidate();
5149 }
5150
5151 public:
5152 http_token type;
5153 http_token subtype;
5154
5155 protected:
5156 http_space m_space;
5157 };
5158
5163 {
5164 public:
5165 virtual bool match(
5166 _In_reads_or_z_(end) const char* text,
5167 _In_ size_t start = 0,
5168 _In_ size_t end = (size_t)-1,
5169 _In_ int flags = match_default)
5170 {
5171 assert(text || start >= end);
5172 if (!http_media_range::match(text, start, end, flags))
5173 goto error;
5174 params.clear();
5175 for (;;) {
5176 if (this->interval.end < end && text[this->interval.end]) {
5177 if (m_space.match(text, this->interval.end, end, flags))
5178 this->interval.end = m_space.interval.end;
5179 else if (text[this->interval.end] == ';') {
5180 this->interval.end++;
5181 while (m_space.match(text, this->interval.end, end, flags))
5182 this->interval.end = m_space.interval.end;
5184 if (param.match(text, this->interval.end, end, flags)) {
5185 this->interval.end = param.interval.end;
5186 params.push_back(std::move(param));
5187 }
5188 else
5189 break;
5190 }
5191 else
5192 break;
5193 }
5194 else
5195 break;
5196 }
5197 this->interval.end = params.empty() ? subtype.interval.end : params.back().interval.end;
5198 return true;
5199
5200 error:
5201 http_media_range::invalidate();
5202 params.clear();
5203 this->interval.start = (this->interval.end = start) + 1;
5204 return false;
5205 }
5206
5207 virtual void invalidate()
5208 {
5209 params.clear();
5210 http_media_range::invalidate();
5211 }
5212
5213 public:
5214 std::list<http_parameter> params;
5215 };
5216
5221 {
5222 public:
5223 virtual bool match(
5224 _In_reads_or_z_(end) const char* text,
5225 _In_ size_t start = 0,
5226 _In_ size_t end = (size_t)-1,
5227 _In_ int flags = match_default)
5228 {
5229 assert(text || start >= end);
5230 this->interval.end = start;
5231 for (;;) {
5232 if (this->interval.end < end && text[this->interval.end]) {
5233 if ((unsigned int)text[this->interval.end] < 0x20 ||
5234 (unsigned int)text[this->interval.end] == 0x7f ||
5235 text[this->interval.end] == ':' ||
5236 text[this->interval.end] == '/' ||
5237 isspace(text[this->interval.end]))
5238 break;
5239 else
5240 this->interval.end++;
5241 }
5242 else
5243 break;
5244 }
5246 this->interval.start = start;
5247 return true;
5248 }
5249 this->interval.start = (this->interval.end = start) + 1;
5250 return false;
5251 }
5252 };
5253
5257 class http_url_port : public parser
5258 {
5259 public:
5260 http_url_port(_In_ const std::locale& locale = std::locale()) :
5261 parser(locale),
5262 value(0)
5263 {}
5264
5265 virtual bool match(
5266 _In_reads_or_z_(end) const char* text,
5267 _In_ size_t start = 0,
5268 _In_ size_t end = (size_t)-1,
5269 _In_ int flags = match_default)
5270 {
5271 assert(text || start >= end);
5272 value = 0;
5273 this->interval.end = start;
5274 for (;;) {
5275 if (this->interval.end < end && text[this->interval.end]) {
5276 if ('0' <= text[this->interval.end] && text[this->interval.end] <= '9') {
5277 size_t _value = (size_t)value * 10 + text[this->interval.end] - '0';
5278 if (_value > (uint16_t)-1) {
5279 value = 0;
5280 this->interval.start = (this->interval.end = start) + 1;
5281 return false;
5282 }
5283 value = (uint16_t)_value;
5284 this->interval.end++;
5285 }
5286 else
5287 break;
5288 }
5289 else
5290 break;
5291 }
5293 this->interval.start = start;
5294 return true;
5295 }
5296 this->interval.start = (this->interval.end = start) + 1;
5297 return false;
5298 }
5299
5300 virtual void invalidate()
5301 {
5302 value = 0;
5303 parser::invalidate();
5304 }
5305
5306 public:
5307 uint16_t value;
5308 };
5309
5314 {
5315 public:
5316 virtual bool match(
5317 _In_reads_or_z_(end) const char* text,
5318 _In_ size_t start = 0,
5319 _In_ size_t end = (size_t)-1,
5320 _In_ int flags = match_default)
5321 {
5322 assert(text || start >= end);
5323 this->interval.end = start;
5324 for (;;) {
5325 if (this->interval.end < end && text[this->interval.end]) {
5326 if ((unsigned int)text[this->interval.end] < 0x20 ||
5327 (unsigned int)text[this->interval.end] == 0x7f ||
5328 text[this->interval.end] == '?' ||
5329 text[this->interval.end] == '/' ||
5330 isspace(text[this->interval.end]))
5331 break;
5332 else
5333 this->interval.end++;
5334 }
5335 else
5336 break;
5337 }
5338 this->interval.start = start;
5339 return true;
5340 }
5341 };
5342
5346 class http_url_path : public parser
5347 {
5348 public:
5349 virtual bool match(
5350 _In_reads_or_z_(end) const char* text,
5351 _In_ size_t start = 0,
5352 _In_ size_t end = (size_t)-1,
5353 _In_ int flags = match_default)
5354 {
5355 assert(text || start >= end);
5357 this->interval.end = start;
5358 segments.clear();
5359 assert(text || this->interval.end >= end);
5360 if (this->interval.end < end && text[this->interval.end] != '/')
5361 goto error;
5362 this->interval.end++;
5363 s.match(text, this->interval.end, end, flags);
5364 segments.push_back(s);
5365 this->interval.end = s.interval.end;
5366 for (;;) {
5367 if (this->interval.end < end && text[this->interval.end]) {
5368 if (text[this->interval.end] == '/') {
5369 this->interval.end++;
5370 s.match(text, this->interval.end, end, flags);
5371 segments.push_back(s);
5372 this->interval.end = s.interval.end;
5373 }
5374 else
5375 break;
5376 }
5377 else
5378 break;
5379 }
5380 this->interval.start = start;
5381 return true;
5382
5383 error:
5384 segments.clear();
5385 this->interval.start = (this->interval.end = start) + 1;
5386 return false;
5387 }
5388
5389 virtual void invalidate()
5390 {
5391 segments.clear();
5392 parser::invalidate();
5393 }
5394
5395 public:
5396 std::vector<http_url_path_segment> segments;
5397 };
5398
5403 {
5404 public:
5405 virtual bool match(
5406 _In_reads_or_z_(end) const char* text,
5407 _In_ size_t start = 0,
5408 _In_ size_t end = (size_t)-1,
5409 _In_ int flags = match_default)
5410 {
5411 assert(text || start >= end);
5412 this->interval.end = start;
5413 name.start = this->interval.end;
5414 for (;;) {
5415 if (this->interval.end < end && text[this->interval.end]) {
5416 if ((unsigned int)text[this->interval.end] < 0x20 ||
5417 (unsigned int)text[this->interval.end] == 0x7f ||
5418 text[this->interval.end] == '&' ||
5419 text[this->interval.end] == '=' ||
5420 isspace(text[this->interval.end]))
5421 break;
5422 else
5423 this->interval.end++;
5424 }
5425 else
5426 break;
5427 }
5429 name.end = this->interval.end;
5430 else
5431 goto error;
5432 if (text[this->interval.end] == '=') {
5433 this->interval.end++;
5434 value.start = this->interval.end;
5435 for (;;) {
5436 if (this->interval.end < end && text[this->interval.end]) {
5437 if ((unsigned int)text[this->interval.end] < 0x20 ||
5438 (unsigned int)text[this->interval.end] == 0x7f ||
5439 text[this->interval.end] == '&' ||
5440 isspace(text[this->interval.end]))
5441 break;
5442 else
5443 this->interval.end++;
5444 }
5445 else
5446 break;
5447 }
5448 value.end = this->interval.end;
5449 }
5450 else {
5451 value.start = 1;
5452 value.end = 0;
5453 }
5454 this->interval.start = start;
5455 return true;
5456
5457 error:
5458 name.start = 1;
5459 name.end = 0;
5460 value.start = 1;
5461 value.end = 0;
5462 this->interval.start = (this->interval.end = start) + 1;
5463 return false;
5464 }
5465
5466 virtual void invalidate()
5467 {
5468 name.start = 1;
5469 name.end = 0;
5470 value.start = 1;
5471 value.end = 0;
5472 parser::invalidate();
5473 }
5474
5475 public:
5478 };
5479
5483 class http_url : public parser
5484 {
5485 public:
5486 http_url(_In_ const std::locale& locale = std::locale()) :
5487 parser(locale),
5488 port(locale)
5489 {}
5490
5491 virtual bool match(
5492 _In_reads_or_z_(end) const char* text,
5493 _In_ size_t start = 0,
5494 _In_ size_t end = (size_t)-1,
5495 _In_ int flags = match_default)
5496 {
5497 assert(text || start >= end);
5498 this->interval.end = start;
5499
5500 if (this->interval.end + 7 <= end && stdex::strnicmp(text + this->interval.end, 7, "http://", (size_t)-1, m_locale) == 0) {
5501 this->interval.end += 7;
5502 if (server.match(text, this->interval.end, end, flags))
5503 this->interval.end = server.interval.end;
5504 else
5505 goto error;
5506 if (this->interval.end < end && text[this->interval.end] == ':') {
5507 this->interval.end++;
5508 if (port.match(text, this->interval.end, end, flags))
5509 this->interval.end = port.interval.end;
5510 }
5511 else {
5512 port.invalidate();
5513 port.value = 80;
5514 }
5515 }
5516 else {
5517 server.invalidate();
5518 port.invalidate();
5519 port.value = 80;
5520 }
5521
5522 if (path.match(text, this->interval.end, end, flags))
5523 this->interval.end = path.interval.end;
5524 else
5525 goto error;
5526
5527 params.clear();
5528
5529 if (this->interval.end < end && text[this->interval.end] == '?') {
5530 this->interval.end++;
5531 for (;;) {
5532 if (this->interval.end < end && text[this->interval.end]) {
5533 if ((unsigned int)text[this->interval.end] < 0x20 ||
5534 (unsigned int)text[this->interval.end] == 0x7f ||
5535 isspace(text[this->interval.end]))
5536 break;
5537 else if (text[this->interval.end] == '&')
5538 this->interval.end++;
5539 else {
5541 if (param.match(text, this->interval.end, end, flags)) {
5542 this->interval.end = param.interval.end;
5543 params.push_back(std::move(param));
5544 }
5545 else
5546 break;
5547 }
5548 }
5549 else
5550 break;
5551 }
5552 }
5553
5554 this->interval.start = start;
5555 return true;
5556
5557 error:
5558 server.invalidate();
5559 port.invalidate();
5560 path.invalidate();
5561 params.clear();
5562 this->interval.start = (this->interval.end = start) + 1;
5563 return false;
5564 }
5565
5566 virtual void invalidate()
5567 {
5568 server.invalidate();
5569 port.invalidate();
5570 path.invalidate();
5571 params.clear();
5572 parser::invalidate();
5573 }
5574
5575 public:
5576 http_url_server server;
5577 http_url_port port;
5578 http_url_path path;
5579 std::list<http_url_parameter> params;
5580 };
5581
5585 class http_language : public parser
5586 {
5587 public:
5588 virtual bool match(
5589 _In_reads_or_z_(end) const char* text,
5590 _In_ size_t start = 0,
5591 _In_ size_t end = (size_t)-1,
5592 _In_ int flags = match_default)
5593 {
5594 assert(text || start >= end);
5595 this->interval.end = start;
5596 components.clear();
5597 for (;;) {
5598 if (this->interval.end < end && text[this->interval.end]) {
5600 k.end = this->interval.end;
5601 for (;;) {
5602 if (k.end < end && text[k.end]) {
5603 if (isalpha(text[k.end]))
5604 k.end++;
5605 else
5606 break;
5607 }
5608 else
5609 break;
5610 }
5611 if (this->interval.end < k.end) {
5612 k.start = this->interval.end;
5613 this->interval.end = k.end;
5614 components.push_back(k);
5615 }
5616 else
5617 break;
5618 if (this->interval.end < end && text[this->interval.end] == '-')
5619 this->interval.end++;
5620 else
5621 break;
5622 }
5623 else
5624 break;
5625 }
5626 if (!components.empty()) {
5627 this->interval.start = start;
5628 this->interval.end = components.back().end;
5629 return true;
5630 }
5631 this->interval.start = (this->interval.end = start) + 1;
5632 return false;
5633 }
5634
5635 virtual void invalidate()
5636 {
5637 components.clear();
5638 parser::invalidate();
5639 }
5640
5641 public:
5642 std::vector<stdex::interval<size_t>> components;
5643 };
5644
5648 class http_weight : public parser
5649 {
5650 public:
5651 http_weight(_In_ const std::locale& locale = std::locale()) :
5652 parser(locale),
5653 value(1.0f)
5654 {}
5655
5656 virtual bool match(
5657 _In_reads_or_z_(end) const char* text,
5658 _In_ size_t start = 0,
5659 _In_ size_t end = (size_t)-1,
5660 _In_ int flags = match_default)
5661 {
5662 assert(text || start >= end);
5663 size_t celi_del = 0, decimalni_del = 0, decimalni_del_n = 1;
5664 this->interval.end = start;
5665 for (;;) {
5666 if (this->interval.end < end && text[this->interval.end]) {
5667 if ('0' <= text[this->interval.end] && text[this->interval.end] <= '9') {
5668 celi_del = celi_del * 10 + text[this->interval.end] - '0';
5669 this->interval.end++;
5670 }
5671 else if (text[this->interval.end] == '.') {
5672 this->interval.end++;
5673 for (;;) {
5674 if (this->interval.end < end && text[this->interval.end]) {
5675 if ('0' <= text[this->interval.end] && text[this->interval.end] <= '9') {
5676 decimalni_del = decimalni_del * 10 + text[this->interval.end] - '0';
5677 decimalni_del_n *= 10;
5678 this->interval.end++;
5679 }
5680 else
5681 break;
5682 }
5683 else
5684 break;
5685 }
5686 break;
5687 }
5688 else
5689 break;
5690 }
5691 else
5692 break;
5693 }
5696 this->interval.start = start;
5697 return true;
5698 }
5699 value = 1.0f;
5700 this->interval.start = (this->interval.end = start) + 1;
5701 return false;
5702 }
5703
5704 virtual void invalidate()
5705 {
5706 value = 1.0f;
5707 parser::invalidate();
5708 }
5709
5710 public:
5711 float value;
5712 };
5713
5717 class http_asterisk : public parser
5718 {
5719 public:
5720 virtual bool match(
5721 _In_reads_or_z_(end) const char* text,
5722 _In_ size_t start = 0,
5723 _In_ size_t end = (size_t)-1,
5724 _In_ int flags = match_default)
5725 {
5726 assert(text || end <= start);
5727 if (start < end && text[start] == '*') {
5728 this->interval.end = (this->interval.start = start) + 1;
5729 return true;
5730 }
5731 this->interval.start = (this->interval.end = start) + 1;
5732 return false;
5733 }
5734 };
5735
5739 template <class T, class T_asterisk = http_asterisk>
5741 {
5742 public:
5743 http_weighted_value(_In_ const std::locale& locale = std::locale()) :
5744 parser(locale),
5745 factor(locale)
5746 {}
5747
5748 virtual bool match(
5749 _In_reads_or_z_(end) const char* text,
5750 _In_ size_t start = 0,
5751 _In_ size_t end = (size_t)-1,
5752 _In_ int flags = match_default)
5753 {
5754 assert(text || start >= end);
5755 size_t konec_vrednosti;
5756 this->interval.end = start;
5757 if (asterisk.match(text, this->interval.end, end, flags)) {
5758 this->interval.end = konec_vrednosti = asterisk.interval.end;
5759 value.invalidate();
5760 }
5761 else if (value.match(text, this->interval.end, end, flags)) {
5762 this->interval.end = konec_vrednosti = value.interval.end;
5763 asterisk.invalidate();
5764 }
5765 else {
5766 asterisk.invalidate();
5767 value.invalidate();
5768 this->interval.start = (this->interval.end = start) + 1;
5769 return false;
5770 }
5771
5772 while (this->interval.end < end && text[this->interval.end] && isspace(text[this->interval.end])) this->interval.end++;
5773 if (this->interval.end < end && text[this->interval.end] == ';') {
5774 this->interval.end++;
5775 while (this->interval.end < end && text[this->interval.end] && isspace(text[this->interval.end])) this->interval.end++;
5776 if (this->interval.end < end && (text[this->interval.end] == 'q' || text[this->interval.end] == 'Q')) {
5777 this->interval.end++;
5778 while (this->interval.end < end && text[this->interval.end] && isspace(text[this->interval.end])) this->interval.end++;
5779 if (this->interval.end < end && text[this->interval.end] == '=') {
5780 this->interval.end++;
5781 while (this->interval.end < end && text[this->interval.end] && isspace(text[this->interval.end])) this->interval.end++;
5782 if (factor.match(text, this->interval.end, end, flags))
5783 this->interval.end = factor.interval.end;
5784 }
5785 }
5786 }
5787 if (!factor.interval) {
5788 factor.invalidate();
5790 }
5791 this->interval.start = start;
5792 return true;
5793 }
5794
5795 virtual void invalidate()
5796 {
5797 asterisk.invalidate();
5798 value.invalidate();
5799 factor.invalidate();
5800 parser::invalidate();
5801 }
5802
5803 public:
5804 T_asterisk asterisk;
5805 T value;
5806 http_weight factor;
5807 };
5808
5813 {
5814 public:
5815 virtual bool match(
5816 _In_reads_or_z_(end) const char* text,
5817 _In_ size_t start = 0,
5818 _In_ size_t end = (size_t)-1,
5819 _In_ int flags = match_default)
5820 {
5821 assert(text || start >= end);
5822 this->interval.end = start;
5823 if (this->interval.end < end && text[this->interval.end] == '$')
5824 this->interval.end++;
5825 else
5826 goto error;
5827 if (name.match(text, this->interval.end, end, flags))
5828 this->interval.end = name.interval.end;
5829 else
5830 goto error;
5831 while (m_space.match(text, this->interval.end, end, flags))
5832 this->interval.end = m_space.interval.end;
5833 if (this->interval.end < end && text[this->interval.end] == '=')
5834 this->interval.end++;
5835 else
5836 goto error;
5837 while (m_space.match(text, this->interval.end, end, flags))
5838 this->interval.end = m_space.interval.end;
5839 if (value.match(text, this->interval.end, end, flags))
5840 this->interval.end = value.interval.end;
5841 else
5842 goto error;
5843 this->interval.start = start;
5844 return true;
5845
5846 error:
5847 name.invalidate();
5848 value.invalidate();
5849 this->interval.start = (this->interval.end = start) + 1;
5850 return false;
5851 }
5852
5853 virtual void invalidate()
5854 {
5855 name.invalidate();
5856 value.invalidate();
5857 parser::invalidate();
5858 }
5859
5860 public:
5861 http_token name;
5862 http_value value;
5863
5864 protected:
5865 http_space m_space;
5866 };
5867
5871 class http_cookie : public parser
5872 {
5873 public:
5874 virtual bool match(
5875 _In_reads_or_z_(end) const char* text,
5876 _In_ size_t start = 0,
5877 _In_ size_t end = (size_t)-1,
5878 _In_ int flags = match_default)
5879 {
5880 assert(text || start >= end);
5881 this->interval.end = start;
5882 if (name.match(text, this->interval.end, end, flags))
5883 this->interval.end = name.interval.end;
5884 else
5885 goto error;
5886 while (m_space.match(text, this->interval.end, end, flags))
5887 this->interval.end = m_space.interval.end;
5888 if (this->interval.end < end && text[this->interval.end] == '=')
5889 this->interval.end++;
5890 else
5891 goto error;
5892 while (m_space.match(text, this->interval.end, end, flags))
5893 this->interval.end = m_space.interval.end;
5894 if (value.match(text, this->interval.end, end, flags))
5895 this->interval.end = value.interval.end;
5896 else
5897 goto error;
5898 params.clear();
5899 for (;;) {
5900 if (this->interval.end < end && text[this->interval.end]) {
5901 if (m_space.match(text, this->interval.end, end, flags))
5902 this->interval.end = m_space.interval.end;
5903 else if (text[this->interval.end] == ';') {
5904 this->interval.end++;
5905 while (m_space.match(text, this->interval.end, end, flags))
5906 this->interval.end = m_space.interval.end;
5908 if (param.match(text, this->interval.end, end, flags)) {
5909 this->interval.end = param.interval.end;
5910 params.push_back(std::move(param));
5911 }
5912 else
5913 break;
5914 }
5915 else
5916 break;
5917 }
5918 else
5919 break;
5920 }
5921 this->interval.start = start;
5922 this->interval.end = params.empty() ? value.interval.end : params.back().interval.end;
5923 return true;
5924
5925 error:
5926 name.invalidate();
5927 value.invalidate();
5928 params.clear();
5929 this->interval.start = (this->interval.end = start) + 1;
5930 return false;
5931 }
5932
5933 virtual void invalidate()
5934 {
5935 name.invalidate();
5936 value.invalidate();
5937 params.clear();
5938 parser::invalidate();
5939 }
5940
5941 public:
5944 std::list<http_cookie_parameter> params;
5945
5946 protected:
5947 http_space m_space;
5948 };
5949
5953 class http_agent : public parser
5954 {
5955 public:
5956 virtual bool match(
5957 _In_reads_or_z_(end) const char* text,
5958 _In_ size_t start = 0,
5959 _In_ size_t end = (size_t)-1,
5960 _In_ int flags = match_default)
5961 {
5962 assert(text || start >= end);
5963 this->interval.end = start;
5964 type.start = this->interval.end;
5965 for (;;) {
5966 if (this->interval.end < end && text[this->interval.end]) {
5967 if (text[this->interval.end] == '/') {
5968 type.end = this->interval.end;
5969 this->interval.end++;
5970 version.start = this->interval.end;
5971 for (;;) {
5972 if (this->interval.end < end && text[this->interval.end]) {
5973 if (isspace(text[this->interval.end])) {
5974 version.end = this->interval.end;
5975 break;
5976 }
5977 else
5978 this->interval.end++;
5979 }
5980 else {
5981 version.end = this->interval.end;
5982 break;
5983 }
5984 }
5985 break;
5986 }
5987 else if (isspace(text[this->interval.end])) {
5988 type.end = this->interval.end;
5989 break;
5990 }
5991 else
5992 this->interval.end++;
5993 }
5994 else {
5995 type.end = this->interval.end;
5996 break;
5997 }
5998 }
6000 this->interval.start = start;
6001 return true;
6002 }
6003 type.start = 1;
6004 type.end = 0;
6005 version.start = 1;
6006 version.end = 0;
6007 this->interval.start = 1;
6008 this->interval.end = 0;
6009 return false;
6010 }
6011
6012 virtual void invalidate()
6013 {
6014 type.start = 1;
6015 type.end = 0;
6016 version.start = 1;
6017 version.end = 0;
6018 parser::invalidate();
6019 }
6020
6021 public:
6024 };
6025
6029 class http_protocol : public parser
6030 {
6031 public:
6032 http_protocol(_In_ const std::locale& locale = std::locale()) :
6033 parser(locale),
6034 version(0x009)
6035 {}
6036
6037 virtual bool match(
6038 _In_reads_or_z_(end) const char* text,
6039 _In_ size_t start = 0,
6040 _In_ size_t end = (size_t)-1,
6041 _In_ int flags = match_default)
6042 {
6043 assert(text || start >= end);
6044 this->interval.end = start;
6045 type.start = this->interval.end;
6046 for (;;) {
6047 if (this->interval.end < end && text[this->interval.end]) {
6048 if (text[this->interval.end] == '/') {
6049 type.end = this->interval.end;
6050 this->interval.end++;
6051 break;
6052 }
6053 else if (isspace(text[this->interval.end]))
6054 goto error;
6055 else
6056 this->interval.end++;
6057 }
6058 else {
6059 type.end = this->interval.end;
6060 goto error;
6061 }
6062 }
6063 version_maj.start = this->interval.end;
6064 for (;;) {
6065 if (this->interval.end < end && text[this->interval.end]) {
6066 if (text[this->interval.end] == '.') {
6067 version_maj.end = this->interval.end;
6068 this->interval.end++;
6069 version_min.start = this->interval.end;
6070 for (;;) {
6071 if (this->interval.end < end && text[this->interval.end]) {
6072 if (isspace(text[this->interval.end])) {
6073 version_min.end = this->interval.end;
6074 version =
6075 (uint16_t)strtoui(text + version_maj.start, version_maj.size(), nullptr, 10) * 0x100 +
6076 (uint16_t)strtoui(text + version_min.start, version_min.size(), nullptr, 10);
6077 break;
6078 }
6079 else
6080 this->interval.end++;
6081 }
6082 else
6083 goto error;
6084 }
6085 break;
6086 }
6087 else if (isspace(text[this->interval.end])) {
6088 version_maj.end = this->interval.end;
6089 version_min.start = 1;
6090 version_min.end = 0;
6091 version = (uint16_t)strtoui(text + version_maj.start, version_maj.size(), nullptr, 10) * 0x100;
6092 break;
6093 }
6094 else
6095 this->interval.end++;
6096 }
6097 else
6098 goto error;
6099 }
6100 this->interval.start = start;
6101 return true;
6102
6103 error:
6104 type.start = 1;
6105 type.end = 0;
6106 version_maj.start = 1;
6107 version_maj.end = 0;
6108 version_min.start = 1;
6109 version_min.end = 0;
6110 version = 0x009;
6111 this->interval.start = 1;
6112 this->interval.end = 0;
6113 return false;
6114 }
6115
6116 virtual void invalidate()
6117 {
6118 type.start = 1;
6119 type.end = 0;
6120 version_maj.start = 1;
6121 version_maj.end = 0;
6122 version_min.start = 1;
6123 version_min.end = 0;
6124 version = 0x009;
6125 parser::invalidate();
6126 }
6127
6128 public:
6130 stdex::interval<size_t> version_maj;
6131 stdex::interval<size_t> version_min;
6133 };
6134
6138 class http_request : public parser
6139 {
6140 public:
6141 http_request(_In_ const std::locale& locale = std::locale()) :
6142 parser(locale),
6143 url(locale),
6144 protocol(locale)
6145 {}
6146
6147 virtual bool match(
6148 _In_reads_or_z_(end) const char* text,
6149 _In_ size_t start = 0,
6150 _In_ size_t end = (size_t)-1,
6151 _In_ int flags = match_default)
6152 {
6153 assert(text || start >= end);
6154 this->interval.end = start;
6155
6156 for (;;) {
6157 if (m_line_break.match(text, this->interval.end, end, flags))
6158 goto error;
6159 else if (this->interval.end < end && text[this->interval.end]) {
6160 if (isspace(text[this->interval.end]))
6161 this->interval.end++;
6162 else
6163 break;
6164 }
6165 else
6166 goto error;
6167 }
6168 verb.start = this->interval.end;
6169 for (;;) {
6170 if (m_line_break.match(text, this->interval.end, end, flags))
6171 goto error;
6172 else if (this->interval.end < end && text[this->interval.end]) {
6173 if (isspace(text[this->interval.end])) {
6174 verb.end = this->interval.end;
6175 this->interval.end++;
6176 break;
6177 }
6178 else
6179 this->interval.end++;
6180 }
6181 else
6182 goto error;
6183 }
6184
6185 for (;;) {
6186 if (m_line_break.match(text, this->interval.end, end, flags))
6187 goto error;
6188 else if (this->interval.end < end && text[this->interval.end]) {
6189 if (isspace(text[this->interval.end]))
6190 this->interval.end++;
6191 else
6192 break;
6193 }
6194 else
6195 goto error;
6196 }
6197 if (url.match(text, this->interval.end, end, flags))
6198 this->interval.end = url.interval.end;
6199 else
6200 goto error;
6201
6202 protocol.invalidate();
6203 for (;;) {
6204 if (m_line_break.match(text, this->interval.end, end, flags)) {
6205 this->interval.end = m_line_break.interval.end;
6206 goto end;
6207 }
6208 else if (this->interval.end < end && text[this->interval.end]) {
6209 if (isspace(text[this->interval.end]))
6210 this->interval.end++;
6211 else
6212 break;
6213 }
6214 else
6215 goto end;
6216 }
6217 for (;;) {
6218 if (m_line_break.match(text, this->interval.end, end, flags)) {
6219 this->interval.end = m_line_break.interval.end;
6220 goto end;
6221 }
6222 else if (protocol.match(text, this->interval.end, end, flags)) {
6223 this->interval.end = protocol.interval.end;
6224 break;
6225 }
6226 else
6227 goto end;
6228 }
6229
6230 for (;;) {
6231 if (m_line_break.match(text, this->interval.end, end, flags)) {
6232 this->interval.end = m_line_break.interval.end;
6233 break;
6234 }
6235 else if (this->interval.end < end && text[this->interval.end])
6236 this->interval.end++;
6237 else
6238 goto end;
6239 }
6240
6241 end:
6242 this->interval.start = start;
6243 return true;
6244
6245 error:
6246 verb.start = 1;
6247 verb.end = 0;
6248 url.invalidate();
6249 protocol.invalidate();
6250 this->interval.start = 1;
6251 this->interval.end = 0;
6252 return false;
6253 }
6254
6255 virtual void invalidate()
6256 {
6257 verb.start = 1;
6258 verb.end = 0;
6259 url.invalidate();
6260 protocol.invalidate();
6261 parser::invalidate();
6262 }
6263
6264 public:
6266 http_url url;
6267 http_protocol protocol;
6268
6269 protected:
6270 http_line_break m_line_break;
6271 };
6272
6276 class http_header : public parser
6277 {
6278 public:
6279 virtual bool match(
6280 _In_reads_or_z_(end) const char* text,
6281 _In_ size_t start = 0,
6282 _In_ size_t end = (size_t)-1,
6283 _In_ int flags = match_default)
6284 {
6285 assert(text || start >= end);
6286 this->interval.end = start;
6287
6288 if (m_line_break.match(text, this->interval.end, end, flags) ||
6289 (this->interval.end < end && text[this->interval.end] && isspace(text[this->interval.end])))
6290 goto error;
6291 name.start = this->interval.end;
6292 for (;;) {
6293 if (m_line_break.match(text, this->interval.end, end, flags))
6294 goto error;
6295 else if (this->interval.end < end && text[this->interval.end]) {
6296 if (isspace(text[this->interval.end])) {
6297 name.end = this->interval.end;
6298 this->interval.end++;
6299 for (;;) {
6300 if (m_line_break.match(text, this->interval.end, end, flags))
6301 goto error;
6302 else if (this->interval.end < end && text[this->interval.end]) {
6303 if (isspace(text[this->interval.end]))
6304 this->interval.end++;
6305 else
6306 break;
6307 }
6308 else
6309 goto error;
6310 }
6311 if (this->interval.end < end && text[this->interval.end] == ':') {
6312 this->interval.end++;
6313 break;
6314 }
6315 else
6316 goto error;
6317 break;
6318 }
6319 else if (text[this->interval.end] == ':') {
6320 name.end = this->interval.end;
6321 this->interval.end++;
6322 break;
6323 }
6324 else
6325 this->interval.end++;
6326 }
6327 else
6328 goto error;
6329 }
6330 value.start = (size_t)-1;
6331 value.end = 0;
6332 for (;;) {
6333 if (m_line_break.match(text, this->interval.end, end, flags)) {
6334 this->interval.end = m_line_break.interval.end;
6335 if (!m_line_break.match(text, this->interval.end, end, flags) &&
6336 this->interval.end < end && text[this->interval.end] && isspace(text[this->interval.end]))
6337 this->interval.end++;
6338 else
6339 break;
6340 }
6341 else if (this->interval.end < end && text[this->interval.end]) {
6342 if (isspace(text[this->interval.end]))
6343 this->interval.end++;
6344 else {
6345 if (value.start == (size_t)-1) value.start = this->interval.end;
6346 value.end = ++this->interval.end;
6347 }
6348 }
6349 else
6350 break;
6351 }
6352 this->interval.start = start;
6353 return true;
6354
6355 error:
6356 name.start = 1;
6357 name.end = 0;
6358 value.start = 1;
6359 value.end = 0;
6360 this->interval.start = 1;
6361 this->interval.end = 0;
6362 return false;
6363 }
6364
6365 virtual void invalidate()
6366 {
6367 name.start = 1;
6368 name.end = 0;
6369 value.start = 1;
6370 value.end = 0;
6371 parser::invalidate();
6372 }
6373
6374 public:
6377
6378 protected:
6379 http_line_break m_line_break;
6380 };
6381
6385 template <class _Key, class T>
6386 class http_value_collection : public T
6387 {
6388 public:
6389 void insert(
6390 _In_reads_or_z_(end) const char* text,
6391 _In_ size_t start = 0,
6392 _In_ size_t end = (size_t)-1,
6393 _In_ int flags = match_default)
6394 {
6395 while (start < end) {
6396 while (start < end && text[start] && isspace(text[start])) start++;
6397 if (start < end && text[start] == ',') {
6398 start++;
6399 while (start < end&& text[start] && isspace(text[start])) start++;
6400 }
6401 _Key el;
6402 if (el.match(text, start, end, flags)) {
6403 start = el.interval.end;
6404 T::insert(std::move(el));
6405 }
6406 else
6407 break;
6408 }
6409 }
6410 };
6411
6412 template <class T>
6414 constexpr bool operator()(const T& a, const T& b) const noexcept
6415 {
6416 return a.factor.value > b.factor.value;
6417 }
6418 };
6419
6423 template <class T, class _Alloc = std::allocator<T>>
6425
6429 template <class T>
6431 {
6432 public:
6434 _In_ const std::shared_ptr<basic_parser<T>>& quote,
6435 _In_ const std::shared_ptr<basic_parser<T>>& chr,
6436 _In_ const std::shared_ptr<basic_parser<T>>& escape,
6437 _In_ const std::shared_ptr<basic_parser<T>>& sol,
6438 _In_ const std::shared_ptr<basic_parser<T>>& bs,
6439 _In_ const std::shared_ptr<basic_parser<T>>& ff,
6440 _In_ const std::shared_ptr<basic_parser<T>>& lf,
6441 _In_ const std::shared_ptr<basic_parser<T>>& cr,
6442 _In_ const std::shared_ptr<basic_parser<T>>& htab,
6443 _In_ const std::shared_ptr<basic_parser<T>>& uni,
6444 _In_ const std::shared_ptr<basic_integer16<T>>& hex,
6445 _In_ const std::locale& locale = std::locale()) :
6446 basic_parser<T>(locale),
6447 m_quote(quote),
6448 m_chr(chr),
6449 m_escape(escape),
6450 m_sol(sol),
6451 m_bs(bs),
6452 m_ff(ff),
6453 m_lf(lf),
6454 m_cr(cr),
6455 m_htab(htab),
6456 m_uni(uni),
6457 m_hex(hex)
6458 {}
6459
6460 virtual bool match(
6461 _In_reads_or_z_(end) const T* text,
6462 _In_ size_t start = 0,
6463 _In_ size_t end = (size_t)-1,
6464 _In_ int flags = match_default)
6465 {
6466 assert(text || start >= end);
6467 this->interval.end = start;
6468 if (m_quote->match(text, this->interval.end, end, flags)) {
6469 this->interval.end = m_quote->interval.end;
6470 value.clear();
6471 for (;;) {
6472 if (m_quote->match(text, this->interval.end, end, flags)) {
6473 this->interval.start = start;
6474 this->interval.end = m_quote->interval.end;
6475 return true;
6476 }
6477 if (m_escape->match(text, this->interval.end, end, flags)) {
6478 if (m_quote->match(text, m_escape->interval.end, end, flags)) {
6479 value += '"'; this->interval.end = m_quote->interval.end;
6480 continue;
6481 }
6482 if (m_sol->match(text, m_escape->interval.end, end, flags)) {
6483 value += '/'; this->interval.end = m_sol->interval.end;
6484 continue;
6485 }
6486 if (m_bs->match(text, m_escape->interval.end, end, flags)) {
6487 value += '\b'; this->interval.end = m_bs->interval.end;
6488 continue;
6489 }
6490 if (m_ff->match(text, m_escape->interval.end, end, flags)) {
6491 value += '\f'; this->interval.end = m_ff->interval.end;
6492 continue;
6493 }
6494 if (m_lf->match(text, m_escape->interval.end, end, flags)) {
6495 value += '\n'; this->interval.end = m_lf->interval.end;
6496 continue;
6497 }
6498 if (m_cr->match(text, m_escape->interval.end, end, flags)) {
6499 value += '\r'; this->interval.end = m_cr->interval.end;
6500 continue;
6501 }
6502 if (m_htab->match(text, m_escape->interval.end, end, flags)) {
6503 value += '\t'; this->interval.end = m_htab->interval.end;
6504 continue;
6505 }
6506 if (
6507 m_uni->match(text, m_escape->interval.end, end, flags) &&
6508 m_hex->match(text, m_uni->interval.end, std::min(m_uni->interval.end + 4, end), flags | match_case_insensitive) &&
6509 m_hex->interval.size() == 4 /* JSON requests 4-digit Unicode sequneces: \u.... */)
6510 {
6511 assert(m_hex->value <= 0xffff);
6512 if (sizeof(T) == 1) {
6513 if (m_hex->value > 0x7ff) {
6514 value += (T)(0xe0 | ((m_hex->value >> 12) & 0x0f));
6515 value += (T)(0x80 | ((m_hex->value >> 6) & 0x3f));
6516 value += (T)(0x80 | (m_hex->value & 0x3f));
6517 }
6518 else if (m_hex->value > 0x7f) {
6519 value += (T)(0xc0 | ((m_hex->value >> 6) & 0x1f));
6520 value += (T)(0x80 | (m_hex->value & 0x3f));
6521 }
6522 else
6523 value += (T)(m_hex->value & 0x7f);
6524 }
6525 else
6526 value += (T)m_hex->value;
6527 this->interval.end = m_hex->interval.end;
6528 continue;
6529 }
6530 if (m_escape->match(text, m_escape->interval.end, end, flags)) {
6531 value += '\\'; this->interval.end = m_escape->interval.end;
6532 continue;
6533 }
6534 }
6535 if (m_chr->match(text, this->interval.end, end, flags)) {
6536 value.Prilepi(text + m_chr->interval.start, m_chr->interval.size());
6537 this->interval.end = m_chr->interval.end;
6538 continue;
6539 }
6540 break;
6541 }
6542 }
6543 value.clear();
6544 this->interval.start = (this->interval.end = start) + 1;
6545 return false;
6546 }
6547
6548 virtual void invalidate()
6549 {
6550 value.clear();
6552 }
6553
6554 public:
6555 std::basic_string<T> value;
6556
6557 protected:
6558 std::shared_ptr<basic_parser<T>> m_quote;
6559 std::shared_ptr<basic_parser<T>> m_chr;
6560 std::shared_ptr<basic_parser<T>> m_escape;
6561 std::shared_ptr<basic_parser<T>> m_sol;
6562 std::shared_ptr<basic_parser<T>> m_bs;
6563 std::shared_ptr<basic_parser<T>> m_ff;
6564 std::shared_ptr<basic_parser<T>> m_lf;
6565 std::shared_ptr<basic_parser<T>> m_cr;
6566 std::shared_ptr<basic_parser<T>> m_htab;
6567 std::shared_ptr<basic_parser<T>> m_uni;
6568 std::shared_ptr<basic_integer16<T>> m_hex;
6569 };
6570
6573#ifdef _UNICODE
6574 using tjson_string = wjson_string;
6575#else
6576 using tjson_string = json_string;
6577#endif
6578 }
6579}
6580
6581#undef ENUM_FLAG_OPERATOR
6582#undef ENUM_FLAGS
6583
6584#ifdef _MSC_VER
6585#pragma warning(pop)
6586#endif
Test for angle in d°mm'ss.dddd form.
Definition parser.hpp:4395
Test for any code unit.
Definition parser.hpp:221
Test for beginning of line.
Definition parser.hpp:615
Test for any.
Definition parser.hpp:1057
Test for chemical formula.
Definition parser.hpp:4669
Test for any code unit from a given string of code units.
Definition parser.hpp:720
Test for specific code unit.
Definition parser.hpp:291
Test for date.
Definition parser.hpp:4025
Test for valid DNS domain character.
Definition parser.hpp:2806
bool allow_on_edge
Is character allowed at the beginning or an end of a DNS domain?
Definition parser.hpp:2844
Test for DNS domain/hostname.
Definition parser.hpp:2906
bool m_allow_absolute
May DNS names end with a dot (absolute name)?
Definition parser.hpp:2970
Test for e-mail address.
Definition parser.hpp:3794
Test for emoticon.
Definition parser.hpp:3902
std::shared_ptr< basic_parser< T > > apex
apex/eyebrows/halo (e.g. O, 0)
Definition parser.hpp:3991
std::shared_ptr< basic_parser< T > > eyes
eyes (e.g. :, ;, >, |, B)
Definition parser.hpp:3992
std::shared_ptr< basic_set< T > > mouth
mouth (e.g. ), ), (, (, |, P, D, p, d)
Definition parser.hpp:3994
std::shared_ptr< basic_parser< T > > nose
nose (e.g. -, o)
Definition parser.hpp:3993
std::shared_ptr< basic_parser< T > > emoticon
emoticon as a whole (e.g. 😀, 🤔, 😶)
Definition parser.hpp:3990
Test for end of line.
Definition parser.hpp:653
Test for fraction.
Definition parser.hpp:1686
Test for decimal integer.
Definition parser.hpp:1295
Test for decimal integer possibly containing thousand separators.
Definition parser.hpp:1380
bool has_separators
Did integer have any separators?
Definition parser.hpp:1440
size_t digit_count
Total number of digits in integer.
Definition parser.hpp:1439
Test for hexadecimal integer.
Definition parser.hpp:1461
Base class for integer testing.
Definition parser.hpp:1273
size_t value
Calculated value of the numeral.
Definition parser.hpp:1287
Test for IPv4 address.
Definition parser.hpp:2346
stdex::interval< size_t > components[4]
Individual component intervals.
Definition parser.hpp:2461
struct in_addr value
IPv4 address value.
Definition parser.hpp:2462
Test for IPv6 address.
Definition parser.hpp:2565
std::shared_ptr< basic_parser< T > > scope_id
Scope ID (e.g. NIC index with link-local addresses)
Definition parser.hpp:2769
stdex::interval< size_t > components[8]
Individual component intervals.
Definition parser.hpp:2767
struct in6_addr value
IPv6 address value.
Definition parser.hpp:2768
Test for valid IPv6 address scope ID character.
Definition parser.hpp:2493
Test for repeating.
Definition parser.hpp:910
bool m_greedy
try to match as long sequence as possible
Definition parser.hpp:949
std::shared_ptr< basic_parser< T > > m_el
repeating element
Definition parser.hpp:946
size_t m_min_iterations
minimum number of iterations
Definition parser.hpp:947
size_t m_max_iterations
maximum number of iterations
Definition parser.hpp:948
Test for JSON string.
Definition parser.hpp:6431
Test for mixed numeral.
Definition parser.hpp:1922
std::shared_ptr< basic_parser< T > > fraction
fraction
Definition parser.hpp:2028
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:2026
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:2025
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:2024
std::shared_ptr< basic_parser< T > > integer
Integer part.
Definition parser.hpp:2027
Test for monetary numeral.
Definition parser.hpp:2217
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:2323
std::shared_ptr< basic_parser< T > > decimal_separator
Decimal separator.
Definition parser.hpp:2328
std::shared_ptr< basic_parser< T > > currency
Currency part.
Definition parser.hpp:2326
std::shared_ptr< basic_parser< T > > decimal
Decimal part.
Definition parser.hpp:2329
std::shared_ptr< basic_parser< T > > integer
Integer part.
Definition parser.hpp:2327
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:2324
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:2325
"No-op" match
Definition parser.hpp:189
Base template for all parsers.
Definition parser.hpp:70
interval< size_t > interval
Region of the last match.
Definition parser.hpp:169
Test for permutation.
Definition parser.hpp:1197
Test for phone number.
Definition parser.hpp:4518
std::basic_string< T > value
Normalized phone number.
Definition parser.hpp:4644
Test for any punctuation code unit.
Definition parser.hpp:463
Test for Roman numeral.
Definition parser.hpp:1570
Test for scientific numeral.
Definition parser.hpp:2048
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:2192
std::shared_ptr< basic_parser< T > > exponent_symbol
Exponent symbol (e.g. 'e')
Definition parser.hpp:2196
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:2190
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:2191
double value
Calculated value of the numeral.
Definition parser.hpp:2200
std::shared_ptr< basic_parser< T > > negative_exp_sign
Negative exponent sign (e.g. '-')
Definition parser.hpp:2198
std::shared_ptr< basic_integer< T > > decimal
Decimal part.
Definition parser.hpp:2195
std::shared_ptr< basic_parser< T > > positive_exp_sign
Positive exponent sign (e.g. '+')
Definition parser.hpp:2197
std::shared_ptr< basic_integer< T > > exponent
Exponent part.
Definition parser.hpp:2199
std::shared_ptr< basic_parser< T > > decimal_separator
Decimal separator.
Definition parser.hpp:2194
std::shared_ptr< basic_integer< T > > integer
Integer part.
Definition parser.hpp:2193
Test for match score.
Definition parser.hpp:1749
Test for sequence.
Definition parser.hpp:1006
Definition parser.hpp:688
Test for signed numeral.
Definition parser.hpp:1836
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:1904
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:1903
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:1902
std::shared_ptr< basic_parser< T > > number
Number.
Definition parser.hpp:1905
Test for any space code unit.
Definition parser.hpp:384
Test for any space or punctuation code unit.
Definition parser.hpp:537
Test for any string.
Definition parser.hpp:1125
Test for given string.
Definition parser.hpp:815
Test for time.
Definition parser.hpp:4292
Test for valid URL password character.
Definition parser.hpp:3088
Test for valid URL path character.
Definition parser.hpp:3188
Test for URL path.
Definition parser.hpp:3296
Test for valid URL username character.
Definition parser.hpp:2989
Test for URL.
Definition parser.hpp:3437
Test for HTTP agent.
Definition parser.hpp:5954
Test for HTTP any type.
Definition parser.hpp:5076
Test for HTTP asterisk.
Definition parser.hpp:5718
Test for HTTP header.
Definition parser.hpp:6277
Test for HTTP language (RFC1766)
Definition parser.hpp:5586
Test for HTTP line break (RFC2616: CRLF | LF)
Definition parser.hpp:4750
Test for HTTP media range (RFC2616: media-range)
Definition parser.hpp:5108
Test for HTTP media type (RFC2616: media-type)
Definition parser.hpp:5163
Test for HTTP parameter (RFC2616: parameter)
Definition parser.hpp:5021
http_token name
Parameter name.
Definition parser.hpp:5065
http_value value
Parameter value.
Definition parser.hpp:5066
Test for HTTP protocol.
Definition parser.hpp:6030
uint16_t version
HTTP protocol version: 0x100 = 1.0, 0x101 = 1.1...
Definition parser.hpp:6132
Test for HTTP quoted string (RFC2616: quoted-string)
Definition parser.hpp:4911
stdex::interval< size_t > content
String content (without quotes)
Definition parser.hpp:4967
Test for HTTP request.
Definition parser.hpp:6139
Test for HTTP space (RFC2616: LWS)
Definition parser.hpp:4786
Test for HTTP text character (RFC2616: TEXT)
Definition parser.hpp:4823
Test for HTTP token (RFC2616: token - tolerates non-ASCII)
Definition parser.hpp:4857
Test for HTTP URL parameter.
Definition parser.hpp:5403
Test for HTTP URL path segment.
Definition parser.hpp:5314
Test for HTTP URL path segment.
Definition parser.hpp:5347
std::vector< http_url_path_segment > segments
Path segments.
Definition parser.hpp:5396
Test for HTTP URL port.
Definition parser.hpp:5258
Test for HTTP URL server.
Definition parser.hpp:5221
Test for HTTP URL.
Definition parser.hpp:5484
Collection of HTTP values.
Definition parser.hpp:6387
Test for HTTP value (RFC2616: value)
Definition parser.hpp:4977
http_quoted_string string
Value when matched as quoted string.
Definition parser.hpp:5013
http_token token
Value when matched as token.
Definition parser.hpp:5014
Test for HTTP weight factor.
Definition parser.hpp:5649
float value
Calculated value of the weight factor.
Definition parser.hpp:5711
Test for HTTP weighted value.
Definition parser.hpp:5741
Base template for collection-holding parsers.
Definition parser.hpp:966
Test for any SGML code point.
Definition parser.hpp:253
Test for any SGML code point from a given string of SGML code points.
Definition parser.hpp:772
Test for specific SGML code point.
Definition parser.hpp:340
Test for valid DNS domain SGML character.
Definition parser.hpp:2862
Test for valid IPv6 address scope ID SGML character.
Definition parser.hpp:2531
Test for any SGML punctuation code point.
Definition parser.hpp:504
Test for any SGML space code point.
Definition parser.hpp:427
Test for any SGML space or punctuation code point.
Definition parser.hpp:580
Test for SGML given string.
Definition parser.hpp:862
Test for valid URL password SGML character.
Definition parser.hpp:3140
Test for valid URL path SGML character.
Definition parser.hpp:3244
Test for valid URL username SGML character.
Definition parser.hpp:3040
Numerical interval.
Definition interval.hpp:18
T size() const
Returns interval size.
Definition interval.hpp:47
T end
interval end
Definition interval.hpp:20
interval() noexcept
Constructs an invalid interval.
Definition interval.hpp:25
T start
interval start
Definition interval.hpp:19
Definition parser.hpp:6413