stdex
Additional custom or not Standard C++ covered algorithms
Loading...
Searching...
No Matches
parser.hpp
1/*
2 SPDX-License-Identifier: MIT
3 Copyright © 2023 Amebis
4*/
5
6#pragma once
7
8#include "compat.hpp"
9#include "interval.hpp"
10#include "memory.hpp"
11#include "sgml.hpp"
12#include "string.hpp"
13#include "system.hpp"
14#include <stdarg.h>
15#include <stdint.h>
16#include <math.h>
17#if defined(_WIN32)
18#include <winsock2.h>
19#if _MSC_VER >= 1300
20#include <ws2ipdef.h>
21#endif
22#include <ws2tcpip.h>
23#elif defined(__APPLE__)
24#include <netinet/in.h>
25#else
26#include <inaddr.h>
27#include <in6addr.h>
28#endif
29#include <limits>
30#include <list>
31#include <locale>
32#include <memory>
33#include <set>
34#include <string>
35
36#ifdef _MSC_VER
37#pragma warning(push)
38#pragma warning(disable: 4100)
39#endif
40
41#define ENUM_FLAG_OPERATOR(T,X) \
42inline T operator X (const T lhs, const T rhs) { return static_cast<T>(static_cast<std::underlying_type_t<T>>(lhs) X static_cast<std::underlying_type_t<T>>(rhs)); } \
43inline T operator X (const T lhs, const std::underlying_type_t<T> rhs) { return static_cast<T>(static_cast<std::underlying_type_t<T>>(lhs) X rhs); } \
44inline T operator X (const std::underlying_type_t<T> lhs, const T rhs) { return static_cast<T>(lhs X static_cast<std::underlying_type_t<T>>(rhs)); } \
45inline T& operator X= (T& lhs, const T rhs) { return lhs = lhs X rhs; } \
46inline T& operator X= (T& lhs, const std::underlying_type_t<T> rhs) { return lhs = lhs X rhs; }
47#define ENUM_FLAGS(T, type) \
48enum class T : type; \
49inline T operator ~ (T t) { return (T) (~static_cast<std::underlying_type_t <T>>(t)); } \
50ENUM_FLAG_OPERATOR(T,|) \
51ENUM_FLAG_OPERATOR(T,^) \
52ENUM_FLAG_OPERATOR(T,&) \
53enum class T : type
54
55#if defined(_WIN32)
56#elif defined(__APPLE__)
57#define s6_words __u6_addr.__u6_addr16
58#else
59#error Unsupported platform
60#endif
61
62namespace stdex
63{
64 namespace parser
65 {
69 constexpr int match_default = 0;
70 constexpr int match_case_insensitive = 0x1;
71 constexpr int match_multiline = 0x2;
72
76 template <class T>
78 {
79 public:
80 basic_parser(_In_ const std::locale& locale = std::locale()) : m_locale(locale) {}
81 virtual ~basic_parser() {}
82
83 bool search(
84 _In_reads_or_z_(end) const T* text,
85 _In_ size_t start = 0,
86 _In_ size_t end = (size_t)-1,
87 _In_ int flags = match_default)
88 {
89 for (size_t i = start; i < end && text[i]; i++)
90 if (match(text, i, end, flags))
91 return true;
92 return false;
93 }
94
95 virtual bool match(
96 _In_reads_or_z_(end) const T* text,
97 _In_ size_t start = 0,
98 _In_ size_t end = (size_t)-1,
99 _In_ int flags = match_default) = 0;
100
101 template<class _Traits, class _Ax>
102 inline bool match(
103 const std::basic_string<T, _Traits, _Ax>& text,
104 _In_ size_t start = 0,
105 _In_ size_t end = (size_t)-1,
106 _In_ int flags = match_default)
107 {
108 return match(text.c_str(), start, std::min<size_t>(end, text.size()), flags);
109 }
110
111 virtual void invalidate()
112 {
113 this->interval.start = 1;
114 this->interval.end = 0;
115 }
116
117 protected:
119 const wchar_t* next_sgml_cp(_In_ const char* text, _In_ size_t start, _In_ size_t end, _Out_ size_t& chr_end, _Out_ wchar_t(&buf)[3])
120 {
121 if (text[start] == '&') {
122 // Potential entity start
123 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
124 for (chr_end = start + 1;; chr_end++) {
125 if (chr_end >= end || text[chr_end] == 0) {
126 // Unterminated entity
127 break;
128 }
129 if (text[chr_end] == ';') {
130 // Entity end
131 size_t n = chr_end - start - 1;
132 if (n >= 2 && text[start + 1] == '#') {
133 // Numerical entity
134 char32_t unicode;
135 if (text[start + 2] == 'x' || text[start + 2] == 'X')
136 unicode = strtou32(text + start + 3, n - 2, nullptr, 16);
137 else
138 unicode = strtou32(text + start + 2, n - 1, nullptr, 10);
139#ifdef _WIN32
140 if (unicode < 0x10000) {
141 buf[0] = (wchar_t)unicode;
142 buf[1] = 0;
143 }
144 else {
145 ucs4_to_surrogate_pair(buf, unicode);
146 buf[2] = 0;
147 }
148#else
149 buf[0] = (wchar_t)unicode;
150 buf[1] = 0;
151#endif
152 chr_end++;
153 return buf;
154 }
155 const wchar_t* entity_w = sgml2uni(text + start + 1, n);
156 if (entity_w) {
157 chr_end++;
158 return entity_w;
159 }
160 // Unknown entity.
161 break;
162 }
163 else if (text[chr_end] == '&' || ctype.is(ctype.space, text[chr_end])) {
164 // This char cannot possibly be a part of entity.
165 break;
166 }
167 }
168 }
169 buf[0] = text[start];
170 buf[1] = 0;
171 chr_end = start + 1;
172 return buf;
173 }
175
176 public:
178
179 protected:
180 std::locale m_locale;
181 };
182
185#ifdef _UNICODE
186 using tparser = wparser;
187#else
188 using tparser = parser;
189#endif
191
195 template <class T>
196 class basic_noop : public basic_parser<T>
197 {
198 public:
199 virtual bool match(
200 _In_reads_or_z_(end) const T* text,
201 _In_ size_t start = 0,
202 _In_ size_t end = (size_t)-1,
203 _In_ int flags = match_default)
204 {
205 _Assume_(text || start >= end);
206 if (start < end && text[start]) {
207 this->interval.start = this->interval.end = start;
208 return true;
209 }
210 this->interval.start = (this->interval.end = start) + 1;
211 return false;
212 }
213 };
214
215 using noop = basic_noop<char>;
217#ifdef _UNICODE
218 using tnoop = wnoop;
219#else
220 using tnoop = noop;
221#endif
223
227 template <class T>
228 class basic_any_cu : public basic_parser<T>
229 {
230 public:
231 basic_any_cu(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
232
233 virtual bool match(
234 _In_reads_or_z_(end) const T* text,
235 _In_ size_t start = 0,
236 _In_ size_t end = (size_t)-1,
237 _In_ int flags = match_default)
238 {
239 _Assume_(text || start >= end);
240 if (start < end && text[start]) {
241 this->interval.end = (this->interval.start = start) + 1;
242 return true;
243 }
244 this->interval.start = (this->interval.end = start) + 1;
245 return false;
246 }
247 };
248
251#ifdef _UNICODE
252 using tany_cu = wany_cu;
253#else
254 using tany_cu = any_cu;
255#endif
256
260 class sgml_any_cp : public basic_any_cu<char>
261 {
262 public:
263 sgml_any_cp(_In_ const std::locale& locale = std::locale()) : basic_any_cu<char>(locale) {}
264
265 virtual bool match(
266 _In_reads_or_z_(end) const char* text,
267 _In_ size_t start = 0,
268 _In_ size_t end = (size_t)-1,
269 _In_ int flags = match_default)
270 {
271 _Assume_(text || start >= end);
272 if (start < end && text[start]) {
273 if (text[start] == '&') {
274 // SGML entity
275 const auto& ctype = std::use_facet<std::ctype<char>>(m_locale);
276 for (this->interval.end = start + 1; this->interval.end < end && text[this->interval.end]; this->interval.end++)
277 if (text[this->interval.end] == ';') {
278 this->interval.end++;
279 this->interval.start = start;
280 return true;
281 }
282 else if (text[this->interval.end] == '&' || ctype.is(ctype.space, text[this->interval.end]))
283 break;
284 // Unterminated entity
285 }
286 this->interval.end = (this->interval.start = start) + 1;
287 return true;
288 }
289 this->interval.start = (this->interval.end = start) + 1;
290 return false;
291 }
292 };
293
297 template <class T>
298 class basic_cu : public basic_parser<T>
299 {
300 public:
301 basic_cu(T chr, bool invert = false, _In_ const std::locale& locale = std::locale()) :
302 basic_parser<T>(locale),
303 m_chr(chr),
304 m_invert(invert)
305 {}
306
307 virtual bool match(
308 _In_reads_or_z_(end) const T* text,
309 _In_ size_t start = 0,
310 _In_ size_t end = (size_t)-1,
311 _In_ int flags = match_default)
312 {
313 _Assume_(text || start >= end);
314 if (start < end && text[start]) {
315 bool r;
316 if (flags & match_case_insensitive) {
317 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
318 r = ctype.tolower(text[start]) == ctype.tolower(m_chr);
319 }
320 else
321 r = text[start] == m_chr;
322 if ((r && !m_invert) || (!r && m_invert)) {
323 this->interval.end = (this->interval.start = start) + 1;
324 return true;
325 }
326 }
327 this->interval.start = (this->interval.end = start) + 1;
328 return false;
329 }
330
331 protected:
332 T m_chr;
333 bool m_invert;
334 };
335
336 using cu = basic_cu<char>;
337 using wcu = basic_cu<wchar_t>;
338#ifdef _UNICODE
339 using tcu = wcu;
340#else
341 using tcu = cu;
342#endif
343
347 class sgml_cp : public sgml_parser
348 {
349 public:
350 sgml_cp(const char* chr, size_t count = (size_t)-1, bool invert = false, _In_ const std::locale& locale = std::locale()) :
351 sgml_parser(locale),
352 m_invert(invert)
353 {
354 _Assume_(chr || !count);
355 wchar_t buf[3];
356 size_t chr_end;
357 m_chr.assign(count ? next_sgml_cp(chr, 0, count, chr_end, buf) : L"");
358 }
359
360 virtual bool match(
361 _In_reads_or_z_(end) const char* text,
362 _In_ size_t start = 0,
363 _In_ size_t end = (size_t)-1,
364 _In_ int flags = match_default)
365 {
366 _Assume_(text || start >= end);
367 if (start < end && text[start]) {
368 wchar_t buf[3];
369 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
370 bool r = ((flags & match_case_insensitive) ?
371 stdex::strnicmp(chr, (size_t)-1, m_chr.c_str(), m_chr.size(), m_locale) :
372 stdex::strncmp(chr, (size_t)-1, m_chr.c_str(), m_chr.size())) == 0;
373 if ((r && !m_invert) || (!r && m_invert)) {
374 this->interval.start = start;
375 return true;
376 }
377 }
378 this->interval.start = (this->interval.end = start) + 1;
379 return false;
380 }
381
382 protected:
383 std::wstring m_chr;
384 bool m_invert;
385 };
386
390 template <class T>
391 class basic_space_cu : public basic_parser<T>
392 {
393 public:
394 basic_space_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) :
395 basic_parser<T>(locale),
396 m_invert(invert)
397 {}
398
399 virtual bool match(
400 _In_reads_or_z_(end) const T* text,
401 _In_ size_t start = 0,
402 _In_ size_t end = (size_t)-1,
403 _In_ int flags = match_default)
404 {
405 _Assume_(text || start >= end);
406 if (start < end && text[start]) {
407 bool r =
408 ((flags & match_multiline) || !islbreak(text[start])) &&
409 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::space, text[start]);
410 if ((r && !m_invert) || (!r && m_invert)) {
411 this->interval.end = (this->interval.start = start) + 1;
412 return true;
413 }
414 }
415 this->interval.start = (this->interval.end = start) + 1;
416 return false;
417 }
418
419 protected:
420 bool m_invert;
421 };
422
425#ifdef _UNICODE
426 using tspace_cu = wspace_cu;
427#else
428 using tspace_cu = space_cu;
429#endif
430
434 class sgml_space_cp : public basic_space_cu<char>
435 {
436 public:
437 sgml_space_cp(_In_ bool invert = false, _In_ const std::locale& locale = std::locale()) :
439 {}
440
441 virtual bool match(
442 _In_reads_or_z_(end) const char* text,
443 _In_ size_t start = 0,
444 _In_ size_t end = (size_t)-1,
445 _In_ int flags = match_default)
446 {
447 _Assume_(text || start >= end);
448 if (start < end && text[start]) {
449 wchar_t buf[3];
450 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
451 const wchar_t* chr_end = chr + stdex::strlen(chr);
452 bool r =
453 ((flags & match_multiline) || !islbreak(chr, (size_t)-1)) &&
454 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::space, chr, chr_end) == chr_end;
455 if ((r && !m_invert) || (!r && m_invert)) {
456 this->interval.start = start;
457 return true;
458 }
459 }
460
461 this->interval.start = (this->interval.end = start) + 1;
462 return false;
463 }
464 };
465
469 template <class T>
470 class basic_punct_cu : public basic_parser<T>
471 {
472 public:
473 basic_punct_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) :
474 basic_parser<T>(locale),
475 m_invert(invert)
476 {}
477
478 virtual bool match(
479 _In_reads_or_z_(end) const T* text,
480 _In_ size_t start = 0,
481 _In_ size_t end = (size_t)-1,
482 _In_ int flags = match_default)
483 {
484 _Assume_(text || start >= end);
485 if (start < end && text[start]) {
486 bool r = std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::punct, text[start]);
487 if ((r && !m_invert) || (!r && m_invert)) {
488 this->interval.end = (this->interval.start = start) + 1;
489 return true;
490 }
491 }
492 this->interval.start = (this->interval.end = start) + 1;
493 return false;
494 }
495
496 protected:
497 bool m_invert;
498 };
499
502#ifdef _UNICODE
503 using tpunct_cu = wpunct_cu;
504#else
505 using tpunct_cu = punct_cu;
506#endif
507
511 class sgml_punct_cp : public basic_punct_cu<char>
512 {
513 public:
514 sgml_punct_cp(bool invert = false, _In_ const std::locale& locale = std::locale()) :
516 {}
517
518 virtual bool match(
519 _In_reads_or_z_(end) const char* text,
520 _In_ size_t start = 0,
521 _In_ size_t end = (size_t)-1,
522 _In_ int flags = match_default)
523 {
524 _Assume_(text || start >= end);
525 if (start < end && text[start]) {
526 wchar_t buf[3];
527 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
528 const wchar_t* chr_end = chr + stdex::strlen(chr);
529 bool r = std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::punct, chr, chr_end) == chr_end;
530 if ((r && !m_invert) || (!r && m_invert)) {
531 this->interval.start = start;
532 return true;
533 }
534 }
535 this->interval.start = (this->interval.end = start) + 1;
536 return false;
537 }
538 };
539
543 template <class T>
545 {
546 public:
547 basic_space_or_punct_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) :
548 basic_parser<T>(locale),
549 m_invert(invert)
550 {}
551
552 virtual bool match(
553 _In_reads_or_z_(end) const T* text,
554 _In_ size_t start = 0,
555 _In_ size_t end = (size_t)-1,
556 _In_ int flags = match_default)
557 {
558 _Assume_(text || start >= end);
559 if (start < end && text[start]) {
560 bool r =
561 ((flags & match_multiline) || !islbreak(text[start])) &&
562 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::space | std::ctype_base::punct, text[start]);
563 if ((r && !m_invert) || (!r && m_invert)) {
564 this->interval.end = (this->interval.start = start) + 1;
565 return true;
566 }
567 }
568 this->interval.start = (this->interval.end = start) + 1;
569 return false;
570 }
571
572 protected:
573 bool m_invert;
574 };
575
578#ifdef _UNICODE
580#else
582#endif
583
588 {
589 public:
590 sgml_space_or_punct_cp(bool invert = false, _In_ const std::locale& locale = std::locale()) :
592 {}
593
594 virtual bool match(
595 _In_reads_or_z_(end) const char* text,
596 _In_ size_t start = 0,
597 _In_ size_t end = (size_t)-1,
598 _In_ int flags = match_default)
599 {
600 _Assume_(text || start >= end);
601 if (start < end && text[start]) {
602 wchar_t buf[3];
603 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
604 const wchar_t* chr_end = chr + stdex::strlen(chr);
605 bool r =
606 ((flags & match_multiline) || !islbreak(chr, (size_t)-1)) &&
607 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::space | std::ctype_base::punct, chr, chr_end) == chr_end;
608 if ((r && !m_invert) || (!r && m_invert)) {
609 this->interval.start = start;
610 return true;
611 }
612 }
613 this->interval.start = (this->interval.end = start) + 1;
614 return false;
615 }
616 };
617
621 template <class T>
622 class basic_bol : public basic_parser<T>
623 {
624 public:
625 basic_bol(bool invert = false) : m_invert(invert) {}
626
627 virtual bool match(
628 _In_reads_or_z_(end) const T* text,
629 _In_ size_t start = 0,
630 _In_ size_t end = (size_t)-1,
631 _In_ int flags = match_default)
632 {
633 _Assume_(text || start >= end);
634 bool r = start == 0 || (start <= end && islbreak(text[start - 1]));
635 if ((r && !m_invert) || (!r && m_invert)) {
636 this->interval.end = this->interval.start = start;
637 return true;
638 }
639 this->interval.start = (this->interval.end = start) + 1;
640 return false;
641 }
642
643 protected:
644 bool m_invert;
645 };
646
647 using bol = basic_bol<char>;
648 using wbol = basic_bol<wchar_t>;
649#ifdef _UNICODE
650 using tbol = wbol;
651#else
652 using tbol = bol;
653#endif
655
659 template <class T>
660 class basic_eol : public basic_parser<T>
661 {
662 public:
663 basic_eol(bool invert = false) : m_invert(invert) {}
664
665 virtual bool match(
666 _In_reads_or_z_(end) const T* text,
667 _In_ size_t start = 0,
668 _In_ size_t end = (size_t)-1,
669 _In_ int flags = match_default)
670 {
671 _Assume_(text || start >= end);
672 bool r = islbreak(text[start]);
673 if ((r && !m_invert) || (!r && m_invert)) {
674 this->interval.end = this->interval.start = start;
675 return true;
676 }
677 this->interval.start = (this->interval.end = start) + 1;
678 return false;
679 }
680
681 protected:
682 bool m_invert;
683 };
684
685 using eol = basic_eol<char>;
686 using weol = basic_eol<wchar_t>;
687#ifdef _UNICODE
688 using teol = weol;
689#else
690 using teol = eol;
691#endif
693
694 template <class T>
695 class basic_set : public basic_parser<T>
696 {
697 public:
698 basic_set(bool invert = false, _In_ const std::locale& locale = std::locale()) :
699 basic_parser<T>(locale),
700 hit_offset((size_t)-1),
701 m_invert(invert)
702 {}
703
704 virtual bool match(
705 _In_reads_or_z_(end) const T* text,
706 _In_ size_t start = 0,
707 _In_ size_t end = (size_t)-1,
708 _In_ int flags = match_default) = 0;
709
710 virtual void invalidate()
711 {
712 hit_offset = (size_t)-1;
714 }
715
716 public:
717 size_t hit_offset;
718
719 protected:
720 bool m_invert;
721 };
722
726 template <class T>
727 class basic_cu_set : public basic_set<T>
728 {
729 public:
731 _In_reads_or_z_(count) const T* set,
732 _In_ size_t count = (size_t)-1,
733 _In_ bool invert = false,
734 _In_ const std::locale& locale = std::locale()) :
735 basic_set<T>(invert, locale)
736 {
737 if (set)
738 m_set.assign(set, set + stdex::strnlen(set, count));
739 }
740
741 virtual bool match(
742 _In_reads_or_z_(end) const T* text,
743 _In_ size_t start = 0,
744 _In_ size_t end = (size_t)-1,
745 _In_ int flags = match_default)
746 {
747 _Assume_(text || start >= end);
748 if (start < end && text[start]) {
749 const T* set = m_set.c_str();
750 size_t r = (flags & match_case_insensitive) ?
751 stdex::strnichr(set, m_set.size(), text[start], this->m_locale) :
752 stdex::strnchr(set, m_set.size(), text[start]);
753 if ((r != stdex::npos && !this->m_invert) || (r == stdex::npos && this->m_invert)) {
754 this->hit_offset = r;
755 this->interval.end = (this->interval.start = start) + 1;
756 return true;
757 }
758 }
759 this->hit_offset = (size_t)-1;
760 this->interval.start = (this->interval.end = start) + 1;
761 return false;
762 }
763
764 protected:
765 std::basic_string<T> m_set;
766 };
767
770#ifdef _UNICODE
771 using tcu_set = wcu_set;
772#else
773 using tcu_set = cu_set;
774#endif
775
779 class sgml_cp_set : public basic_set<char>
780 {
781 public:
782 sgml_cp_set(const char* set, size_t count = (size_t)-1, bool invert = false, _In_ const std::locale& locale = std::locale()) :
783 basic_set<char>(invert, locale)
784 {
785 if (set)
786 m_set = sgml2wstr(set, count);
787 }
788
789 virtual bool match(
790 _In_reads_or_z_(end) const char* text,
791 _In_ size_t start = 0,
792 _In_ size_t end = (size_t)-1,
793 _In_ int flags = match_default)
794 {
795 _Assume_(text || start >= end);
796 if (start < end && text[start]) {
797 wchar_t buf[3];
798 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
799 const wchar_t* set = m_set.c_str();
800 size_t r = (flags & match_case_insensitive) ?
801 stdex::strnistr(set, m_set.size(), chr, m_locale) :
802 stdex::strnstr(set, m_set.size(), chr);
803 if ((r != stdex::npos && !m_invert) || (r == stdex::npos && m_invert)) {
804 hit_offset = r;
805 this->interval.start = start;
806 return true;
807 }
808 }
809 hit_offset = (size_t)-1;
810 this->interval.start = (this->interval.end = start) + 1;
811 return false;
812 }
813
814 protected:
815 std::wstring m_set;
816 };
817
821 template <class T>
822 class basic_string : public basic_parser<T>
823 {
824 public:
826 _In_reads_or_z_(count) const T* str,
827 _In_ size_t count = (size_t)-1,
828 _In_ const std::locale& locale = std::locale()) :
829 basic_parser<T>(locale),
830 m_str(str, str + stdex::strnlen(str, count))
831 {}
832
833 virtual bool match(
834 _In_reads_or_z_(end) const T* text,
835 _In_ size_t start = 0,
836 _In_ size_t end = (size_t)-1,
837 _In_ int flags = match_default)
838 {
839 _Assume_(text || start >= end);
840 size_t
841 m = m_str.size(),
842 n = std::min<size_t>(end - start, m);
843 bool r = ((flags & match_case_insensitive) ?
844 stdex::strnicmp(text + start, n, m_str.c_str(), m, this->m_locale) :
845 stdex::strncmp(text + start, n, m_str.c_str(), m)) == 0;
846 if (r) {
847 this->interval.end = (this->interval.start = start) + n;
848 return true;
849 }
850 this->interval.start = (this->interval.end = start) + 1;
851 return false;
852 }
853
854 protected:
855 std::basic_string<T> m_str;
856 };
857
860#ifdef _UNICODE
861 using tstring = wstring;
862#else
863 using tstring = string;
864#endif
865
870 {
871 public:
872 sgml_string(const char* str, size_t count = (size_t)-1, _In_ const std::locale& locale = std::locale()) :
873 sgml_parser(locale),
874 m_str(sgml2wstr(str, count))
875 {}
876
877 virtual bool match(
878 _In_reads_or_z_(end) const char* text,
879 _In_ size_t start = 0,
880 _In_ size_t end = (size_t)-1,
881 _In_ int flags = match_default)
882 {
883 _Assume_(text || start >= end);
884 const wchar_t* str = m_str.c_str();
885 const bool case_insensitive = flags & match_case_insensitive ? true : false;
886 const auto& ctype = std::use_facet<std::ctype<wchar_t>>(m_locale);
887 for (this->interval.end = start;;) {
888 if (!*str) {
889 this->interval.start = start;
890 return true;
891 }
892 if (this->interval.end >= end || !text[this->interval.end]) {
893 this->interval.start = (this->interval.end = start) + 1;
894 return false;
895 }
896 wchar_t buf[3];
897 const wchar_t* chr = next_sgml_cp(text, this->interval.end, end, this->interval.end, buf);
898 for (; *chr; ++str, ++chr) {
899 if (!*str ||
900 (case_insensitive ? ctype.tolower(*str) != ctype.tolower(*chr) : *str != *chr))
901 {
902 this->interval.start = (this->interval.end = start) + 1;
903 return false;
904 }
905 }
906 }
907 }
908
909 protected:
910 std::wstring m_str;
911 };
912
916 template <class T>
918 {
919 public:
920 basic_iterations(const std::shared_ptr<basic_parser<T>>& el, size_t min_iterations = 0, size_t max_iterations = (size_t)-1, bool greedy = true) :
921 m_el(el),
925 {}
926
927 virtual bool match(
928 _In_reads_or_z_(end) const T* text,
929 _In_ size_t start = 0,
930 _In_ size_t end = (size_t)-1,
931 _In_ int flags = match_default)
932 {
933 _Assume_(text || start >= end);
934 this->interval.start = this->interval.end = start;
935 for (size_t i = 0; ; i++) {
936 if ((!m_greedy && i >= m_min_iterations) || i >= m_max_iterations)
937 return true;
938 if (!m_el->match(text, this->interval.end, end, flags)) {
939 if (i >= m_min_iterations)
940 return true;
941 break;
942 }
943 if (m_el->interval.end == this->interval.end) {
944 // Element did match, but the matching interval was empty. Quit instead of spinning.
945 return true;
946 }
947 this->interval.end = m_el->interval.end;
948 }
949 this->interval.start = (this->interval.end = start) + 1;
950 return false;
951 }
952
953 protected:
954 std::shared_ptr<basic_parser<T>> m_el;
957 bool m_greedy;
958 };
959
962#ifdef _UNICODE
963 using titerations = witerations;
964#else
965 using titerations = iterations;
966#endif
968
972 template <class T>
974 {
975 protected:
976 parser_collection(_In_ const std::locale& locale) : basic_parser<T>(locale) {}
977
978 public:
980 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el,
981 _In_ size_t count,
982 _In_ const std::locale& locale = std::locale()) :
983 basic_parser<T>(locale)
984 {
985 _Assume_(el || !count);
986 m_collection.reserve(count);
987 for (size_t i = 0; i < count; i++)
988 m_collection.push_back(el[i]);
989 }
990
992 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
993 _In_ const std::locale& locale = std::locale()) :
994 basic_parser<T>(locale),
995 m_collection(std::move(collection))
996 {}
997
998 virtual void invalidate()
999 {
1000 for (auto& el: m_collection)
1001 el->invalidate();
1003 }
1004
1005 protected:
1006 std::vector<std::shared_ptr<basic_parser<T>>> m_collection;
1007 };
1008
1012 template <class T>
1014 {
1015 public:
1017 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el = nullptr,
1018 _In_ size_t count = 0,
1019 _In_ const std::locale& locale = std::locale()) :
1020 parser_collection<T>(el, count, locale)
1021 {}
1022
1024 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1025 _In_ const std::locale& locale = std::locale()) :
1026 parser_collection<T>(std::move(collection), locale)
1027 {}
1028
1029 virtual bool match(
1030 _In_reads_or_z_(end) const T* text,
1031 _In_ size_t start = 0,
1032 _In_ size_t end = (size_t)-1,
1033 _In_ int flags = match_default)
1034 {
1035 _Assume_(text || start >= end);
1036 this->interval.end = start;
1037 for (auto i = this->m_collection.begin(); i != this->m_collection.end(); ++i) {
1038 if (!(*i)->match(text, this->interval.end, end, flags)) {
1039 for (++i; i != this->m_collection.end(); ++i)
1040 (*i)->invalidate();
1041 this->interval.start = (this->interval.end = start) + 1;
1042 return false;
1043 }
1044 this->interval.end = (*i)->interval.end;
1045 }
1046 this->interval.start = start;
1047 return true;
1048 }
1049 };
1050
1053#ifdef _UNICODE
1054 using tsequence = wsequence;
1055#else
1056 using tsequence = sequence;
1057#endif
1059
1063 template <class T>
1065 {
1066 protected:
1067 basic_branch(_In_ const std::locale& locale) :
1068 parser_collection<T>(locale),
1069 hit_offset((size_t)-1)
1070 {}
1071
1072 public:
1074 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el = nullptr,
1075 _In_ size_t count = 0,
1076 _In_ const std::locale& locale = std::locale()) :
1077 parser_collection<T>(el, count, locale),
1078 hit_offset((size_t)-1)
1079 {}
1080
1082 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1083 _In_ const std::locale& locale = std::locale()) :
1084 parser_collection<T>(std::move(collection), locale),
1085 hit_offset((size_t)-1)
1086 {}
1087
1088 virtual bool match(
1089 _In_reads_or_z_(end) const T* text,
1090 _In_ size_t start = 0,
1091 _In_ size_t end = (size_t)-1,
1092 _In_ int flags = match_default)
1093 {
1094 _Assume_(text || start >= end);
1095 hit_offset = 0;
1096 for (auto i = this->m_collection.begin(); i != this->m_collection.end(); ++i, ++hit_offset) {
1097 if ((*i)->match(text, start, end, flags)) {
1098 this->interval = (*i)->interval;
1099 for (++i; i != this->m_collection.end(); ++i)
1100 (*i)->invalidate();
1101 return true;
1102 }
1103 }
1104 hit_offset = (size_t)-1;
1105 this->interval.start = (this->interval.end = start) + 1;
1106 return false;
1107 }
1108
1109 virtual void invalidate()
1110 {
1111 hit_offset = (size_t)-1;
1113 }
1114
1115 public:
1116 size_t hit_offset;
1117 };
1118
1119 using branch = basic_branch<char>;
1121#ifdef _UNICODE
1122 using tbranch = wbranch;
1123#else
1124 using tbranch = branch;
1125#endif
1127
1131 template <class T, class T_parser = basic_string<T>>
1133 {
1134 public:
1135 inline basic_string_branch(
1136 _In_reads_(count) const T* str_z = nullptr,
1137 _In_ size_t count = 0,
1138 _In_ const std::locale& locale = std::locale()) :
1139 basic_branch<T>(locale)
1140 {
1141 build(str_z, count);
1142 }
1143
1144 inline basic_string_branch(_In_z_ const T* str, ...) :
1145 basic_branch<T>(std::locale())
1146 {
1147 va_list params;
1148 va_start(params, str);
1149 build(str, params);
1150 va_end(params);
1151 }
1152
1153 inline basic_string_branch(_In_ const std::locale& locale, _In_z_ const T* str, ...) :
1154 basic_branch<T>(locale)
1155 {
1156 va_list params;
1157 va_start(params, str);
1158 build(str, params);
1159 va_end(params);
1160 }
1161
1162 protected:
1163 void build(_In_reads_(count) const T* str_z, _In_ size_t count)
1164 {
1165 _Assume_(str_z || !count);
1166 if (count) {
1167 size_t offset, n;
1168 for (
1169 offset = n = 0;
1170 offset < count && str_z[offset];
1171 offset += stdex::strnlen(str_z + offset, count - offset) + 1, ++n);
1172 this->m_collection.reserve(n);
1173 for (
1174 offset = 0;
1175 offset < count && str_z[offset];
1176 offset += stdex::strnlen(str_z + offset, count - offset) + 1)
1177 this->m_collection.push_back(std::move(std::make_shared<T_parser>(str_z + offset, count - offset, this->m_locale)));
1178 }
1179 }
1180
1181 void build(_In_z_ const T* str, _In_ va_list params)
1182 {
1183 const T* p;
1184 for (
1185 this->m_collection.push_back(std::move(std::make_shared<T_parser>(str, (size_t)-1, this->m_locale)));
1186 (p = va_arg(params, const T*)) != nullptr;
1187 this->m_collection.push_back(std::move(std::make_shared<T_parser>(p, (size_t)-1, this->m_locale))));
1188 }
1189 };
1190
1193#ifdef _UNICODE
1195#else
1197#endif
1199
1203 template <class T>
1205 {
1206 public:
1208 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el = nullptr,
1209 _In_ size_t count = 0,
1210 _In_ const std::locale& locale = std::locale()) :
1211 parser_collection<T>(el, count, locale)
1212 {}
1213
1215 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1216 _In_ const std::locale& locale = std::locale()) :
1217 parser_collection<T>(std::move(collection), locale)
1218 {}
1219
1220 virtual bool match(
1221 _In_reads_or_z_(end) const T* text,
1222 _In_ size_t start = 0,
1223 _In_ size_t end = (size_t)-1,
1224 _In_ int flags = match_default)
1225 {
1226 _Assume_(text || start >= end);
1227 for (auto& el: this->m_collection)
1228 el->invalidate();
1229 if (match_recursively(text, start, end, flags)) {
1230 this->interval.start = start;
1231 return true;
1232 }
1233 this->interval.start = (this->interval.end = start) + 1;
1234 return false;
1235 }
1236
1237 protected:
1238 bool match_recursively(
1239 _In_reads_or_z_(end) const T* text,
1240 _In_ size_t start = 0,
1241 _In_ size_t end = (size_t)-1,
1242 _In_ int flags = match_default)
1243 {
1244 bool all_matched = true;
1245 for (auto& el: this->m_collection) {
1246 if (!el->interval) {
1247 // Element was not matched in permutatuion yet.
1248 all_matched = false;
1249 if (el->match(text, start, end, flags)) {
1250 // Element matched for the first time.
1251 if (match_recursively(text, el->interval.end, end, flags)) {
1252 // Rest of the elements matched too.
1253 return true;
1254 }
1255 el->invalidate();
1256 }
1257 }
1258 }
1259 if (all_matched) {
1260 this->interval.end = start;
1261 return true;
1262 }
1263 return false;
1264 }
1265 };
1266
1269#ifdef _UNICODE
1270 using tpermutation = wpermutation;
1271#else
1272 using tpermutation = permutation;
1273#endif
1275
1279 template <class T>
1280 class basic_integer : public basic_parser<T>
1281 {
1282 public:
1283 basic_integer(_In_ const std::locale& locale = std::locale()) :
1284 basic_parser<T>(locale),
1285 value(0)
1286 {}
1287
1288 virtual void invalidate()
1289 {
1290 value = 0;
1292 }
1293
1294 public:
1295 size_t value;
1296 };
1297
1301 template <class T>
1303 {
1304 public:
1306 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
1307 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
1308 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
1309 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
1310 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
1311 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
1312 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
1313 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
1314 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
1315 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
1316 _In_ const std::locale& locale = std::locale()) :
1317 basic_integer<T>(locale),
1318 m_digit_0(digit_0),
1319 m_digit_1(digit_1),
1320 m_digit_2(digit_2),
1321 m_digit_3(digit_3),
1322 m_digit_4(digit_4),
1323 m_digit_5(digit_5),
1324 m_digit_6(digit_6),
1325 m_digit_7(digit_7),
1326 m_digit_8(digit_8),
1327 m_digit_9(digit_9)
1328 {}
1329
1330 virtual bool match(
1331 _In_reads_or_z_(end) const T* text,
1332 _In_ size_t start = 0,
1333 _In_ size_t end = (size_t)-1,
1334 _In_ int flags = match_default)
1335 {
1336 _Assume_(text || start >= end);
1337 for (this->interval.end = start, this->value = 0; this->interval.end < end && text[this->interval.end];) {
1338 size_t dig;
1339 if (m_digit_0->match(text, this->interval.end, end, flags)) { dig = 0; this->interval.end = m_digit_0->interval.end; }
1340 else if (m_digit_1->match(text, this->interval.end, end, flags)) { dig = 1; this->interval.end = m_digit_1->interval.end; }
1341 else if (m_digit_2->match(text, this->interval.end, end, flags)) { dig = 2; this->interval.end = m_digit_2->interval.end; }
1342 else if (m_digit_3->match(text, this->interval.end, end, flags)) { dig = 3; this->interval.end = m_digit_3->interval.end; }
1343 else if (m_digit_4->match(text, this->interval.end, end, flags)) { dig = 4; this->interval.end = m_digit_4->interval.end; }
1344 else if (m_digit_5->match(text, this->interval.end, end, flags)) { dig = 5; this->interval.end = m_digit_5->interval.end; }
1345 else if (m_digit_6->match(text, this->interval.end, end, flags)) { dig = 6; this->interval.end = m_digit_6->interval.end; }
1346 else if (m_digit_7->match(text, this->interval.end, end, flags)) { dig = 7; this->interval.end = m_digit_7->interval.end; }
1347 else if (m_digit_8->match(text, this->interval.end, end, flags)) { dig = 8; this->interval.end = m_digit_8->interval.end; }
1348 else if (m_digit_9->match(text, this->interval.end, end, flags)) { dig = 9; this->interval.end = m_digit_9->interval.end; }
1349 else break;
1350 this->value = this->value * 10 + dig;
1351 }
1353 this->interval.start = start;
1354 return true;
1355 }
1356 this->interval.start = (this->interval.end = start) + 1;
1357 return false;
1358 }
1359
1360 protected:
1361 std::shared_ptr<basic_parser<T>>
1362 m_digit_0,
1363 m_digit_1,
1364 m_digit_2,
1365 m_digit_3,
1366 m_digit_4,
1367 m_digit_5,
1368 m_digit_6,
1369 m_digit_7,
1370 m_digit_8,
1371 m_digit_9;
1372 };
1373
1376#ifdef _UNICODE
1377 using tinteger10 = winteger10;
1378#else
1379 using tinteger10 = integer10;
1380#endif
1382
1386 template <class T>
1388 {
1389 public:
1391 _In_ const std::shared_ptr<basic_integer10<T>>& digits,
1392 _In_ const std::shared_ptr<basic_set<T>>& separator,
1393 _In_ const std::locale& locale = std::locale()) :
1394 basic_integer<T>(locale),
1395 digit_count(0),
1396 has_separators(false),
1397 m_digits(digits),
1398 m_separator(separator)
1399 {}
1400
1401 virtual bool match(
1402 _In_reads_or_z_(end) const T* text,
1403 _In_ size_t start = 0,
1404 _In_ size_t end = (size_t)-1,
1405 _In_ int flags = match_default)
1406 {
1407 _Assume_(text || start >= end);
1408 if (m_digits->match(text, start, end, flags)) {
1409 // Leading part match.
1410 this->value = m_digits->value;
1411 digit_count = m_digits->interval.size();
1412 has_separators = false;
1413 this->interval.start = start;
1414 this->interval.end = m_digits->interval.end;
1415 if (m_digits->interval.size() <= 3) {
1416 // Maybe separated with thousand separators?
1417 size_t hit_offset = (size_t)-1;
1418 while (m_separator->match(text, this->interval.end, end, flags) &&
1419 (hit_offset == (size_t)-1 || hit_offset == m_separator->hit_offset) && // All separators must be the same, no mixing.
1420 m_digits->match(text, m_separator->interval.end, end, flags) &&
1421 m_digits->interval.size() == 3)
1422 {
1423 // Thousand separator and three-digit integer followed.
1424 this->value = this->value * 1000 + m_digits->value;
1425 digit_count += 3;
1426 has_separators = true;
1427 this->interval.end = m_digits->interval.end;
1428 hit_offset = m_separator->hit_offset;
1429 }
1430 }
1431
1432 return true;
1433 }
1434 this->value = 0;
1435 this->interval.start = (this->interval.end = start) + 1;
1436 return false;
1437 }
1438
1439 virtual void invalidate()
1440 {
1441 digit_count = 0;
1442 has_separators = false;
1444 }
1445
1446 public:
1449
1450 protected:
1451 std::shared_ptr<basic_integer10<T>> m_digits;
1452 std::shared_ptr<basic_set<T>> m_separator;
1453 };
1454
1457#ifdef _UNICODE
1458 using tinteger10ts = winteger10ts;
1459#else
1460 using tinteger10ts = integer10ts;
1461#endif
1463
1467 template <class T>
1469 {
1470 public:
1472 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
1473 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
1474 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
1475 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
1476 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
1477 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
1478 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
1479 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
1480 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
1481 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
1482 _In_ const std::shared_ptr<basic_parser<T>>& digit_10,
1483 _In_ const std::shared_ptr<basic_parser<T>>& digit_11,
1484 _In_ const std::shared_ptr<basic_parser<T>>& digit_12,
1485 _In_ const std::shared_ptr<basic_parser<T>>& digit_13,
1486 _In_ const std::shared_ptr<basic_parser<T>>& digit_14,
1487 _In_ const std::shared_ptr<basic_parser<T>>& digit_15,
1488 _In_ const std::locale& locale = std::locale()) :
1489 basic_integer<T>(locale),
1490 m_digit_0(digit_0),
1491 m_digit_1(digit_1),
1492 m_digit_2(digit_2),
1493 m_digit_3(digit_3),
1494 m_digit_4(digit_4),
1495 m_digit_5(digit_5),
1496 m_digit_6(digit_6),
1497 m_digit_7(digit_7),
1498 m_digit_8(digit_8),
1499 m_digit_9(digit_9),
1500 m_digit_10(digit_10),
1501 m_digit_11(digit_11),
1502 m_digit_12(digit_12),
1503 m_digit_13(digit_13),
1504 m_digit_14(digit_14),
1505 m_digit_15(digit_15)
1506 {}
1507
1508 virtual bool match(
1509 _In_reads_or_z_(end) const T* text,
1510 _In_ size_t start = 0,
1511 _In_ size_t end = (size_t)-1,
1512 _In_ int flags = match_default)
1513 {
1514 _Assume_(text || start >= end);
1515 for (this->interval.end = start, this->value = 0; this->interval.end < end && text[this->interval.end];) {
1516 size_t dig;
1517 if (m_digit_0->match(text, this->interval.end, end, flags)) { dig = 0; this->interval.end = m_digit_0->interval.end; }
1518 else if (m_digit_1->match(text, this->interval.end, end, flags)) { dig = 1; this->interval.end = m_digit_1->interval.end; }
1519 else if (m_digit_2->match(text, this->interval.end, end, flags)) { dig = 2; this->interval.end = m_digit_2->interval.end; }
1520 else if (m_digit_3->match(text, this->interval.end, end, flags)) { dig = 3; this->interval.end = m_digit_3->interval.end; }
1521 else if (m_digit_4->match(text, this->interval.end, end, flags)) { dig = 4; this->interval.end = m_digit_4->interval.end; }
1522 else if (m_digit_5->match(text, this->interval.end, end, flags)) { dig = 5; this->interval.end = m_digit_5->interval.end; }
1523 else if (m_digit_6->match(text, this->interval.end, end, flags)) { dig = 6; this->interval.end = m_digit_6->interval.end; }
1524 else if (m_digit_7->match(text, this->interval.end, end, flags)) { dig = 7; this->interval.end = m_digit_7->interval.end; }
1525 else if (m_digit_8->match(text, this->interval.end, end, flags)) { dig = 8; this->interval.end = m_digit_8->interval.end; }
1526 else if (m_digit_9->match(text, this->interval.end, end, flags)) { dig = 9; this->interval.end = m_digit_9->interval.end; }
1527 else if (m_digit_10->match(text, this->interval.end, end, flags)) { dig = 10; this->interval.end = m_digit_10->interval.end; }
1528 else if (m_digit_11->match(text, this->interval.end, end, flags)) { dig = 11; this->interval.end = m_digit_11->interval.end; }
1529 else if (m_digit_12->match(text, this->interval.end, end, flags)) { dig = 12; this->interval.end = m_digit_12->interval.end; }
1530 else if (m_digit_13->match(text, this->interval.end, end, flags)) { dig = 13; this->interval.end = m_digit_13->interval.end; }
1531 else if (m_digit_14->match(text, this->interval.end, end, flags)) { dig = 14; this->interval.end = m_digit_14->interval.end; }
1532 else if (m_digit_15->match(text, this->interval.end, end, flags)) { dig = 15; this->interval.end = m_digit_15->interval.end; }
1533 else break;
1534 this->value = this->value * 16 + dig;
1535 }
1537 this->interval.start = start;
1538 return true;
1539 }
1540 this->interval.start = (this->interval.end = start) + 1;
1541 return false;
1542 }
1543
1544 protected:
1545 std::shared_ptr<basic_parser<T>>
1546 m_digit_0,
1547 m_digit_1,
1548 m_digit_2,
1549 m_digit_3,
1550 m_digit_4,
1551 m_digit_5,
1552 m_digit_6,
1553 m_digit_7,
1554 m_digit_8,
1555 m_digit_9,
1556 m_digit_10,
1557 m_digit_11,
1558 m_digit_12,
1559 m_digit_13,
1560 m_digit_14,
1561 m_digit_15;
1562 };
1563
1566#ifdef _UNICODE
1567 using tinteger16 = winteger16;
1568#else
1569 using tinteger16 = integer16;
1570#endif
1572
1576 template <class T>
1578 {
1579 public:
1581 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
1582 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
1583 _In_ const std::shared_ptr<basic_parser<T>>& digit_10,
1584 _In_ const std::shared_ptr<basic_parser<T>>& digit_50,
1585 _In_ const std::shared_ptr<basic_parser<T>>& digit_100,
1586 _In_ const std::shared_ptr<basic_parser<T>>& digit_500,
1587 _In_ const std::shared_ptr<basic_parser<T>>& digit_1000,
1588 _In_ const std::shared_ptr<basic_parser<T>>& digit_5000,
1589 _In_ const std::shared_ptr<basic_parser<T>>& digit_10000,
1590 _In_ const std::locale& locale = std::locale()) :
1591 basic_integer<T>(locale),
1592 m_digit_1(digit_1),
1593 m_digit_5(digit_5),
1594 m_digit_10(digit_10),
1595 m_digit_50(digit_50),
1596 m_digit_100(digit_100),
1597 m_digit_500(digit_500),
1598 m_digit_1000(digit_1000),
1599 m_digit_5000(digit_5000),
1600 m_digit_10000(digit_10000)
1601 {}
1602
1603 virtual bool match(
1604 _In_reads_or_z_(end) const T* text,
1605 _In_ size_t start = 0,
1606 _In_ size_t end = (size_t)-1,
1607 _In_ int flags = match_default)
1608 {
1609 _Assume_(text || start >= end);
1610 size_t
1611 dig[5] = { (size_t)-1, (size_t)-1, (size_t)-1, (size_t)-1, (size_t)-1 },
1612 end2;
1613
1614 for (this->interval.end = start, this->value = 0; this->interval.end < end && text[this->interval.end]; dig[3] = dig[2], dig[2] = dig[1], dig[1] = dig[0], this->interval.end = end2) {
1615 if (m_digit_1 && m_digit_1->match(text, this->interval.end, end, flags)) { dig[0] = 1; end2 = m_digit_1->interval.end; }
1616 else if (m_digit_5 && m_digit_5->match(text, this->interval.end, end, flags)) { dig[0] = 5; end2 = m_digit_5->interval.end; }
1617 else if (m_digit_10 && m_digit_10->match(text, this->interval.end, end, flags)) { dig[0] = 10; end2 = m_digit_10->interval.end; }
1618 else if (m_digit_50 && m_digit_50->match(text, this->interval.end, end, flags)) { dig[0] = 50; end2 = m_digit_50->interval.end; }
1619 else if (m_digit_100 && m_digit_100->match(text, this->interval.end, end, flags)) { dig[0] = 100; end2 = m_digit_100->interval.end; }
1620 else if (m_digit_500 && m_digit_500->match(text, this->interval.end, end, flags)) { dig[0] = 500; end2 = m_digit_500->interval.end; }
1621 else if (m_digit_1000 && m_digit_1000->match(text, this->interval.end, end, flags)) { dig[0] = 1000; end2 = m_digit_1000->interval.end; }
1622 else if (m_digit_5000 && m_digit_5000->match(text, this->interval.end, end, flags)) { dig[0] = 5000; end2 = m_digit_5000->interval.end; }
1623 else if (m_digit_10000 && m_digit_10000->match(text, this->interval.end, end, flags)) { dig[0] = 10000; end2 = m_digit_10000->interval.end; }
1624 else break;
1625
1626 // Store first digit.
1627 if (dig[4] == (size_t)-1) dig[4] = dig[0];
1628
1629 if (dig[3] == dig[2] && dig[2] == dig[1] && dig[1] == dig[0] && dig[0] != dig[4]) {
1630 // Same digit repeated four times. No-go, unless first digit. E.g. XIIII vs. XIV. MMMMMCD allowed, IIII also...
1631 break;
1632 }
1633 if (dig[0] <= dig[1]) {
1634 // Digit is less or equal previous one: add.
1635 this->value += dig[0];
1636 }
1637 else if (
1638 (dig[1] == 1 && (dig[0] == 5 || dig[0] == 10)) ||
1639 (dig[1] == 10 && (dig[0] == 50 || dig[0] == 100)) ||
1640 (dig[1] == 100 && (dig[0] == 500 || dig[0] == 1000)) ||
1641 (dig[1] == 1000 && (dig[0] == 5000 || dig[0] == 10000)))
1642 {
1643 // Digit is up to two orders bigger than previous one: subtract. But...
1644 if (dig[2] < dig[0]) {
1645 // Digit is also bigger than pre-previous one. E.g. VIX (V < X => invalid)
1646 break;
1647 }
1648 this->value -= dig[1]; // Cancel addition in the previous step.
1649 dig[0] -= dig[1]; // Combine last two digits.
1650 dig[1] = dig[2]; // The true previous digit is now pre-previous one. :)
1651 dig[2] = dig[3]; // The true pre-previous digit is now pre-pre-previous one. :)
1652 this->value += dig[0]; // Add combined value.
1653 }
1654 else {
1655 // New digit is too big than the previous one. E.g. VX (V < X => invalid)
1656 break;
1657 }
1658 }
1659 if (this->value) {
1660 this->interval.start = start;
1661 return true;
1662 }
1663 this->interval.start = (this->interval.end = start) + 1;
1664 return false;
1665 }
1666
1667 protected:
1668 std::shared_ptr<basic_parser<T>>
1669 m_digit_1,
1670 m_digit_5,
1671 m_digit_10,
1672 m_digit_50,
1673 m_digit_100,
1674 m_digit_500,
1675 m_digit_1000,
1676 m_digit_5000,
1677 m_digit_10000;
1678 };
1679
1682#ifdef _UNICODE
1684#else
1686#endif
1688
1692 template <class T>
1694 {
1695 public:
1697 _In_ const std::shared_ptr<basic_parser<T>>& _numerator,
1698 _In_ const std::shared_ptr<basic_parser<T>>& _fraction_line,
1699 _In_ const std::shared_ptr<basic_parser<T>>& _denominator,
1700 _In_ const std::locale& locale = std::locale()) :
1701 basic_parser<T>(locale),
1702 numerator(_numerator),
1703 fraction_line(_fraction_line),
1704 denominator(_denominator)
1705 {}
1706
1707 virtual bool match(
1708 _In_reads_or_z_(end) const T* text,
1709 _In_ size_t start = 0,
1710 _In_ size_t end = (size_t)-1,
1711 _In_ int flags = match_default)
1712 {
1713 _Assume_(text || start >= end);
1714 if (numerator->match(text, start, end, flags) &&
1715 fraction_line->match(text, numerator->interval.end, end, flags) &&
1716 denominator->match(text, fraction_line->interval.end, end, flags))
1717 {
1718 this->interval.start = start;
1719 this->interval.end = denominator->interval.end;
1720 return true;
1721 }
1722 numerator->invalidate();
1723 fraction_line->invalidate();
1724 denominator->invalidate();
1725 this->interval.start = (this->interval.end = start) + 1;
1726 return false;
1727 }
1728
1729 virtual void invalidate()
1730 {
1731 numerator->invalidate();
1732 fraction_line->invalidate();
1733 denominator->invalidate();
1735 }
1736
1737 public:
1738 std::shared_ptr<basic_parser<T>> numerator;
1739 std::shared_ptr<basic_parser<T>> fraction_line;
1740 std::shared_ptr<basic_parser<T>> denominator;
1741 };
1742
1745#ifdef _UNICODE
1746 using tfraction = wfraction;
1747#else
1748 using tfraction = fraction;
1749#endif
1751
1755 template <class T>
1756 class basic_score : public basic_parser<T>
1757 {
1758 public:
1760 _In_ const std::shared_ptr<basic_parser<T>>& _home,
1761 _In_ const std::shared_ptr<basic_parser<T>>& _separator,
1762 _In_ const std::shared_ptr<basic_parser<T>>& _guest,
1763 _In_ const std::shared_ptr<basic_parser<T>>& space,
1764 _In_ const std::locale& locale = std::locale()) :
1765 basic_parser<T>(locale),
1766 home(_home),
1767 separator(_separator),
1768 guest(_guest),
1769 m_space(space)
1770 {}
1771
1772 virtual bool match(
1773 _In_reads_or_z_(end) const T* text,
1774 _In_ size_t start = 0,
1775 _In_ size_t end = (size_t)-1,
1776 _In_ int flags = match_default)
1777 {
1778 _Assume_(text || start >= end);
1779 this->interval.end = start;
1780
1781 const int space_match_flags = flags & ~match_multiline; // Spaces in score must never be broken in new line.
1782
1783 if (home->match(text, this->interval.end, end, flags))
1784 this->interval.end = home->interval.end;
1785 else
1786 goto end;
1787
1788 for (; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
1789
1790 if (separator->match(text, this->interval.end, end, flags))
1791 this->interval.end = separator->interval.end;
1792 else
1793 goto end;
1794
1795 for (; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
1796
1797 if (guest->match(text, this->interval.end, end, flags))
1798 this->interval.end = guest->interval.end;
1799 else
1800 goto end;
1801
1802 this->interval.start = start;
1803 return true;
1804
1805 end:
1806 home->invalidate();
1807 separator->invalidate();
1808 guest->invalidate();
1809 this->interval.start = (this->interval.end = start) + 1;
1810 return false;
1811 }
1812
1813 virtual void invalidate()
1814 {
1815 home->invalidate();
1816 separator->invalidate();
1817 guest->invalidate();
1819 }
1820
1821 public:
1822 std::shared_ptr<basic_parser<T>> home;
1823 std::shared_ptr<basic_parser<T>> separator;
1824 std::shared_ptr<basic_parser<T>> guest;
1825
1826 protected:
1827 std::shared_ptr<basic_parser<T>> m_space;
1828 };
1829
1830 using score = basic_score<char>;
1832#ifdef _UNICODE
1833 using tscore = wscore;
1834#else
1835 using tscore = score;
1836#endif
1838
1842 template <class T>
1844 {
1845 public:
1847 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
1848 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
1849 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
1850 _In_ const std::shared_ptr<basic_parser<T>>& _number,
1851 _In_ const std::locale& locale = std::locale()) :
1852 basic_parser<T>(locale),
1857 {}
1858
1859 virtual bool match(
1860 _In_reads_or_z_(end) const T* text,
1861 _In_ size_t start = 0,
1862 _In_ size_t end = (size_t)-1,
1863 _In_ int flags = match_default)
1864 {
1865 _Assume_(text || start >= end);
1866 this->interval.end = start;
1867 if (positive_sign && positive_sign->match(text, this->interval.end, end, flags)) {
1868 this->interval.end = positive_sign->interval.end;
1869 if (negative_sign) negative_sign->invalidate();
1870 if (special_sign) special_sign->invalidate();
1871 }
1872 else if (negative_sign && negative_sign->match(text, this->interval.end, end, flags)) {
1873 this->interval.end = negative_sign->interval.end;
1874 if (positive_sign) positive_sign->invalidate();
1875 if (special_sign) special_sign->invalidate();
1876 }
1877 else if (special_sign && special_sign->match(text, this->interval.end, end, flags)) {
1878 this->interval.end = special_sign->interval.end;
1879 if (positive_sign) positive_sign->invalidate();
1880 if (negative_sign) negative_sign->invalidate();
1881 }
1882 else {
1883 if (positive_sign) positive_sign->invalidate();
1884 if (negative_sign) negative_sign->invalidate();
1885 if (special_sign) special_sign->invalidate();
1886 }
1887 if (number->match(text, this->interval.end, end, flags)) {
1888 this->interval.start = start;
1889 this->interval.end = number->interval.end;
1890 return true;
1891 }
1892 if (positive_sign) positive_sign->invalidate();
1893 if (negative_sign) negative_sign->invalidate();
1894 if (special_sign) special_sign->invalidate();
1895 number->invalidate();
1896 this->interval.start = (this->interval.end = start) + 1;
1897 return false;
1898 }
1899
1900 virtual void invalidate()
1901 {
1902 if (positive_sign) positive_sign->invalidate();
1903 if (negative_sign) negative_sign->invalidate();
1904 if (special_sign) special_sign->invalidate();
1905 number->invalidate();
1907 }
1908
1909 public:
1910 std::shared_ptr<basic_parser<T>> positive_sign;
1911 std::shared_ptr<basic_parser<T>> negative_sign;
1912 std::shared_ptr<basic_parser<T>> special_sign;
1913 std::shared_ptr<basic_parser<T>> number;
1914 };
1915
1918#ifdef _UNICODE
1920#else
1922#endif
1924
1928 template <class T>
1930 {
1931 public:
1933 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
1934 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
1935 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
1936 _In_ const std::shared_ptr<basic_parser<T>>& _integer,
1937 _In_ const std::shared_ptr<basic_parser<T>>& space,
1938 _In_ const std::shared_ptr<basic_parser<T>>& _fraction,
1939 _In_ const std::locale& locale = std::locale()) :
1940 basic_parser<T>(locale),
1946 m_space(space)
1947 {}
1948
1949 virtual bool match(
1950 _In_reads_or_z_(end) const T* text,
1951 _In_ size_t start = 0,
1952 _In_ size_t end = (size_t)-1,
1953 _In_ int flags = match_default)
1954 {
1955 _Assume_(text || start >= end);
1956 this->interval.end = start;
1957
1958 if (positive_sign && positive_sign->match(text, this->interval.end, end, flags)) {
1959 this->interval.end = positive_sign->interval.end;
1960 if (negative_sign) negative_sign->invalidate();
1961 if (special_sign) special_sign->invalidate();
1962 }
1963 else if (negative_sign && negative_sign->match(text, this->interval.end, end, flags)) {
1964 this->interval.end = negative_sign->interval.end;
1965 if (positive_sign) positive_sign->invalidate();
1966 if (special_sign) special_sign->invalidate();
1967 }
1968 else if (special_sign && special_sign->match(text, this->interval.end, end, flags)) {
1969 this->interval.end = special_sign->interval.end;
1970 if (positive_sign) positive_sign->invalidate();
1971 if (negative_sign) negative_sign->invalidate();
1972 }
1973 else {
1974 if (positive_sign) positive_sign->invalidate();
1975 if (negative_sign) negative_sign->invalidate();
1976 if (special_sign) special_sign->invalidate();
1977 }
1978
1979 // Check for <integer> <fraction>
1980 const int space_match_flags = flags & ~match_multiline; // Spaces in fractions must never be broken in new line.
1981 if (integer->match(text, this->interval.end, end, flags) &&
1982 m_space->match(text, integer->interval.end, end, space_match_flags))
1983 {
1984 for (this->interval.end = m_space->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
1985 if (fraction->match(text, this->interval.end, end, flags)) {
1986 this->interval.start = start;
1987 this->interval.end = fraction->interval.end;
1988 return true;
1989 }
1990 fraction->invalidate();
1991 this->interval.start = start;
1992 this->interval.end = integer->interval.end;
1993 return true;
1994 }
1995
1996 // Check for <fraction>
1997 if (fraction->match(text, this->interval.end, end, flags)) {
1998 integer->invalidate();
1999 this->interval.start = start;
2000 this->interval.end = fraction->interval.end;
2001 return true;
2002 }
2003
2004 // Check for <integer>
2005 if (integer->match(text, this->interval.end, end, flags)) {
2006 fraction->invalidate();
2007 this->interval.start = start;
2008 this->interval.end = integer->interval.end;
2009 return true;
2010 }
2011
2012 if (positive_sign) positive_sign->invalidate();
2013 if (negative_sign) negative_sign->invalidate();
2014 if (special_sign) special_sign->invalidate();
2015 integer->invalidate();
2016 fraction->invalidate();
2017 this->interval.start = (this->interval.end = start) + 1;
2018 return false;
2019 }
2020
2021 virtual void invalidate()
2022 {
2023 if (positive_sign) positive_sign->invalidate();
2024 if (negative_sign) negative_sign->invalidate();
2025 if (special_sign) special_sign->invalidate();
2026 integer->invalidate();
2027 fraction->invalidate();
2029 }
2030
2031 public:
2032 std::shared_ptr<basic_parser<T>> positive_sign;
2033 std::shared_ptr<basic_parser<T>> negative_sign;
2034 std::shared_ptr<basic_parser<T>> special_sign;
2035 std::shared_ptr<basic_parser<T>> integer;
2036 std::shared_ptr<basic_parser<T>> fraction;
2037
2038 protected:
2039 std::shared_ptr<basic_parser<T>> m_space;
2040 };
2041
2044#ifdef _UNICODE
2046#else
2048#endif
2050
2054 template <class T>
2056 {
2057 public:
2059 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
2060 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
2061 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
2062 _In_ const std::shared_ptr<basic_integer<T>>& _integer,
2063 _In_ const std::shared_ptr<basic_parser<T>>& _decimal_separator,
2064 _In_ const std::shared_ptr<basic_integer<T>>& _decimal,
2065 _In_ const std::shared_ptr<basic_parser<T>>& _exponent_symbol,
2066 _In_ const std::shared_ptr<basic_parser<T>>& _positive_exp_sign,
2067 _In_ const std::shared_ptr<basic_parser<T>>& _negative_exp_sign,
2068 _In_ const std::shared_ptr<basic_integer<T>>& _exponent,
2069 _In_ const std::locale& locale = std::locale()) :
2070 basic_parser<T>(locale),
2081 value(std::numeric_limits<double>::quiet_NaN())
2082 {}
2083
2084 virtual bool match(
2085 _In_reads_or_z_(end) const T* text,
2086 _In_ size_t start = 0,
2087 _In_ size_t end = (size_t)-1,
2088 _In_ int flags = match_default)
2089 {
2090 _Assume_(text || start >= end);
2091 this->interval.end = start;
2092
2093 if (positive_sign && positive_sign->match(text, this->interval.end, end, flags)) {
2094 this->interval.end = positive_sign->interval.end;
2095 if (negative_sign) negative_sign->invalidate();
2096 if (special_sign) special_sign->invalidate();
2097 }
2098 else if (negative_sign && negative_sign->match(text, this->interval.end, end, flags)) {
2099 this->interval.end = negative_sign->interval.end;
2100 if (positive_sign) positive_sign->invalidate();
2101 if (special_sign) special_sign->invalidate();
2102 }
2103 else if (special_sign && special_sign->match(text, this->interval.end, end, flags)) {
2104 this->interval.end = special_sign->interval.end;
2105 if (positive_sign) positive_sign->invalidate();
2106 if (negative_sign) negative_sign->invalidate();
2107 }
2108 else {
2109 if (positive_sign) positive_sign->invalidate();
2110 if (negative_sign) negative_sign->invalidate();
2111 if (special_sign) special_sign->invalidate();
2112 }
2113
2114 if (integer->match(text, this->interval.end, end, flags))
2115 this->interval.end = integer->interval.end;
2116
2117 if (decimal_separator->match(text, this->interval.end, end, flags) &&
2119 this->interval.end = decimal->interval.end;
2120 else {
2121 decimal_separator->invalidate();
2122 decimal->invalidate();
2123 }
2124
2125 if (integer->interval.empty() &&
2126 decimal->interval.empty())
2127 {
2128 // No integer part, no decimal part.
2129 if (positive_sign) positive_sign->invalidate();
2130 if (negative_sign) negative_sign->invalidate();
2131 if (special_sign) special_sign->invalidate();
2132 integer->invalidate();
2133 decimal_separator->invalidate();
2134 decimal->invalidate();
2135 if (exponent_symbol) exponent_symbol->invalidate();
2136 if (positive_exp_sign) positive_exp_sign->invalidate();
2137 if (negative_exp_sign) negative_exp_sign->invalidate();
2138 if (exponent) exponent->invalidate();
2139 this->interval.start = (this->interval.end = start) + 1;
2140 return false;
2141 }
2142
2143 if (exponent_symbol && exponent_symbol->match(text, this->interval.end, end, flags) &&
2146 (exponent && exponent->match(text, exponent_symbol->interval.end, end, flags))))
2147 {
2148 this->interval.end = exponent->interval.end;
2149 if (negative_exp_sign) negative_exp_sign->invalidate();
2150 }
2151 else if (exponent_symbol && exponent_symbol->match(text, this->interval.end, end, flags) &&
2154 {
2155 this->interval.end = exponent->interval.end;
2156 if (positive_exp_sign) positive_exp_sign->invalidate();
2157 }
2158 else {
2159 if (exponent_symbol) exponent_symbol->invalidate();
2160 if (positive_exp_sign) positive_exp_sign->invalidate();
2161 if (negative_exp_sign) negative_exp_sign->invalidate();
2162 if (exponent) exponent->invalidate();
2163 }
2164
2165 value = (double)integer->value;
2166 if (decimal->interval)
2167 value += (double)decimal->value * pow(10.0, -(double)decimal->interval.size());
2168 if (negative_sign && negative_sign->interval)
2169 value = -value;
2170 if (exponent && exponent->interval) {
2171 double e = (double)exponent->value;
2172 if (negative_exp_sign && negative_exp_sign->interval)
2173 e = -e;
2174 value *= pow(10.0, e);
2175 }
2176
2177 this->interval.start = start;
2178 return true;
2179 }
2180
2181 virtual void invalidate()
2182 {
2183 if (positive_sign) positive_sign->invalidate();
2184 if (negative_sign) negative_sign->invalidate();
2185 if (special_sign) special_sign->invalidate();
2186 integer->invalidate();
2187 decimal_separator->invalidate();
2188 decimal->invalidate();
2189 if (exponent_symbol) exponent_symbol->invalidate();
2190 if (positive_exp_sign) positive_exp_sign->invalidate();
2191 if (negative_exp_sign) negative_exp_sign->invalidate();
2192 if (exponent) exponent->invalidate();
2193 value = std::numeric_limits<double>::quiet_NaN();
2195 }
2196
2197 public:
2198 std::shared_ptr<basic_parser<T>> positive_sign;
2199 std::shared_ptr<basic_parser<T>> negative_sign;
2200 std::shared_ptr<basic_parser<T>> special_sign;
2201 std::shared_ptr<basic_integer<T>> integer;
2202 std::shared_ptr<basic_parser<T>> decimal_separator;
2203 std::shared_ptr<basic_integer<T>> decimal;
2204 std::shared_ptr<basic_parser<T>> exponent_symbol;
2205 std::shared_ptr<basic_parser<T>> positive_exp_sign;
2206 std::shared_ptr<basic_parser<T>> negative_exp_sign;
2207 std::shared_ptr<basic_integer<T>> exponent;
2208 double value;
2209 };
2210
2213#ifdef _UNICODE
2215#else
2217#endif
2219
2223 template <class T>
2225 {
2226 public:
2228 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
2229 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
2230 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
2231 _In_ const std::shared_ptr<basic_parser<T>>& _currency,
2232 _In_ const std::shared_ptr<basic_parser<T>>& _integer,
2233 _In_ const std::shared_ptr<basic_parser<T>>& _decimal_separator,
2234 _In_ const std::shared_ptr<basic_parser<T>>& _decimal,
2235 _In_ const std::locale& locale = std::locale()) :
2236 basic_parser<T>(locale),
2244 {}
2245
2246 virtual bool match(
2247 _In_reads_or_z_(end) const T* text,
2248 _In_ size_t start = 0,
2249 _In_ size_t end = (size_t)-1,
2250 _In_ int flags = match_default)
2251 {
2252 _Assume_(text || start >= end);
2253 this->interval.end = start;
2254
2255 if (positive_sign->match(text, this->interval.end, end, flags)) {
2256 this->interval.end = positive_sign->interval.end;
2257 if (negative_sign) negative_sign->invalidate();
2258 if (special_sign) special_sign->invalidate();
2259 }
2260 else if (negative_sign->match(text, this->interval.end, end, flags)) {
2261 this->interval.end = negative_sign->interval.end;
2262 if (positive_sign) positive_sign->invalidate();
2263 if (special_sign) special_sign->invalidate();
2264 }
2265 else if (special_sign->match(text, this->interval.end, end, flags)) {
2266 this->interval.end = special_sign->interval.end;
2267 if (positive_sign) positive_sign->invalidate();
2268 if (negative_sign) negative_sign->invalidate();
2269 }
2270 else {
2271 if (positive_sign) positive_sign->invalidate();
2272 if (negative_sign) negative_sign->invalidate();
2273 if (special_sign) special_sign->invalidate();
2274 }
2275
2276 if (currency->match(text, this->interval.end, end, flags))
2277 this->interval.end = currency->interval.end;
2278 else {
2279 if (positive_sign) positive_sign->invalidate();
2280 if (negative_sign) negative_sign->invalidate();
2281 if (special_sign) special_sign->invalidate();
2282 integer->invalidate();
2283 decimal_separator->invalidate();
2284 decimal->invalidate();
2285 this->interval.start = (this->interval.end = start) + 1;
2286 return false;
2287 }
2288
2289 if (integer->match(text, this->interval.end, end, flags))
2290 this->interval.end = integer->interval.end;
2291 if (decimal_separator->match(text, this->interval.end, end, flags) &&
2293 this->interval.end = decimal->interval.end;
2294 else {
2295 decimal_separator->invalidate();
2296 decimal->invalidate();
2297 }
2298
2299 if (integer->interval.empty() &&
2300 decimal->interval.empty())
2301 {
2302 // No integer part, no decimal part.
2303 if (positive_sign) positive_sign->invalidate();
2304 if (negative_sign) negative_sign->invalidate();
2305 if (special_sign) special_sign->invalidate();
2306 currency->invalidate();
2307 integer->invalidate();
2308 decimal_separator->invalidate();
2309 decimal->invalidate();
2310 this->interval.start = (this->interval.end = start) + 1;
2311 return false;
2312 }
2313
2314 this->interval.start = start;
2315 return true;
2316 }
2317
2318 virtual void invalidate()
2319 {
2320 if (positive_sign) positive_sign->invalidate();
2321 if (negative_sign) negative_sign->invalidate();
2322 if (special_sign) special_sign->invalidate();
2323 currency->invalidate();
2324 integer->invalidate();
2325 decimal_separator->invalidate();
2326 decimal->invalidate();
2328 }
2329
2330 public:
2331 std::shared_ptr<basic_parser<T>> positive_sign;
2332 std::shared_ptr<basic_parser<T>> negative_sign;
2333 std::shared_ptr<basic_parser<T>> special_sign;
2334 std::shared_ptr<basic_parser<T>> currency;
2335 std::shared_ptr<basic_parser<T>> integer;
2336 std::shared_ptr<basic_parser<T>> decimal_separator;
2337 std::shared_ptr<basic_parser<T>> decimal;
2338 };
2339
2342#ifdef _UNICODE
2344#else
2346#endif
2348
2352 template <class T>
2354 {
2355 public:
2357 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
2358 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
2359 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
2360 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
2361 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
2362 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
2363 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
2364 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
2365 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
2366 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
2367 _In_ const std::shared_ptr<basic_parser<T>>& separator,
2368 _In_ const std::locale& locale = std::locale()) :
2369 basic_parser<T>(locale),
2370 m_digit_0(digit_0),
2371 m_digit_1(digit_1),
2372 m_digit_2(digit_2),
2373 m_digit_3(digit_3),
2374 m_digit_4(digit_4),
2375 m_digit_5(digit_5),
2376 m_digit_6(digit_6),
2377 m_digit_7(digit_7),
2378 m_digit_8(digit_8),
2379 m_digit_9(digit_9),
2380 m_separator(separator)
2381 {
2382 value.s_addr = 0;
2383 }
2384
2385 virtual bool match(
2386 _In_reads_or_z_(end) const T* text,
2387 _In_ size_t start = 0,
2388 _In_ size_t end = (size_t)-1,
2389 _In_ int flags = match_default)
2390 {
2391 _Assume_(text || start >= end);
2392 this->interval.end = start;
2393 value.s_addr = 0;
2394
2395 size_t i;
2396 for (i = 0; i < 4; i++) {
2397 if (i) {
2398 if (m_separator->match(text, this->interval.end, end, flags))
2399 this->interval.end = m_separator->interval.end;
2400 else
2401 goto error;
2402 }
2403
2404 components[i].start = this->interval.end;
2405 bool is_empty = true;
2406 size_t x;
2407 for (x = 0; this->interval.end < end && text[this->interval.end];) {
2408 size_t dig, digit_end;
2409 if (m_digit_0->match(text, this->interval.end, end, flags)) { dig = 0; digit_end = m_digit_0->interval.end; }
2410 else if (m_digit_1->match(text, this->interval.end, end, flags)) { dig = 1; digit_end = m_digit_1->interval.end; }
2411 else if (m_digit_2->match(text, this->interval.end, end, flags)) { dig = 2; digit_end = m_digit_2->interval.end; }
2412 else if (m_digit_3->match(text, this->interval.end, end, flags)) { dig = 3; digit_end = m_digit_3->interval.end; }
2413 else if (m_digit_4->match(text, this->interval.end, end, flags)) { dig = 4; digit_end = m_digit_4->interval.end; }
2414 else if (m_digit_5->match(text, this->interval.end, end, flags)) { dig = 5; digit_end = m_digit_5->interval.end; }
2415 else if (m_digit_6->match(text, this->interval.end, end, flags)) { dig = 6; digit_end = m_digit_6->interval.end; }
2416 else if (m_digit_7->match(text, this->interval.end, end, flags)) { dig = 7; digit_end = m_digit_7->interval.end; }
2417 else if (m_digit_8->match(text, this->interval.end, end, flags)) { dig = 8; digit_end = m_digit_8->interval.end; }
2418 else if (m_digit_9->match(text, this->interval.end, end, flags)) { dig = 9; digit_end = m_digit_9->interval.end; }
2419 else break;
2420 size_t x_n = x * 10 + dig;
2421 if (x_n <= 255) {
2422 x = x_n;
2423 this->interval.end = digit_end;
2424 is_empty = false;
2425 }
2426 else
2427 break;
2428 }
2429 if (is_empty)
2430 goto error;
2431 components[i].end = this->interval.end;
2432 value.s_addr = (value.s_addr << 8) | (uint8_t)x;
2433 }
2434 if (i < 4)
2435 goto error;
2436
2437 this->interval.start = start;
2438 return true;
2439
2440 error:
2441 components[0].start = 1;
2442 components[0].end = 0;
2443 components[1].start = 1;
2444 components[1].end = 0;
2445 components[2].start = 1;
2446 components[2].end = 0;
2447 components[3].start = 1;
2448 components[3].end = 0;
2449 value.s_addr = 0;
2450 this->interval.start = (this->interval.end = start) + 1;
2451 return false;
2452 }
2453
2454 virtual void invalidate()
2455 {
2456 components[0].start = 1;
2457 components[0].end = 0;
2458 components[1].start = 1;
2459 components[1].end = 0;
2460 components[2].start = 1;
2461 components[2].end = 0;
2462 components[3].start = 1;
2463 components[3].end = 0;
2464 value.s_addr = 0;
2466 }
2467
2468 public:
2471
2472 protected:
2473 std::shared_ptr<basic_parser<T>>
2474 m_digit_0,
2475 m_digit_1,
2476 m_digit_2,
2477 m_digit_3,
2478 m_digit_4,
2479 m_digit_5,
2480 m_digit_6,
2481 m_digit_7,
2482 m_digit_8,
2483 m_digit_9;
2484 std::shared_ptr<basic_parser<T>> m_separator;
2485 };
2486
2489#ifdef _UNICODE
2491#else
2493#endif
2495
2499 template <class T>
2501 {
2502 public:
2503 basic_ipv6_scope_id_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
2504
2505 virtual bool match(
2506 _In_reads_or_z_(end) const T* text,
2507 _In_ size_t start = 0,
2508 _In_ size_t end = (size_t)-1,
2509 _In_ int flags = match_default)
2510 {
2511 _Assume_(text || start >= end);
2512 if (start < end && text[start]) {
2513 if (text[start] == '-' ||
2514 text[start] == '_' ||
2515 text[start] == ':' ||
2516 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
2517 {
2518 this->interval.end = (this->interval.start = start) + 1;
2519 return true;
2520 }
2521 }
2522 this->interval.start = (this->interval.end = start) + 1;
2523 return false;
2524 }
2525 };
2526
2529#ifdef _UNICODE
2531#else
2533#endif
2534
2539 {
2540 public:
2541 sgml_ipv6_scope_id_char(_In_ const std::locale& locale = std::locale()) : sgml_parser(locale) {}
2542
2543 virtual bool match(
2544 _In_reads_or_z_(end) const char* text,
2545 _In_ size_t start = 0,
2546 _In_ size_t end = (size_t)-1,
2547 _In_ int flags = match_default)
2548 {
2549 _Assume_(text || start >= end);
2550 if (start < end && text[start]) {
2551 wchar_t buf[3];
2552 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
2553 const wchar_t* chr_end = chr + stdex::strlen(chr);
2554 if (((chr[0] == L'-' ||
2555 chr[0] == L'_' ||
2556 chr[0] == L':') && chr[1] == 0) ||
2557 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
2558 {
2559 this->interval.start = start;
2560 return true;
2561 }
2562 }
2563 this->interval.start = (this->interval.end = start) + 1;
2564 return false;
2565 }
2566 };
2567
2571 template <class T>
2573 {
2574 public:
2576 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
2577 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
2578 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
2579 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
2580 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
2581 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
2582 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
2583 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
2584 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
2585 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
2586 _In_ const std::shared_ptr<basic_parser<T>>& digit_10,
2587 _In_ const std::shared_ptr<basic_parser<T>>& digit_11,
2588 _In_ const std::shared_ptr<basic_parser<T>>& digit_12,
2589 _In_ const std::shared_ptr<basic_parser<T>>& digit_13,
2590 _In_ const std::shared_ptr<basic_parser<T>>& digit_14,
2591 _In_ const std::shared_ptr<basic_parser<T>>& digit_15,
2592 _In_ const std::shared_ptr<basic_parser<T>>& separator,
2593 _In_ const std::shared_ptr<basic_parser<T>>& scope_id_separator = nullptr,
2594 _In_ const std::shared_ptr<basic_parser<T>>& _scope_id = nullptr,
2595 _In_ const std::locale& locale = std::locale()) :
2596 basic_parser<T>(locale),
2597 m_digit_0(digit_0),
2598 m_digit_1(digit_1),
2599 m_digit_2(digit_2),
2600 m_digit_3(digit_3),
2601 m_digit_4(digit_4),
2602 m_digit_5(digit_5),
2603 m_digit_6(digit_6),
2604 m_digit_7(digit_7),
2605 m_digit_8(digit_8),
2606 m_digit_9(digit_9),
2607 m_digit_10(digit_10),
2608 m_digit_11(digit_11),
2609 m_digit_12(digit_12),
2610 m_digit_13(digit_13),
2611 m_digit_14(digit_14),
2612 m_digit_15(digit_15),
2613 m_separator(separator),
2614 m_scope_id_separator(scope_id_separator),
2616 {
2617 memset(&value, 0, sizeof(value));
2618 }
2619
2620 virtual bool match(
2621 _In_reads_or_z_(end) const T* text,
2622 _In_ size_t start = 0,
2623 _In_ size_t end = (size_t)-1,
2624 _In_ int flags = match_default)
2625 {
2626 _Assume_(text || start >= end);
2627 this->interval.end = start;
2628 memset(&value, 0, sizeof(value));
2629
2630 size_t i, compaction_i = (size_t)-1, compaction_start = start;
2631 for (i = 0; i < 8; i++) {
2632 bool is_empty = true;
2633
2634 if (m_separator->match(text, this->interval.end, end, flags)) {
2635 if (m_separator->match(text, m_separator->interval.end, end, flags)) {
2636 // :: found
2637 if (compaction_i == (size_t)-1) {
2638 // Zero compaction start
2639 compaction_i = i;
2640 compaction_start = m_separator->interval.start;
2641 this->interval.end = m_separator->interval.end;
2642 }
2643 else {
2644 // More than one zero compaction
2645 break;
2646 }
2647 }
2648 else if (i) {
2649 // Inner : found
2650 this->interval.end = m_separator->interval.end;
2651 }
2652 else {
2653 // Leading : found
2654 goto error;
2655 }
2656 }
2657 else if (i) {
2658 // : missing
2659 break;
2660 }
2661
2662 components[i].start = this->interval.end;
2663 size_t x;
2664 for (x = 0; this->interval.end < end && text[this->interval.end];) {
2665 size_t dig, digit_end;
2666 if (m_digit_0->match(text, this->interval.end, end, flags)) { dig = 0; digit_end = m_digit_0->interval.end; }
2667 else if (m_digit_1->match(text, this->interval.end, end, flags)) { dig = 1; digit_end = m_digit_1->interval.end; }
2668 else if (m_digit_2->match(text, this->interval.end, end, flags)) { dig = 2; digit_end = m_digit_2->interval.end; }
2669 else if (m_digit_3->match(text, this->interval.end, end, flags)) { dig = 3; digit_end = m_digit_3->interval.end; }
2670 else if (m_digit_4->match(text, this->interval.end, end, flags)) { dig = 4; digit_end = m_digit_4->interval.end; }
2671 else if (m_digit_5->match(text, this->interval.end, end, flags)) { dig = 5; digit_end = m_digit_5->interval.end; }
2672 else if (m_digit_6->match(text, this->interval.end, end, flags)) { dig = 6; digit_end = m_digit_6->interval.end; }
2673 else if (m_digit_7->match(text, this->interval.end, end, flags)) { dig = 7; digit_end = m_digit_7->interval.end; }
2674 else if (m_digit_8->match(text, this->interval.end, end, flags)) { dig = 8; digit_end = m_digit_8->interval.end; }
2675 else if (m_digit_9->match(text, this->interval.end, end, flags)) { dig = 9; digit_end = m_digit_9->interval.end; }
2676 else if (m_digit_10->match(text, this->interval.end, end, flags)) { dig = 10; digit_end = m_digit_10->interval.end; }
2677 else if (m_digit_11->match(text, this->interval.end, end, flags)) { dig = 11; digit_end = m_digit_11->interval.end; }
2678 else if (m_digit_12->match(text, this->interval.end, end, flags)) { dig = 12; digit_end = m_digit_12->interval.end; }
2679 else if (m_digit_13->match(text, this->interval.end, end, flags)) { dig = 13; digit_end = m_digit_13->interval.end; }
2680 else if (m_digit_14->match(text, this->interval.end, end, flags)) { dig = 14; digit_end = m_digit_14->interval.end; }
2681 else if (m_digit_15->match(text, this->interval.end, end, flags)) { dig = 15; digit_end = m_digit_15->interval.end; }
2682 else break;
2683 size_t x_n = x * 16 + dig;
2684 if (x_n <= 0xffff) {
2685 x = x_n;
2686 this->interval.end = digit_end;
2687 is_empty = false;
2688 }
2689 else
2690 break;
2691 }
2692 if (is_empty) {
2693 if (compaction_i != (size_t)-1) {
2694 // Zero compaction active: no sweat.
2695 break;
2696 }
2697 goto error;
2698 }
2699 components[i].end = this->interval.end;
2700 this->value.s6_words[i] = (uint16_t)x;
2701 }
2702
2703 if (compaction_i != (size_t)-1) {
2704 // Align components right due to zero compaction.
2705 size_t j, k;
2706 for (j = 8, k = i; k > compaction_i;) {
2707 this->value.s6_words[--j] = this->value.s6_words[--k];
2709 }
2710 for (; j > compaction_i;) {
2711 this->value.s6_words[--j] = 0;
2712 components[j].start =
2714 }
2715 }
2716 else if (i < 8)
2717 goto error;
2718
2719 if (m_scope_id_separator && m_scope_id_separator->match(text, this->interval.end, end, flags) &&
2720 scope_id && scope_id->match(text, m_scope_id_separator->interval.end, end, flags))
2721 this->interval.end = scope_id->interval.end;
2722 else if (scope_id)
2723 scope_id->invalidate();
2724
2725 this->interval.start = start;
2726 return true;
2727
2728 error:
2729 components[0].start = 1;
2730 components[0].end = 0;
2731 components[1].start = 1;
2732 components[1].end = 0;
2733 components[2].start = 1;
2734 components[2].end = 0;
2735 components[3].start = 1;
2736 components[3].end = 0;
2737 components[4].start = 1;
2738 components[4].end = 0;
2739 components[5].start = 1;
2740 components[5].end = 0;
2741 components[6].start = 1;
2742 components[6].end = 0;
2743 components[7].start = 1;
2744 components[7].end = 0;
2745 memset(&value, 0, sizeof(value));
2746 if (scope_id) scope_id->invalidate();
2747 this->interval.start = (this->interval.end = start) + 1;
2748 return false;
2749 }
2750
2751 virtual void invalidate()
2752 {
2753 components[0].start = 1;
2754 components[0].end = 0;
2755 components[1].start = 1;
2756 components[1].end = 0;
2757 components[2].start = 1;
2758 components[2].end = 0;
2759 components[3].start = 1;
2760 components[3].end = 0;
2761 components[4].start = 1;
2762 components[4].end = 0;
2763 components[5].start = 1;
2764 components[5].end = 0;
2765 components[6].start = 1;
2766 components[6].end = 0;
2767 components[7].start = 1;
2768 components[7].end = 0;
2769 memset(&value, 0, sizeof(value));
2770 if (scope_id) scope_id->invalidate();
2772 }
2773
2774 public:
2777 std::shared_ptr<basic_parser<T>> scope_id;
2778
2779 protected:
2780 std::shared_ptr<basic_parser<T>>
2781 m_digit_0,
2782 m_digit_1,
2783 m_digit_2,
2784 m_digit_3,
2785 m_digit_4,
2786 m_digit_5,
2787 m_digit_6,
2788 m_digit_7,
2789 m_digit_8,
2790 m_digit_9,
2791 m_digit_10,
2792 m_digit_11,
2793 m_digit_12,
2794 m_digit_13,
2795 m_digit_14,
2796 m_digit_15;
2797 std::shared_ptr<basic_parser<T>> m_separator, m_scope_id_separator;
2798 };
2799
2802#ifdef _UNICODE
2804#else
2806#endif
2808
2812 template <class T>
2814 {
2815 public:
2817 _In_ bool allow_idn,
2818 _In_ const std::locale& locale = std::locale()) :
2819 basic_parser<T>(locale),
2820 m_allow_idn(allow_idn),
2821 allow_on_edge(true)
2822 {}
2823
2824 virtual bool match(
2825 _In_reads_or_z_(end) const T* text,
2826 _In_ size_t start = 0,
2827 _In_ size_t end = (size_t)-1,
2828 _In_ int flags = match_default)
2829 {
2830 _Assume_(text || start >= end);
2831 if (start < end && text[start]) {
2832 if (('A' <= text[start] && text[start] <= 'Z') ||
2833 ('a' <= text[start] && text[start] <= 'z') ||
2834 ('0' <= text[start] && text[start] <= '9'))
2835 allow_on_edge = true;
2836 else if (text[start] == '-')
2837 allow_on_edge = false;
2838 else if (m_allow_idn && std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
2839 allow_on_edge = true;
2840 else {
2841 this->interval.start = (this->interval.end = start) + 1;
2842 return false;
2843 }
2844 this->interval.end = (this->interval.start = start) + 1;
2845 return true;
2846 }
2847 this->interval.start = (this->interval.end = start) + 1;
2848 return false;
2849 }
2850
2851 public:
2853
2854 protected:
2855 bool m_allow_idn;
2856 };
2857
2860#ifdef _UNICODE
2862#else
2864#endif
2865
2870 {
2871 public:
2873 _In_ bool allow_idn,
2874 _In_ const std::locale& locale = std::locale()) :
2876 {}
2877
2878 virtual bool match(
2879 _In_reads_or_z_(end) const char* text,
2880 _In_ size_t start = 0,
2881 _In_ size_t end = (size_t)-1,
2882 _In_ int flags = match_default)
2883 {
2884 _Assume_(text || start >= end);
2885 if (start < end && text[start]) {
2886 wchar_t buf[3];
2887 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
2888 const wchar_t* chr_end = chr + stdex::strlen(chr);
2889 if ((('A' <= chr[0] && chr[0] <= 'Z') ||
2890 ('a' <= chr[0] && chr[0] <= 'z') ||
2891 ('0' <= chr[0] && chr[0] <= '9')) && chr[1] == 0)
2892 allow_on_edge = true;
2893 else if (chr[0] == '-' && chr[1] == 0)
2894 allow_on_edge = false;
2895 else if (m_allow_idn && std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
2896 allow_on_edge = true;
2897 else {
2898 this->interval.start = (this->interval.end = start) + 1;
2899 return false;
2900 }
2901 this->interval.start = start;
2902 return true;
2903 }
2904 this->interval.start = (this->interval.end = start) + 1;
2905 return false;
2906 }
2907 };
2908
2912 template <class T>
2914 {
2915 public:
2917 _In_ bool allow_absolute,
2918 _In_ const std::shared_ptr<basic_dns_domain_char<T>>& domain_char,
2919 _In_ const std::shared_ptr<basic_parser<T>>& separator,
2920 _In_ const std::locale& locale = std::locale()) :
2921 basic_parser<T>(locale),
2923 m_domain_char(domain_char),
2924 m_separator(separator)
2925 {}
2926
2927 virtual bool match(
2928 _In_reads_or_z_(end) const T* text,
2929 _In_ size_t start = 0,
2930 _In_ size_t end = (size_t)-1,
2931 _In_ int flags = match_default)
2932 {
2933 _Assume_(text || start >= end);
2934 size_t i = start, count;
2935 for (count = 0; i < end && text[i] && count < 127; count++) {
2936 if (m_domain_char->match(text, i, end, flags) &&
2937 m_domain_char->allow_on_edge)
2938 {
2939 // Domain start
2940 this->interval.end = i = m_domain_char->interval.end;
2941 while (i < end && text[i]) {
2942 if (m_domain_char->allow_on_edge &&
2943 m_separator->match(text, i, end, flags))
2944 {
2945 // Domain end
2946 if (m_allow_absolute)
2947 this->interval.end = i = m_separator->interval.end;
2948 else {
2949 this->interval.end = i;
2950 i = m_separator->interval.end;
2951 }
2952 break;
2953 }
2954 if (m_domain_char->match(text, i, end, flags)) {
2955 if (m_domain_char->allow_on_edge)
2956 this->interval.end = i = m_domain_char->interval.end;
2957 else
2958 i = m_domain_char->interval.end;
2959 }
2960 else {
2961 this->interval.start = start;
2962 return true;
2963 }
2964 }
2965 }
2966 else
2967 break;
2968 }
2969 if (count) {
2970 this->interval.start = start;
2971 return true;
2972 }
2973 this->interval.start = (this->interval.end = start) + 1;
2974 return false;
2975 }
2976
2977 protected:
2979 std::shared_ptr<basic_dns_domain_char<T>> m_domain_char;
2980 std::shared_ptr<basic_parser<T>> m_separator;
2981 };
2982
2985#ifdef _UNICODE
2986 using tdns_name = wdns_name;
2987#else
2988 using tdns_name = dns_name;
2989#endif
2991
2995 template <class T>
2997 {
2998 public:
2999 basic_url_username_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
3000
3001 virtual bool match(
3002 _In_reads_or_z_(end) const T* text,
3003 _In_ size_t start = 0,
3004 _In_ size_t end = (size_t)-1,
3005 _In_ int flags = match_default)
3006 {
3007 _Assume_(text || start >= end);
3008 if (start < end && text[start]) {
3009 if (text[start] == '-' ||
3010 text[start] == '.' ||
3011 text[start] == '_' ||
3012 text[start] == '~' ||
3013 text[start] == '%' ||
3014 text[start] == '!' ||
3015 text[start] == '$' ||
3016 text[start] == '&' ||
3017 text[start] == '\'' ||
3018 //text[start] == '(' ||
3019 //text[start] == ')' ||
3020 text[start] == '*' ||
3021 text[start] == '+' ||
3022 text[start] == ',' ||
3023 text[start] == ';' ||
3024 text[start] == '=' ||
3025 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
3026 {
3027 this->interval.end = (this->interval.start = start) + 1;
3028 return true;
3029 }
3030 }
3031 this->interval.start = (this->interval.end = start) + 1;
3032 return false;
3033 }
3034 };
3035
3038#ifdef _UNICODE
3040#else
3042#endif
3043
3048 {
3049 public:
3050 sgml_url_username_char(_In_ const std::locale& locale = std::locale()) : basic_url_username_char<char>(locale) {}
3051
3052 virtual bool match(
3053 _In_reads_or_z_(end) const char* text,
3054 _In_ size_t start = 0,
3055 _In_ size_t end = (size_t)-1,
3056 _In_ int flags = match_default)
3057 {
3058 _Assume_(text || start >= end);
3059 if (start < end && text[start]) {
3060 wchar_t buf[3];
3061 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
3062 const wchar_t* chr_end = chr + stdex::strlen(chr);
3063 if (((chr[0] == L'-' ||
3064 chr[0] == L'.' ||
3065 chr[0] == L'_' ||
3066 chr[0] == L'~' ||
3067 chr[0] == L'%' ||
3068 chr[0] == L'!' ||
3069 chr[0] == L'$' ||
3070 chr[0] == L'&' ||
3071 chr[0] == L'\'' ||
3072 //chr[0] == L'(' ||
3073 //chr[0] == L')' ||
3074 chr[0] == L'*' ||
3075 chr[0] == L'+' ||
3076 chr[0] == L',' ||
3077 chr[0] == L';' ||
3078 chr[0] == L'=') && chr[1] == 0) ||
3079 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
3080 {
3081 this->interval.start = start;
3082 return true;
3083 }
3084 }
3085
3086 this->interval.start = (this->interval.end = start) + 1;
3087 return false;
3088 }
3089 };
3090
3094 template <class T>
3096 {
3097 public:
3098 basic_url_password_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
3099
3100 virtual bool match(
3101 _In_reads_or_z_(end) const T* text,
3102 _In_ size_t start = 0,
3103 _In_ size_t end = (size_t)-1,
3104 _In_ int flags = match_default)
3105 {
3106 _Assume_(text || start >= end);
3107 if (start < end && text[start]) {
3108 if (text[start] == '-' ||
3109 text[start] == '.' ||
3110 text[start] == '_' ||
3111 text[start] == '~' ||
3112 text[start] == '%' ||
3113 text[start] == '!' ||
3114 text[start] == '$' ||
3115 text[start] == '&' ||
3116 text[start] == '\'' ||
3117 text[start] == '(' ||
3118 text[start] == ')' ||
3119 text[start] == '*' ||
3120 text[start] == '+' ||
3121 text[start] == ',' ||
3122 text[start] == ';' ||
3123 text[start] == '=' ||
3124 text[start] == ':' ||
3125 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
3126 {
3127 this->interval.end = (this->interval.start = start) + 1;
3128 return true;
3129 }
3130 }
3131 this->interval.start = (this->interval.end = start) + 1;
3132 return false;
3133 }
3134 };
3135
3138#ifdef _UNICODE
3140#else
3142#endif
3143
3148 {
3149 public:
3150 sgml_url_password_char(_In_ const std::locale& locale = std::locale()) : basic_url_password_char<char>(locale) {}
3151
3152 virtual bool match(
3153 _In_reads_or_z_(end) const char* text,
3154 _In_ size_t start = 0,
3155 _In_ size_t end = (size_t)-1,
3156 _In_ int flags = match_default)
3157 {
3158 _Assume_(text || start >= end);
3159 if (start < end && text[start]) {
3160 wchar_t buf[3];
3161 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
3162 const wchar_t* chr_end = chr + stdex::strlen(chr);
3163 if (((chr[0] == L'-' ||
3164 chr[0] == L'.' ||
3165 chr[0] == L'_' ||
3166 chr[0] == L'~' ||
3167 chr[0] == L'%' ||
3168 chr[0] == L'!' ||
3169 chr[0] == L'$' ||
3170 chr[0] == L'&' ||
3171 chr[0] == L'\'' ||
3172 chr[0] == L'(' ||
3173 chr[0] == L')' ||
3174 chr[0] == L'*' ||
3175 chr[0] == L'+' ||
3176 chr[0] == L',' ||
3177 chr[0] == L';' ||
3178 chr[0] == L'=' ||
3179 chr[0] == L':') && chr[1] == 0) ||
3180 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
3181 {
3182 this->interval.start = start;
3183 return true;
3184 }
3185 }
3186 this->interval.start = (this->interval.end = start) + 1;
3187 return false;
3188 }
3189 };
3190
3194 template <class T>
3196 {
3197 public:
3198 basic_url_path_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
3199
3200 virtual bool match(
3201 _In_reads_or_z_(end) const T* text,
3202 _In_ size_t start = 0,
3203 _In_ size_t end = (size_t)-1,
3204 _In_ int flags = match_default)
3205 {
3206 _Assume_(text || start >= end);
3207 if (start < end && text[start]) {
3208 if (text[start] == '/' ||
3209 text[start] == '-' ||
3210 text[start] == '.' ||
3211 text[start] == '_' ||
3212 text[start] == '~' ||
3213 text[start] == '%' ||
3214 text[start] == '!' ||
3215 text[start] == '$' ||
3216 text[start] == '&' ||
3217 text[start] == '\'' ||
3218 text[start] == '(' ||
3219 text[start] == ')' ||
3220 text[start] == '*' ||
3221 text[start] == '+' ||
3222 text[start] == ',' ||
3223 text[start] == ';' ||
3224 text[start] == '=' ||
3225 text[start] == ':' ||
3226 text[start] == '@' ||
3227 text[start] == '?' ||
3228 text[start] == '#' ||
3229 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
3230 {
3231 this->interval.end = (this->interval.start = start) + 1;
3232 return true;
3233 }
3234 }
3235 this->interval.start = (this->interval.end = start) + 1;
3236 return false;
3237 }
3238 };
3239
3242#ifdef _UNICODE
3244#else
3246#endif
3247
3252 {
3253 public:
3254 sgml_url_path_char(_In_ const std::locale& locale = std::locale()) : basic_url_path_char<char>(locale) {}
3255
3256 virtual bool match(
3257 _In_reads_or_z_(end) const char* text,
3258 _In_ size_t start = 0,
3259 _In_ size_t end = (size_t)-1,
3260 _In_ int flags = match_default)
3261 {
3262 _Assume_(text || start >= end);
3263 if (start < end && text[start]) {
3264 wchar_t buf[3];
3265 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
3266 const wchar_t* chr_end = chr + stdex::strlen(chr);
3267 if (((chr[0] == L'/' ||
3268 chr[0] == L'-' ||
3269 chr[0] == L'.' ||
3270 chr[0] == L'_' ||
3271 chr[0] == L'~' ||
3272 chr[0] == L'%' ||
3273 chr[0] == L'!' ||
3274 chr[0] == L'$' ||
3275 chr[0] == L'&' ||
3276 chr[0] == L'\'' ||
3277 chr[0] == L'(' ||
3278 chr[0] == L')' ||
3279 chr[0] == L'*' ||
3280 chr[0] == L'+' ||
3281 chr[0] == L',' ||
3282 chr[0] == L';' ||
3283 chr[0] == L'=' ||
3284 chr[0] == L':' ||
3285 chr[0] == L'@' ||
3286 chr[0] == L'?' ||
3287 chr[0] == L'#') && chr[1] == 0) ||
3288 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
3289 {
3290 this->interval.start = start;
3291 return true;
3292 }
3293 }
3294 this->interval.start = (this->interval.end = start) + 1;
3295 return false;
3296 }
3297 };
3298
3302 template <class T>
3304 {
3305 public:
3307 _In_ const std::shared_ptr<basic_parser<T>>& path_char,
3308 _In_ const std::shared_ptr<basic_parser<T>>& query_start,
3309 _In_ const std::shared_ptr<basic_parser<T>>& bookmark_start,
3310 _In_ const std::locale& locale = std::locale()) :
3311 basic_parser<T>(locale),
3312 m_path_char(path_char),
3313 m_query_start(query_start),
3314 m_bookmark_start(bookmark_start)
3315 {}
3316
3317 virtual bool match(
3318 _In_reads_or_z_(end) const T* text,
3319 _In_ size_t start = 0,
3320 _In_ size_t end = (size_t)-1,
3321 _In_ int flags = match_default)
3322 {
3323 _Assume_(text || start >= end);
3324
3325 this->interval.end = start;
3326 path.start = start;
3327 query.start = 1;
3328 query.end = 0;
3329 bookmark.start = 1;
3330 bookmark.end = 0;
3331
3332 for (;;) {
3333 if (this->interval.end >= end || !text[this->interval.end])
3334 break;
3335 if (m_query_start->match(text, this->interval.end, end, flags)) {
3336 path.end = this->interval.end;
3337 query.start = this->interval.end = m_query_start->interval.end;
3338 for (;;) {
3339 if (this->interval.end >= end || !text[this->interval.end]) {
3340 query.end = this->interval.end;
3341 break;
3342 }
3343 if (m_bookmark_start->match(text, this->interval.end, end, flags)) {
3344 query.end = this->interval.end;
3345 bookmark.start = this->interval.end = m_bookmark_start->interval.end;
3346 for (;;) {
3347 if (this->interval.end >= end || !text[this->interval.end]) {
3348 bookmark.end = this->interval.end;
3349 break;
3350 }
3351 if (m_path_char->match(text, this->interval.end, end, flags))
3352 this->interval.end = m_path_char->interval.end;
3353 else {
3354 bookmark.end = this->interval.end;
3355 break;
3356 }
3357 }
3358 this->interval.start = start;
3359 return true;
3360 }
3361 if (m_path_char->match(text, this->interval.end, end, flags))
3362 this->interval.end = m_path_char->interval.end;
3363 else {
3364 query.end = this->interval.end;
3365 break;
3366 }
3367 }
3368 this->interval.start = start;
3369 return true;
3370 }
3371 if (m_bookmark_start->match(text, this->interval.end, end, flags)) {
3372 path.end = this->interval.end;
3373 bookmark.start = this->interval.end = m_bookmark_start->interval.end;
3374 for (;;) {
3375 if (this->interval.end >= end || !text[this->interval.end]) {
3376 bookmark.end = this->interval.end;
3377 break;
3378 }
3379 if (m_path_char->match(text, this->interval.end, end, flags))
3380 this->interval.end = m_path_char->interval.end;
3381 else {
3382 bookmark.end = this->interval.end;
3383 break;
3384 }
3385 }
3386 this->interval.start = start;
3387 return true;
3388 }
3389 if (m_path_char->match(text, this->interval.end, end, flags))
3390 this->interval.end = m_path_char->interval.end;
3391 else
3392 break;
3393 }
3394
3396 path.end = this->interval.end;
3397 this->interval.start = start;
3398 return true;
3399 }
3400
3401 path.start = 1;
3402 path.end = 0;
3403 bookmark.start = 1;
3404 bookmark.end = 0;
3405 this->interval.start = (this->interval.end = start) + 1;
3406 return false;
3407 }
3408
3409 virtual void invalidate()
3410 {
3411 path.start = 1;
3412 path.end = 0;
3413 query.start = 1;
3414 query.end = 0;
3415 bookmark.start = 1;
3416 bookmark.end = 0;
3418 }
3419
3420 public:
3423 stdex::interval<size_t> bookmark;
3424
3425 protected:
3426 std::shared_ptr<basic_parser<T>> m_path_char;
3427 std::shared_ptr<basic_parser<T>> m_query_start;
3428 std::shared_ptr<basic_parser<T>> m_bookmark_start;
3429 };
3430
3433#ifdef _UNICODE
3434 using turl_path = wurl_path;
3435#else
3436 using turl_path = url_path;
3437#endif
3439
3443 template <class T>
3444 class basic_url : public basic_parser<T>
3445 {
3446 public:
3447 basic_url(
3448 _In_ const std::shared_ptr<basic_parser<T>>& _http_scheme,
3449 _In_ const std::shared_ptr<basic_parser<T>>& _ftp_scheme,
3450 _In_ const std::shared_ptr<basic_parser<T>>& _mailto_scheme,
3451 _In_ const std::shared_ptr<basic_parser<T>>& _file_scheme,
3452 _In_ const std::shared_ptr<basic_parser<T>>& colon,
3453 _In_ const std::shared_ptr<basic_parser<T>>& slash,
3454 _In_ const std::shared_ptr<basic_parser<T>>& _username,
3455 _In_ const std::shared_ptr<basic_parser<T>>& _password,
3456 _In_ const std::shared_ptr<basic_parser<T>>& at,
3457 _In_ const std::shared_ptr<basic_parser<T>>& ip_lbracket,
3458 _In_ const std::shared_ptr<basic_parser<T>>& ip_rbracket,
3459 _In_ const std::shared_ptr<basic_parser<T>>& _ipv4_host,
3460 _In_ const std::shared_ptr<basic_parser<T>>& _ipv6_host,
3461 _In_ const std::shared_ptr<basic_parser<T>>& _dns_host,
3462 _In_ const std::shared_ptr<basic_parser<T>>& _port,
3463 _In_ const std::shared_ptr<basic_parser<T>>& _path,
3464 _In_ const std::locale& locale = std::locale()) :
3465 basic_parser<T>(locale),
3466 http_scheme(_http_scheme),
3467 ftp_scheme(_ftp_scheme),
3468 mailto_scheme(_mailto_scheme),
3469 file_scheme(_file_scheme),
3470 m_colon(colon),
3471 m_slash(slash),
3472 username(_username),
3473 password(_password),
3474 m_at(at),
3475 m_ip_lbracket(ip_lbracket),
3476 m_ip_rbracket(ip_rbracket),
3477 ipv4_host(_ipv4_host),
3478 ipv6_host(_ipv6_host),
3479 dns_host(_dns_host),
3480 port(_port),
3481 path(_path)
3482 {}
3483
3484 virtual bool match(
3485 _In_reads_or_z_(end) const T* text,
3486 _In_ size_t start = 0,
3487 _In_ size_t end = (size_t)-1,
3488 _In_ int flags = match_default)
3489 {
3490 _Assume_(text || start >= end);
3491
3492 this->interval.end = start;
3493
3494 if (http_scheme->match(text, this->interval.end, end, flags) &&
3495 m_colon->match(text, http_scheme->interval.end, end, flags) &&
3496 m_slash->match(text, m_colon->interval.end, end, flags) &&
3497 m_slash->match(text, m_slash->interval.end, end, flags))
3498 {
3499 // http://
3500 this->interval.end = m_slash->interval.end;
3501 ftp_scheme->invalidate();
3502 mailto_scheme->invalidate();
3503 file_scheme->invalidate();
3504 }
3505 else if (ftp_scheme->match(text, this->interval.end, end, flags) &&
3506 m_colon->match(text, ftp_scheme->interval.end, end, flags) &&
3507 m_slash->match(text, m_colon->interval.end, end, flags) &&
3508 m_slash->match(text, m_slash->interval.end, end, flags))
3509 {
3510 // ftp://
3511 this->interval.end = m_slash->interval.end;
3512 http_scheme->invalidate();
3513 mailto_scheme->invalidate();
3514 file_scheme->invalidate();
3515 }
3516 else if (mailto_scheme->match(text, this->interval.end, end, flags) &&
3517 m_colon->match(text, mailto_scheme->interval.end, end, flags))
3518 {
3519 // mailto:
3520 this->interval.end = m_colon->interval.end;
3521 http_scheme->invalidate();
3522 ftp_scheme->invalidate();
3523 file_scheme->invalidate();
3524 }
3525 else if (file_scheme->match(text, this->interval.end, end, flags) &&
3526 m_colon->match(text, file_scheme->interval.end, end, flags) &&
3527 m_slash->match(text, m_colon->interval.end, end, flags) &&
3528 m_slash->match(text, m_slash->interval.end, end, flags))
3529 {
3530 // file://
3531 this->interval.end = m_slash->interval.end;
3532 http_scheme->invalidate();
3533 ftp_scheme->invalidate();
3534 mailto_scheme->invalidate();
3535 }
3536 else {
3537 // Default to http:
3538 http_scheme->invalidate();
3539 ftp_scheme->invalidate();
3540 mailto_scheme->invalidate();
3541 file_scheme->invalidate();
3542 }
3543
3544 if (ftp_scheme->interval) {
3545 if (username->match(text, this->interval.end, end, flags)) {
3546 if (m_colon->match(text, username->interval.end, end, flags) &&
3547 password->match(text, m_colon->interval.end, end, flags) &&
3548 m_at->match(text, password->interval.end, end, flags))
3549 {
3550 // Username and password
3551 this->interval.end = m_at->interval.end;
3552 }
3553 else if (m_at->match(text, this->interval.end, end, flags)) {
3554 // Username only
3555 this->interval.end = m_at->interval.end;
3556 password->invalidate();
3557 }
3558 else {
3559 username->invalidate();
3560 password->invalidate();
3561 }
3562 }
3563 else {
3564 username->invalidate();
3565 password->invalidate();
3566 }
3567
3568 if (ipv4_host->match(text, this->interval.end, end, flags)) {
3569 // Host is IPv4
3570 this->interval.end = ipv4_host->interval.end;
3571 ipv6_host->invalidate();
3572 dns_host->invalidate();
3573 }
3574 else if (
3575 m_ip_lbracket->match(text, this->interval.end, end, flags) &&
3576 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3577 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3578 {
3579 // Host is IPv6
3580 this->interval.end = m_ip_rbracket->interval.end;
3581 ipv4_host->invalidate();
3582 dns_host->invalidate();
3583 }
3584 else if (dns_host->match(text, this->interval.end, end, flags)) {
3585 // Host is hostname
3586 this->interval.end = dns_host->interval.end;
3587 ipv4_host->invalidate();
3588 ipv6_host->invalidate();
3589 }
3590 else {
3591 invalidate();
3592 return false;
3593 }
3594
3595 if (m_colon->match(text, this->interval.end, end, flags) &&
3596 port->match(text, m_colon->interval.end, end, flags))
3597 {
3598 // Port
3599 this->interval.end = port->interval.end;
3600 }
3601 else
3602 port->invalidate();
3603
3604 if (path->match(text, this->interval.end, end, flags)) {
3605 // Path
3606 this->interval.end = path->interval.end;
3607 }
3608
3609 this->interval.start = start;
3610 return true;
3611 }
3612
3613 if (mailto_scheme->interval) {
3614 if (username->match(text, this->interval.end, end, flags) &&
3615 m_at->match(text, username->interval.end, end, flags))
3616 {
3617 // Username
3618 this->interval.end = m_at->interval.end;
3619 }
3620 else {
3621 invalidate();
3622 return false;
3623 }
3624
3625 if (m_ip_lbracket->match(text, this->interval.end, end, flags) &&
3626 ipv4_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3627 m_ip_rbracket->match(text, ipv4_host->interval.end, end, flags))
3628 {
3629 // Host is IPv4
3630 this->interval.end = m_ip_rbracket->interval.end;
3631 ipv6_host->invalidate();
3632 dns_host->invalidate();
3633 }
3634 else if (
3635 m_ip_lbracket->match(text, this->interval.end, end, flags) &&
3636 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3637 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3638 {
3639 // Host is IPv6
3640 this->interval.end = m_ip_rbracket->interval.end;
3641 ipv4_host->invalidate();
3642 dns_host->invalidate();
3643 }
3644 else if (dns_host->match(text, this->interval.end, end, flags)) {
3645 // Host is hostname
3646 this->interval.end = dns_host->interval.end;
3647 ipv4_host->invalidate();
3648 ipv6_host->invalidate();
3649 }
3650 else {
3651 invalidate();
3652 return false;
3653 }
3654
3655 password->invalidate();
3656 port->invalidate();
3657 path->invalidate();
3658 this->interval.start = start;
3659 return true;
3660 }
3661
3662 if (file_scheme->interval) {
3663 if (path->match(text, this->interval.end, end, flags)) {
3664 // Path
3665 this->interval.end = path->interval.end;
3666 }
3667
3668 username->invalidate();
3669 password->invalidate();
3670 ipv4_host->invalidate();
3671 ipv6_host->invalidate();
3672 dns_host->invalidate();
3673 port->invalidate();
3674 this->interval.start = start;
3675 return true;
3676 }
3677
3678 // "http://" found or defaulted to
3679
3680 // If "http://" explicit, test for username&password.
3681 if (http_scheme->interval &&
3682 username->match(text, this->interval.end, end, flags))
3683 {
3684 if (m_colon->match(text, username->interval.end, end, flags) &&
3685 password->match(text, m_colon->interval.end, end, flags) &&
3686 m_at->match(text, password->interval.end, end, flags))
3687 {
3688 // Username and password
3689 this->interval.end = m_at->interval.end;
3690 }
3691 else if (m_at->match(text, username->interval.end, end, flags)) {
3692 // Username only
3693 this->interval.end = m_at->interval.end;
3694 password->invalidate();
3695 }
3696 else {
3697 username->invalidate();
3698 password->invalidate();
3699 }
3700 }
3701 else {
3702 username->invalidate();
3703 password->invalidate();
3704 }
3705
3706 if (ipv4_host->match(text, this->interval.end, end, flags)) {
3707 // Host is IPv4
3708 this->interval.end = ipv4_host->interval.end;
3709 ipv6_host->invalidate();
3710 dns_host->invalidate();
3711 }
3712 else if (
3713 m_ip_lbracket->match(text, this->interval.end, end, flags) &&
3714 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3715 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3716 {
3717 // Host is IPv6
3718 this->interval.end = m_ip_rbracket->interval.end;
3719 ipv4_host->invalidate();
3720 dns_host->invalidate();
3721 }
3722 else if (dns_host->match(text, this->interval.end, end, flags)) {
3723 // Host is hostname
3724 this->interval.end = dns_host->interval.end;
3725 ipv4_host->invalidate();
3726 ipv6_host->invalidate();
3727 }
3728 else {
3729 invalidate();
3730 return false;
3731 }
3732
3733 if (m_colon->match(text, this->interval.end, end, flags) &&
3734 port->match(text, m_colon->interval.end, end, flags))
3735 {
3736 // Port
3737 this->interval.end = port->interval.end;
3738 }
3739 else
3740 port->invalidate();
3741
3742 if (path->match(text, this->interval.end, end, flags)) {
3743 // Path
3744 this->interval.end = path->interval.end;
3745 }
3746
3747 this->interval.start = start;
3748 return true;
3749 }
3750
3751 virtual void invalidate()
3752 {
3753 http_scheme->invalidate();
3754 ftp_scheme->invalidate();
3755 mailto_scheme->invalidate();
3756 file_scheme->invalidate();
3757 username->invalidate();
3758 password->invalidate();
3759 ipv4_host->invalidate();
3760 ipv6_host->invalidate();
3761 dns_host->invalidate();
3762 port->invalidate();
3763 path->invalidate();
3765 }
3766
3767 public:
3768 std::shared_ptr<basic_parser<T>> http_scheme;
3769 std::shared_ptr<basic_parser<T>> ftp_scheme;
3770 std::shared_ptr<basic_parser<T>> mailto_scheme;
3771 std::shared_ptr<basic_parser<T>> file_scheme;
3772 std::shared_ptr<basic_parser<T>> username;
3773 std::shared_ptr<basic_parser<T>> password;
3774 std::shared_ptr<basic_parser<T>> ipv4_host;
3775 std::shared_ptr<basic_parser<T>> ipv6_host;
3776 std::shared_ptr<basic_parser<T>> dns_host;
3777 std::shared_ptr<basic_parser<T>> port;
3778 std::shared_ptr<basic_parser<T>> path;
3779
3780 protected:
3781 std::shared_ptr<basic_parser<T>> m_colon;
3782 std::shared_ptr<basic_parser<T>> m_slash;
3783 std::shared_ptr<basic_parser<T>> m_at;
3784 std::shared_ptr<basic_parser<T>> m_ip_lbracket;
3785 std::shared_ptr<basic_parser<T>> m_ip_rbracket;
3786 };
3787
3788 using url = basic_url<char>;
3789 using wurl = basic_url<wchar_t>;
3790#ifdef _UNICODE
3791 using turl = wurl;
3792#else
3793 using turl = url;
3794#endif
3795 using sgml_url = basic_url<char>;
3796
3800 template <class T>
3802 {
3803 public:
3805 _In_ const std::shared_ptr<basic_parser<T>>& _username,
3806 _In_ const std::shared_ptr<basic_parser<T>>& at,
3807 _In_ const std::shared_ptr<basic_parser<T>>& ip_lbracket,
3808 _In_ const std::shared_ptr<basic_parser<T>>& ip_rbracket,
3809 _In_ const std::shared_ptr<basic_parser<T>>& _ipv4_host,
3810 _In_ const std::shared_ptr<basic_parser<T>>& _ipv6_host,
3811 _In_ const std::shared_ptr<basic_parser<T>>& _dns_host,
3812 _In_ const std::locale& locale = std::locale()) :
3813 basic_parser<T>(locale),
3814 username(_username),
3815 m_at(at),
3816 m_ip_lbracket(ip_lbracket),
3817 m_ip_rbracket(ip_rbracket),
3818 ipv4_host(_ipv4_host),
3819 ipv6_host(_ipv6_host),
3820 dns_host(_dns_host)
3821 {}
3822
3823 virtual bool match(
3824 _In_reads_or_z_(end) const T* text,
3825 _In_ size_t start = 0,
3826 _In_ size_t end = (size_t)-1,
3827 _In_ int flags = match_default)
3828 {
3829 _Assume_(text || start >= end);
3830
3831 if (username->match(text, start, end, flags) &&
3832 m_at->match(text, username->interval.end, end, flags))
3833 {
3834 // Username@
3835 if (m_ip_lbracket->match(text, m_at->interval.end, end, flags) &&
3836 ipv4_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3837 m_ip_rbracket->match(text, ipv4_host->interval.end, end, flags))
3838 {
3839 // Host is IPv4
3840 this->interval.end = m_ip_rbracket->interval.end;
3841 ipv6_host->invalidate();
3842 dns_host->invalidate();
3843 }
3844 else if (
3845 m_ip_lbracket->match(text, m_at->interval.end, end, flags) &&
3846 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3847 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3848 {
3849 // Host is IPv6
3850 this->interval.end = m_ip_rbracket->interval.end;
3851 ipv4_host->invalidate();
3852 dns_host->invalidate();
3853 }
3854 else if (dns_host->match(text, m_at->interval.end, end, flags)) {
3855 // Host is hostname
3856 this->interval.end = dns_host->interval.end;
3857 ipv4_host->invalidate();
3858 ipv6_host->invalidate();
3859 }
3860 else
3861 goto error;
3862 this->interval.start = start;
3863 return true;
3864 }
3865
3866 error:
3867 username->invalidate();
3868 ipv4_host->invalidate();
3869 ipv6_host->invalidate();
3870 dns_host->invalidate();
3871 this->interval.start = (this->interval.end = start) + 1;
3872 return false;
3873 }
3874
3875 virtual void invalidate()
3876 {
3877 username->invalidate();
3878 ipv4_host->invalidate();
3879 ipv6_host->invalidate();
3880 dns_host->invalidate();
3882 }
3883
3884 public:
3885 std::shared_ptr<basic_parser<T>> username;
3886 std::shared_ptr<basic_parser<T>> ipv4_host;
3887 std::shared_ptr<basic_parser<T>> ipv6_host;
3888 std::shared_ptr<basic_parser<T>> dns_host;
3889
3890 protected:
3891 std::shared_ptr<basic_parser<T>> m_at;
3892 std::shared_ptr<basic_parser<T>> m_ip_lbracket;
3893 std::shared_ptr<basic_parser<T>> m_ip_rbracket;
3894 };
3895
3898#ifdef _UNICODE
3900#else
3902#endif
3904
3908 template <class T>
3910 {
3911 public:
3913 _In_ const std::shared_ptr<basic_parser<T>>& _emoticon,
3914 _In_ const std::shared_ptr<basic_parser<T>>& _apex,
3915 _In_ const std::shared_ptr<basic_parser<T>>& _eyes,
3916 _In_ const std::shared_ptr<basic_parser<T>>& _nose,
3917 _In_ const std::shared_ptr<basic_set<T>>& _mouth,
3918 _In_ const std::locale& locale = std::locale()) :
3919 basic_parser<T>(locale),
3921 apex(_apex),
3922 eyes(_eyes),
3923 nose(_nose),
3924 mouth(_mouth)
3925 {}
3926
3927 virtual bool match(
3928 _In_reads_or_z_(end) const T* text,
3929 _In_ size_t start = 0,
3930 _In_ size_t end = (size_t)-1,
3931 _In_ int flags = match_default)
3932 {
3933 _Assume_(text || start >= end);
3934
3935 if (emoticon && emoticon->match(text, start, end, flags)) {
3936 if (apex) apex->invalidate();
3937 eyes->invalidate();
3938 if (nose) nose->invalidate();
3939 mouth->invalidate();
3940 this->interval.start = start;
3941 this->interval.end = emoticon->interval.end;
3942 return true;
3943 }
3944
3945 this->interval.end = start;
3946
3947 if (apex && apex->match(text, this->interval.end, end, flags))
3948 this->interval.end = apex->interval.end;
3949
3950 if (eyes->match(text, this->interval.end, end, flags)) {
3951 if (nose && nose->match(text, eyes->interval.end, end, flags) &&
3952 mouth->match(text, nose->interval.end, end, flags))
3953 {
3954 size_t
3956 hit_offset = mouth->hit_offset;
3957 // Mouth may repeat :-)))))))
3958 for (this->interval.end = mouth->interval.end; mouth->match(text, this->interval.end, end, flags) && mouth->hit_offset == hit_offset; this->interval.end = mouth->interval.end);
3959 mouth->interval.start = start_mouth;
3960 mouth->interval.end = this->interval.end;
3961 this->interval.start = start;
3962 return true;
3963 }
3964 if (mouth->match(text, eyes->interval.end, end, flags)) {
3965 size_t
3967 hit_offset = mouth->hit_offset;
3968 // Mouth may repeat :-)))))))
3969 for (this->interval.end = mouth->interval.end; mouth->match(text, this->interval.end, end, flags) && mouth->hit_offset == hit_offset; this->interval.end = mouth->interval.end);
3970 if (nose) nose->invalidate();
3971 mouth->interval.start = start_mouth;
3972 mouth->interval.end = this->interval.end;
3973 this->interval.start = start;
3974 return true;
3975 }
3976 }
3977
3978 if (emoticon) emoticon->invalidate();
3979 if (apex) apex->invalidate();
3980 eyes->invalidate();
3981 if (nose) nose->invalidate();
3982 mouth->invalidate();
3983 this->interval.start = (this->interval.end = start) + 1;
3984 return false;
3985 }
3986
3987 virtual void invalidate()
3988 {
3989 if (emoticon) emoticon->invalidate();
3990 if (apex) apex->invalidate();
3991 eyes->invalidate();
3992 if (nose) nose->invalidate();
3993 mouth->invalidate();
3995 }
3996
3997 public:
3998 std::shared_ptr<basic_parser<T>> emoticon;
3999 std::shared_ptr<basic_parser<T>> apex;
4000 std::shared_ptr<basic_parser<T>> eyes;
4001 std::shared_ptr<basic_parser<T>> nose;
4002 std::shared_ptr<basic_set<T>> mouth;
4003 };
4004
4007#ifdef _UNICODE
4008 using temoticon = wemoticon;
4009#else
4010 using temoticon = emoticon;
4011#endif
4013
4017 enum date_format_t {
4018 date_format_none = 0,
4019 date_format_dmy = 0x1,
4020 date_format_mdy = 0x2,
4021 date_format_ymd = 0x4,
4022 date_format_ym = 0x8,
4023 date_format_my = 0x10,
4024 date_format_dm = 0x20,
4025 date_format_md = 0x40,
4026 };
4027
4031 template <class T>
4032 class basic_date : public basic_parser<T>
4033 {
4034 public:
4035 basic_date(
4036 _In_ int format_mask,
4037 _In_ const std::shared_ptr<basic_integer<T>>& _day,
4038 _In_ const std::shared_ptr<basic_integer<T>>& _month,
4039 _In_ const std::shared_ptr<basic_integer<T>>& _year,
4040 _In_ const std::shared_ptr<basic_set<T>>& separator,
4041 _In_ const std::shared_ptr<basic_parser<T>>& space,
4042 _In_ const std::locale& locale = std::locale()) :
4043 basic_parser<T>(locale),
4044 format(date_format_none),
4045 m_format_mask(format_mask),
4046 day(_day),
4047 month(_month),
4048 year(_year),
4049 m_separator(separator),
4050 m_space(space)
4051 {}
4052
4053 virtual bool match(
4054 _In_reads_or_z_(end) const T* text,
4055 _In_ size_t start = 0,
4056 _In_ size_t end = (size_t)-1,
4057 _In_ int flags = match_default)
4058 {
4059 _Assume_(text || start >= end);
4060
4061 const int space_match_flags = flags & ~match_multiline; // Spaces in dates must never be broken in new line.
4062 if ((m_format_mask & date_format_dmy) == date_format_dmy) {
4063 if (day->match(text, start, end, flags)) {
4064 for (this->interval.end = day->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4065 if (m_separator->match(text, this->interval.end, end, flags)) {
4066 size_t hit_offset = m_separator->hit_offset;
4067 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4068 if (month->match(text, this->interval.end, end, flags)) {
4069 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4070 if (m_separator->match(text, this->interval.end, end, flags) &&
4071 m_separator->hit_offset == hit_offset) // Both separators must match.
4072 {
4073 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4074 if (year->match(text, this->interval.end, end, flags) &&
4075 is_valid(day->value, month->value))
4076 {
4077 this->interval.start = start;
4078 this->interval.end = year->interval.end;
4079 format = date_format_dmy;
4080 return true;
4081 }
4082 }
4083 }
4084 }
4085 }
4086 }
4087
4088 if ((m_format_mask & date_format_mdy) == date_format_mdy) {
4089 if (month->match(text, start, end, flags)) {
4090 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4091 if (m_separator->match(text, this->interval.end, end, flags)) {
4092 size_t hit_offset = m_separator->hit_offset;
4093 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4094 if (day->match(text, this->interval.end, end, flags)) {
4095 for (this->interval.end = day->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4096 if (m_separator->match(text, this->interval.end, end, flags) &&
4097 m_separator->hit_offset == hit_offset) // Both separators must match.
4098 {
4099 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4100 if (year->match(text, this->interval.end, end, flags) &&
4101 is_valid(day->value, month->value))
4102 {
4103 this->interval.start = start;
4104 this->interval.end = year->interval.end;
4105 format = date_format_mdy;
4106 return true;
4107 }
4108 }
4109 }
4110 }
4111 }
4112 }
4113
4114 if ((m_format_mask & date_format_ymd) == date_format_ymd) {
4115 if (year->match(text, start, end, flags)) {
4116 for (this->interval.end = year->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4117 if (m_separator->match(text, this->interval.end, end, flags)) {
4118 size_t hit_offset = m_separator->hit_offset;
4119 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4120 if (month->match(text, this->interval.end, end, flags)) {
4121 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4122 if (m_separator->match(text, this->interval.end, end, flags) &&
4123 m_separator->hit_offset == hit_offset) // Both separators must match.
4124 {
4125 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4126 if (day->match(text, this->interval.end, end, flags) &&
4127 is_valid(day->value, month->value))
4128 {
4129 this->interval.start = start;
4130 this->interval.end = day->interval.end;
4131 format = date_format_ymd;
4132 return true;
4133 }
4134 }
4135 }
4136 }
4137 }
4138 }
4139
4140 if ((m_format_mask & date_format_ym) == date_format_ym) {
4141 if (year->match(text, start, end, flags)) {
4142 for (this->interval.end = year->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4143 if (m_separator->match(text, this->interval.end, end, flags)) {
4144 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4145 if (month->match(text, this->interval.end, end, flags) &&
4146 is_valid((size_t)-1, month->value))
4147 {
4148 if (day) day->invalidate();
4149 this->interval.start = start;
4150 this->interval.end = month->interval.end;
4151 format = date_format_ym;
4152 return true;
4153 }
4154 }
4155 }
4156 }
4157
4158 if ((m_format_mask & date_format_my) == date_format_my) {
4159 if (month->match(text, start, end, flags)) {
4160 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4161 if (m_separator->match(text, this->interval.end, end, flags)) {
4162 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4163 if (year->match(text, this->interval.end, end, flags) &&
4164 is_valid((size_t)-1, month->value))
4165 {
4166 if (day) day->invalidate();
4167 this->interval.start = start;
4168 this->interval.end = year->interval.end;
4169 format = date_format_my;
4170 return true;
4171 }
4172 }
4173 }
4174 }
4175
4176 if ((m_format_mask & date_format_dm) == date_format_dm) {
4177 if (day->match(text, start, end, flags)) {
4178 for (this->interval.end = day->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4179 if (m_separator->match(text, this->interval.end, end, flags)) {
4180 size_t hit_offset = m_separator->hit_offset;
4181 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4182 if (month->match(text, this->interval.end, end, flags) &&
4183 is_valid(day->value, month->value))
4184 {
4185 if (year) year->invalidate();
4186 this->interval.start = start;
4187 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4188 if (m_separator->match(text, this->interval.end, end, flags) &&
4189 m_separator->hit_offset == hit_offset) // Both separators must match.
4190 this->interval.end = m_separator->interval.end;
4191 else
4192 this->interval.end = month->interval.end;
4193 format = date_format_dm;
4194 return true;
4195 }
4196 }
4197 }
4198 }
4199
4200 if ((m_format_mask & date_format_md) == date_format_md) {
4201 if (month->match(text, start, end, flags)) {
4202 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4203 if (m_separator->match(text, this->interval.end, end, flags)) {
4204 size_t hit_offset = m_separator->hit_offset;
4205 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4206 if (day->match(text, this->interval.end, end, flags) &&
4207 is_valid(day->value, month->value))
4208 {
4209 if (year) year->invalidate();
4210 this->interval.start = start;
4211 for (this->interval.end = day->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4212 if (m_separator->match(text, this->interval.end, end, flags) &&
4213 m_separator->hit_offset == hit_offset) // Both separators must match.
4214 this->interval.end = m_separator->interval.end;
4215 else
4216 this->interval.end = day->interval.end;
4217 format = date_format_md;
4218 return true;
4219 }
4220 }
4221 }
4222 }
4223
4224 if (day) day->invalidate();
4225 if (month) month->invalidate();
4226 if (year) year->invalidate();
4227 format = date_format_none;
4228 this->interval.start = (this->interval.end = start) + 1;
4229 return false;
4230 }
4231
4232 virtual void invalidate()
4233 {
4234 if (day) day->invalidate();
4235 if (month) month->invalidate();
4236 if (year) year->invalidate();
4237 format = date_format_none;
4239 }
4240
4241 protected:
4242 static inline bool is_valid(size_t day, size_t month)
4243 {
4244 if (month == (size_t)-1) {
4245 // Default to January. This allows validating day only, as January has all 31 days.
4246 month = 1;
4247 }
4248 if (day == (size_t)-1) {
4249 // Default to 1st day in month. This allows validating month only, as each month has 1st day.
4250 day = 1;
4251 }
4252
4253 switch (month) {
4254 case 1:
4255 case 3:
4256 case 5:
4257 case 7:
4258 case 8:
4259 case 10:
4260 case 12:
4261 return 1 <= day && day <= 31;
4262 case 2:
4263 return 1 <= day && day <= 29;
4264 case 4:
4265 case 6:
4266 case 9:
4267 case 11:
4268 return 1 <= day && day <= 30;
4269 default:
4270 return false;
4271 }
4272 }
4273
4274 public:
4275 date_format_t format;
4276 std::shared_ptr<basic_integer<T>> day;
4277 std::shared_ptr<basic_integer<T>> month;
4278 std::shared_ptr<basic_integer<T>> year;
4279
4280 protected:
4281 int m_format_mask;
4282 std::shared_ptr<basic_set<T>> m_separator;
4283 std::shared_ptr<basic_parser<T>> m_space;
4284 };
4285
4286 using date = basic_date<char>;
4287 using wdate = basic_date<wchar_t>;
4288#ifdef _UNICODE
4289 using tdate = wdate;
4290#else
4291 using tdate = date;
4292#endif
4294
4298 template <class T>
4299 class basic_time : public basic_parser<T>
4300 {
4301 public:
4302 basic_time(
4303 _In_ const std::shared_ptr<basic_integer10<T>>& _hour,
4304 _In_ const std::shared_ptr<basic_integer10<T>>& _minute,
4305 _In_ const std::shared_ptr<basic_integer10<T>>& _second,
4306 _In_ const std::shared_ptr<basic_integer10<T>>& _millisecond,
4307 _In_ const std::shared_ptr<basic_set<T>>& separator,
4308 _In_ const std::shared_ptr<basic_parser<T>>& millisecond_separator,
4309 _In_ const std::locale& locale = std::locale()) :
4310 basic_parser<T>(locale),
4311 hour(_hour),
4312 minute(_minute),
4313 second(_second),
4314 millisecond(_millisecond),
4315 m_separator(separator),
4316 m_millisecond_separator(millisecond_separator)
4317 {}
4318
4319 virtual bool match(
4320 _In_reads_or_z_(end) const T* text,
4321 _In_ size_t start = 0,
4322 _In_ size_t end = (size_t)-1,
4323 _In_ int flags = match_default)
4324 {
4325 _Assume_(text || start >= end);
4326
4327 if (hour->match(text, start, end, flags) &&
4328 m_separator->match(text, hour->interval.end, end, flags) &&
4329 minute->match(text, m_separator->interval.end, end, flags) &&
4330 minute->value < 60)
4331 {
4332 // hh::mm
4333 size_t hit_offset = m_separator->hit_offset;
4334 if (m_separator->match(text, minute->interval.end, end, flags) &&
4335 m_separator->hit_offset == hit_offset && // Both separators must match.
4336 second && second->match(text, m_separator->interval.end, end, flags) &&
4337 second->value < 60)
4338 {
4339 // hh::mm:ss
4340 if (m_millisecond_separator && m_millisecond_separator->match(text, second->interval.end, end, flags) &&
4341 millisecond && millisecond->match(text, m_millisecond_separator->interval.end, end, flags) &&
4342 millisecond->value < 1000)
4343 {
4344 // hh::mm:ss.mmmm
4345 this->interval.end = millisecond->interval.end;
4346 }
4347 else {
4348 if (millisecond) millisecond->invalidate();
4349 this->interval.end = second->interval.end;
4350 }
4351 }
4352 else {
4353 if (second) second->invalidate();
4354 if (millisecond) millisecond->invalidate();
4355 this->interval.end = minute->interval.end;
4356 }
4357 this->interval.start = start;
4358 return true;
4359 }
4360
4361 hour->invalidate();
4362 minute->invalidate();
4363 if (second) second->invalidate();
4364 if (millisecond) millisecond->invalidate();
4365 this->interval.start = (this->interval.end = start) + 1;
4366 return false;
4367 }
4368
4369 virtual void invalidate()
4370 {
4371 hour->invalidate();
4372 minute->invalidate();
4373 if (second) second->invalidate();
4374 if (millisecond) millisecond->invalidate();
4376 }
4377
4378 public:
4379 std::shared_ptr<basic_integer10<T>> hour;
4380 std::shared_ptr<basic_integer10<T>> minute;
4381 std::shared_ptr<basic_integer10<T>> second;
4382 std::shared_ptr<basic_integer10<T>> millisecond;
4383
4384 protected:
4385 std::shared_ptr<basic_set<T>> m_separator;
4386 std::shared_ptr<basic_parser<T>> m_millisecond_separator;
4387 };
4388
4389 using time = basic_time<char>;
4390 using wtime = basic_time<wchar_t>;
4391#ifdef _UNICODE
4392 using ttime = wtime;
4393#else
4394 using ttime = time;
4395#endif
4397
4401 template <class T>
4402 class basic_angle : public basic_parser<T>
4403 {
4404 public:
4406 _In_ const std::shared_ptr<basic_integer10<T>>& _degree,
4407 _In_ const std::shared_ptr<basic_parser<T>>& _degree_separator,
4408 _In_ const std::shared_ptr<basic_integer10<T>>& _minute,
4409 _In_ const std::shared_ptr<basic_parser<T>>& _minute_separator,
4410 _In_ const std::shared_ptr<basic_integer10<T>>& _second,
4411 _In_ const std::shared_ptr<basic_parser<T>>& _second_separator,
4412 _In_ const std::shared_ptr<basic_parser<T>>& _decimal,
4413 _In_ const std::locale& locale = std::locale()) :
4414 basic_parser<T>(locale),
4415 degree(_degree),
4416 degree_separator(_degree_separator),
4417 minute(_minute),
4418 minute_separator(_minute_separator),
4419 second(_second),
4420 second_separator(_second_separator),
4421 decimal(_decimal)
4422 {}
4423
4424 virtual bool match(
4425 _In_reads_or_z_(end) const T* text,
4426 _In_ size_t start = 0,
4427 _In_ size_t end = (size_t)-1,
4428 _In_ int flags = match_default)
4429 {
4430 _Assume_(text || start >= end);
4431
4432 this->interval.end = start;
4433
4434 if (degree->match(text, this->interval.end, end, flags) &&
4435 degree_separator->match(text, degree->interval.end, end, flags))
4436 {
4437 // Degrees
4438 this->interval.end = degree_separator->interval.end;
4439 }
4440 else {
4441 degree->invalidate();
4442 degree_separator->invalidate();
4443 }
4444
4445 if (minute->match(text, this->interval.end, end, flags) &&
4446 minute->value < 60 &&
4447 minute_separator->match(text, minute->interval.end, end, flags))
4448 {
4449 // Minutes
4450 this->interval.end = minute_separator->interval.end;
4451 }
4452 else {
4453 minute->invalidate();
4454 minute_separator->invalidate();
4455 }
4456
4457 if (second && second->match(text, this->interval.end, end, flags) &&
4458 second->value < 60)
4459 {
4460 // Seconds
4461 this->interval.end = second->interval.end;
4462 if (second_separator && second_separator->match(text, this->interval.end, end, flags))
4463 this->interval.end = second_separator->interval.end;
4464 else
4465 if (second_separator) second_separator->invalidate();
4466 }
4467 else {
4468 if (second) second->invalidate();
4469 if (second_separator) second_separator->invalidate();
4470 }
4471
4472 if (degree->interval.start < degree->interval.end ||
4473 minute->interval.start < minute->interval.end ||
4474 (second && second->interval.start < second->interval.end))
4475 {
4476 if (decimal && decimal->match(text, this->interval.end, end, flags)) {
4477 // Decimals
4478 this->interval.end = decimal->interval.end;
4479 }
4480 else if (decimal)
4481 decimal->invalidate();
4482 this->interval.start = start;
4483 return true;
4484 }
4485 if (decimal) decimal->invalidate();
4486 this->interval.start = (this->interval.end = start) + 1;
4487 return false;
4488 }
4489
4490 virtual void invalidate()
4491 {
4492 degree->invalidate();
4493 degree_separator->invalidate();
4494 minute->invalidate();
4495 minute_separator->invalidate();
4496 if (second) second->invalidate();
4497 if (second_separator) second_separator->invalidate();
4498 if (decimal) decimal->invalidate();
4500 }
4501
4502 public:
4503 std::shared_ptr<basic_integer10<T>> degree;
4504 std::shared_ptr<basic_parser<T>> degree_separator;
4505 std::shared_ptr<basic_integer10<T>> minute;
4506 std::shared_ptr<basic_parser<T>> minute_separator;
4507 std::shared_ptr<basic_integer10<T>> second;
4508 std::shared_ptr<basic_parser<T>> second_separator;
4509 std::shared_ptr<basic_parser<T>> decimal;
4510 };
4511
4512 using angle = basic_angle<char>;
4514#ifdef _UNICODE
4515 using RRegElKot = wangle;
4516#else
4517 using RRegElKot = angle;
4518#endif
4520
4524 template <class T>
4526 {
4527 public:
4529 _In_ const std::shared_ptr<basic_parser<T>>& digit,
4530 _In_ const std::shared_ptr<basic_parser<T>>& plus_sign,
4531 _In_ const std::shared_ptr<basic_set<T>>& lparenthesis,
4532 _In_ const std::shared_ptr<basic_set<T>>& rparenthesis,
4533 _In_ const std::shared_ptr<basic_parser<T>>& separator,
4534 _In_ const std::shared_ptr<basic_parser<T>>& space,
4535 _In_ const std::locale& locale = std::locale()) :
4536 basic_parser<T>(locale),
4537 m_digit(digit),
4538 m_plus_sign(plus_sign),
4539 m_lparenthesis(lparenthesis),
4540 m_rparenthesis(rparenthesis),
4541 m_separator(separator),
4542 m_space(space)
4543 {}
4544
4545 virtual bool match(
4546 _In_reads_or_z_(end) const T* text,
4547 _In_ size_t start = 0,
4548 _In_ size_t end = (size_t)-1,
4549 _In_ int flags = match_default)
4550 {
4551 _Assume_(text || start >= end);
4552
4553 size_t safe_digit_end = start, safe_value_size = 0;
4554 bool has_digits = false, after_digit = false, in_parentheses = false, after_parentheses = false;
4555 const int space_match_flags = flags & ~match_multiline; // Spaces in phone numbers must never be broken in new line.
4556
4557 this->interval.end = start;
4558 value.clear();
4559 m_lparenthesis->invalidate();
4560 m_rparenthesis->invalidate();
4561
4562 if (m_plus_sign && m_plus_sign->match(text, this->interval.end, end, flags)) {
4563 value.append(text + m_plus_sign->interval.start, text + m_plus_sign->interval.end);
4564 safe_value_size = value.size();
4565 this->interval.end = m_plus_sign->interval.end;
4566 }
4567
4568 for (;;) {
4569 _Assume_(text || this->interval.end >= end);
4570 if (this->interval.end >= end || !text[this->interval.end])
4571 break;
4572 if (m_digit->match(text, this->interval.end, end, flags)) {
4573 // Digit
4574 value.append(text + m_digit->interval.start, text + m_digit->interval.end);
4575 this->interval.end = m_digit->interval.end;
4576 if (!in_parentheses) {
4577 safe_digit_end = this->interval.end;
4578 safe_value_size = value.size();
4579 has_digits = true;
4580 }
4581 after_digit = true;
4582 after_parentheses = false;
4583 }
4584 else if (
4585 m_lparenthesis && !m_lparenthesis->interval && // No left parenthesis yet
4586 m_rparenthesis && !m_rparenthesis->interval && // Right parenthesis after left
4587 m_lparenthesis->match(text, this->interval.end, end, flags))
4588 {
4589 // Left parenthesis
4590 value.append(text + m_lparenthesis->interval.start, m_lparenthesis->interval.size());
4591 this->interval.end = m_lparenthesis->interval.end;
4592 in_parentheses = true;
4593 after_digit = false;
4594 after_parentheses = false;
4595 }
4596 else if (
4597 in_parentheses && // After left parenthesis
4598 m_rparenthesis && !m_rparenthesis->interval && // No right parenthesis yet
4599 m_rparenthesis->match(text, this->interval.end, end, flags) &&
4600 m_lparenthesis->hit_offset == m_rparenthesis->hit_offset) // Left and right parentheses must match
4601 {
4602 // Right parenthesis
4603 value.append(text + m_rparenthesis->interval.start, text + m_rparenthesis->interval.end);
4604 this->interval.end = m_rparenthesis->interval.end;
4605 safe_digit_end = this->interval.end;
4606 safe_value_size = value.size();
4607 in_parentheses = false;
4608 after_digit = false;
4609 after_parentheses = true;
4610 }
4611 else if (
4612 after_digit &&
4613 !in_parentheses && // No separators inside parentheses
4614 !after_parentheses && // No separators following right parenthesis
4615 m_separator && m_separator->match(text, this->interval.end, end, flags))
4616 {
4617 // Separator
4618 this->interval.end = m_separator->interval.end;
4619 after_digit = false;
4620 after_parentheses = false;
4621 }
4622 else if (
4624 m_space && m_space->match(text, this->interval.end, end, space_match_flags))
4625 {
4626 // Space
4627 this->interval.end = m_space->interval.end;
4628 after_digit = false;
4629 after_parentheses = false;
4630 }
4631 else
4632 break;
4633 }
4634 if (has_digits) {
4635 value.erase(safe_value_size);
4636 this->interval.start = start;
4637 this->interval.end = safe_digit_end;
4638 return true;
4639 }
4640 value.clear();
4641 this->interval.start = (this->interval.end = start) + 1;
4642 return false;
4643 }
4644
4645 virtual void invalidate()
4646 {
4647 value.clear();
4649 }
4650
4651 public:
4652 std::basic_string<T> value;
4653
4654 protected:
4655 std::shared_ptr<basic_parser<T>> m_digit;
4656 std::shared_ptr<basic_parser<T>> m_plus_sign;
4657 std::shared_ptr<basic_set<T>> m_lparenthesis;
4658 std::shared_ptr<basic_set<T>> m_rparenthesis;
4659 std::shared_ptr<basic_parser<T>> m_separator;
4660 std::shared_ptr<basic_parser<T>> m_space;
4661 };
4662
4665#ifdef _UNICODE
4667#else
4669#endif
4671
4677 template <class T>
4678 class basic_iban : public basic_parser<T>
4679 {
4680 public:
4681 basic_iban(
4682 _In_ const std::shared_ptr<basic_parser<T>>& space,
4683 _In_ const std::locale& locale = std::locale()) :
4684 basic_parser<T>(locale),
4685 m_space(space)
4686 {
4687 this->country[0] = 0;
4688 this->check_digits[0] = 0;
4689 this->bban[0] = 0;
4690 this->is_valid = false;
4691 }
4692
4693 virtual bool match(
4694 _In_reads_or_z_(end) const T* text,
4695 _In_ size_t start = 0,
4696 _In_ size_t end = (size_t)-1,
4697 _In_ int flags = match_default)
4698 {
4699 _Assume_(text || start >= end);
4700 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
4701 const bool case_insensitive = flags & match_case_insensitive ? true : false;
4702 struct country_t {
4703 T country[2];
4704 T check_digits[2];
4705 size_t length;
4706 };
4707 static const country_t s_countries[] = {
4708 { { 'A', 'D' }, {}, 24 }, // Andorra
4709 { { 'A', 'E' }, {}, 23 }, // United Arab Emirates
4710 { { 'A', 'L' }, {}, 28 }, // Albania
4711 { { 'A', 'O' }, {}, 25 }, // Angola
4712 { { 'A', 'T' }, {}, 20 }, // Austria
4713 { { 'A', 'Z' }, {}, 28 }, // Azerbaijan
4714 { { 'B', 'A' }, { '3', '9' }, 20}, // Bosnia and Herzegovina
4715 { { 'B', 'E' }, {}, 16 }, // Belgium
4716 { { 'B', 'F' }, {}, 28 }, // Burkina Faso
4717 { { 'B', 'G' }, {}, 22 }, // Bulgaria
4718 { { 'B', 'H' }, {}, 22 }, // Bahrain
4719 { { 'B', 'I' }, {}, 27 }, // Burundi
4720 { { 'B', 'J' }, {}, 28 }, // Benin
4721 { { 'B', 'R' }, {}, 29 }, // Brazil
4722 { { 'B', 'Y' }, {}, 28 }, // Belarus
4723 { { 'C', 'F' }, {}, 27 }, // Central African Republic
4724 { { 'C', 'G' }, {}, 27 }, // Congo, Republic of the
4725 { { 'C', 'H' }, {}, 21 }, // Switzerland
4726 { { 'C', 'I' }, {}, 28 }, // Côte d'Ivoire
4727 { { 'C', 'M' }, {}, 27 }, // Cameroon
4728 { { 'C', 'R' }, {}, 22 }, // Costa Rica
4729 { { 'C', 'V' }, {}, 25 }, // Cabo Verde
4730 { { 'C', 'Y' }, {}, 28 }, // Cyprus
4731 { { 'C', 'Z' }, {}, 24 }, // Czech Republic
4732 { { 'D', 'E' }, {}, 22 }, // Germany
4733 { { 'D', 'J' }, {}, 27 }, // Djibouti
4734 { { 'D', 'K' }, {}, 18 }, // Denmark
4735 { { 'D', 'O' }, {}, 28 }, // Dominican Republic
4736 { { 'D', 'Z' }, {}, 26 }, // Algeria
4737 { { 'E', 'E' }, {}, 20 }, // Estonia
4738 { { 'E', 'G' }, {}, 29 }, // Egypt
4739 { { 'E', 'S' }, {}, 24 }, // Spain
4740 { { 'F', 'I' }, {}, 18 }, // Finland
4741 { { 'F', 'O' }, {}, 18 }, // Faroe Islands
4742 { { 'F', 'R' }, {}, 27 }, // France
4743 { { 'G', 'A' }, {}, 27 }, // Gabon
4744 { { 'G', 'B' }, {}, 22 }, // United Kingdom
4745 { { 'G', 'E' }, {}, 22 }, // Georgia
4746 { { 'G', 'I' }, {}, 23 }, // Gibraltar
4747 { { 'G', 'L' }, {}, 18 }, // Greenland
4748 { { 'G', 'Q' }, {}, 27 }, // Equatorial Guinea
4749 { { 'G', 'R' }, {}, 27 }, // Greece
4750 { { 'G', 'T' }, {}, 28 }, // Guatemala
4751 { { 'G', 'W' }, {}, 25 }, // Guinea-Bissau
4752 { { 'H', 'N' }, {}, 28 }, // Honduras
4753 { { 'H', 'R' }, {}, 21 }, // Croatia
4754 { { 'H', 'U' }, {}, 28 }, // Hungary
4755 { { 'I', 'E' }, {}, 22 }, // Ireland
4756 { { 'I', 'L' }, {}, 23 }, // Israel
4757 { { 'I', 'Q' }, {}, 23 }, // Iraq
4758 { { 'I', 'R' }, {}, 26 }, // Iran
4759 { { 'I', 'S' }, {}, 26 }, // Iceland
4760 { { 'I', 'T' }, {}, 27 }, // Italy
4761 { { 'J', 'O' }, {}, 30 }, // Jordan
4762 { { 'K', 'M' }, {}, 27 }, // Comoros
4763 { { 'K', 'W' }, {}, 30 }, // Kuwait
4764 { { 'K', 'Z' }, {}, 20 }, // Kazakhstan
4765 { { 'L', 'B' }, {}, 28 }, // Lebanon
4766 { { 'L', 'C' }, {}, 32 }, // Saint Lucia
4767 { { 'L', 'I' }, {}, 21 }, // Liechtenstein
4768 { { 'L', 'T' }, {}, 20 }, // Lithuania
4769 { { 'L', 'U' }, {}, 20 }, // Luxembourg
4770 { { 'L', 'V' }, {}, 21 }, // Latvia
4771 { { 'L', 'Y' }, {}, 25 }, // Libya
4772 { { 'M', 'A' }, {}, 28 }, // Morocco
4773 { { 'M', 'C' }, {}, 27 }, // Monaco
4774 { { 'M', 'D' }, {}, 24 }, // Moldova
4775 { { 'M', 'E' }, { '2', '5' }, 22 }, // Montenegro
4776 { { 'M', 'G' }, {}, 27 }, // Madagascar
4777 { { 'M', 'K' }, { '0', '7' }, 19 }, // North Macedonia
4778 { { 'M', 'L' }, {}, 28 }, // Mali
4779 { { 'M', 'R' }, { '1', '3' }, 27}, // Mauritania
4780 { { 'M', 'T' }, {}, 31 }, // Malta
4781 { { 'M', 'U' }, {}, 30 }, // Mauritius
4782 { { 'M', 'Z' }, {}, 25 }, // Mozambique
4783 { { 'N', 'E' }, {}, 28 }, // Niger
4784 { { 'N', 'I' }, {}, 32 }, // Nicaragua
4785 { { 'N', 'L' }, {}, 18 }, // Netherlands
4786 { { 'N', 'O' }, {}, 15 }, // Norway
4787 { { 'P', 'K' }, {}, 24 }, // Pakistan
4788 { { 'P', 'L' }, {}, 28 }, // Poland
4789 { { 'P', 'S' }, {}, 29 }, // Palestinian territories
4790 { { 'P', 'T' }, { '5', '0' }, 25 }, // Portugal
4791 { { 'Q', 'A' }, {}, 29 }, // Qatar
4792 { { 'R', 'O' }, {}, 24 }, // Romania
4793 { { 'R', 'S' }, { '3', '5' }, 22 }, // Serbia
4794 { { 'R', 'U' }, {}, 33 }, // Russia
4795 { { 'S', 'A' }, {}, 24 }, // Saudi Arabia
4796 { { 'S', 'C' }, {}, 31 }, // Seychelles
4797 { { 'S', 'D' }, {}, 18 }, // Sudan
4798 { { 'S', 'E' }, {}, 24 }, // Sweden
4799 { { 'S', 'I' }, { '5', '6' }, 19 }, // Slovenia
4800 { { 'S', 'K' }, {}, 24 }, // Slovakia
4801 { { 'S', 'M' }, {}, 27 }, // San Marino
4802 { { 'S', 'N' }, {}, 28 }, // Senegal
4803 { { 'S', 'T' }, {}, 25 }, // São Tomé and Príncipe
4804 { { 'S', 'V' }, {}, 28 }, // El Salvador
4805 { { 'T', 'D' }, {}, 27 }, // Chad
4806 { { 'T', 'G' }, {}, 28 }, // Togo
4807 { { 'T', 'L' }, { '3', '8' }, 23}, // East Timor
4808 { { 'T', 'N' }, { '5', '9' }, 24 }, // Tunisia
4809 { { 'T', 'R' }, {}, 26 }, // Turkey
4810 { { 'U', 'A' }, {}, 29 }, // Ukraine
4811 { { 'V', 'A' }, {}, 22 }, // Vatican City
4812 { { 'V', 'G' }, {}, 24 }, // Virgin Islands, British
4813 { { 'X', 'K' }, {}, 20 }, // Kosovo
4814 };
4815 const country_t* country_desc = nullptr;
4816 size_t n, available, next, bban_length;
4818
4819 this->interval.end = start;
4820 for (size_t i = 0; i < 2; ++i, ++this->interval.end) {
4821 if (this->interval.end >= end || !text[this->interval.end])
4822 goto error; // incomplete country code
4823 T chr = case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end];
4824 if (chr < 'A' || 'Z' < chr)
4825 goto error; // invalid country code
4826 this->country[i] = chr;
4827 }
4828 for (size_t l = 0, r = _countof(s_countries);;) {
4829 if (l >= r)
4830 goto error; // unknown country
4831 size_t m = (l + r) / 2;
4832 const country_t& c = s_countries[m];
4833 if (c.country[0] < this->country[0] || (c.country[0] == this->country[0] && c.country[1] < this->country[1]))
4834 l = m + 1;
4835 else if (this->country[0] < c.country[0] || (this->country[0] == c.country[0] && this->country[1] < c.country[1]))
4836 r = m;
4837 else {
4838 country_desc = &c;
4839 break;
4840 }
4841 }
4842 this->country[2] = 0;
4843
4844 for (size_t i = 0; i < 2; ++i, ++this->interval.end) {
4845 if (this->interval.end >= end || text[this->interval.end] < '0' || '9' < text[this->interval.end])
4846 goto error; // incomplete or invalid check digits
4847 this->check_digits[i] = text[this->interval.end];
4848 }
4849 this->check_digits[2] = 0;
4850
4851 if ((country_desc->check_digits[0] && this->check_digits[0] != country_desc->check_digits[0]) ||
4852 (country_desc->check_digits[1] && this->check_digits[1] != country_desc->check_digits[1]))
4853 goto error; // unexpected check digits
4854
4855 bban_length = country_desc->length - 4;
4856 for (n = 0; n < bban_length;) {
4857 if (this->interval.end >= end || !text[this->interval.end])
4858 goto error; // bban too short
4859 if (m_space && m_space->match(text, this->interval.end, end, flags)) {
4860 this->interval.end = m_space->interval.end;
4861 continue;
4862 }
4863 T chr = case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end];
4864 if (('0' <= chr && chr <= '9') || ('A' <= chr && chr <= 'Z')) {
4865 this->bban[n++] = chr;
4866 this->interval.end++;
4867 }
4868 else
4869 goto error; // invalid bban
4870 }
4871 this->bban[n] = 0;
4872
4873 // Normalize IBAN.
4874 T normalized[69];
4875 available = 0;
4876 for (size_t i = 0; ; ++i) {
4877 if (!this->bban[i]) {
4878 for (i = 0; i < 2; ++i) {
4879 if ('A' <= this->country[i] && this->country[i] <= 'J') {
4880 normalized[available++] = '1';
4881 normalized[available++] = '0' + this->country[i] - 'A';
4882 }
4883 else if ('K' <= this->country[i] && this->country[i] <= 'T') {
4884 normalized[available++] = '2';
4885 normalized[available++] = '0' + this->country[i] - 'K';
4886 }
4887 else if ('U' <= this->country[i] && this->country[i] <= 'Z') {
4888 normalized[available++] = '3';
4889 normalized[available++] = '0' + this->country[i] - 'U';
4890 }
4891 }
4892 normalized[available++] = this->check_digits[0];
4893 normalized[available++] = this->check_digits[1];
4894 normalized[available] = 0;
4895 break;
4896 }
4897 if ('0' <= this->bban[i] && this->bban[i] <= '9')
4898 normalized[available++] = this->bban[i];
4899 else if ('A' <= this->bban[i] && this->bban[i] <= 'J') {
4900 normalized[available++] = '1';
4901 normalized[available++] = '0' + this->bban[i] - 'A';
4902 }
4903 else if ('K' <= this->bban[i] && this->bban[i] <= 'T') {
4904 normalized[available++] = '2';
4905 normalized[available++] = '0' + this->bban[i] - 'K';
4906 }
4907 else if ('U' <= this->bban[i] && this->bban[i] <= 'Z') {
4908 normalized[available++] = '3';
4909 normalized[available++] = '0' + this->bban[i] - 'U';
4910 }
4911 }
4912
4913 // Calculate modulo 97.
4914 nominator = stdex::strtou32(normalized, 9, &next, 10);
4915 for (;;) {
4916 nominator %= 97;
4917 if (!normalized[next]) {
4918 this->is_valid = nominator == 1;
4919 break;
4920 }
4921 size_t digit_count = nominator == 0 ? 0 : nominator < 10 ? 1 : 2;
4922 for (; digit_count < 9 && normalized[next]; ++next, ++digit_count)
4923 nominator = nominator * 10 + (normalized[next] - '0');
4924 }
4925
4926 this->interval.start = start;
4927 return true;
4928
4929 error:
4930 this->country[0] = 0;
4931 this->check_digits[0] = 0;
4932 this->bban[0] = 0;
4933 this->is_valid = false;
4934 this->interval.start = (this->interval.end = start) + 1;
4935 return false;
4936 }
4937
4938 virtual void invalidate()
4939 {
4940 this->country[0] = 0;
4941 this->check_digits[0] = 0;
4942 this->bban[0] = 0;
4943 this->is_valid = false;
4945 }
4946
4947 public:
4948 T country[3];
4950 T bban[31];
4952
4953 protected:
4954 std::shared_ptr<basic_parser<T>> m_space;
4955 };
4956
4957 using iban = basic_iban<char>;
4958 using wiban = basic_iban<wchar_t>;
4959#ifdef _UNICODE
4960 using tiban = wiban;
4961#else
4962 using tiban = iban;
4963#endif
4965
4971 template <class T>
4973 {
4974 public:
4976 _In_ const std::shared_ptr<basic_parser<T>>& space,
4977 _In_ const std::locale& locale = std::locale()) :
4978 basic_parser<T>(locale),
4979 m_space(space)
4980 {
4981 this->check_digits[0] = 0;
4982 this->reference[0] = 0;
4983 this->is_valid = false;
4984 }
4985
4986 virtual bool match(
4987 _In_reads_or_z_(end) const T* text,
4988 _In_ size_t start = 0,
4989 _In_ size_t end = (size_t)-1,
4990 _In_ int flags = match_default)
4991 {
4992 _Assume_(text || start >= end);
4993 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
4994 const bool case_insensitive = flags & match_case_insensitive ? true : false;
4995 size_t n, available, next;
4997
4998 this->interval.end = start;
4999 if (this->interval.end + 1 >= end ||
5000 (case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end]) != 'R' ||
5001 (case_insensitive ? ctype.toupper(text[this->interval.end + 1]) : text[this->interval.end + 1]) != 'F')
5002 goto error; // incomplete or wrong reference ID
5003 this->interval.end += 2;
5004
5005 for (size_t i = 0; i < 2; ++i, ++this->interval.end) {
5006 if (this->interval.end >= end || text[this->interval.end] < '0' || '9' < text[this->interval.end])
5007 goto error; // incomplete or invalid check digits
5008 this->check_digits[i] = text[this->interval.end];
5009 }
5010 this->check_digits[2] = 0;
5011
5012 for (n = 0;;) {
5013 if (m_space && m_space->match(text, this->interval.end, end, flags))
5014 this->interval.end = m_space->interval.end;
5015 for (size_t j = 0; j < 4; ++j) {
5016 if (this->interval.end >= end || !text[this->interval.end])
5017 goto out;
5018 T chr = case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end];
5019 if (('0' <= chr && chr <= '9') || ('A' <= chr && chr <= 'Z')) {
5020 if (n >= _countof(reference) - 1)
5021 goto error; // reference overflow
5022 this->reference[n++] = chr;
5023 this->interval.end++;
5024 }
5025 else
5026 goto out;
5027 }
5028 }
5029 out:
5030 if (!n)
5031 goto error; // reference too short
5032 this->reference[_countof(this->reference) - 1] = 0;
5033 for (size_t i = n, j = _countof(this->reference) - 1; i;)
5034 this->reference[--j] = this->reference[--i];
5035 for (size_t j = _countof(this->reference) - 1 - n; j;)
5036 this->reference[--j] = '0';
5037
5038 // Normalize creditor reference.
5039 T normalized[47];
5040 available = 0;
5041 for (size_t i = 0; ; ++i) {
5042 if (!this->reference[i]) {
5043 normalized[available++] = '2'; // R
5044 normalized[available++] = '7';
5045 normalized[available++] = '1'; // F
5046 normalized[available++] = '5';
5047 normalized[available++] = this->check_digits[0];
5048 normalized[available++] = this->check_digits[1];
5049 normalized[available] = 0;
5050 break;
5051 }
5052 if ('0' <= this->reference[i] && this->reference[i] <= '9')
5053 normalized[available++] = this->reference[i];
5054 else if ('A' <= this->reference[i] && this->reference[i] <= 'J') {
5055 normalized[available++] = '1';
5056 normalized[available++] = '0' + this->reference[i] - 'A';
5057 }
5058 else if ('K' <= this->reference[i] && this->reference[i] <= 'T') {
5059 normalized[available++] = '2';
5060 normalized[available++] = '0' + this->reference[i] - 'K';
5061 }
5062 else if ('U' <= this->reference[i] && this->reference[i] <= 'Z') {
5063 normalized[available++] = '3';
5064 normalized[available++] = '0' + this->reference[i] - 'U';
5065 }
5066 }
5067
5068 // Calculate modulo 97.
5069 nominator = stdex::strtou32(normalized, 9, &next, 10);
5070 for (;;) {
5071 nominator %= 97;
5072 if (!normalized[next]) {
5073 this->is_valid = nominator == 1;
5074 break;
5075 }
5076 size_t digit_count = nominator == 0 ? 0 : nominator < 10 ? 1 : 2;
5077 for (; digit_count < 9 && normalized[next]; ++next, ++digit_count)
5078 nominator = nominator * 10 + (normalized[next] - '0');
5079 }
5080
5081 this->interval.start = start;
5082 return true;
5083
5084 error:
5085 this->check_digits[0] = 0;
5086 this->reference[0] = 0;
5087 this->is_valid = false;
5088 this->interval.start = (this->interval.end = start) + 1;
5089 return false;
5090 }
5091
5092 virtual void invalidate()
5093 {
5094 this->check_digits[0] = 0;
5095 this->reference[0] = 0;
5096 this->is_valid = false;
5098 }
5099
5100 public:
5104
5105 protected:
5106 std::shared_ptr<basic_parser<T>> m_space;
5107 };
5108
5111#ifdef _UNICODE
5113#else
5115#endif
5117
5123 template <class T>
5125 {
5126 public:
5127 basic_si_reference_part(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
5128
5129 virtual bool match(
5130 _In_reads_or_z_(end) const T* text,
5131 _In_ size_t start = 0,
5132 _In_ size_t end = (size_t)-1,
5133 _In_ int flags = match_default)
5134 {
5135 _Assume_(text || start >= end);
5136 this->interval.end = start;
5137 for (;;) {
5138 if (this->interval.end >= end || !text[this->interval.end])
5139 break;
5140 if ('0' <= text[this->interval.end] && text[this->interval.end] <= '9')
5141 this->interval.end++;
5142 else
5143 break;
5144 }
5146 this->interval.start = start;
5147 return true;
5148 }
5149 this->interval.start = (this->interval.end = start) + 1;
5150 return false;
5151 }
5152 };
5153
5156#ifdef _UNICODE
5158#else
5160#endif
5162
5168 template <class T>
5170 {
5171 public:
5172 basic_si_reference_delimiter(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
5173
5174 virtual bool match(
5175 _In_reads_or_z_(end) const T* text,
5176 _In_ size_t start = 0,
5177 _In_ size_t end = (size_t)-1,
5178 _In_ int flags = match_default)
5179 {
5180 _Assume_(text || start >= end);
5181 if (start < end && text[start] == '-') {
5182 this->interval.end = (this->interval.start = start) + 1;
5183 return true;
5184 }
5185 this->interval.start = (this->interval.end = start) + 1;
5186 return false;
5187 }
5188 };
5189
5192#ifdef _UNICODE
5194#else
5196#endif
5198
5206 template <class T>
5208 {
5209 public:
5211 _In_ const std::shared_ptr<basic_parser<T>>& space,
5212 _In_ const std::locale& locale = std::locale()) :
5213 basic_parser<T>(locale),
5214 part1(locale),
5215 part2(locale),
5216 part3(locale),
5217 is_valid(false),
5218 m_space(space),
5219 m_delimiter(locale)
5220 {
5221 this->model[0] = 0;
5222 }
5223
5224 virtual bool match(
5225 _In_reads_or_z_(end) const T* text,
5226 _In_ size_t start = 0,
5227 _In_ size_t end = (size_t)-1,
5228 _In_ int flags = match_default)
5229 {
5230 _Assume_(text || start >= end);
5231 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
5232 const bool case_insensitive = flags & match_case_insensitive ? true : false;
5233
5234 this->interval.end = start;
5235 if (this->interval.end + 1 >= end ||
5236 (case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end]) != 'S' ||
5237 (case_insensitive ? ctype.toupper(text[this->interval.end + 1]) : text[this->interval.end + 1]) != 'I')
5238 goto error; // incomplete or wrong reference ID
5239 this->interval.end += 2;
5240
5241 for (size_t i = 0; i < 2; ++i, ++this->interval.end) {
5242 if (this->interval.end >= end || text[this->interval.end] < '0' || '9' < text[this->interval.end])
5243 goto error; // incomplete or invalid model
5244 this->model[i] = text[this->interval.end];
5245 }
5246 this->model[2] = 0;
5247
5248 this->part1.invalidate();
5249 this->part2.invalidate();
5250 this->part3.invalidate();
5251 if (this->model[0] == '9' && this->model[1] == '9') {
5252 is_valid = true;
5253 this->interval.start = start;
5254 return true;
5255 }
5256
5257 if (m_space && m_space->match(text, this->interval.end, end, flags))
5258 this->interval.end = m_space->interval.end;
5259
5260 this->part1.match(text, this->interval.end, end, flags) &&
5261 this->m_delimiter.match(text, this->part1.interval.end, end, flags) &&
5262 this->part2.match(text, this->m_delimiter.interval.end, end, flags) &&
5263 this->m_delimiter.match(text, this->part2.interval.end, end, flags) &&
5264 this->part3.match(text, this->m_delimiter.interval.end, end, flags);
5265
5266 this->interval.start = start;
5267 if (this->part3.interval)
5268 this->interval.end = this->part3.interval.end;
5269 else if (this->part2.interval)
5270 this->interval.end = this->part2.interval.end;
5271 else if (this->part1.interval)
5272 this->interval.end = this->part1.interval.end;
5273 else
5274 this->interval.end = start + 4;
5275
5276 if (this->model[0] == '0' && this->model[1] == '0')
5277 is_valid =
5278 this->part3.interval ?
5279 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5280 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 :
5281 this->part2.interval ?
5282 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5283 this->part1.interval.size() + this->part2.interval.size() <= 20 :
5284 this->part1.interval ?
5285 this->part1.interval.size() <= 12 :
5286 false;
5287 else if (this->model[0] == '0' && this->model[1] == '1')
5288 is_valid =
5289 this->part3.interval ?
5290 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5291 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5292 check11(
5293 text + this->part1.interval.start, this->part1.interval.size(),
5294 text + this->part2.interval.start, this->part2.interval.size(),
5295 text + this->part3.interval.start, this->part3.interval.size()) :
5296 this->part2.interval ?
5297 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5298 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5299 check11(
5300 text + this->part1.interval.start, this->part1.interval.size(),
5301 text + this->part2.interval.start, this->part2.interval.size()) :
5302 this->part1.interval ?
5303 this->part1.interval.size() <= 12 &&
5304 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5305 false;
5306 else if (this->model[0] == '0' && this->model[1] == '2')
5307 is_valid =
5308 this->part3.interval ?
5309 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5310 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5311 check11(text + this->part2.interval.start, this->part2.interval.size()) &&
5312 check11(text + this->part3.interval.start, this->part3.interval.size()) :
5313 false;
5314 else if (this->model[0] == '0' && this->model[1] == '3')
5315 is_valid =
5316 this->part3.interval ?
5317 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5318 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5319 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5320 check11(text + this->part2.interval.start, this->part2.interval.size()) &&
5321 check11(text + this->part3.interval.start, this->part3.interval.size()) :
5322 false;
5323 else if (this->model[0] == '0' && this->model[1] == '4')
5324 is_valid =
5325 this->part3.interval ?
5326 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5327 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5328 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5329 check11(text + this->part3.interval.start, this->part3.interval.size()) :
5330 false;
5331 else if ((this->model[0] == '0' || this->model[0] == '5') && this->model[1] == '5')
5332 is_valid =
5333 this->part3.interval ?
5334 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5335 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5336 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5337 this->part2.interval ?
5338 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5339 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5340 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5341 this->part1.interval ?
5342 this->part1.interval.size() <= 12 &&
5343 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5344 false;
5345 else if (this->model[0] == '0' && this->model[1] == '6')
5346 is_valid =
5347 this->part3.interval ?
5348 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5349 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5350 check11(
5351 text + this->part2.interval.start, this->part2.interval.size(),
5352 text + this->part3.interval.start, this->part3.interval.size()) :
5353 this->part2.interval ?
5354 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5355 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5356 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5357 false;
5358 else if (this->model[0] == '0' && this->model[1] == '7')
5359 is_valid =
5360 this->part3.interval ?
5361 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5362 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5363 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5364 this->part2.interval ?
5365 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5366 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5367 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5368 false;
5369 else if (this->model[0] == '0' && this->model[1] == '8')
5370 is_valid =
5371 this->part3.interval ?
5372 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5373 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5374 check11(
5375 text + this->part1.interval.start, this->part1.interval.size(),
5376 text + this->part2.interval.start, this->part2.interval.size()) &&
5377 check11(text + this->part3.interval.start, this->part3.interval.size()) :
5378 false;
5379 else if (this->model[0] == '0' && this->model[1] == '9')
5380 is_valid =
5381 this->part3.interval ?
5382 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5383 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5384 check11(
5385 text + this->part1.interval.start, this->part1.interval.size(),
5386 text + this->part2.interval.start, this->part2.interval.size()) :
5387 this->part2.interval ?
5388 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5389 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5390 check11(
5391 text + this->part1.interval.start, this->part1.interval.size(),
5392 text + this->part2.interval.start, this->part2.interval.size()) :
5393 this->part1.interval ?
5394 this->part1.interval.size() <= 12 &&
5395 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5396 false;
5397 else if (this->model[0] == '1' && this->model[1] == '0')
5398 is_valid =
5399 this->part3.interval ?
5400 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5401 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5402 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5403 check11(
5404 text + this->part2.interval.start, this->part2.interval.size(),
5405 text + this->part3.interval.start, this->part3.interval.size()) :
5406 this->part2.interval ?
5407 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5408 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5409 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5410 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5411 false;
5412 else if (
5413 (this->model[0] == '1' && (this->model[1] == '1' || this->model[1] == '8' || this->model[1] == '9')) ||
5414 ((this->model[0] == '2' || this->model[0] == '3') && this->model[1] == '8') ||
5415 (this->model[0] == '4' && (this->model[1] == '0' || this->model[1] == '1' || this->model[1] == '8' || this->model[1] == '9')) ||
5416 (this->model[0] == '5' && (this->model[1] == '1' || this->model[1] == '8')))
5417 is_valid =
5418 this->part3.interval ?
5419 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5420 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5421 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5422 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5423 this->part2.interval ?
5424 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5425 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5426 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5427 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5428 false;
5429 else if (this->model[0] == '1' && this->model[1] == '2')
5430 is_valid =
5431 this->part3.interval ? false :
5432 this->part2.interval ? false :
5433 this->part1.interval ?
5434 this->part1.interval.size() <= 13 &&
5435 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5436 false;
5437 else if ((this->model[0] == '2' || this->model[0] == '3') && this->model[1] == '1')
5438 is_valid =
5439 this->part3.interval ? false :
5440 this->part2.interval ?
5441 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5442 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5443 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5444 false;
5445 else
5446 is_valid = true; // Assume models we don't handle as valid
5447 return true;
5448
5449 error:
5450 this->model[0] = 0;
5451 this->part1.interval.start = (this->part1.interval.end = start) + 1;
5452 this->part2.interval.start = (this->part2.interval.end = start) + 1;
5453 this->part3.interval.start = (this->part3.interval.end = start) + 1;
5454 this->is_valid = false;
5455 this->interval.start = (this->interval.end = start) + 1;
5456 return false;
5457 }
5458
5459 virtual void invalidate()
5460 {
5461 this->model[0] = 0;
5462 this->part1.invalidate();
5463 this->part2.invalidate();
5464 this->part3.invalidate();
5465 this->is_valid = false;
5467 }
5468
5469 protected:
5470 static bool check11(
5471 _In_count_(num_part1) const T* part1, _In_ size_t num_part1)
5472 {
5473 _Assume_(part1 && num_part1 >= 1);
5474 uint32_t nominator = 0, ponder = 2;
5475 for (size_t i = num_part1 - 1; i--; ++ponder)
5476 nominator += (part1[i] - '0') * ponder;
5477 uint8_t control = 11 - static_cast<uint8_t>(nominator % 11);
5478 if (control >= 10)
5479 control = 0;
5480 return control == part1[num_part1 - 1] - '0';
5481 }
5482
5483 static bool check11(
5484 _In_count_(num_part1) const T* part1, _In_ size_t num_part1,
5485 _In_count_(num_part2) const T* part2, _In_ size_t num_part2)
5486 {
5487 _Assume_(part1 || !num_part1);
5488 _Assume_(part2 && num_part2 >= 1);
5489 uint32_t nominator = 0, ponder = 2;
5490 for (size_t i = num_part2 - 1; i--; ++ponder)
5491 nominator += (part2[i] - '0') * ponder;
5492 for (size_t i = num_part1; i--; ++ponder)
5493 nominator += (part1[i] - '0') * ponder;
5494 uint8_t control = 11 - static_cast<uint8_t>(nominator % 11);
5495 if (control == 10)
5496 control = 0;
5497 return control == part2[num_part2 - 1] - '0';
5498 }
5499
5500 static bool check11(
5501 _In_count_(num_part1) const T* part1, _In_ size_t num_part1,
5502 _In_count_(num_part2) const T* part2, _In_ size_t num_part2,
5503 _In_count_(num_part3) const T* part3, _In_ size_t num_part3)
5504 {
5505 _Assume_(part1 || !num_part1);
5506 _Assume_(part2 || !num_part2);
5507 _Assume_(part3 && num_part3 >= 1);
5508 uint32_t nominator = 0, ponder = 2;
5509 for (size_t i = num_part3 - 1; i--; ++ponder)
5510 nominator += (part3[i] - '0') * ponder;
5511 for (size_t i = num_part2; i--; ++ponder)
5512 nominator += (part2[i] - '0') * ponder;
5513 for (size_t i = num_part1; i--; ++ponder)
5514 nominator += (part1[i] - '0') * ponder;
5515 uint8_t control = 11 - static_cast<uint8_t>(nominator % 11);
5516 if (control == 10)
5517 control = 0;
5518 return control == part2[num_part3 - 1] - '0';
5519 }
5520
5521 public:
5522 T model[3];
5527
5528 protected:
5529 std::shared_ptr<basic_parser<T>> m_space;
5531 };
5532
5535#ifdef _UNICODE
5537#else
5539#endif
5541
5545 template <class T>
5547 {
5548 public:
5550 _In_ const std::shared_ptr<basic_parser<T>>& element,
5551 _In_ const std::shared_ptr<basic_parser<T>>& digit,
5552 _In_ const std::shared_ptr<basic_parser<T>>& sign,
5553 _In_ const std::locale& locale = std::locale()) :
5554 basic_parser<T>(locale),
5555 m_element(element),
5556 m_digit(digit),
5557 m_sign(sign),
5558 has_digits(false),
5559 has_charge(false)
5560 {}
5561
5562 virtual bool match(
5563 _In_reads_or_z_(end) const T* text,
5564 _In_ size_t start = 0,
5565 _In_ size_t end = (size_t)-1,
5566 _In_ int flags = match_default)
5567 {
5568 _Assume_(text || start >= end);
5569
5570 has_digits = false;
5571 has_charge = false;
5572 this->interval.end = start;
5573
5574 const int element_match_flags = flags & ~match_case_insensitive; // Chemical elements are always case-sensitive.
5575 for (;;) {
5576 if (m_element->match(text, this->interval.end, end, element_match_flags)) {
5577 this->interval.end = m_element->interval.end;
5578 while (m_digit->match(text, this->interval.end, end, flags)) {
5579 this->interval.end = m_digit->interval.end;
5580 has_digits = true;
5581 }
5582 }
5583 else if (start < this->interval.end) {
5584 if (m_sign->match(text, this->interval.end, end, flags)) {
5585 this->interval.end = m_sign->interval.end;
5586 has_charge = true;
5587 }
5588 this->interval.start = start;
5589 return true;
5590 }
5591 else {
5592 this->interval.start = (this->interval.end = start) + 1;
5593 return false;
5594 }
5595 }
5596 }
5597
5598 virtual void invalidate()
5599 {
5600 has_digits = false;
5601 has_charge = false;
5603 }
5604
5605 public:
5606 bool has_digits;
5607 bool has_charge;
5608
5609 protected:
5610 std::shared_ptr<basic_parser<T>> m_element;
5611 std::shared_ptr<basic_parser<T>> m_digit;
5612 std::shared_ptr<basic_parser<T>> m_sign;
5613 };
5614
5617#ifdef _UNICODE
5619#else
5621#endif
5623
5628 {
5629 public:
5630 virtual bool match(
5631 _In_reads_or_z_(end) const char* text,
5632 _In_ size_t start = 0,
5633 _In_ size_t end = (size_t)-1,
5634 _In_ int flags = match_default)
5635 {
5636 _Assume_(text || start >= end);
5637 this->interval.end = start;
5638
5639 _Assume_(text || this->interval.end >= end);
5640 if (this->interval.end < end && text[this->interval.end]) {
5641 if (text[this->interval.end] == '\r') {
5642 this->interval.end++;
5643 if (this->interval.end < end && text[this->interval.end] == '\n') {
5644 this->interval.start = start;
5645 this->interval.end++;
5646 return true;
5647 }
5648 }
5649 else if (text[this->interval.end] == '\n') {
5650 this->interval.start = start;
5651 this->interval.end++;
5652 return true;
5653 }
5654 }
5655 this->interval.start = (this->interval.end = start) + 1;
5656 return false;
5657 }
5658 };
5659
5663 class http_space : public parser
5664 {
5665 public:
5666 virtual bool match(
5667 _In_reads_or_z_(end) const char* text,
5668 _In_ size_t start = 0,
5669 _In_ size_t end = (size_t)-1,
5670 _In_ int flags = match_default)
5671 {
5672 _Assume_(text || start >= end);
5673 this->interval.end = start;
5674 if (m_line_break.match(text, this->interval.end, end, flags)) {
5675 this->interval.end = m_line_break.interval.end;
5676 if (this->interval.end < end && text[this->interval.end] && isspace(text[this->interval.end])) {
5677 this->interval.start = start;
5678 this->interval.end++;
5679 while (this->interval.end < end && text[this->interval.end] && isspace(text[this->interval.end])) this->interval.end++;
5680 return true;
5681 }
5682 }
5683 else if (this->interval.end < end && text[this->interval.end] && isspace(text[this->interval.end])) {
5684 this->interval.start = start;
5685 this->interval.end++;
5686 while (this->interval.end < end && text[this->interval.end] && isspace(text[this->interval.end])) this->interval.end++;
5687 return true;
5688 }
5689 this->interval.start = (this->interval.end = start) + 1;
5690 return false;
5691 }
5692
5693 protected:
5694 http_line_break m_line_break;
5695 };
5696
5700 class http_text_char : public parser
5701 {
5702 public:
5703 virtual bool match(
5704 _In_reads_or_z_(end) const char* text,
5705 _In_ size_t start = 0,
5706 _In_ size_t end = (size_t)-1,
5707 _In_ int flags = match_default)
5708 {
5709 _Assume_(text || start >= end);
5710 this->interval.end = start;
5711
5712 _Assume_(text || this->interval.end >= end);
5713 if (m_space.match(text, this->interval.end, end, flags)) {
5714 this->interval.start = start;
5715 this->interval.end = m_space.interval.end;
5716 return true;
5717 }
5718 else if (this->interval.end < end && text[this->interval.end] && text[this->interval.end] >= 0x20) {
5719 this->interval.start = start;
5720 this->interval.end++;
5721 return true;
5722 }
5723 this->interval.start = (this->interval.end = start) + 1;
5724 return false;
5725 }
5726
5727 protected:
5728 http_space m_space;
5729 };
5730
5734 class http_token : public parser
5735 {
5736 public:
5737 virtual bool match(
5738 _In_reads_or_z_(end) const char* text,
5739 _In_ size_t start = 0,
5740 _In_ size_t end = (size_t)-1,
5741 _In_ int flags = match_default)
5742 {
5743 _Assume_(text || start >= end);
5744 this->interval.end = start;
5745 for (;;) {
5746 if (this->interval.end < end && text[this->interval.end]) {
5747 if ((unsigned int)text[this->interval.end] < 0x20 ||
5748 (unsigned int)text[this->interval.end] == 0x7f ||
5749 text[this->interval.end] == '(' ||
5750 text[this->interval.end] == ')' ||
5751 text[this->interval.end] == '<' ||
5752 text[this->interval.end] == '>' ||
5753 text[this->interval.end] == '@' ||
5754 text[this->interval.end] == ',' ||
5755 text[this->interval.end] == ';' ||
5756 text[this->interval.end] == ':' ||
5757 text[this->interval.end] == '\\' ||
5758 text[this->interval.end] == '\"' ||
5759 text[this->interval.end] == '/' ||
5760 text[this->interval.end] == '[' ||
5761 text[this->interval.end] == ']' ||
5762 text[this->interval.end] == '?' ||
5763 text[this->interval.end] == '=' ||
5764 text[this->interval.end] == '{' ||
5765 text[this->interval.end] == '}' ||
5766 isspace(text[this->interval.end]))
5767 break;
5768 else
5769 this->interval.end++;
5770 }
5771 else
5772 break;
5773 }
5775 this->interval.start = start;
5776 return true;
5777 }
5778 else {
5779 this->interval.start = (this->interval.end = start) + 1;
5780 return false;
5781 }
5782 }
5783 };
5784
5789 {
5790 public:
5791 virtual bool match(
5792 _In_reads_or_z_(end) const char* text,
5793 _In_ size_t start = 0,
5794 _In_ size_t end = (size_t)-1,
5795 _In_ int flags = match_default)
5796 {
5797 _Assume_(text || start >= end);
5798 this->interval.end = start;
5799 if (this->interval.end < end && text[this->interval.end] != '"')
5800 goto error;
5801 this->interval.end++;
5802 content.start = this->interval.end;
5803 for (;;) {
5804 _Assume_(text || this->interval.end >= end);
5805 if (this->interval.end < end && text[this->interval.end]) {
5806 if (text[this->interval.end] == '"') {
5807 content.end = this->interval.end;
5808 this->interval.end++;
5809 break;
5810 }
5811 else if (text[this->interval.end] == '\\') {
5812 this->interval.end++;
5813 if (this->interval.end < end && text[this->interval.end]) {
5814 this->interval.end++;
5815 }
5816 else
5817 goto error;
5818 }
5819 else if (m_chr.match(text, this->interval.end, end, flags))
5820 this->interval.end++;
5821 else
5822 goto error;
5823 }
5824 else
5825 goto error;
5826 }
5827 this->interval.start = start;
5828 return true;
5829
5830 error:
5831 content.start = 1;
5832 content.end = 0;
5833 this->interval.start = (this->interval.end = start) + 1;
5834 return false;
5835 }
5836
5837 virtual void invalidate()
5838 {
5839 content.start = 1;
5840 content.end = 0;
5841 parser::invalidate();
5842 }
5843
5844 public:
5846
5847 protected:
5848 http_text_char m_chr;
5849 };
5850
5854 class http_value : public parser
5855 {
5856 public:
5857 virtual bool match(
5858 _In_reads_or_z_(end) const char* text,
5859 _In_ size_t start = 0,
5860 _In_ size_t end = (size_t)-1,
5861 _In_ int flags = match_default)
5862 {
5863 _Assume_(text || start >= end);
5864 this->interval.end = start;
5865 if (string.match(text, this->interval.end, end, flags)) {
5866 token.invalidate();
5867 this->interval.end = string.interval.end;
5868 this->interval.start = start;
5869 return true;
5870 }
5871 else if (token.match(text, this->interval.end, end, flags)) {
5872 string.invalidate();
5873 this->interval.end = token.interval.end;
5874 this->interval.start = start;
5875 return true;
5876 }
5877 else {
5878 this->interval.start = (this->interval.end = start) + 1;
5879 return false;
5880 }
5881 }
5882
5883 virtual void invalidate()
5884 {
5885 string.invalidate();
5886 token.invalidate();
5887 parser::invalidate();
5888 }
5889
5890 public:
5893 };
5894
5898 class http_parameter : public parser
5899 {
5900 public:
5901 virtual bool match(
5902 _In_reads_or_z_(end) const char* text,
5903 _In_ size_t start = 0,
5904 _In_ size_t end = (size_t)-1,
5905 _In_ int flags = match_default)
5906 {
5907 _Assume_(text || start >= end);
5908 this->interval.end = start;
5909 if (name.match(text, this->interval.end, end, flags))
5910 this->interval.end = name.interval.end;
5911 else
5912 goto error;
5913 while (m_space.match(text, this->interval.end, end, flags))
5914 this->interval.end = m_space.interval.end;
5915 _Assume_(text || this->interval.end >= end);
5916 if (this->interval.end < end && text[this->interval.end] == '=')
5917 this->interval.end++;
5918 else
5919 while (m_space.match(text, this->interval.end, end, flags))
5920 this->interval.end = m_space.interval.end;
5921 if (value.match(text, this->interval.end, end, flags))
5922 this->interval.end = value.interval.end;
5923 else
5924 goto error;
5925 this->interval.start = start;
5926 return true;
5927
5928 error:
5929 name.invalidate();
5930 value.invalidate();
5931 this->interval.start = (this->interval.end = start) + 1;
5932 return false;
5933 }
5934
5935 virtual void invalidate()
5936 {
5937 name.invalidate();
5938 value.invalidate();
5939 parser::invalidate();
5940 }
5941
5942 public:
5945
5946 protected:
5947 http_space m_space;
5948 };
5949
5953 class http_any_type : public parser
5954 {
5955 public:
5956 virtual bool match(
5957 _In_reads_or_z_(end) const char* text,
5958 _In_ size_t start = 0,
5959 _In_ size_t end = (size_t)-1,
5960 _In_ int flags = match_default)
5961 {
5962 _Assume_(text || start >= end);
5963 if (start + 2 < end &&
5964 text[start] == '*' &&
5965 text[start + 1] == '/' &&
5966 text[start + 2] == '*')
5967 {
5968 this->interval.end = (this->interval.start = start) + 3;
5969 return true;
5970 }
5971 else if (start < end && text[start] == '*') {
5972 this->interval.end = (this->interval.start = start) + 1;
5973 return true;
5974 }
5975 else {
5976 this->interval.start = (this->interval.end = start) + 1;
5977 return false;
5978 }
5979 }
5980 };
5981
5986 {
5987 public:
5988 virtual bool match(
5989 _In_reads_or_z_(end) const char* text,
5990 _In_ size_t start = 0,
5991 _In_ size_t end = (size_t)-1,
5992 _In_ int flags = match_default)
5993 {
5994 _Assume_(text || start >= end);
5995 this->interval.end = start;
5996 if (type.match(text, this->interval.end, end, flags))
5997 this->interval.end = type.interval.end;
5998 else
5999 goto error;
6000 while (m_space.match(text, this->interval.end, end, flags))
6001 this->interval.end = m_space.interval.end;
6002 if (this->interval.end < end && text[this->interval.end] == '/')
6003 this->interval.end++;
6004 else
6005 goto error;
6006 while (m_space.match(text, this->interval.end, end, flags))
6007 this->interval.end = m_space.interval.end;
6008 if (subtype.match(text, this->interval.end, end, flags))
6009 this->interval.end = subtype.interval.end;
6010 else
6011 goto error;
6012 this->interval.start = start;
6013 return true;
6014
6015 error:
6016 type.invalidate();
6017 subtype.invalidate();
6018 this->interval.start = (this->interval.end = start) + 1;
6019 return false;
6020 }
6021
6022 virtual void invalidate()
6023 {
6024 type.invalidate();
6025 subtype.invalidate();
6026 parser::invalidate();
6027 }
6028
6029 public:
6030 http_token type;
6031 http_token subtype;
6032
6033 protected:
6034 http_space m_space;
6035 };
6036
6041 {
6042 public:
6043 virtual bool match(
6044 _In_reads_or_z_(end) const char* text,
6045 _In_ size_t start = 0,
6046 _In_ size_t end = (size_t)-1,
6047 _In_ int flags = match_default)
6048 {
6049 _Assume_(text || start >= end);
6050 if (!http_media_range::match(text, start, end, flags))
6051 goto error;
6052 params.clear();
6053 for (;;) {
6054 if (this->interval.end < end && text[this->interval.end]) {
6055 if (m_space.match(text, this->interval.end, end, flags))
6056 this->interval.end = m_space.interval.end;
6057 else if (text[this->interval.end] == ';') {
6058 this->interval.end++;
6059 while (m_space.match(text, this->interval.end, end, flags))
6060 this->interval.end = m_space.interval.end;
6062 if (param.match(text, this->interval.end, end, flags)) {
6063 this->interval.end = param.interval.end;
6064 params.push_back(std::move(param));
6065 }
6066 else
6067 break;
6068 }
6069 else
6070 break;
6071 }
6072 else
6073 break;
6074 }
6075 this->interval.end = params.empty() ? subtype.interval.end : params.back().interval.end;
6076 return true;
6077
6078 error:
6079 http_media_range::invalidate();
6080 params.clear();
6081 this->interval.start = (this->interval.end = start) + 1;
6082 return false;
6083 }
6084
6085 virtual void invalidate()
6086 {
6087 params.clear();
6088 http_media_range::invalidate();
6089 }
6090
6091 public:
6092 std::list<http_parameter> params;
6093 };
6094
6099 {
6100 public:
6101 virtual bool match(
6102 _In_reads_or_z_(end) const char* text,
6103 _In_ size_t start = 0,
6104 _In_ size_t end = (size_t)-1,
6105 _In_ int flags = match_default)
6106 {
6107 _Assume_(text || start >= end);
6108 this->interval.end = start;
6109 for (;;) {
6110 if (this->interval.end < end && text[this->interval.end]) {
6111 if ((unsigned int)text[this->interval.end] < 0x20 ||
6112 (unsigned int)text[this->interval.end] == 0x7f ||
6113 text[this->interval.end] == ':' ||
6114 text[this->interval.end] == '/' ||
6115 isspace(text[this->interval.end]))
6116 break;
6117 else
6118 this->interval.end++;
6119 }
6120 else
6121 break;
6122 }
6124 this->interval.start = start;
6125 return true;
6126 }
6127 this->interval.start = (this->interval.end = start) + 1;
6128 return false;
6129 }
6130 };
6131
6135 class http_url_port : public parser
6136 {
6137 public:
6138 http_url_port(_In_ const std::locale& locale = std::locale()) :
6139 parser(locale),
6140 value(0)
6141 {}
6142
6143 virtual bool match(
6144 _In_reads_or_z_(end) const char* text,
6145 _In_ size_t start = 0,
6146 _In_ size_t end = (size_t)-1,
6147 _In_ int flags = match_default)
6148 {
6149 _Assume_(text || start >= end);
6150 value = 0;
6151 this->interval.end = start;
6152 for (;;) {
6153 if (this->interval.end < end && text[this->interval.end]) {
6154 if ('0' <= text[this->interval.end] && text[this->interval.end] <= '9') {
6155 size_t _value = (size_t)value * 10 + text[this->interval.end] - '0';
6156 if (_value > (uint16_t)-1) {
6157 value = 0;
6158 this->interval.start = (this->interval.end = start) + 1;
6159 return false;
6160 }
6161 value = (uint16_t)_value;
6162 this->interval.end++;
6163 }
6164 else
6165 break;
6166 }
6167 else
6168 break;
6169 }
6171 this->interval.start = start;
6172 return true;
6173 }
6174 this->interval.start = (this->interval.end = start) + 1;
6175 return false;
6176 }
6177
6178 virtual void invalidate()
6179 {
6180 value = 0;
6181 parser::invalidate();
6182 }
6183
6184 public:
6185 uint16_t value;
6186 };
6187
6192 {
6193 public:
6194 virtual bool match(
6195 _In_reads_or_z_(end) const char* text,
6196 _In_ size_t start = 0,
6197 _In_ size_t end = (size_t)-1,
6198 _In_ int flags = match_default)
6199 {
6200 _Assume_(text || start >= end);
6201 this->interval.end = start;
6202 for (;;) {
6203 if (this->interval.end < end && text[this->interval.end]) {
6204 if ((unsigned int)text[this->interval.end] < 0x20 ||
6205 (unsigned int)text[this->interval.end] == 0x7f ||
6206 text[this->interval.end] == '?' ||
6207 text[this->interval.end] == '/' ||
6208 isspace(text[this->interval.end]))
6209 break;
6210 else
6211 this->interval.end++;
6212 }
6213 else
6214 break;
6215 }
6216 this->interval.start = start;
6217 return true;
6218 }
6219 };
6220
6224 class http_url_path : public parser
6225 {
6226 public:
6227 virtual bool match(
6228 _In_reads_or_z_(end) const char* text,
6229 _In_ size_t start = 0,
6230 _In_ size_t end = (size_t)-1,
6231 _In_ int flags = match_default)
6232 {
6233 _Assume_(text || start >= end);
6235 this->interval.end = start;
6236 segments.clear();
6237 _Assume_(text || this->interval.end >= end);
6238 if (this->interval.end < end && text[this->interval.end] != '/')
6239 goto error;
6240 this->interval.end++;
6241 s.match(text, this->interval.end, end, flags);
6242 segments.push_back(s);
6243 this->interval.end = s.interval.end;
6244 for (;;) {
6245 if (this->interval.end < end && text[this->interval.end]) {
6246 if (text[this->interval.end] == '/') {
6247 this->interval.end++;
6248 s.match(text, this->interval.end, end, flags);
6249 segments.push_back(s);
6250 this->interval.end = s.interval.end;
6251 }
6252 else
6253 break;
6254 }
6255 else
6256 break;
6257 }
6258 this->interval.start = start;
6259 return true;
6260
6261 error:
6262 segments.clear();
6263 this->interval.start = (this->interval.end = start) + 1;
6264 return false;
6265 }
6266
6267 virtual void invalidate()
6268 {
6269 segments.clear();
6270 parser::invalidate();
6271 }
6272
6273 public:
6274 std::vector<http_url_path_segment> segments;
6275 };
6276
6281 {
6282 public:
6283 virtual bool match(
6284 _In_reads_or_z_(end) const char* text,
6285 _In_ size_t start = 0,
6286 _In_ size_t end = (size_t)-1,
6287 _In_ int flags = match_default)
6288 {
6289 _Assume_(text || start >= end);
6290 this->interval.end = start;
6291 name.start = this->interval.end;
6292 for (;;) {
6293 if (this->interval.end < end && text[this->interval.end]) {
6294 if ((unsigned int)text[this->interval.end] < 0x20 ||
6295 (unsigned int)text[this->interval.end] == 0x7f ||
6296 text[this->interval.end] == '&' ||
6297 text[this->interval.end] == '=' ||
6298 isspace(text[this->interval.end]))
6299 break;
6300 else
6301 this->interval.end++;
6302 }
6303 else
6304 break;
6305 }
6307 name.end = this->interval.end;
6308 else
6309 goto error;
6310 if (text[this->interval.end] == '=') {
6311 this->interval.end++;
6312 value.start = this->interval.end;
6313 for (;;) {
6314 if (this->interval.end < end && text[this->interval.end]) {
6315 if ((unsigned int)text[this->interval.end] < 0x20 ||
6316 (unsigned int)text[this->interval.end] == 0x7f ||
6317 text[this->interval.end] == '&' ||
6318 isspace(text[this->interval.end]))
6319 break;
6320 else
6321 this->interval.end++;
6322 }
6323 else
6324 break;
6325 }
6326 value.end = this->interval.end;
6327 }
6328 else {
6329 value.start = 1;
6330 value.end = 0;
6331 }
6332 this->interval.start = start;
6333 return true;
6334
6335 error:
6336 name.start = 1;
6337 name.end = 0;
6338 value.start = 1;
6339 value.end = 0;
6340 this->interval.start = (this->interval.end = start) + 1;
6341 return false;
6342 }
6343
6344 virtual void invalidate()
6345 {
6346 name.start = 1;
6347 name.end = 0;
6348 value.start = 1;
6349 value.end = 0;
6350 parser::invalidate();
6351 }
6352
6353 public:
6356 };
6357
6361 class http_url : public parser
6362 {
6363 public:
6364 http_url(_In_ const std::locale& locale = std::locale()) :
6365 parser(locale),
6366 port(locale)
6367 {}
6368
6369 virtual bool match(
6370 _In_reads_or_z_(end) const char* text,
6371 _In_ size_t start = 0,
6372 _In_ size_t end = (size_t)-1,
6373 _In_ int flags = match_default)
6374 {
6375 _Assume_(text || start >= end);
6376 this->interval.end = start;
6377
6378 if (this->interval.end + 7 <= end && stdex::strnicmp(text + this->interval.end, 7, "http://", (size_t)-1, m_locale) == 0) {
6379 this->interval.end += 7;
6380 if (server.match(text, this->interval.end, end, flags))
6381 this->interval.end = server.interval.end;
6382 else
6383 goto error;
6384 if (this->interval.end < end && text[this->interval.end] == ':') {
6385 this->interval.end++;
6386 if (port.match(text, this->interval.end, end, flags))
6387 this->interval.end = port.interval.end;
6388 }
6389 else {
6390 port.invalidate();
6391 port.value = 80;
6392 }
6393 }
6394 else {
6395 server.invalidate();
6396 port.invalidate();
6397 port.value = 80;
6398 }
6399
6400 if (path.match(text, this->interval.end, end, flags))
6401 this->interval.end = path.interval.end;
6402 else
6403 goto error;
6404
6405 params.clear();
6406
6407 if (this->interval.end < end && text[this->interval.end] == '?') {
6408 this->interval.end++;
6409 for (;;) {
6410 if (this->interval.end < end && text[this->interval.end]) {
6411 if ((unsigned int)text[this->interval.end] < 0x20 ||
6412 (unsigned int)text[this->interval.end] == 0x7f ||
6413 isspace(text[this->interval.end]))
6414 break;
6415 else if (text[this->interval.end] == '&')
6416 this->interval.end++;
6417 else {
6419 if (param.match(text, this->interval.end, end, flags)) {
6420 this->interval.end = param.interval.end;
6421 params.push_back(std::move(param));
6422 }
6423 else
6424 break;
6425 }
6426 }
6427 else
6428 break;
6429 }
6430 }
6431
6432 this->interval.start = start;
6433 return true;
6434
6435 error:
6436 server.invalidate();
6437 port.invalidate();
6438 path.invalidate();
6439 params.clear();
6440 this->interval.start = (this->interval.end = start) + 1;
6441 return false;
6442 }
6443
6444 virtual void invalidate()
6445 {
6446 server.invalidate();
6447 port.invalidate();
6448 path.invalidate();
6449 params.clear();
6450 parser::invalidate();
6451 }
6452
6453 public:
6454 http_url_server server;
6455 http_url_port port;
6456 http_url_path path;
6457 std::list<http_url_parameter> params;
6458 };
6459
6463 class http_language : public parser
6464 {
6465 public:
6466 virtual bool match(
6467 _In_reads_or_z_(end) const char* text,
6468 _In_ size_t start = 0,
6469 _In_ size_t end = (size_t)-1,
6470 _In_ int flags = match_default)
6471 {
6472 _Assume_(text || start >= end);
6473 this->interval.end = start;
6474 components.clear();
6475 for (;;) {
6476 if (this->interval.end < end && text[this->interval.end]) {
6478 k.end = this->interval.end;
6479 for (;;) {
6480 if (k.end < end && text[k.end]) {
6481 if (isalpha(text[k.end]))
6482 k.end++;
6483 else
6484 break;
6485 }
6486 else
6487 break;
6488 }
6489 if (this->interval.end < k.end) {
6490 k.start = this->interval.end;
6491 this->interval.end = k.end;
6492 components.push_back(k);
6493 }
6494 else
6495 break;
6496 if (this->interval.end < end && text[this->interval.end] == '-')
6497 this->interval.end++;
6498 else
6499 break;
6500 }
6501 else
6502 break;
6503 }
6504 if (!components.empty()) {
6505 this->interval.start = start;
6506 this->interval.end = components.back().end;
6507 return true;
6508 }
6509 this->interval.start = (this->interval.end = start) + 1;
6510 return false;
6511 }
6512
6513 virtual void invalidate()
6514 {
6515 components.clear();
6516 parser::invalidate();
6517 }
6518
6519 public:
6520 std::vector<stdex::interval<size_t>> components;
6521 };
6522
6526 class http_weight : public parser
6527 {
6528 public:
6529 http_weight(_In_ const std::locale& locale = std::locale()) :
6530 parser(locale),
6531 value(1.0f)
6532 {}
6533
6534 virtual bool match(
6535 _In_reads_or_z_(end) const char* text,
6536 _In_ size_t start = 0,
6537 _In_ size_t end = (size_t)-1,
6538 _In_ int flags = match_default)
6539 {
6540 _Assume_(text || start >= end);
6541 size_t celi_del = 0, decimalni_del = 0, decimalni_del_n = 1;
6542 this->interval.end = start;
6543 for (;;) {
6544 if (this->interval.end < end && text[this->interval.end]) {
6545 if ('0' <= text[this->interval.end] && text[this->interval.end] <= '9') {
6546 celi_del = celi_del * 10 + text[this->interval.end] - '0';
6547 this->interval.end++;
6548 }
6549 else if (text[this->interval.end] == '.') {
6550 this->interval.end++;
6551 for (;;) {
6552 if (this->interval.end < end && text[this->interval.end]) {
6553 if ('0' <= text[this->interval.end] && text[this->interval.end] <= '9') {
6554 decimalni_del = decimalni_del * 10 + text[this->interval.end] - '0';
6555 decimalni_del_n *= 10;
6556 this->interval.end++;
6557 }
6558 else
6559 break;
6560 }
6561 else
6562 break;
6563 }
6564 break;
6565 }
6566 else
6567 break;
6568 }
6569 else
6570 break;
6571 }
6574 this->interval.start = start;
6575 return true;
6576 }
6577 value = 1.0f;
6578 this->interval.start = (this->interval.end = start) + 1;
6579 return false;
6580 }
6581
6582 virtual void invalidate()
6583 {
6584 value = 1.0f;
6585 parser::invalidate();
6586 }
6587
6588 public:
6589 float value;
6590 };
6591
6595 class http_asterisk : public parser
6596 {
6597 public:
6598 virtual bool match(
6599 _In_reads_or_z_(end) const char* text,
6600 _In_ size_t start = 0,
6601 _In_ size_t end = (size_t)-1,
6602 _In_ int flags = match_default)
6603 {
6604 _Assume_(text || end <= start);
6605 if (start < end && text[start] == '*') {
6606 this->interval.end = (this->interval.start = start) + 1;
6607 return true;
6608 }
6609 this->interval.start = (this->interval.end = start) + 1;
6610 return false;
6611 }
6612 };
6613
6617 template <class T, class T_asterisk = http_asterisk>
6619 {
6620 public:
6621 http_weighted_value(_In_ const std::locale& locale = std::locale()) :
6622 parser(locale),
6623 factor(locale)
6624 {}
6625
6626 virtual bool match(
6627 _In_reads_or_z_(end) const char* text,
6628 _In_ size_t start = 0,
6629 _In_ size_t end = (size_t)-1,
6630 _In_ int flags = match_default)
6631 {
6632 _Assume_(text || start >= end);
6633 size_t konec_vrednosti;
6634 this->interval.end = start;
6635 if (asterisk.match(text, this->interval.end, end, flags)) {
6636 this->interval.end = konec_vrednosti = asterisk.interval.end;
6637 value.invalidate();
6638 }
6639 else if (value.match(text, this->interval.end, end, flags)) {
6640 this->interval.end = konec_vrednosti = value.interval.end;
6641 asterisk.invalidate();
6642 }
6643 else {
6644 asterisk.invalidate();
6645 value.invalidate();
6646 this->interval.start = (this->interval.end = start) + 1;
6647 return false;
6648 }
6649
6650 while (this->interval.end < end && text[this->interval.end] && isspace(text[this->interval.end])) this->interval.end++;
6651 if (this->interval.end < end && text[this->interval.end] == ';') {
6652 this->interval.end++;
6653 while (this->interval.end < end && text[this->interval.end] && isspace(text[this->interval.end])) this->interval.end++;
6654 if (this->interval.end < end && (text[this->interval.end] == 'q' || text[this->interval.end] == 'Q')) {
6655 this->interval.end++;
6656 while (this->interval.end < end && text[this->interval.end] && isspace(text[this->interval.end])) this->interval.end++;
6657 if (this->interval.end < end && text[this->interval.end] == '=') {
6658 this->interval.end++;
6659 while (this->interval.end < end && text[this->interval.end] && isspace(text[this->interval.end])) this->interval.end++;
6660 if (factor.match(text, this->interval.end, end, flags))
6661 this->interval.end = factor.interval.end;
6662 }
6663 }
6664 }
6665 if (!factor.interval) {
6666 factor.invalidate();
6668 }
6669 this->interval.start = start;
6670 return true;
6671 }
6672
6673 virtual void invalidate()
6674 {
6675 asterisk.invalidate();
6676 value.invalidate();
6677 factor.invalidate();
6678 parser::invalidate();
6679 }
6680
6681 public:
6682 T_asterisk asterisk;
6683 T value;
6684 http_weight factor;
6685 };
6686
6691 {
6692 public:
6693 virtual bool match(
6694 _In_reads_or_z_(end) const char* text,
6695 _In_ size_t start = 0,
6696 _In_ size_t end = (size_t)-1,
6697 _In_ int flags = match_default)
6698 {
6699 _Assume_(text || start >= end);
6700 this->interval.end = start;
6701 if (this->interval.end < end && text[this->interval.end] == '$')
6702 this->interval.end++;
6703 else
6704 goto error;
6705 if (name.match(text, this->interval.end, end, flags))
6706 this->interval.end = name.interval.end;
6707 else
6708 goto error;
6709 while (m_space.match(text, this->interval.end, end, flags))
6710 this->interval.end = m_space.interval.end;
6711 if (this->interval.end < end && text[this->interval.end] == '=')
6712 this->interval.end++;
6713 else
6714 goto error;
6715 while (m_space.match(text, this->interval.end, end, flags))
6716 this->interval.end = m_space.interval.end;
6717 if (value.match(text, this->interval.end, end, flags))
6718 this->interval.end = value.interval.end;
6719 else
6720 goto error;
6721 this->interval.start = start;
6722 return true;
6723
6724 error:
6725 name.invalidate();
6726 value.invalidate();
6727 this->interval.start = (this->interval.end = start) + 1;
6728 return false;
6729 }
6730
6731 virtual void invalidate()
6732 {
6733 name.invalidate();
6734 value.invalidate();
6735 parser::invalidate();
6736 }
6737
6738 public:
6739 http_token name;
6740 http_value value;
6741
6742 protected:
6743 http_space m_space;
6744 };
6745
6749 class http_cookie : public parser
6750 {
6751 public:
6752 virtual bool match(
6753 _In_reads_or_z_(end) const char* text,
6754 _In_ size_t start = 0,
6755 _In_ size_t end = (size_t)-1,
6756 _In_ int flags = match_default)
6757 {
6758 _Assume_(text || start >= end);
6759 this->interval.end = start;
6760 if (name.match(text, this->interval.end, end, flags))
6761 this->interval.end = name.interval.end;
6762 else
6763 goto error;
6764 while (m_space.match(text, this->interval.end, end, flags))
6765 this->interval.end = m_space.interval.end;
6766 if (this->interval.end < end && text[this->interval.end] == '=')
6767 this->interval.end++;
6768 else
6769 goto error;
6770 while (m_space.match(text, this->interval.end, end, flags))
6771 this->interval.end = m_space.interval.end;
6772 if (value.match(text, this->interval.end, end, flags))
6773 this->interval.end = value.interval.end;
6774 else
6775 goto error;
6776 params.clear();
6777 for (;;) {
6778 if (this->interval.end < end && text[this->interval.end]) {
6779 if (m_space.match(text, this->interval.end, end, flags))
6780 this->interval.end = m_space.interval.end;
6781 else if (text[this->interval.end] == ';') {
6782 this->interval.end++;
6783 while (m_space.match(text, this->interval.end, end, flags))
6784 this->interval.end = m_space.interval.end;
6786 if (param.match(text, this->interval.end, end, flags)) {
6787 this->interval.end = param.interval.end;
6788 params.push_back(std::move(param));
6789 }
6790 else
6791 break;
6792 }
6793 else
6794 break;
6795 }
6796 else
6797 break;
6798 }
6799 this->interval.start = start;
6800 this->interval.end = params.empty() ? value.interval.end : params.back().interval.end;
6801 return true;
6802
6803 error:
6804 name.invalidate();
6805 value.invalidate();
6806 params.clear();
6807 this->interval.start = (this->interval.end = start) + 1;
6808 return false;
6809 }
6810
6811 virtual void invalidate()
6812 {
6813 name.invalidate();
6814 value.invalidate();
6815 params.clear();
6816 parser::invalidate();
6817 }
6818
6819 public:
6822 std::list<http_cookie_parameter> params;
6823
6824 protected:
6825 http_space m_space;
6826 };
6827
6831 class http_agent : public parser
6832 {
6833 public:
6834 virtual bool match(
6835 _In_reads_or_z_(end) const char* text,
6836 _In_ size_t start = 0,
6837 _In_ size_t end = (size_t)-1,
6838 _In_ int flags = match_default)
6839 {
6840 _Assume_(text || start >= end);
6841 this->interval.end = start;
6842 type.start = this->interval.end;
6843 for (;;) {
6844 if (this->interval.end < end && text[this->interval.end]) {
6845 if (text[this->interval.end] == '/') {
6846 type.end = this->interval.end;
6847 this->interval.end++;
6848 version.start = this->interval.end;
6849 for (;;) {
6850 if (this->interval.end < end && text[this->interval.end]) {
6851 if (isspace(text[this->interval.end])) {
6852 version.end = this->interval.end;
6853 break;
6854 }
6855 else
6856 this->interval.end++;
6857 }
6858 else {
6859 version.end = this->interval.end;
6860 break;
6861 }
6862 }
6863 break;
6864 }
6865 else if (isspace(text[this->interval.end])) {
6866 type.end = this->interval.end;
6867 break;
6868 }
6869 else
6870 this->interval.end++;
6871 }
6872 else {
6873 type.end = this->interval.end;
6874 break;
6875 }
6876 }
6878 this->interval.start = start;
6879 return true;
6880 }
6881 type.start = 1;
6882 type.end = 0;
6883 version.start = 1;
6884 version.end = 0;
6885 this->interval.start = 1;
6886 this->interval.end = 0;
6887 return false;
6888 }
6889
6890 virtual void invalidate()
6891 {
6892 type.start = 1;
6893 type.end = 0;
6894 version.start = 1;
6895 version.end = 0;
6896 parser::invalidate();
6897 }
6898
6899 public:
6902 };
6903
6907 class http_protocol : public parser
6908 {
6909 public:
6910 http_protocol(_In_ const std::locale& locale = std::locale()) :
6911 parser(locale),
6912 version(0x009)
6913 {}
6914
6915 virtual bool match(
6916 _In_reads_or_z_(end) const char* text,
6917 _In_ size_t start = 0,
6918 _In_ size_t end = (size_t)-1,
6919 _In_ int flags = match_default)
6920 {
6921 _Assume_(text || start >= end);
6922 this->interval.end = start;
6923 type.start = this->interval.end;
6924 for (;;) {
6925 if (this->interval.end < end && text[this->interval.end]) {
6926 if (text[this->interval.end] == '/') {
6927 type.end = this->interval.end;
6928 this->interval.end++;
6929 break;
6930 }
6931 else if (isspace(text[this->interval.end]))
6932 goto error;
6933 else
6934 this->interval.end++;
6935 }
6936 else {
6937 type.end = this->interval.end;
6938 goto error;
6939 }
6940 }
6941 version_maj.start = this->interval.end;
6942 for (;;) {
6943 if (this->interval.end < end && text[this->interval.end]) {
6944 if (text[this->interval.end] == '.') {
6945 version_maj.end = this->interval.end;
6946 this->interval.end++;
6947 version_min.start = this->interval.end;
6948 for (;;) {
6949 if (this->interval.end < end && text[this->interval.end]) {
6950 if (isspace(text[this->interval.end])) {
6951 version_min.end = this->interval.end;
6952 version =
6953 (uint16_t)strtoui(text + version_maj.start, version_maj.size(), nullptr, 10) * 0x100 +
6954 (uint16_t)strtoui(text + version_min.start, version_min.size(), nullptr, 10);
6955 break;
6956 }
6957 else
6958 this->interval.end++;
6959 }
6960 else
6961 goto error;
6962 }
6963 break;
6964 }
6965 else if (isspace(text[this->interval.end])) {
6966 version_maj.end = this->interval.end;
6967 version_min.start = 1;
6968 version_min.end = 0;
6969 version = (uint16_t)strtoui(text + version_maj.start, version_maj.size(), nullptr, 10) * 0x100;
6970 break;
6971 }
6972 else
6973 this->interval.end++;
6974 }
6975 else
6976 goto error;
6977 }
6978 this->interval.start = start;
6979 return true;
6980
6981 error:
6982 type.start = 1;
6983 type.end = 0;
6984 version_maj.start = 1;
6985 version_maj.end = 0;
6986 version_min.start = 1;
6987 version_min.end = 0;
6988 version = 0x009;
6989 this->interval.start = 1;
6990 this->interval.end = 0;
6991 return false;
6992 }
6993
6994 virtual void invalidate()
6995 {
6996 type.start = 1;
6997 type.end = 0;
6998 version_maj.start = 1;
6999 version_maj.end = 0;
7000 version_min.start = 1;
7001 version_min.end = 0;
7002 version = 0x009;
7003 parser::invalidate();
7004 }
7005
7006 public:
7008 stdex::interval<size_t> version_maj;
7009 stdex::interval<size_t> version_min;
7011 };
7012
7016 class http_request : public parser
7017 {
7018 public:
7019 http_request(_In_ const std::locale& locale = std::locale()) :
7020 parser(locale),
7021 url(locale),
7022 protocol(locale)
7023 {}
7024
7025 virtual bool match(
7026 _In_reads_or_z_(end) const char* text,
7027 _In_ size_t start = 0,
7028 _In_ size_t end = (size_t)-1,
7029 _In_ int flags = match_default)
7030 {
7031 _Assume_(text || start >= end);
7032 this->interval.end = start;
7033
7034 for (;;) {
7035 if (m_line_break.match(text, this->interval.end, end, flags))
7036 goto error;
7037 else if (this->interval.end < end && text[this->interval.end]) {
7038 if (isspace(text[this->interval.end]))
7039 this->interval.end++;
7040 else
7041 break;
7042 }
7043 else
7044 goto error;
7045 }
7046 verb.start = this->interval.end;
7047 for (;;) {
7048 if (m_line_break.match(text, this->interval.end, end, flags))
7049 goto error;
7050 else if (this->interval.end < end && text[this->interval.end]) {
7051 if (isspace(text[this->interval.end])) {
7052 verb.end = this->interval.end;
7053 this->interval.end++;
7054 break;
7055 }
7056 else
7057 this->interval.end++;
7058 }
7059 else
7060 goto error;
7061 }
7062
7063 for (;;) {
7064 if (m_line_break.match(text, this->interval.end, end, flags))
7065 goto error;
7066 else if (this->interval.end < end && text[this->interval.end]) {
7067 if (isspace(text[this->interval.end]))
7068 this->interval.end++;
7069 else
7070 break;
7071 }
7072 else
7073 goto error;
7074 }
7075 if (url.match(text, this->interval.end, end, flags))
7076 this->interval.end = url.interval.end;
7077 else
7078 goto error;
7079
7080 protocol.invalidate();
7081 for (;;) {
7082 if (m_line_break.match(text, this->interval.end, end, flags)) {
7083 this->interval.end = m_line_break.interval.end;
7084 goto end;
7085 }
7086 else if (this->interval.end < end && text[this->interval.end]) {
7087 if (isspace(text[this->interval.end]))
7088 this->interval.end++;
7089 else
7090 break;
7091 }
7092 else
7093 goto end;
7094 }
7095 for (;;) {
7096 if (m_line_break.match(text, this->interval.end, end, flags)) {
7097 this->interval.end = m_line_break.interval.end;
7098 goto end;
7099 }
7100 else if (protocol.match(text, this->interval.end, end, flags)) {
7101 this->interval.end = protocol.interval.end;
7102 break;
7103 }
7104 else
7105 goto end;
7106 }
7107
7108 for (;;) {
7109 if (m_line_break.match(text, this->interval.end, end, flags)) {
7110 this->interval.end = m_line_break.interval.end;
7111 break;
7112 }
7113 else if (this->interval.end < end && text[this->interval.end])
7114 this->interval.end++;
7115 else
7116 goto end;
7117 }
7118
7119 end:
7120 this->interval.start = start;
7121 return true;
7122
7123 error:
7124 verb.start = 1;
7125 verb.end = 0;
7126 url.invalidate();
7127 protocol.invalidate();
7128 this->interval.start = 1;
7129 this->interval.end = 0;
7130 return false;
7131 }
7132
7133 virtual void invalidate()
7134 {
7135 verb.start = 1;
7136 verb.end = 0;
7137 url.invalidate();
7138 protocol.invalidate();
7139 parser::invalidate();
7140 }
7141
7142 public:
7144 http_url url;
7145 http_protocol protocol;
7146
7147 protected:
7148 http_line_break m_line_break;
7149 };
7150
7154 class http_header : public parser
7155 {
7156 public:
7157 virtual bool match(
7158 _In_reads_or_z_(end) const char* text,
7159 _In_ size_t start = 0,
7160 _In_ size_t end = (size_t)-1,
7161 _In_ int flags = match_default)
7162 {
7163 _Assume_(text || start >= end);
7164 this->interval.end = start;
7165
7166 if (m_line_break.match(text, this->interval.end, end, flags) ||
7167 (this->interval.end < end && text[this->interval.end] && isspace(text[this->interval.end])))
7168 goto error;
7169 name.start = this->interval.end;
7170 for (;;) {
7171 if (m_line_break.match(text, this->interval.end, end, flags))
7172 goto error;
7173 else if (this->interval.end < end && text[this->interval.end]) {
7174 if (isspace(text[this->interval.end])) {
7175 name.end = this->interval.end;
7176 this->interval.end++;
7177 for (;;) {
7178 if (m_line_break.match(text, this->interval.end, end, flags))
7179 goto error;
7180 else if (this->interval.end < end && text[this->interval.end]) {
7181 if (isspace(text[this->interval.end]))
7182 this->interval.end++;
7183 else
7184 break;
7185 }
7186 else
7187 goto error;
7188 }
7189 if (this->interval.end < end && text[this->interval.end] == ':') {
7190 this->interval.end++;
7191 break;
7192 }
7193 else
7194 goto error;
7195 break;
7196 }
7197 else if (text[this->interval.end] == ':') {
7198 name.end = this->interval.end;
7199 this->interval.end++;
7200 break;
7201 }
7202 else
7203 this->interval.end++;
7204 }
7205 else
7206 goto error;
7207 }
7208 value.start = (size_t)-1;
7209 value.end = 0;
7210 for (;;) {
7211 if (m_line_break.match(text, this->interval.end, end, flags)) {
7212 this->interval.end = m_line_break.interval.end;
7213 if (!m_line_break.match(text, this->interval.end, end, flags) &&
7214 this->interval.end < end && text[this->interval.end] && isspace(text[this->interval.end]))
7215 this->interval.end++;
7216 else
7217 break;
7218 }
7219 else if (this->interval.end < end && text[this->interval.end]) {
7220 if (isspace(text[this->interval.end]))
7221 this->interval.end++;
7222 else {
7223 if (value.start == (size_t)-1) value.start = this->interval.end;
7224 value.end = ++this->interval.end;
7225 }
7226 }
7227 else
7228 break;
7229 }
7230 this->interval.start = start;
7231 return true;
7232
7233 error:
7234 name.start = 1;
7235 name.end = 0;
7236 value.start = 1;
7237 value.end = 0;
7238 this->interval.start = 1;
7239 this->interval.end = 0;
7240 return false;
7241 }
7242
7243 virtual void invalidate()
7244 {
7245 name.start = 1;
7246 name.end = 0;
7247 value.start = 1;
7248 value.end = 0;
7249 parser::invalidate();
7250 }
7251
7252 public:
7255
7256 protected:
7257 http_line_break m_line_break;
7258 };
7259
7263 template <class _Key, class T>
7264 class http_value_collection : public T
7265 {
7266 public:
7267 void insert(
7268 _In_reads_or_z_(end) const char* text,
7269 _In_ size_t start = 0,
7270 _In_ size_t end = (size_t)-1,
7271 _In_ int flags = match_default)
7272 {
7273 while (start < end) {
7274 while (start < end && text[start] && isspace(text[start])) start++;
7275 if (start < end && text[start] == ',') {
7276 start++;
7277 while (start < end&& text[start] && isspace(text[start])) start++;
7278 }
7279 _Key el;
7280 if (el.match(text, start, end, flags)) {
7281 start = el.interval.end;
7282 T::insert(std::move(el));
7283 }
7284 else
7285 break;
7286 }
7287 }
7288 };
7289
7290 template <class T>
7292 constexpr bool operator()(const T& a, const T& b) const noexcept
7293 {
7294 return a.factor.value > b.factor.value;
7295 }
7296 };
7297
7301 template <class T, class _Alloc = std::allocator<T>>
7303
7307 template <class T>
7309 {
7310 public:
7312 _In_ const std::shared_ptr<basic_parser<T>>& quote,
7313 _In_ const std::shared_ptr<basic_parser<T>>& chr,
7314 _In_ const std::shared_ptr<basic_parser<T>>& escape,
7315 _In_ const std::shared_ptr<basic_parser<T>>& sol,
7316 _In_ const std::shared_ptr<basic_parser<T>>& bs,
7317 _In_ const std::shared_ptr<basic_parser<T>>& ff,
7318 _In_ const std::shared_ptr<basic_parser<T>>& lf,
7319 _In_ const std::shared_ptr<basic_parser<T>>& cr,
7320 _In_ const std::shared_ptr<basic_parser<T>>& htab,
7321 _In_ const std::shared_ptr<basic_parser<T>>& uni,
7322 _In_ const std::shared_ptr<basic_integer16<T>>& hex,
7323 _In_ const std::locale& locale = std::locale()) :
7324 basic_parser<T>(locale),
7325 m_quote(quote),
7326 m_chr(chr),
7327 m_escape(escape),
7328 m_sol(sol),
7329 m_bs(bs),
7330 m_ff(ff),
7331 m_lf(lf),
7332 m_cr(cr),
7333 m_htab(htab),
7334 m_uni(uni),
7335 m_hex(hex)
7336 {}
7337
7338 virtual bool match(
7339 _In_reads_or_z_(end) const T* text,
7340 _In_ size_t start = 0,
7341 _In_ size_t end = (size_t)-1,
7342 _In_ int flags = match_default)
7343 {
7344 _Assume_(text || start >= end);
7345 this->interval.end = start;
7346 if (m_quote->match(text, this->interval.end, end, flags)) {
7347 this->interval.end = m_quote->interval.end;
7348 value.clear();
7349 for (;;) {
7350 if (m_quote->match(text, this->interval.end, end, flags)) {
7351 this->interval.start = start;
7352 this->interval.end = m_quote->interval.end;
7353 return true;
7354 }
7355 if (m_escape->match(text, this->interval.end, end, flags)) {
7356 if (m_quote->match(text, m_escape->interval.end, end, flags)) {
7357 value += '"'; this->interval.end = m_quote->interval.end;
7358 continue;
7359 }
7360 if (m_sol->match(text, m_escape->interval.end, end, flags)) {
7361 value += '/'; this->interval.end = m_sol->interval.end;
7362 continue;
7363 }
7364 if (m_bs->match(text, m_escape->interval.end, end, flags)) {
7365 value += '\b'; this->interval.end = m_bs->interval.end;
7366 continue;
7367 }
7368 if (m_ff->match(text, m_escape->interval.end, end, flags)) {
7369 value += '\f'; this->interval.end = m_ff->interval.end;
7370 continue;
7371 }
7372 if (m_lf->match(text, m_escape->interval.end, end, flags)) {
7373 value += '\n'; this->interval.end = m_lf->interval.end;
7374 continue;
7375 }
7376 if (m_cr->match(text, m_escape->interval.end, end, flags)) {
7377 value += '\r'; this->interval.end = m_cr->interval.end;
7378 continue;
7379 }
7380 if (m_htab->match(text, m_escape->interval.end, end, flags)) {
7381 value += '\t'; this->interval.end = m_htab->interval.end;
7382 continue;
7383 }
7384 if (
7385 m_uni->match(text, m_escape->interval.end, end, flags) &&
7386 m_hex->match(text, m_uni->interval.end, std::min(m_uni->interval.end + 4, end), flags | match_case_insensitive) &&
7387 m_hex->interval.size() == 4 /* JSON requests 4-digit Unicode sequneces: \u.... */)
7388 {
7389 _Assume_(m_hex->value <= 0xffff);
7390 if (sizeof(T) == 1) {
7391 if (m_hex->value > 0x7ff) {
7392 value += (T)(0xe0 | ((m_hex->value >> 12) & 0x0f));
7393 value += (T)(0x80 | ((m_hex->value >> 6) & 0x3f));
7394 value += (T)(0x80 | (m_hex->value & 0x3f));
7395 }
7396 else if (m_hex->value > 0x7f) {
7397 value += (T)(0xc0 | ((m_hex->value >> 6) & 0x1f));
7398 value += (T)(0x80 | (m_hex->value & 0x3f));
7399 }
7400 else
7401 value += (T)(m_hex->value & 0x7f);
7402 }
7403 else
7404 value += (T)m_hex->value;
7405 this->interval.end = m_hex->interval.end;
7406 continue;
7407 }
7408 if (m_escape->match(text, m_escape->interval.end, end, flags)) {
7409 value += '\\'; this->interval.end = m_escape->interval.end;
7410 continue;
7411 }
7412 }
7413 if (m_chr->match(text, this->interval.end, end, flags)) {
7414 value.Prilepi(text + m_chr->interval.start, m_chr->interval.size());
7415 this->interval.end = m_chr->interval.end;
7416 continue;
7417 }
7418 break;
7419 }
7420 }
7421 value.clear();
7422 this->interval.start = (this->interval.end = start) + 1;
7423 return false;
7424 }
7425
7426 virtual void invalidate()
7427 {
7428 value.clear();
7430 }
7431
7432 public:
7433 std::basic_string<T> value;
7434
7435 protected:
7436 std::shared_ptr<basic_parser<T>> m_quote;
7437 std::shared_ptr<basic_parser<T>> m_chr;
7438 std::shared_ptr<basic_parser<T>> m_escape;
7439 std::shared_ptr<basic_parser<T>> m_sol;
7440 std::shared_ptr<basic_parser<T>> m_bs;
7441 std::shared_ptr<basic_parser<T>> m_ff;
7442 std::shared_ptr<basic_parser<T>> m_lf;
7443 std::shared_ptr<basic_parser<T>> m_cr;
7444 std::shared_ptr<basic_parser<T>> m_htab;
7445 std::shared_ptr<basic_parser<T>> m_uni;
7446 std::shared_ptr<basic_integer16<T>> m_hex;
7447 };
7448
7451#ifdef _UNICODE
7452 using tjson_string = wjson_string;
7453#else
7454 using tjson_string = json_string;
7455#endif
7456 }
7457}
7458
7459#undef ENUM_FLAG_OPERATOR
7460#undef ENUM_FLAGS
7461
7462#ifdef _MSC_VER
7463#pragma warning(pop)
7464#endif
Test for angle in d°mm'ss.dddd form.
Definition parser.hpp:4403
Test for any code unit.
Definition parser.hpp:229
Test for beginning of line.
Definition parser.hpp:623
Test for any.
Definition parser.hpp:1065
Test for chemical formula.
Definition parser.hpp:5547
Test for Creditor Reference.
Definition parser.hpp:4973
T reference[22]
Normalized national reference number.
Definition parser.hpp:5102
T check_digits[3]
Two check digits.
Definition parser.hpp:5101
bool is_valid
Is reference valid per ISO 7064.
Definition parser.hpp:5103
Test for any code unit from a given string of code units.
Definition parser.hpp:728
Test for specific code unit.
Definition parser.hpp:299
Test for date.
Definition parser.hpp:4033
Test for valid DNS domain character.
Definition parser.hpp:2814
bool allow_on_edge
Is character allowed at the beginning or an end of a DNS domain?
Definition parser.hpp:2852
Test for DNS domain/hostname.
Definition parser.hpp:2914
bool m_allow_absolute
May DNS names end with a dot (absolute name)?
Definition parser.hpp:2978
Test for e-mail address.
Definition parser.hpp:3802
Test for emoticon.
Definition parser.hpp:3910
std::shared_ptr< basic_parser< T > > apex
apex/eyebrows/halo (e.g. O, 0)
Definition parser.hpp:3999
std::shared_ptr< basic_parser< T > > eyes
eyes (e.g. :, ;, >, |, B)
Definition parser.hpp:4000
std::shared_ptr< basic_set< T > > mouth
mouth (e.g. ), ), (, (, |, P, D, p, d)
Definition parser.hpp:4002
std::shared_ptr< basic_parser< T > > nose
nose (e.g. -, o)
Definition parser.hpp:4001
std::shared_ptr< basic_parser< T > > emoticon
emoticon as a whole (e.g. 😀, 🤔, 😶)
Definition parser.hpp:3998
Test for end of line.
Definition parser.hpp:661
Test for fraction.
Definition parser.hpp:1694
Test for International Bank Account Number.
Definition parser.hpp:4679
T bban[31]
Normalized Basic Bank Account Number.
Definition parser.hpp:4950
T country[3]
ISO 3166-1 alpha-2 country code.
Definition parser.hpp:4948
T check_digits[3]
Two check digits.
Definition parser.hpp:4949
bool is_valid
Is IBAN valid per ISO 7064.
Definition parser.hpp:4951
Test for decimal integer.
Definition parser.hpp:1303
Test for decimal integer possibly containing thousand separators.
Definition parser.hpp:1388
bool has_separators
Did integer have any separators?
Definition parser.hpp:1448
size_t digit_count
Total number of digits in integer.
Definition parser.hpp:1447
Test for hexadecimal integer.
Definition parser.hpp:1469
Base class for integer testing.
Definition parser.hpp:1281
size_t value
Calculated value of the numeral.
Definition parser.hpp:1295
Test for IPv4 address.
Definition parser.hpp:2354
stdex::interval< size_t > components[4]
Individual component intervals.
Definition parser.hpp:2469
struct in_addr value
IPv4 address value.
Definition parser.hpp:2470
Test for IPv6 address.
Definition parser.hpp:2573
std::shared_ptr< basic_parser< T > > scope_id
Scope ID (e.g. NIC index with link-local addresses)
Definition parser.hpp:2777
stdex::interval< size_t > components[8]
Individual component intervals.
Definition parser.hpp:2775
struct in6_addr value
IPv6 address value.
Definition parser.hpp:2776
Test for valid IPv6 address scope ID character.
Definition parser.hpp:2501
Test for repeating.
Definition parser.hpp:918
bool m_greedy
try to match as long sequence as possible
Definition parser.hpp:957
std::shared_ptr< basic_parser< T > > m_el
repeating element
Definition parser.hpp:954
size_t m_min_iterations
minimum number of iterations
Definition parser.hpp:955
size_t m_max_iterations
maximum number of iterations
Definition parser.hpp:956
Test for JSON string.
Definition parser.hpp:7309
Test for mixed numeral.
Definition parser.hpp:1930
std::shared_ptr< basic_parser< T > > fraction
fraction
Definition parser.hpp:2036
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:2034
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:2033
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:2032
std::shared_ptr< basic_parser< T > > integer
Integer part.
Definition parser.hpp:2035
Test for monetary numeral.
Definition parser.hpp:2225
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:2331
std::shared_ptr< basic_parser< T > > decimal_separator
Decimal separator.
Definition parser.hpp:2336
std::shared_ptr< basic_parser< T > > currency
Currency part.
Definition parser.hpp:2334
std::shared_ptr< basic_parser< T > > decimal
Decimal part.
Definition parser.hpp:2337
std::shared_ptr< basic_parser< T > > integer
Integer part.
Definition parser.hpp:2335
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:2332
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:2333
"No-op" match
Definition parser.hpp:197
Base template for all parsers.
Definition parser.hpp:78
interval< size_t > interval
Region of the last match.
Definition parser.hpp:177
Test for permutation.
Definition parser.hpp:1205
Test for phone number.
Definition parser.hpp:4526
std::basic_string< T > value
Normalized phone number.
Definition parser.hpp:4652
Test for any punctuation code unit.
Definition parser.hpp:471
Test for Roman numeral.
Definition parser.hpp:1578
Test for scientific numeral.
Definition parser.hpp:2056
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:2200
std::shared_ptr< basic_parser< T > > exponent_symbol
Exponent symbol (e.g. 'e')
Definition parser.hpp:2204
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:2198
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:2199
double value
Calculated value of the numeral.
Definition parser.hpp:2208
std::shared_ptr< basic_parser< T > > negative_exp_sign
Negative exponent sign (e.g. '-')
Definition parser.hpp:2206
std::shared_ptr< basic_integer< T > > decimal
Decimal part.
Definition parser.hpp:2203
std::shared_ptr< basic_parser< T > > positive_exp_sign
Positive exponent sign (e.g. '+')
Definition parser.hpp:2205
std::shared_ptr< basic_integer< T > > exponent
Exponent part.
Definition parser.hpp:2207
std::shared_ptr< basic_parser< T > > decimal_separator
Decimal separator.
Definition parser.hpp:2202
std::shared_ptr< basic_integer< T > > integer
Integer part.
Definition parser.hpp:2201
Test for match score.
Definition parser.hpp:1757
Test for sequence.
Definition parser.hpp:1014
Definition parser.hpp:696
Test for SI Reference delimiter.
Definition parser.hpp:5170
Test for SI Reference part.
Definition parser.hpp:5125
Test for SI Reference.
Definition parser.hpp:5208
basic_si_reference_part< T > part3
Reference data part 3 (P3)
Definition parser.hpp:5525
basic_si_reference_part< T > part1
Reference data part 1 (P1)
Definition parser.hpp:5523
bool is_valid
Is reference valid.
Definition parser.hpp:5526
T model[3]
Reference model.
Definition parser.hpp:5522
basic_si_reference_part< T > part2
Reference data part 2 (P2)
Definition parser.hpp:5524
Test for signed numeral.
Definition parser.hpp:1844
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:1912
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:1911
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:1910
std::shared_ptr< basic_parser< T > > number
Number.
Definition parser.hpp:1913
Test for any space code unit.
Definition parser.hpp:392
Test for any space or punctuation code unit.
Definition parser.hpp:545
Test for any string.
Definition parser.hpp:1133
Test for given string.
Definition parser.hpp:823
Test for time.
Definition parser.hpp:4300
Test for valid URL password character.
Definition parser.hpp:3096
Test for valid URL path character.
Definition parser.hpp:3196
Test for URL path.
Definition parser.hpp:3304
Test for valid URL username character.
Definition parser.hpp:2997
Test for URL.
Definition parser.hpp:3445
Test for HTTP agent.
Definition parser.hpp:6832
Test for HTTP any type.
Definition parser.hpp:5954
Test for HTTP asterisk.
Definition parser.hpp:6596
Test for HTTP header.
Definition parser.hpp:7155
Test for HTTP language (RFC1766)
Definition parser.hpp:6464
Test for HTTP line break (RFC2616: CRLF | LF)
Definition parser.hpp:5628
Test for HTTP media range (RFC2616: media-range)
Definition parser.hpp:5986
Test for HTTP media type (RFC2616: media-type)
Definition parser.hpp:6041
Test for HTTP parameter (RFC2616: parameter)
Definition parser.hpp:5899
http_token name
Parameter name.
Definition parser.hpp:5943
http_value value
Parameter value.
Definition parser.hpp:5944
Test for HTTP protocol.
Definition parser.hpp:6908
uint16_t version
HTTP protocol version: 0x100 = 1.0, 0x101 = 1.1...
Definition parser.hpp:7010
Test for HTTP quoted string (RFC2616: quoted-string)
Definition parser.hpp:5789
stdex::interval< size_t > content
String content (without quotes)
Definition parser.hpp:5845
Test for HTTP request.
Definition parser.hpp:7017
Test for HTTP space (RFC2616: LWS)
Definition parser.hpp:5664
Test for HTTP text character (RFC2616: TEXT)
Definition parser.hpp:5701
Test for HTTP token (RFC2616: token - tolerates non-ASCII)
Definition parser.hpp:5735
Test for HTTP URL parameter.
Definition parser.hpp:6281
Test for HTTP URL path segment.
Definition parser.hpp:6192
Test for HTTP URL path segment.
Definition parser.hpp:6225
std::vector< http_url_path_segment > segments
Path segments.
Definition parser.hpp:6274
Test for HTTP URL port.
Definition parser.hpp:6136
Test for HTTP URL server.
Definition parser.hpp:6099
Test for HTTP URL.
Definition parser.hpp:6362
Collection of HTTP values.
Definition parser.hpp:7265
Test for HTTP value (RFC2616: value)
Definition parser.hpp:5855
http_quoted_string string
Value when matched as quoted string.
Definition parser.hpp:5891
http_token token
Value when matched as token.
Definition parser.hpp:5892
Test for HTTP weight factor.
Definition parser.hpp:6527
float value
Calculated value of the weight factor.
Definition parser.hpp:6589
Test for HTTP weighted value.
Definition parser.hpp:6619
Base template for collection-holding parsers.
Definition parser.hpp:974
Test for any SGML code point.
Definition parser.hpp:261
Test for any SGML code point from a given string of SGML code points.
Definition parser.hpp:780
Test for specific SGML code point.
Definition parser.hpp:348
Test for valid DNS domain SGML character.
Definition parser.hpp:2870
Test for valid IPv6 address scope ID SGML character.
Definition parser.hpp:2539
Test for any SGML punctuation code point.
Definition parser.hpp:512
Test for any SGML space code point.
Definition parser.hpp:435
Test for any SGML space or punctuation code point.
Definition parser.hpp:588
Test for SGML given string.
Definition parser.hpp:870
Test for valid URL password SGML character.
Definition parser.hpp:3148
Test for valid URL path SGML character.
Definition parser.hpp:3252
Test for valid URL username SGML character.
Definition parser.hpp:3048
Numerical interval.
Definition interval.hpp:18
T size() const
Returns interval size.
Definition interval.hpp:47
T end
interval end
Definition interval.hpp:20
interval() noexcept
Constructs an invalid interval.
Definition interval.hpp:25
T start
interval start
Definition interval.hpp:19
Definition parser.hpp:7291