stdex
Additional custom or not Standard C++ covered algorithms
Loading...
Searching...
No Matches
parser.cpp
1/*
2 SPDX-License-Identifier: MIT
3 Copyright © 2023 Amebis
4*/
5
6#include "pch.h"
7
8using namespace std;
9using namespace stdex;
10using namespace stdex::parser;
11#ifdef _WIN32
12using namespace Microsoft::VisualStudio::CppUnitTestFramework;
13#endif
14
15namespace UnitTests
16{
17 TEST_CLASS(parser)
18 {
19 public:
21 {
22 static const wchar_t text[] = L"This is a test.\nSecond line.";
23
24 {
25 wnoop p;
26 Assert::IsTrue(p.match(text));
27 Assert::AreEqual((size_t)0, p.interval.start);
28 Assert::AreEqual((size_t)0, p.interval.end);
29 }
30
31 {
32 wcu p(L't');
33 Assert::IsFalse(p.match(text));
34 Assert::IsTrue(p.match(text, 0, _countof(text), match_case_insensitive));
35 Assert::AreEqual((size_t)0, p.interval.start);
36 Assert::AreEqual((size_t)1, p.interval.end);
37 }
38
39 {
41 Assert::IsFalse(p.match(text));
42 Assert::IsTrue(p.match(text, 4));
43 Assert::AreEqual((size_t)4, p.interval.start);
44 Assert::AreEqual((size_t)5, p.interval.end);
45 }
46
47 {
49 Assert::IsFalse(p.match(text));
50 Assert::IsTrue(p.match(text, 14));
51 Assert::AreEqual((size_t)14, p.interval.start);
52 Assert::AreEqual((size_t)15, p.interval.end);
53 }
54
55 {
57 Assert::IsFalse(p.match(text));
58 Assert::IsTrue(p.match(text, 4));
59 Assert::AreEqual((size_t)4, p.interval.start);
60 Assert::AreEqual((size_t)5, p.interval.end);
61 Assert::IsTrue(p.match(text, 14));
62 Assert::AreEqual((size_t)14, p.interval.start);
63 Assert::AreEqual((size_t)15, p.interval.end);
64 }
65
66 {
67 wbol p;
68 Assert::IsTrue(p.match(text));
69 Assert::AreEqual((size_t)0, p.interval.start);
70 Assert::AreEqual((size_t)0, p.interval.end);
71 Assert::IsFalse(p.match(text, 1));
72 Assert::IsFalse(p.match(text, 15));
73 Assert::IsTrue(p.match(text, 16));
74 Assert::AreEqual((size_t)16, p.interval.start);
75 Assert::AreEqual((size_t)16, p.interval.end);
76 }
77
78 {
79 weol p;
80 Assert::IsFalse(p.match(text));
81 Assert::IsFalse(p.match(text, 1));
82 Assert::IsTrue(p.match(text, 15));
83 Assert::AreEqual((size_t)15, p.interval.start);
84 Assert::AreEqual((size_t)15, p.interval.end);
85 Assert::IsFalse(p.match(text, 16));
86 }
87
88 {
89 wcu_set p(L"abcD");
90 Assert::IsFalse(p.match(text));
91 Assert::IsTrue(p.match(text, 8));
92 Assert::AreEqual((size_t)8, p.interval.start);
93 Assert::AreEqual((size_t)9, p.interval.end);
94 Assert::AreEqual((size_t)0, p.hit_offset);
95 Assert::IsFalse(p.match(text, 21));
96 Assert::IsTrue(p.match(text, 21, _countof(text), match_case_insensitive));
97 Assert::AreEqual((size_t)21, p.interval.start);
98 Assert::AreEqual((size_t)22, p.interval.end);
99 Assert::AreEqual((size_t)3, p.hit_offset);
100 }
101
102 {
103 stdex::parser::wstring p(L"this");
104 Assert::IsFalse(p.match(text));
105 Assert::IsTrue(p.match(text, 0, sizeof(text), match_case_insensitive));
106 Assert::AreEqual((size_t)0, p.interval.start);
107 Assert::AreEqual((size_t)4, p.interval.end);
108 }
109
110 {
111 wany_cu chr;
112 witerations p(make_shared_no_delete(&chr), 1, 5);
113 Assert::IsTrue(p.match(text));
114 Assert::AreEqual((size_t)0, p.interval.start);
115 Assert::AreEqual((size_t)5, p.interval.end);
116 }
117
118 {
119 wspace_cu nospace(true);
120 witerations p(make_shared_no_delete(&nospace), 1);
121 Assert::IsTrue(p.match(text));
122 Assert::AreEqual((size_t)0, p.interval.start);
123 Assert::AreEqual((size_t)4, p.interval.end);
124 }
125
126 {
127 wcu chr_t(L't'), chr_h(L'h'), chr_i(L'i'), chr_s(L's');
128 wspace_cu space;
129 wsequence p({
130 make_shared_no_delete(&chr_t),
131 make_shared_no_delete(&chr_h),
132 make_shared_no_delete(&chr_i),
133 make_shared_no_delete(&chr_s),
134 make_shared_no_delete(&space) });
135 Assert::IsFalse(p.match(text));
136 Assert::IsTrue(p.match(text, 0, _countof(text), match_case_insensitive));
137 Assert::AreEqual((size_t)0, p.interval.start);
138 Assert::AreEqual((size_t)5, p.interval.end);
139 }
140
141 {
142 stdex::parser::wstring apple(L"apple"), orange(L"orange"), _this(L"this");
143 wspace_cu space;
144 wbranch p({
145 make_shared_no_delete(&apple),
146 make_shared_no_delete(&orange),
147 make_shared_no_delete(&_this),
148 make_shared_no_delete(&space) });
149 Assert::IsFalse(p.match(text));
150 Assert::IsTrue(p.match(text, 0, _countof(text), match_case_insensitive));
151 Assert::AreEqual((size_t)2, p.hit_offset);
152 Assert::AreEqual((size_t)0, p.interval.start);
153 Assert::AreEqual((size_t)4, p.interval.end);
154 }
155
156 {
157 wstring_branch p(L"apple", L"orange", L"this", nullptr);
158 Assert::IsFalse(p.match(text));
159 Assert::IsTrue(p.match(text, 0, _countof(text), match_case_insensitive));
160 Assert::AreEqual((size_t)2, p.hit_offset);
161 Assert::AreEqual((size_t)0, p.interval.start);
162 Assert::AreEqual((size_t)4, p.interval.end);
163 }
164
165 {
166 wcu chr_s(L's'), chr_h(L'h'), chr_i(L'i'), chr_t(L't');
168 make_shared_no_delete(&chr_s),
169 make_shared_no_delete(&chr_h),
170 make_shared_no_delete(&chr_i),
171 make_shared_no_delete(&chr_t) });
172 Assert::IsFalse(p.match(text));
173 Assert::IsTrue(p.match(text, 0, _countof(text), match_case_insensitive));
174 Assert::AreEqual((size_t)0, p.interval.start);
175 Assert::AreEqual((size_t)4, p.interval.end);
176 }
177 }
178
180 {
181 std::locale locale_slSI("sl_SI");
182 static const char text[] = "V kožuščku zlobnega mizarja stopiclja fant\nin kliče 1234567890.";
183
184 {
185 sgml_noop p;
186 Assert::IsTrue(p.match(text));
187 Assert::AreEqual((size_t)0, p.interval.start);
188 Assert::AreEqual((size_t)0, p.interval.end);
189 }
190
191 {
192 sgml_cp p("v");
193 Assert::IsFalse(p.match(text));
194 Assert::IsTrue(p.match(text, 0, _countof(text), match_case_insensitive));
195 Assert::AreEqual((size_t)0, p.interval.start);
196 Assert::AreEqual((size_t)1, p.interval.end);
197 }
198
199 {
200 sgml_cp p("Ž", SIZE_MAX, false, locale_slSI);
201 Assert::IsFalse(p.match(text, 4));
202 Assert::IsTrue(p.match(text, 4, _countof(text), match_case_insensitive));
203 Assert::AreEqual((size_t)4, p.interval.start);
204 Assert::AreEqual((size_t)12, p.interval.end);
205 }
206
207 {
209 Assert::IsFalse(p.match(text));
210 Assert::IsTrue(p.match(text, 1));
211 Assert::AreEqual((size_t)1, p.interval.start);
212 Assert::AreEqual((size_t)2, p.interval.end);
213 Assert::IsTrue(p.match(text, 79));
214 Assert::AreEqual((size_t)79, p.interval.start);
215 Assert::AreEqual((size_t)85, p.interval.end);
216 }
217
218 {
219 sgml_string_branch p(locale_slSI, "apple", "orange", "KoŽuŠčKu", nullptr);
220 Assert::IsFalse(p.match(text, 2));
221 Assert::IsTrue(p.match(text, 2, _countof(text), match_case_insensitive));
222 Assert::AreEqual((size_t)2, p.hit_offset);
223 Assert::AreEqual((size_t)2, p.interval.start);
224 Assert::AreEqual((size_t)31, p.interval.end);
225 }
226 }
227
229 {
230 static const std::locale locale("en_US.UTF-8");
231 static const char request[] =
232 "GET / HTTP/2\r\n"
233 "Host: stackoverflow.com\r\n"
234 "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/110.0\r\n"
235 "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8\r\n"
236 "Accept-Language: sl,en-US;q=0.8,en;q=0.6,de-DE;q=0.4,de;q=0.2\r\n"
237 "Accept-Encoding: gzip, deflate, br\r\n"
238 "DNT: 1\r\n"
239 "Connection: keep-alive\r\n"
240 "Cookie: prov=00000000-0000-0000-0000-000000000000; acct=t=00000000000000000%2f%2f0000%2b0000%2b000&s=00000000000000000000000000000000; OptanonConsent=isGpcEnabled=0&datestamp=Fri+Feb+03+2023+11%3A11%3A08+GMT%2B0100+(Srednjeevropski+standardni+%C4%8Das)&version=6.37.0&isIABGlobal=false&hosts=&consentId=00000000-0000-0000-0000-000000000000&interactionCount=1&landingPath=NotLandingPage&groups=00000%3A0%2C00000%3A0%2C00000%3A0%2C00000%3A0; OptanonAlertBoxClosed=2023-02-03T10:11:08.683Z\r\n"
241 "Upgrade-Insecure-Requests: 1\r\n"
242 "Sec-Fetch-Dest: document\r\n"
243 "Sec-Fetch-Mode: navigate\r\n"
244 "Sec-Fetch-Site: none\r\n"
245 "Sec-Fetch-User: ?1\r\n"
246 "Pragma: no-cache\r\n"
247 "Cache-Control: no-cache\r\n"
248 "\r\n";
249
250 {
251 http_request p(locale);
252 Assert::IsTrue(p.match(request));
253 Assert::AreEqual((size_t)0, p.interval.start);
254 Assert::AreEqual((size_t)14, p.interval.end);
255 Assert::AreEqual((size_t)0, p.verb.start);
256 Assert::AreEqual((size_t)3, p.verb.end);
257 Assert::AreEqual((size_t)4, p.url.interval.start);
258 Assert::AreEqual((size_t)5, p.url.interval.end);
259 Assert::AreEqual((size_t)6, p.protocol.interval.start);
260 Assert::AreEqual((size_t)12, p.protocol.interval.end);
261 Assert::AreEqual((uint16_t)0x200, p.protocol.version);
262 }
263
264 {
265 std::list<http_header> hdrs;
266 size_t offset = 14;
267 for (;;) {
269 if (h.match(request, offset)) {
270 offset = h.interval.end;
271 hdrs.push_back(std::move(h));
272 }
273 else
274 break;
275 }
276 Assert::AreEqual((size_t)15, hdrs.size());
278 for (const auto& h : hdrs)
279 if (strnicmp(request + h.name.start, h.name.size(), "Accept-Language", (size_t)-1, locale) == 0)
280 langs.insert(request, h.value.start, h.value.end);
281 Assert::IsTrue(!langs.empty());
282 {
284 "sl", "en-US", "en", "de-DE", "de"
285 };
286 auto c = control.cbegin();
287 auto l = langs.cbegin();
288 for (; c != control.cend() && l != langs.cend(); ++c, ++l)
289 Assert::IsTrue(strnicmp(request + l->value.interval.start, l->value.interval.size(), c->c_str(), c->size(), locale) == 0);
290 Assert::IsTrue(c == control.cend());
291 Assert::IsTrue(l == langs.cend());
292 }
293 }
294
295 //static const char response[] =
296 // "HTTP/2 200 OK\r\n"
297 // "cache-control: private\r\n"
298 // "content-type: text/html; charset=utf-8\r\n"
299 // "content-encoding: gzip\r\n"
300 // "strict-transport-security: max-age=15552000\r\n"
301 // "x-frame-options: SAMEORIGIN\r\n"
302 // "set-cookie: acct=t=00000000000000000%2f%2f0000%2b0000%2b000&s=00000000000000000000000000000000; expires=Sat, 16 Sep 2023 10:23:00 GMT; domain=.stackoverflow.com; path=/; secure; samesite=none; httponly\r\n"
303 // "set-cookie: prov_tgt=; expires=Tue, 14 Mar 2023 10:23:00 GMT; domain=.stackoverflow.com; path=/; secure; samesite=none; httponly\r\n"
304 // "x-request-guid: a6536a49-b473-4c6f-b313-c1e7c0d8f600\r\n"
305 // "feature-policy: microphone 'none'; speaker 'none'\r\n"
306 // "content-security-policy: upgrade-insecure-requests; frame-ancestors 'self' https://stackexchange.com\r\n"
307 // "accept-ranges: bytes\r\n"
308 // "date: Thu, 16 Mar 2023 10:23:00 GMT\r\n"
309 // "via: 1.1 varnish\r\n"
310 // "x-served-by: cache-vie6354-VIE\r\n"
311 // "x-cache: MISS\r\n"
312 // "x-cache-hits: 0\r\n"
313 // "x-timer: S1678962181.533907,VS0,VE144\r\n"
314 // "vary: Accept-Encoding,Fastly-SSL\r\n"
315 // "x-dns-prefetch-control: off\r\n"
316 // "X-Firefox-Spdy: h2\r\n"
317 // "\r\n";
318 }
319 };
320}
interval< size_t > interval
Region of the last match.
Definition parser.hpp:169
Test for given string.
Definition parser.hpp:815
Test for HTTP header.
Definition parser.hpp:6277
Test for HTTP request.
Definition parser.hpp:6139
Test for specific SGML code point.
Definition parser.hpp:340
Test for any SGML space code point.
Definition parser.hpp:427
T size() const
Returns interval size.
Definition interval.hpp:47
T end
interval end
Definition interval.hpp:20
T start
interval start
Definition interval.hpp:19