stdex
Additional custom or not Standard C++ covered algorithms
Loading...
Searching...
No Matches
parser.cpp
1/*
2 SPDX-License-Identifier: MIT
3 Copyright © 2023 Amebis
4*/
5
6#include "pch.h"
7
8using namespace std;
9using namespace stdex;
10using namespace stdex::parser;
11#ifdef _WIN32
12using namespace Microsoft::VisualStudio::CppUnitTestFramework;
13
14namespace Microsoft {
15 namespace VisualStudio {
16 namespace CppUnitTestFramework {
17 static std::wstring ToString(const stdex::interval<size_t>& q)
18 {
19 return stdex::sprintf(L"<%zu, %zu>", nullptr, q.start, q.end);
20 }
21 }
22 }
23}
24#endif
25
26namespace UnitTests
27{
28 TEST_CLASS(parser)
29 {
30 public:
32 {
33 static const wchar_t text[] = L"This is a test.\nSecond line.";
34
35 {
36 wnoop p;
37 Assert::IsTrue(p.match(text));
38 Assert::AreEqual((size_t)0, p.interval.start);
39 Assert::AreEqual((size_t)0, p.interval.end);
40 }
41
42 {
43 wcu p(L't');
44 Assert::IsFalse(p.match(text));
45 Assert::IsTrue(p.match(text, 0, _countof(text), match_case_insensitive));
46 Assert::AreEqual((size_t)0, p.interval.start);
47 Assert::AreEqual((size_t)1, p.interval.end);
48 }
49
50 {
52 Assert::IsFalse(p.match(text));
53 Assert::IsTrue(p.match(text, 4));
54 Assert::AreEqual((size_t)4, p.interval.start);
55 Assert::AreEqual((size_t)5, p.interval.end);
56 }
57
58 {
60 Assert::IsFalse(p.match(text));
61 Assert::IsTrue(p.match(text, 14));
62 Assert::AreEqual((size_t)14, p.interval.start);
63 Assert::AreEqual((size_t)15, p.interval.end);
64 }
65
66 {
68 Assert::IsFalse(p.match(text));
69 Assert::IsTrue(p.match(text, 4));
70 Assert::AreEqual((size_t)4, p.interval.start);
71 Assert::AreEqual((size_t)5, p.interval.end);
72 Assert::IsTrue(p.match(text, 14));
73 Assert::AreEqual((size_t)14, p.interval.start);
74 Assert::AreEqual((size_t)15, p.interval.end);
75 }
76
77 {
78 wbol p;
79 Assert::IsTrue(p.match(text));
80 Assert::AreEqual((size_t)0, p.interval.start);
81 Assert::AreEqual((size_t)0, p.interval.end);
82 Assert::IsFalse(p.match(text, 1));
83 Assert::IsFalse(p.match(text, 15));
84 Assert::IsTrue(p.match(text, 16));
85 Assert::AreEqual((size_t)16, p.interval.start);
86 Assert::AreEqual((size_t)16, p.interval.end);
87 }
88
89 {
90 weol p;
91 Assert::IsFalse(p.match(text));
92 Assert::IsFalse(p.match(text, 1));
93 Assert::IsTrue(p.match(text, 15));
94 Assert::AreEqual((size_t)15, p.interval.start);
95 Assert::AreEqual((size_t)15, p.interval.end);
96 Assert::IsFalse(p.match(text, 16));
97 }
98
99 {
100 wcu_set p(L"abcD");
101 Assert::IsFalse(p.match(text));
102 Assert::IsTrue(p.match(text, 8));
103 Assert::AreEqual((size_t)8, p.interval.start);
104 Assert::AreEqual((size_t)9, p.interval.end);
105 Assert::AreEqual((size_t)0, p.hit_offset);
106 Assert::IsFalse(p.match(text, 21));
107 Assert::IsTrue(p.match(text, 21, _countof(text), match_case_insensitive));
108 Assert::AreEqual((size_t)21, p.interval.start);
109 Assert::AreEqual((size_t)22, p.interval.end);
110 Assert::AreEqual((size_t)3, p.hit_offset);
111 }
112
113 {
114 stdex::parser::wstring p(L"this");
115 Assert::IsFalse(p.match(text));
116 Assert::IsTrue(p.match(text, 0, sizeof(text), match_case_insensitive));
117 Assert::AreEqual((size_t)0, p.interval.start);
118 Assert::AreEqual((size_t)4, p.interval.end);
119 }
120
121 {
122 wany_cu chr;
123 witerations p(make_shared_no_delete(&chr), 1, 5);
124 Assert::IsTrue(p.match(text));
125 Assert::AreEqual((size_t)0, p.interval.start);
126 Assert::AreEqual((size_t)5, p.interval.end);
127 }
128
129 {
130 wspace_cu nospace(true);
131 witerations p(make_shared_no_delete(&nospace), 1);
132 Assert::IsTrue(p.match(text));
133 Assert::AreEqual((size_t)0, p.interval.start);
134 Assert::AreEqual((size_t)4, p.interval.end);
135 }
136
137 {
138 wcu chr_t(L't'), chr_h(L'h'), chr_i(L'i'), chr_s(L's');
139 wspace_cu space;
140 wsequence p({
141 make_shared_no_delete(&chr_t),
142 make_shared_no_delete(&chr_h),
143 make_shared_no_delete(&chr_i),
144 make_shared_no_delete(&chr_s),
145 make_shared_no_delete(&space) });
146 Assert::IsFalse(p.match(text));
147 Assert::IsTrue(p.match(text, 0, _countof(text), match_case_insensitive));
148 Assert::AreEqual((size_t)0, p.interval.start);
149 Assert::AreEqual((size_t)5, p.interval.end);
150 }
151
152 {
153 stdex::parser::wstring apple(L"apple"), orange(L"orange"), _this(L"this");
154 wspace_cu space;
155 wbranch p({
156 make_shared_no_delete(&apple),
157 make_shared_no_delete(&orange),
158 make_shared_no_delete(&_this),
159 make_shared_no_delete(&space) });
160 Assert::IsFalse(p.match(text));
161 Assert::IsTrue(p.match(text, 0, _countof(text), match_case_insensitive));
162 Assert::AreEqual((size_t)2, p.hit_offset);
163 Assert::AreEqual((size_t)0, p.interval.start);
164 Assert::AreEqual((size_t)4, p.interval.end);
165 }
166
167 {
168 wstring_branch p(L"apple", L"orange", L"this", nullptr);
169 Assert::IsFalse(p.match(text));
170 Assert::IsTrue(p.match(text, 0, _countof(text), match_case_insensitive));
171 Assert::AreEqual((size_t)2, p.hit_offset);
172 Assert::AreEqual((size_t)0, p.interval.start);
173 Assert::AreEqual((size_t)4, p.interval.end);
174 }
175
176 {
177 wcu chr_s(L's'), chr_h(L'h'), chr_i(L'i'), chr_t(L't');
179 make_shared_no_delete(&chr_s),
180 make_shared_no_delete(&chr_h),
181 make_shared_no_delete(&chr_i),
182 make_shared_no_delete(&chr_t) });
183 Assert::IsFalse(p.match(text));
184 Assert::IsTrue(p.match(text, 0, _countof(text), match_case_insensitive));
185 Assert::AreEqual((size_t)0, p.interval.start);
186 Assert::AreEqual((size_t)4, p.interval.end);
187 }
188
189 {
190 std::locale locale_slSI("sl_SI");
191 wspace_cu space(false, locale_slSI);
192 wiban p(make_shared_no_delete(&space), locale_slSI);
193 Assert::IsTrue(p.match(L"SI56023120015226972", 0, SIZE_MAX));
194 Assert::IsTrue(p.is_valid);
195 Assert::AreEqual(L"SI", p.country);
196 Assert::AreEqual(L"56", p.check_digits);
197 Assert::AreEqual(L"023120015226972", p.bban);
198 Assert::IsTrue(p.match(L"SI56 0231 2001 5226 972", 0, SIZE_MAX));
199 Assert::IsTrue(p.is_valid);
200 Assert::AreEqual(L"SI", p.country);
201 Assert::AreEqual(L"56", p.check_digits);
202 Assert::AreEqual(L"023120015226972", p.bban);
203 Assert::IsFalse(p.match(L"si56 0231 2001 5226 972", 0, SIZE_MAX));
204 Assert::IsFalse(p.is_valid);
205 Assert::IsTrue(p.match(L"si56 0231 2001 5226 972", 0, SIZE_MAX, match_case_insensitive));
206 Assert::IsTrue(p.is_valid);
207 Assert::IsTrue(p.match(L"SI56 0231 2001 5226 9720", 0, SIZE_MAX));
208 Assert::IsFalse(p.is_valid);
209 Assert::IsTrue(p.match(L"...SI56 0231 2001 5226 972...", 3, SIZE_MAX));
210 Assert::IsTrue(p.is_valid);
211 Assert::IsTrue(p.match(L"SI56 0231 2001 5226 972", 0, SIZE_MAX)); // no-break space
212 Assert::IsTrue(p.is_valid);
213 }
214
215 {
216 std::locale locale_slSI("sl_SI");
217 wspace_cu space(false, locale_slSI);
218 wcreditor_reference p(make_shared_no_delete(&space), locale_slSI);
219 Assert::IsTrue(p.match(L"RF18539007547034", 0, SIZE_MAX));
220 Assert::IsTrue(p.is_valid);
221 Assert::AreEqual(L"18", p.check_digits);
222 Assert::AreEqual(L"000000000539007547034", p.reference);
223 Assert::IsTrue(p.match(L"RF18 5390 0754 7034", 0, SIZE_MAX));
224 Assert::IsTrue(p.is_valid);
225 Assert::AreEqual(L"18", p.check_digits);
226 Assert::AreEqual(L"000000000539007547034", p.reference);
227 Assert::IsFalse(p.match(L"rf18 5390 0754 7034", 0, SIZE_MAX));
228 Assert::IsFalse(p.is_valid);
229 Assert::IsTrue(p.match(L"rf18 5390 0754 7034", 0, SIZE_MAX, match_case_insensitive));
230 Assert::IsTrue(p.is_valid);
231 Assert::IsTrue(p.match(L"RF18 5390 0754 70340", 0, SIZE_MAX));
232 Assert::IsFalse(p.is_valid);
233 Assert::IsTrue(p.match(L"...RF18 5390 0754 7034...", 3, SIZE_MAX));
234 Assert::IsTrue(p.is_valid);
235 Assert::IsTrue(p.match(L"RF18 5390 0754 7034", 0, SIZE_MAX)); // no-break space
236 Assert::IsTrue(p.is_valid);
237 }
238
239 {
240 std::locale locale_slSI("sl_SI");
241 wspace_cu space(false, locale_slSI);
242 wsi_reference p(make_shared_no_delete(&space), locale_slSI);
243 Assert::IsTrue(p.match(L"SI121234567890120", 0, SIZE_MAX));
244 Assert::IsTrue(p.is_valid);
245 Assert::AreEqual(L"12", p.model);
246 Assert::AreEqual(stdex::interval<size_t>(4, 17), p.part1.interval);
247 Assert::IsTrue(p.match(L"SI12 1234567890120", 0, SIZE_MAX));
248 Assert::IsTrue(p.is_valid);
249 Assert::AreEqual(L"12", p.model);
250 Assert::AreEqual(stdex::interval<size_t>(5, 18), p.part1.interval);
251 Assert::IsFalse(p.match(L"si12 1234567890120", 0, SIZE_MAX));
252 Assert::IsTrue(p.match(L"si12 1234567890120", 0, SIZE_MAX, match_case_insensitive));
253 Assert::IsTrue(p.match(L"...SI12 1234567890120...", 3, SIZE_MAX));
254 Assert::IsTrue(p.match(L"SI12 1234567890120", 0, SIZE_MAX)); // no-break space
255 }
256 }
257
259 {
260 std::locale locale_slSI("sl_SI");
261 static const char text[] = "V ko&zcaron;u&scaron;&ccaron;ku zlobnega mizarja stopiclja fant\nin kli&ccaron;e&nbsp;1234567890.";
262
263 {
264 sgml_noop p;
265 Assert::IsTrue(p.match(text));
266 Assert::AreEqual((size_t)0, p.interval.start);
267 Assert::AreEqual((size_t)0, p.interval.end);
268 }
269
270 {
271 sgml_cp p("v");
272 Assert::IsFalse(p.match(text));
273 Assert::IsTrue(p.match(text, 0, _countof(text), match_case_insensitive));
274 Assert::AreEqual((size_t)0, p.interval.start);
275 Assert::AreEqual((size_t)1, p.interval.end);
276 }
277
278 {
279 sgml_cp p("&Zcaron;", SIZE_MAX, false, locale_slSI);
280 Assert::IsFalse(p.match(text, 4));
281 Assert::IsTrue(p.match(text, 4, _countof(text), match_case_insensitive));
282 Assert::AreEqual((size_t)4, p.interval.start);
283 Assert::AreEqual((size_t)12, p.interval.end);
284 }
285
286 {
288 Assert::IsFalse(p.match(text));
289 Assert::IsTrue(p.match(text, 1));
290 Assert::AreEqual((size_t)1, p.interval.start);
291 Assert::AreEqual((size_t)2, p.interval.end);
292 Assert::IsTrue(p.match(text, 79));
293 Assert::AreEqual((size_t)79, p.interval.start);
294 Assert::AreEqual((size_t)85, p.interval.end);
295 }
296
297 {
298 sgml_string_branch p(locale_slSI, "apple", "orange", "Ko&Zcaron;u&Scaron;&ccaron;Ku", nullptr);
299 Assert::IsFalse(p.match(text, 2));
300 Assert::IsTrue(p.match(text, 2, _countof(text), match_case_insensitive));
301 Assert::AreEqual((size_t)2, p.hit_offset);
302 Assert::AreEqual((size_t)2, p.interval.start);
303 Assert::AreEqual((size_t)31, p.interval.end);
304 }
305
306 {
307 std::locale locale_slSI("sl_SI");
308 sgml_space_cp space(false, locale_slSI);
309 sgml_iban p(make_shared_no_delete(&space), locale_slSI);
310 Assert::IsTrue(p.match("SI56023120015226972", 0, SIZE_MAX));
311 Assert::IsTrue(p.is_valid);
312 Assert::AreEqual("SI", p.country);
313 Assert::AreEqual("56", p.check_digits);
314 Assert::AreEqual("023120015226972", p.bban);
315 Assert::IsTrue(p.match("SI56 0231 2001 5226 972", 0, SIZE_MAX));
316 Assert::IsTrue(p.is_valid);
317 Assert::AreEqual("SI", p.country);
318 Assert::AreEqual("56", p.check_digits);
319 Assert::AreEqual("023120015226972", p.bban);
320 Assert::IsFalse(p.match("si56 0231 2001 5226 972", 0, SIZE_MAX));
321 Assert::IsFalse(p.is_valid);
322 Assert::IsTrue(p.match("si56 0231 2001 5226 972", 0, SIZE_MAX, match_case_insensitive));
323 Assert::IsTrue(p.is_valid);
324 Assert::IsTrue(p.match("SI56 0231 2001 5226 9720", 0, SIZE_MAX));
325 Assert::IsFalse(p.is_valid);
326 Assert::IsTrue(p.match("...SI56 0231 2001 5226 972...", 3, SIZE_MAX));
327 Assert::IsTrue(p.is_valid);
328 Assert::IsTrue(p.match("SI56&nbsp;0231&nbsp;2001&nbsp;5226&nbsp;972", 0, SIZE_MAX));
329 Assert::IsTrue(p.is_valid);
330 }
331
332 {
333 std::locale locale_slSI("sl_SI");
334 sgml_space_cp space(false, locale_slSI);
335 sgml_creditor_reference p(make_shared_no_delete(&space), locale_slSI);
336 Assert::IsTrue(p.match("RF18539007547034", 0, SIZE_MAX));
337 Assert::IsTrue(p.is_valid);
338 Assert::AreEqual("18", p.check_digits);
339 Assert::AreEqual("000000000539007547034", p.reference);
340 Assert::IsTrue(p.match("RF18 5390 0754 7034", 0, SIZE_MAX));
341 Assert::IsTrue(p.is_valid);
342 Assert::AreEqual("18", p.check_digits);
343 Assert::AreEqual("000000000539007547034", p.reference);
344 Assert::IsFalse(p.match("rf18 5390 0754 7034", 0, SIZE_MAX));
345 Assert::IsFalse(p.is_valid);
346 Assert::IsTrue(p.match("rf18 5390 0754 7034", 0, SIZE_MAX, match_case_insensitive));
347 Assert::IsTrue(p.is_valid);
348 Assert::IsTrue(p.match("RF18 5390 0754 70340", 0, SIZE_MAX));
349 Assert::IsFalse(p.is_valid);
350 Assert::IsTrue(p.match("...RF18 5390 0754 7034...", 3, SIZE_MAX));
351 Assert::IsTrue(p.is_valid);
352 Assert::IsTrue(p.match("RF18&nbsp;5390&nbsp;0754&nbsp;7034", 0, SIZE_MAX));
353 Assert::IsTrue(p.is_valid);
354 }
355
356 {
357 std::locale locale_slSI("sl_SI");
358 sgml_space_cp space(false, locale_slSI);
359 sgml_si_reference p(make_shared_no_delete(&space), locale_slSI);
360 Assert::IsTrue(p.match("SI121234567890120", 0, SIZE_MAX));
361 Assert::IsTrue(p.is_valid);
362 Assert::AreEqual("12", p.model);
363 Assert::AreEqual(stdex::interval<size_t>(4, 17), p.part1.interval);
364 Assert::IsTrue(p.match("SI12 1234567890120", 0, SIZE_MAX));
365 Assert::IsTrue(p.is_valid);
366 Assert::AreEqual("12", p.model);
367 Assert::AreEqual(stdex::interval<size_t>(5, 18), p.part1.interval);
368 Assert::IsFalse(p.match("si12 1234567890120", 0, SIZE_MAX));
369 Assert::IsTrue(p.match("si12 1234567890120", 0, SIZE_MAX, match_case_insensitive));
370 Assert::IsTrue(p.match("...SI12 1234567890120...", 3, SIZE_MAX));
371 Assert::IsTrue(p.match("SI12&nbsp;1234567890120", 0, SIZE_MAX));
372 }
373 }
374
376 {
377 static const std::locale locale("en_US.UTF-8");
378 static const char request[] =
379 "GET / HTTP/2\r\n"
380 "Host: stackoverflow.com\r\n"
381 "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/110.0\r\n"
382 "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8\r\n"
383 "Accept-Language: sl,en-US;q=0.8,en;q=0.6,de-DE;q=0.4,de;q=0.2\r\n"
384 "Accept-Encoding: gzip, deflate, br\r\n"
385 "DNT: 1\r\n"
386 "Connection: keep-alive\r\n"
387 "Cookie: prov=00000000-0000-0000-0000-000000000000; acct=t=00000000000000000%2f%2f0000%2b0000%2b000&s=00000000000000000000000000000000; OptanonConsent=isGpcEnabled=0&datestamp=Fri+Feb+03+2023+11%3A11%3A08+GMT%2B0100+(Srednjeevropski+standardni+%C4%8Das)&version=6.37.0&isIABGlobal=false&hosts=&consentId=00000000-0000-0000-0000-000000000000&interactionCount=1&landingPath=NotLandingPage&groups=00000%3A0%2C00000%3A0%2C00000%3A0%2C00000%3A0; OptanonAlertBoxClosed=2023-02-03T10:11:08.683Z\r\n"
388 "Upgrade-Insecure-Requests: 1\r\n"
389 "Sec-Fetch-Dest: document\r\n"
390 "Sec-Fetch-Mode: navigate\r\n"
391 "Sec-Fetch-Site: none\r\n"
392 "Sec-Fetch-User: ?1\r\n"
393 "Pragma: no-cache\r\n"
394 "Cache-Control: no-cache\r\n"
395 "\r\n";
396
397 {
398 http_request p(locale);
399 Assert::IsTrue(p.match(request));
400 Assert::AreEqual((size_t)0, p.interval.start);
401 Assert::AreEqual((size_t)14, p.interval.end);
402 Assert::AreEqual((size_t)0, p.verb.start);
403 Assert::AreEqual((size_t)3, p.verb.end);
404 Assert::AreEqual((size_t)4, p.url.interval.start);
405 Assert::AreEqual((size_t)5, p.url.interval.end);
406 Assert::AreEqual((size_t)6, p.protocol.interval.start);
407 Assert::AreEqual((size_t)12, p.protocol.interval.end);
408 Assert::AreEqual((uint16_t)0x200, p.protocol.version);
409 }
410
411 {
412 std::list<http_header> hdrs;
413 size_t offset = 14;
414 for (;;) {
416 if (h.match(request, offset)) {
417 offset = h.interval.end;
418 hdrs.push_back(std::move(h));
419 }
420 else
421 break;
422 }
423 Assert::AreEqual((size_t)15, hdrs.size());
425 for (const auto& h : hdrs)
426 if (strnicmp(request + h.name.start, h.name.size(), "Accept-Language", (size_t)-1, locale) == 0)
427 langs.insert(request, h.value.start, h.value.end);
428 Assert::IsTrue(!langs.empty());
429 {
431 "sl", "en-US", "en", "de-DE", "de"
432 };
433 auto c = control.cbegin();
434 auto l = langs.cbegin();
435 for (; c != control.cend() && l != langs.cend(); ++c, ++l)
436 Assert::IsTrue(strnicmp(request + l->value.interval.start, l->value.interval.size(), c->c_str(), c->size(), locale) == 0);
437 Assert::IsTrue(c == control.cend());
438 Assert::IsTrue(l == langs.cend());
439 }
440 }
441
442 //static const char response[] =
443 // "HTTP/2 200 OK\r\n"
444 // "cache-control: private\r\n"
445 // "content-type: text/html; charset=utf-8\r\n"
446 // "content-encoding: gzip\r\n"
447 // "strict-transport-security: max-age=15552000\r\n"
448 // "x-frame-options: SAMEORIGIN\r\n"
449 // "set-cookie: acct=t=00000000000000000%2f%2f0000%2b0000%2b000&s=00000000000000000000000000000000; expires=Sat, 16 Sep 2023 10:23:00 GMT; domain=.stackoverflow.com; path=/; secure; samesite=none; httponly\r\n"
450 // "set-cookie: prov_tgt=; expires=Tue, 14 Mar 2023 10:23:00 GMT; domain=.stackoverflow.com; path=/; secure; samesite=none; httponly\r\n"
451 // "x-request-guid: a6536a49-b473-4c6f-b313-c1e7c0d8f600\r\n"
452 // "feature-policy: microphone 'none'; speaker 'none'\r\n"
453 // "content-security-policy: upgrade-insecure-requests; frame-ancestors 'self' https://stackexchange.com\r\n"
454 // "accept-ranges: bytes\r\n"
455 // "date: Thu, 16 Mar 2023 10:23:00 GMT\r\n"
456 // "via: 1.1 varnish\r\n"
457 // "x-served-by: cache-vie6354-VIE\r\n"
458 // "x-cache: MISS\r\n"
459 // "x-cache-hits: 0\r\n"
460 // "x-timer: S1678962181.533907,VS0,VE144\r\n"
461 // "vary: Accept-Encoding,Fastly-SSL\r\n"
462 // "x-dns-prefetch-control: off\r\n"
463 // "X-Firefox-Spdy: h2\r\n"
464 // "\r\n";
465 }
466 };
467}
interval< size_t > interval
Region of the last match.
Definition parser.hpp:170
Test for given string.
Definition parser.hpp:816
Test for HTTP header.
Definition parser.hpp:7156
Test for HTTP request.
Definition parser.hpp:7018
Test for specific SGML code point.
Definition parser.hpp:341
Test for any SGML space code point.
Definition parser.hpp:428
Numerical interval.
Definition interval.hpp:18
T size() const
Returns interval size.
Definition interval.hpp:47
T end
interval end
Definition interval.hpp:20
T start
interval start
Definition interval.hpp:19