parser: detect spaces, characters and newline faster where appropriate

No need to use locale-specific character type detection when ASCII.
Locale-specific implementation on Windows is not that very fast.

Signed-off-by: Simon Rozman <simon@rozman.si>
This commit is contained in:
Simon Rozman 2023-11-28 13:48:54 +01:00
parent 7685818bf7
commit e17fa1d8c2
2 changed files with 18 additions and 7 deletions

View File

@ -400,7 +400,7 @@ namespace stdex
_Assume_(text || start >= end); _Assume_(text || start >= end);
if (start < end && text[start]) { if (start < end && text[start]) {
bool r = bool r =
((flags & match_multiline) || !islbreak(text[start])) && ((flags & match_multiline) || !stdex::islbreak(text[start])) &&
std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::space, text[start]); std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::space, text[start]);
if ((r && !m_invert) || (!r && m_invert)) { if ((r && !m_invert) || (!r && m_invert)) {
this->interval.end = (this->interval.start = start) + 1; this->interval.end = (this->interval.start = start) + 1;
@ -445,7 +445,7 @@ namespace stdex
const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf); const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
const wchar_t* chr_end = chr + stdex::strlen(chr); const wchar_t* chr_end = chr + stdex::strlen(chr);
bool r = bool r =
((flags & match_multiline) || !islbreak(chr, SIZE_MAX)) && ((flags & match_multiline) || !stdex::islbreak(chr, SIZE_MAX)) &&
std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::space, chr, chr_end) == chr_end; std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::space, chr, chr_end) == chr_end;
if ((r && !m_invert) || (!r && m_invert)) { if ((r && !m_invert) || (!r && m_invert)) {
this->interval.start = start; this->interval.start = start;
@ -553,7 +553,7 @@ namespace stdex
_Assume_(text || start >= end); _Assume_(text || start >= end);
if (start < end && text[start]) { if (start < end && text[start]) {
bool r = bool r =
((flags & match_multiline) || !islbreak(text[start])) && ((flags & match_multiline) || !stdex::islbreak(text[start])) &&
std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::space | std::ctype_base::punct, text[start]); std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::space | std::ctype_base::punct, text[start]);
if ((r && !m_invert) || (!r && m_invert)) { if ((r && !m_invert) || (!r && m_invert)) {
this->interval.end = (this->interval.start = start) + 1; this->interval.end = (this->interval.start = start) + 1;
@ -598,7 +598,7 @@ namespace stdex
const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf); const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
const wchar_t* chr_end = chr + stdex::strlen(chr); const wchar_t* chr_end = chr + stdex::strlen(chr);
bool r = bool r =
((flags & match_multiline) || !islbreak(chr, SIZE_MAX)) && ((flags & match_multiline) || !stdex::islbreak(chr, SIZE_MAX)) &&
std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::space | std::ctype_base::punct, chr, chr_end) == chr_end; std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::space | std::ctype_base::punct, chr, chr_end) == chr_end;
if ((r && !m_invert) || (!r && m_invert)) { if ((r && !m_invert) || (!r && m_invert)) {
this->interval.start = start; this->interval.start = start;
@ -626,7 +626,7 @@ namespace stdex
_In_ int flags = match_default) _In_ int flags = match_default)
{ {
_Assume_(text || start >= end); _Assume_(text || start >= end);
bool r = start == 0 || (start <= end && islbreak(text[start - 1])); bool r = start == 0 || (start <= end && stdex::islbreak(text[start - 1]));
if ((r && !m_invert) || (!r && m_invert)) { if ((r && !m_invert) || (!r && m_invert)) {
this->interval.end = this->interval.start = start; this->interval.end = this->interval.start = start;
return true; return true;
@ -664,7 +664,7 @@ namespace stdex
_In_ int flags = match_default) _In_ int flags = match_default)
{ {
_Assume_(text || start >= end); _Assume_(text || start >= end);
bool r = islbreak(text[start]); bool r = stdex::islbreak(text[start]);
if ((r && !m_invert) || (!r && m_invert)) { if ((r && !m_invert) || (!r && m_invert)) {
this->interval.end = this->interval.start = start; this->interval.end = this->interval.start = start;
return true; return true;
@ -6471,7 +6471,7 @@ namespace stdex
k.end = this->interval.end; k.end = this->interval.end;
for (;;) { for (;;) {
if (k.end < end && text[k.end]) { if (k.end < end && text[k.end]) {
if (isalpha(text[k.end])) if (stdex::isalpha(text[k.end]))
k.end++; k.end++;
else else
break; break;

View File

@ -177,6 +177,17 @@ namespace stdex
return '0' <= chr && chr <= '9'; return '0' <= chr && chr <= '9';
} }
///
/// Test if the given code unit is ASCII-character
///
/// \param[in] chr Code unit
///
template <class T>
inline bool isalpha(_In_ T chr)
{
return islower(chr) || isupper(chr);
}
/// ///
/// Return number of code units the glyph represents /// Return number of code units the glyph represents
/// ///