parser: detect spaces, characters and newline faster where appropriate
No need to use locale-specific character type detection when ASCII. Locale-specific implementation on Windows is not that very fast. Signed-off-by: Simon Rozman <simon@rozman.si>
This commit is contained in:
parent
7685818bf7
commit
e17fa1d8c2
@ -400,7 +400,7 @@ namespace stdex
|
|||||||
_Assume_(text || start >= end);
|
_Assume_(text || start >= end);
|
||||||
if (start < end && text[start]) {
|
if (start < end && text[start]) {
|
||||||
bool r =
|
bool r =
|
||||||
((flags & match_multiline) || !islbreak(text[start])) &&
|
((flags & match_multiline) || !stdex::islbreak(text[start])) &&
|
||||||
std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::space, text[start]);
|
std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::space, text[start]);
|
||||||
if ((r && !m_invert) || (!r && m_invert)) {
|
if ((r && !m_invert) || (!r && m_invert)) {
|
||||||
this->interval.end = (this->interval.start = start) + 1;
|
this->interval.end = (this->interval.start = start) + 1;
|
||||||
@ -445,7 +445,7 @@ namespace stdex
|
|||||||
const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
|
const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
|
||||||
const wchar_t* chr_end = chr + stdex::strlen(chr);
|
const wchar_t* chr_end = chr + stdex::strlen(chr);
|
||||||
bool r =
|
bool r =
|
||||||
((flags & match_multiline) || !islbreak(chr, SIZE_MAX)) &&
|
((flags & match_multiline) || !stdex::islbreak(chr, SIZE_MAX)) &&
|
||||||
std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::space, chr, chr_end) == chr_end;
|
std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::space, chr, chr_end) == chr_end;
|
||||||
if ((r && !m_invert) || (!r && m_invert)) {
|
if ((r && !m_invert) || (!r && m_invert)) {
|
||||||
this->interval.start = start;
|
this->interval.start = start;
|
||||||
@ -553,7 +553,7 @@ namespace stdex
|
|||||||
_Assume_(text || start >= end);
|
_Assume_(text || start >= end);
|
||||||
if (start < end && text[start]) {
|
if (start < end && text[start]) {
|
||||||
bool r =
|
bool r =
|
||||||
((flags & match_multiline) || !islbreak(text[start])) &&
|
((flags & match_multiline) || !stdex::islbreak(text[start])) &&
|
||||||
std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::space | std::ctype_base::punct, text[start]);
|
std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::space | std::ctype_base::punct, text[start]);
|
||||||
if ((r && !m_invert) || (!r && m_invert)) {
|
if ((r && !m_invert) || (!r && m_invert)) {
|
||||||
this->interval.end = (this->interval.start = start) + 1;
|
this->interval.end = (this->interval.start = start) + 1;
|
||||||
@ -598,7 +598,7 @@ namespace stdex
|
|||||||
const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
|
const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
|
||||||
const wchar_t* chr_end = chr + stdex::strlen(chr);
|
const wchar_t* chr_end = chr + stdex::strlen(chr);
|
||||||
bool r =
|
bool r =
|
||||||
((flags & match_multiline) || !islbreak(chr, SIZE_MAX)) &&
|
((flags & match_multiline) || !stdex::islbreak(chr, SIZE_MAX)) &&
|
||||||
std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::space | std::ctype_base::punct, chr, chr_end) == chr_end;
|
std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::space | std::ctype_base::punct, chr, chr_end) == chr_end;
|
||||||
if ((r && !m_invert) || (!r && m_invert)) {
|
if ((r && !m_invert) || (!r && m_invert)) {
|
||||||
this->interval.start = start;
|
this->interval.start = start;
|
||||||
@ -626,7 +626,7 @@ namespace stdex
|
|||||||
_In_ int flags = match_default)
|
_In_ int flags = match_default)
|
||||||
{
|
{
|
||||||
_Assume_(text || start >= end);
|
_Assume_(text || start >= end);
|
||||||
bool r = start == 0 || (start <= end && islbreak(text[start - 1]));
|
bool r = start == 0 || (start <= end && stdex::islbreak(text[start - 1]));
|
||||||
if ((r && !m_invert) || (!r && m_invert)) {
|
if ((r && !m_invert) || (!r && m_invert)) {
|
||||||
this->interval.end = this->interval.start = start;
|
this->interval.end = this->interval.start = start;
|
||||||
return true;
|
return true;
|
||||||
@ -664,7 +664,7 @@ namespace stdex
|
|||||||
_In_ int flags = match_default)
|
_In_ int flags = match_default)
|
||||||
{
|
{
|
||||||
_Assume_(text || start >= end);
|
_Assume_(text || start >= end);
|
||||||
bool r = islbreak(text[start]);
|
bool r = stdex::islbreak(text[start]);
|
||||||
if ((r && !m_invert) || (!r && m_invert)) {
|
if ((r && !m_invert) || (!r && m_invert)) {
|
||||||
this->interval.end = this->interval.start = start;
|
this->interval.end = this->interval.start = start;
|
||||||
return true;
|
return true;
|
||||||
@ -6471,7 +6471,7 @@ namespace stdex
|
|||||||
k.end = this->interval.end;
|
k.end = this->interval.end;
|
||||||
for (;;) {
|
for (;;) {
|
||||||
if (k.end < end && text[k.end]) {
|
if (k.end < end && text[k.end]) {
|
||||||
if (isalpha(text[k.end]))
|
if (stdex::isalpha(text[k.end]))
|
||||||
k.end++;
|
k.end++;
|
||||||
else
|
else
|
||||||
break;
|
break;
|
||||||
|
@ -177,6 +177,17 @@ namespace stdex
|
|||||||
return '0' <= chr && chr <= '9';
|
return '0' <= chr && chr <= '9';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
///
|
||||||
|
/// Test if the given code unit is ASCII-character
|
||||||
|
///
|
||||||
|
/// \param[in] chr Code unit
|
||||||
|
///
|
||||||
|
template <class T>
|
||||||
|
inline bool isalpha(_In_ T chr)
|
||||||
|
{
|
||||||
|
return islower(chr) || isupper(chr);
|
||||||
|
}
|
||||||
|
|
||||||
///
|
///
|
||||||
/// Return number of code units the glyph represents
|
/// Return number of code units the glyph represents
|
||||||
///
|
///
|
||||||
|
Loading…
x
Reference in New Issue
Block a user