string: add is7bit

Signed-off-by: Simon Rozman <simon@rozman.si>
This commit is contained in:
Simon Rozman 2023-12-23 07:52:20 +01:00
parent e1f53f31ad
commit fdf16a65b6
2 changed files with 72 additions and 14 deletions

View File

@ -478,7 +478,7 @@ namespace stdex
for (size_t i = 0; i < count_src;) { for (size_t i = 0; i < count_src;) {
size_t n = glyphlen(src + i, count_src - i); size_t n = glyphlen(src + i, count_src - i);
if (n == 1 && if (n == 1 &&
do_ascii && (unsigned int)src[i] < 128 && do_ascii && is7bit(src[i]) &&
src[i] != L'&' && src[i] != L'&' &&
(do_quot || (src[i] != L'"')) && (do_quot || (src[i] != L'"')) &&
(do_apos || (src[i] != L'\'')) && (do_apos || (src[i] != L'\'')) &&
@ -505,7 +505,7 @@ namespace stdex
} }
else if (n == 1) { else if (n == 1) {
// Trivial character (1 code unit, 1 glyph), no entity available. // Trivial character (1 code unit, 1 glyph), no entity available.
if ((unsigned int)src[i] < 128) if (is7bit(src[i]))
dst.append(1, static_cast<char>(src[i++])); dst.append(1, static_cast<char>(src[i++]));
else { else {
char tmp[3 + 8 + 1 + 1]; char tmp[3 + 8 + 1 + 1];
@ -523,7 +523,7 @@ namespace stdex
dst.append(1, ';'); dst.append(1, ';');
i++; i++;
} }
else if ((unsigned int)src[i] < 128) else if (is7bit(src[i]))
dst.append(1, static_cast<char>(src[i++])); dst.append(1, static_cast<char>(src[i++]));
else { else {
uint32_t unicode; uint32_t unicode;
@ -602,7 +602,7 @@ namespace stdex
for (size_t i = 0; i < count_src;) { for (size_t i = 0; i < count_src;) {
size_t n = glyphlen(src + i, count_src - i); size_t n = glyphlen(src + i, count_src - i);
if (n == 1 && if (n == 1 &&
do_ascii && (unsigned int)src[i] < 128 && do_ascii && is7bit(src[i]) &&
src[i] != L'&' && src[i] != L'&' &&
(do_quot || (src[i] != L'"')) && (do_quot || (src[i] != L'"')) &&
(do_apos || (src[i] != L'\'')) && (do_apos || (src[i] != L'\'')) &&
@ -634,7 +634,7 @@ namespace stdex
} }
else if (n == 1) { else if (n == 1) {
// Trivial character (1 code unit, 1 glyph), no entity available. // Trivial character (1 code unit, 1 glyph), no entity available.
if ((unsigned int)src[i] < 128) { if (is7bit(src[i])) {
if (j + 1 >= count_dst) if (j + 1 >= count_dst)
throw buffer_overrun; throw buffer_overrun;
dst[j++] = static_cast<char>(src[i++]); dst[j++] = static_cast<char>(src[i++]);
@ -661,7 +661,7 @@ namespace stdex
dst[j++] = ';'; dst[j++] = ';';
i++; i++;
} }
else if ((unsigned int)src[i] < 128) { else if (is7bit(src[i])) {
if (j + 1 >= count_dst) if (j + 1 >= count_dst)
throw buffer_overrun; throw buffer_overrun;
dst[j++] = static_cast<char>(src[i++]); dst[j++] = static_cast<char>(src[i++]);

View File

@ -188,6 +188,17 @@ namespace stdex
return islower(chr) || isupper(chr); return islower(chr) || isupper(chr);
} }
///
/// Test if the given code unit is ASCII
///
/// \param[in] chr Code unit
///
template <class T>
inline bool is7bit(_In_ T chr)
{
return '\x00' <= chr && chr <= '\x7f';
}
/// ///
/// Return number of code units the glyph represents /// Return number of code units the glyph represents
/// ///
@ -737,8 +748,7 @@ namespace stdex
/// ///
template <class T> template <class T>
inline bool isblank( inline bool isblank(
_In_reads_or_z_opt_(count) const T* str, _In_reads_or_z_opt_(count) const T* str, _In_ size_t count,
_In_ size_t count,
_In_ const std::locale& locale) _In_ const std::locale& locale)
{ {
_Assume_(str || !count); _Assume_(str || !count);
@ -778,6 +788,54 @@ namespace stdex
return isblank(str, N, locale); return isblank(str, N, locale);
} }
// ///
// /// Checks if string contains all-ASCII characters
// ///
// /// \param[in] str String
// ///
// /// \return `true` if all characters are ASCII or `false` when any non-ASCII character is found in string.
// ///
// template <class T>
// inline bool is7bit(_In_z_ const T* str)
// {
// _Assume_(str);
// for (size_t i = 0; str[i]; i++)
// if (!is7bit(str[i]))
// return false;
// return true;
// }
///
/// Checks if string contains all-ASCII characters
///
/// \param[in] str String
/// \param[in] count Code unit count limit
///
/// \return `true` if all characters are ASCII or `false` when any non-ASCII character is found in string.
///
template <class T>
inline bool is7bit(_In_reads_or_z_opt_(count) const T* str, _In_ size_t count)
{
_Assume_(str || !count);
for (size_t i = 0; i < count && str[i]; i++)
if (!is7bit(str[i]))
return false;
return true;
}
///
/// Checks if string contains all-ASCII characters
///
/// \param[in] str String
///
/// \return `true` if all characters are ASCII or `false` when any non-ASCII character is found in string.
///
template <class T, size_t N>
inline bool is7bit(_In_ const T (&str)[N])
{
return is7bit(str, N);
}
/// ///
/// Binary compare two strings /// Binary compare two strings
/// ///
@ -1465,7 +1523,7 @@ namespace stdex
_Assume_(dst); _Assume_(dst);
_Assume_(src); _Assume_(src);
for (size_t i = 0; ; ++i) { for (size_t i = 0; ; ++i) {
if ((dst[i] = src[i]) == 0) if ((dst[i] = static_cast<T1>(src[i])) == 0)
return i; return i;
} }
} }
@ -1489,7 +1547,7 @@ namespace stdex
for (size_t i = 0; ; ++i) { for (size_t i = 0; ; ++i) {
if (i >= count) if (i >= count)
return i; return i;
if ((dst[i] = src[i]) == 0) if ((dst[i] = static_cast<T1>(src[i])) == 0)
return i; return i;
} }
} }
@ -1519,7 +1577,7 @@ namespace stdex
dst[i] = 0; dst[i] = 0;
return i; return i;
} }
if ((dst[i] = src[i]) == 0) if ((dst[i] = static_cast<T1>(src[i])) == 0)
return i; return i;
} }
} }
@ -1556,7 +1614,7 @@ namespace stdex
_Assume_(dst); _Assume_(dst);
_Assume_(src); _Assume_(src);
for (size_t i = 0, j = stdex::strlen<T1>(dst); ; ++i, ++j) { for (size_t i = 0, j = stdex::strlen<T1>(dst); ; ++i, ++j) {
if ((dst[j] = src[i]) == 0) if ((dst[j] = static_cast<T1>(src[i])) == 0)
return j; return j;
} }
} }
@ -1580,7 +1638,7 @@ namespace stdex
for (size_t i = 0, j = stdex::strlen<T1>(dst); ; ++i, ++j) { for (size_t i = 0, j = stdex::strlen<T1>(dst); ; ++i, ++j) {
if (i >= count) if (i >= count)
return j; return j;
if ((dst[j] = src[i]) == 0) if ((dst[j] = static_cast<T1>(src[i])) == 0)
return j; return j;
} }
} }
@ -1610,7 +1668,7 @@ namespace stdex
dst[j] = 0; dst[j] = 0;
return j; return j;
} }
if ((dst[j] = src[i]) == 0) if ((dst[j] = static_cast<T1>(src[i])) == 0)
return j; return j;
} }
} }