From fdf16a65b63f09d3ddee3fe17a6e2738c75e5d4e Mon Sep 17 00:00:00 2001 From: Simon Rozman Date: Sat, 23 Dec 2023 07:52:20 +0100 Subject: [PATCH] string: add is7bit Signed-off-by: Simon Rozman --- include/stdex/sgml.hpp | 12 +++---- include/stdex/string.hpp | 74 +++++++++++++++++++++++++++++++++++----- 2 files changed, 72 insertions(+), 14 deletions(-) diff --git a/include/stdex/sgml.hpp b/include/stdex/sgml.hpp index 529f56551..50924d878 100644 --- a/include/stdex/sgml.hpp +++ b/include/stdex/sgml.hpp @@ -478,7 +478,7 @@ namespace stdex for (size_t i = 0; i < count_src;) { size_t n = glyphlen(src + i, count_src - i); if (n == 1 && - do_ascii && (unsigned int)src[i] < 128 && + do_ascii && is7bit(src[i]) && src[i] != L'&' && (do_quot || (src[i] != L'"')) && (do_apos || (src[i] != L'\'')) && @@ -505,7 +505,7 @@ namespace stdex } else if (n == 1) { // Trivial character (1 code unit, 1 glyph), no entity available. - if ((unsigned int)src[i] < 128) + if (is7bit(src[i])) dst.append(1, static_cast(src[i++])); else { char tmp[3 + 8 + 1 + 1]; @@ -523,7 +523,7 @@ namespace stdex dst.append(1, ';'); i++; } - else if ((unsigned int)src[i] < 128) + else if (is7bit(src[i])) dst.append(1, static_cast(src[i++])); else { uint32_t unicode; @@ -602,7 +602,7 @@ namespace stdex for (size_t i = 0; i < count_src;) { size_t n = glyphlen(src + i, count_src - i); if (n == 1 && - do_ascii && (unsigned int)src[i] < 128 && + do_ascii && is7bit(src[i]) && src[i] != L'&' && (do_quot || (src[i] != L'"')) && (do_apos || (src[i] != L'\'')) && @@ -634,7 +634,7 @@ namespace stdex } else if (n == 1) { // Trivial character (1 code unit, 1 glyph), no entity available. - if ((unsigned int)src[i] < 128) { + if (is7bit(src[i])) { if (j + 1 >= count_dst) throw buffer_overrun; dst[j++] = static_cast(src[i++]); @@ -661,7 +661,7 @@ namespace stdex dst[j++] = ';'; i++; } - else if ((unsigned int)src[i] < 128) { + else if (is7bit(src[i])) { if (j + 1 >= count_dst) throw buffer_overrun; dst[j++] = static_cast(src[i++]); diff --git a/include/stdex/string.hpp b/include/stdex/string.hpp index 97536ae08..4cc57a3a8 100644 --- a/include/stdex/string.hpp +++ b/include/stdex/string.hpp @@ -188,6 +188,17 @@ namespace stdex return islower(chr) || isupper(chr); } + /// + /// Test if the given code unit is ASCII + /// + /// \param[in] chr Code unit + /// + template + inline bool is7bit(_In_ T chr) + { + return '\x00' <= chr && chr <= '\x7f'; + } + /// /// Return number of code units the glyph represents /// @@ -737,8 +748,7 @@ namespace stdex /// template inline bool isblank( - _In_reads_or_z_opt_(count) const T* str, - _In_ size_t count, + _In_reads_or_z_opt_(count) const T* str, _In_ size_t count, _In_ const std::locale& locale) { _Assume_(str || !count); @@ -778,6 +788,54 @@ namespace stdex return isblank(str, N, locale); } + // /// + // /// Checks if string contains all-ASCII characters + // /// + // /// \param[in] str String + // /// + // /// \return `true` if all characters are ASCII or `false` when any non-ASCII character is found in string. + // /// + // template + // inline bool is7bit(_In_z_ const T* str) + // { + // _Assume_(str); + // for (size_t i = 0; str[i]; i++) + // if (!is7bit(str[i])) + // return false; + // return true; + // } + + /// + /// Checks if string contains all-ASCII characters + /// + /// \param[in] str String + /// \param[in] count Code unit count limit + /// + /// \return `true` if all characters are ASCII or `false` when any non-ASCII character is found in string. + /// + template + inline bool is7bit(_In_reads_or_z_opt_(count) const T* str, _In_ size_t count) + { + _Assume_(str || !count); + for (size_t i = 0; i < count && str[i]; i++) + if (!is7bit(str[i])) + return false; + return true; + } + + /// + /// Checks if string contains all-ASCII characters + /// + /// \param[in] str String + /// + /// \return `true` if all characters are ASCII or `false` when any non-ASCII character is found in string. + /// + template + inline bool is7bit(_In_ const T (&str)[N]) + { + return is7bit(str, N); + } + /// /// Binary compare two strings /// @@ -1465,7 +1523,7 @@ namespace stdex _Assume_(dst); _Assume_(src); for (size_t i = 0; ; ++i) { - if ((dst[i] = src[i]) == 0) + if ((dst[i] = static_cast(src[i])) == 0) return i; } } @@ -1489,7 +1547,7 @@ namespace stdex for (size_t i = 0; ; ++i) { if (i >= count) return i; - if ((dst[i] = src[i]) == 0) + if ((dst[i] = static_cast(src[i])) == 0) return i; } } @@ -1519,7 +1577,7 @@ namespace stdex dst[i] = 0; return i; } - if ((dst[i] = src[i]) == 0) + if ((dst[i] = static_cast(src[i])) == 0) return i; } } @@ -1556,7 +1614,7 @@ namespace stdex _Assume_(dst); _Assume_(src); for (size_t i = 0, j = stdex::strlen(dst); ; ++i, ++j) { - if ((dst[j] = src[i]) == 0) + if ((dst[j] = static_cast(src[i])) == 0) return j; } } @@ -1580,7 +1638,7 @@ namespace stdex for (size_t i = 0, j = stdex::strlen(dst); ; ++i, ++j) { if (i >= count) return j; - if ((dst[j] = src[i]) == 0) + if ((dst[j] = static_cast(src[i])) == 0) return j; } } @@ -1610,7 +1668,7 @@ namespace stdex dst[j] = 0; return j; } - if ((dst[j] = src[i]) == 0) + if ((dst[j] = static_cast(src[i])) == 0) return j; } }