From 2674bb0e3299cac53a834ca9a3b5828c5dd7e338 Mon Sep 17 00:00:00 2001 From: Simon Rozman Date: Mon, 13 Jan 2025 10:56:38 +0100 Subject: [PATCH] string: add variants for char16_t/char32_t strings char16_t is not exactly the wchar_t on Windows. char32_t is not exactly the wchar_t on POSIX. Rather than selecting the appropriate variant, polymorphism picked the template implementation of strncmp, strcpy and strncpy. The one that does not convert UTF16 surrogate pairs against their UTF32 representation. Signed-off-by: Simon Rozman --- UnitTests/string.cpp | 4 +- include/stdex/string.hpp | 165 ++++++++++++++++++++++++++++----------- 2 files changed, 122 insertions(+), 47 deletions(-) diff --git a/UnitTests/string.cpp b/UnitTests/string.cpp index b1d023bbb..6973a32a2 100644 --- a/UnitTests/string.cpp +++ b/UnitTests/string.cpp @@ -1,4 +1,4 @@ -/* +/* SPDX-License-Identifier: MIT Copyright © 2023-2025 Amebis */ @@ -16,7 +16,7 @@ namespace UnitTests { stdex::utf32_t tmp[0x100]; stdex::strncpy(tmp, u"This is a 🐔Test🐮."); - Assert::AreEqual(reinterpret_cast(U"This is a 🐔Test🐮."), tmp); + Assert::IsTrue(stdex::strcmp(U"This is a 🐔Test🐮.", tmp) == 0); } void string::sprintf() diff --git a/include/stdex/string.hpp b/include/stdex/string.hpp index 243436dcb..79134f8e4 100644 --- a/include/stdex/string.hpp +++ b/include/stdex/string.hpp @@ -14,6 +14,7 @@ #include #include #include +#include #ifdef __APPLE__ #include #endif @@ -346,7 +347,7 @@ namespace stdex /// \return Number of code units excluding zero terminator in the string. /// template - size_t strnlen(_In_ const T (&str)[N]) + size_t strnlen(_In_ const T(&str)[N]) { return strnlen(str, N); } @@ -401,7 +402,7 @@ namespace stdex /// template size_t strnchr( - _In_ const T (&str)[N], + _In_ const T(&str)[N], _In_ T chr) { return strnchr(str, N, chr); @@ -459,7 +460,7 @@ namespace stdex /// template size_t strrnchr( - _In_ const T (&str)[N], + _In_ const T(&str)[N], _In_ T chr) { return strrnchr(str, N, chr); @@ -565,7 +566,7 @@ namespace stdex /// template size_t strnichr( - _In_ const T (&str)[N], + _In_ const T(&str)[N], _In_ T chr) { return strnichr(str, N, chr); @@ -582,7 +583,7 @@ namespace stdex /// template size_t strnichr( - _In_ const T (&str)[N], + _In_ const T(&str)[N], _In_ T chr, _In_ const std::locale& locale) { @@ -693,7 +694,7 @@ namespace stdex /// template size_t strrnichr( - _In_ const T (&str)[N], + _In_ const T(&str)[N], _In_ T chr) { return strrnichr(str, N, chr); @@ -710,7 +711,7 @@ namespace stdex /// template size_t strrnichr( - _In_ const T (&str)[N], + _In_ const T(&str)[N], _In_ T chr, _In_ const std::locale& locale) { @@ -805,7 +806,7 @@ namespace stdex /// \return `true` if all characters are white-space or `false` when any non-white-space character is found in string. /// template - bool isblank(_In_ const T (&str)[N]) + bool isblank(_In_ const T(&str)[N]) { return isblank(str, N); } @@ -820,7 +821,7 @@ namespace stdex /// template bool isblank( - _In_ const T (&str)[N], + _In_ const T(&str)[N], _In_ const std::locale& locale) { return isblank(str, N, locale); @@ -869,7 +870,7 @@ namespace stdex /// \return `true` if all characters are ASCII or `false` when any non-ASCII character is found in string. /// template - bool is7bit(_In_ const T (&str)[N]) + bool is7bit(_In_ const T(&str)[N]) { return is7bit(str, N); } @@ -961,8 +962,8 @@ namespace stdex /// template int strncmp( - _In_ const T1 (&str1)[N1], - _In_ const T2 (&str2)[N2]) + _In_ const T1(&str1)[N1], + _In_ const T2(&str2)[N2]) { return strncmp(str1, N1, str2, N2); } @@ -1006,6 +1007,25 @@ namespace stdex return 0; } + /// + /// Binary compare two strings + /// + /// \param[in] str1 String 1 + /// \param[in] count1 String 1 code unit count limit + /// \param[in] str2 String 2 + /// \param[in] count2 String 2 code unit count limit + /// + /// \return Negative if str1str2; zero if str1==str2 + /// + inline int strncmp( + _In_reads_or_z_opt_(count1) const char32_t* str1, _In_ size_t count1, + _In_reads_or_z_opt_(count2) const char16_t* str2, _In_ size_t count2) + { + return strncmp( + reinterpret_cast(str1), count1, + reinterpret_cast(str2), count2); + } + /// /// Binary compare two strings /// @@ -1016,12 +1036,30 @@ namespace stdex /// template int strncmp( - _In_ const utf32_t (&str1)[N1], - _In_ const utf16_t (&str2)[N2]) + _In_ const utf32_t(&str1)[N1], + _In_ const utf16_t(&str2)[N2]) { return strncmp(str1, N1, str2, N2); } + /// + /// Binary compare two strings + /// + /// \param[in] str1 String 1 + /// \param[in] str2 String 2 + /// + /// \return Negative if str1str2; zero if str1==str2 + /// + template + int strncmp( + _In_ const char32_t(&str1)[N1], + _In_ const char16_t(&str2)[N2]) + { + return strncmp( + reinterpret_cast(str1), N1, + reinterpret_cast(str2), N2); + } + /// /// Binary compare two strings in reverse direction /// @@ -1118,8 +1156,8 @@ namespace stdex /// template int strrncmp( - _In_ const T1 (&str1)[N1], - _In_ const T2 (&str2)[N2]) + _In_ const T1(&str1)[N1], + _In_ const T2(&str2)[N2]) { return strrncmp(str1, N1, str2, N2); } @@ -1304,8 +1342,8 @@ namespace stdex /// template int strnicmp( - _In_ const T1 (&str1)[N1], - _In_ const T2 (&str2)[N2]) + _In_ const T1(&str1)[N1], + _In_ const T2(&str2)[N2]) { strnicmp(str1, N1, str2, N2); } @@ -1321,8 +1359,8 @@ namespace stdex /// template int strnicmp( - _In_ const T1 (&str1)[N1], - _In_ const T2 (&str2)[N2], + _In_ const T1(&str1)[N1], + _In_ const T2(&str2)[N2], _In_ const std::locale& locale) { strnicmp(str1, N1, str2, N2, locale); @@ -1383,8 +1421,8 @@ namespace stdex /// template int strncoll( - _In_ const T (&str1)[N1], - _In_ const T (&str2)[N2], + _In_ const T(&str1)[N1], + _In_ const T(&str2)[N2], _In_ const std::locale& locale) { return strncoll(str1, N1, str2, N2, locale); @@ -1455,7 +1493,7 @@ namespace stdex /// template size_t strnstr( - _In_ const T1 (&str)[N1], + _In_ const T1(&str)[N1], _In_z_ const T2* sample) { return strnstr(str, N1, sample); @@ -1591,7 +1629,7 @@ namespace stdex /// template size_t strnistr( - _In_ const T1 (&str)[N1], + _In_ const T1(&str)[N1], _In_z_ const T2* sample) { return strnistr(str, N1, sample); @@ -1608,7 +1646,7 @@ namespace stdex /// template size_t strnistr( - _In_ const T1 (&str)[N1], + _In_ const T1(&str)[N1], _In_z_ const T2* sample, _In_ const std::locale& locale) { @@ -1659,6 +1697,23 @@ namespace stdex } } + /// + /// Recode UTF-16 zero-terminated string to UTF-32 + /// + /// \param[in] dst Destination string + /// \param[in] src Source string + /// + /// \return Number of code units excluding zero terminator in the dst string after the operation. + /// + inline size_t strcpy( + _Out_writes_z_(_String_length_(src) + 1) char32_t* dst, + _In_z_ const char16_t* src) + { + return strcpy( + reinterpret_cast(dst), + reinterpret_cast(src)); + } + /// /// Copy zero-terminated string /// @@ -1742,6 +1797,25 @@ namespace stdex } } + /// + /// Recode UTF-16 zero-terminated string to UTF-32 + /// + /// \param[in] dst Destination string + /// \param[in] count_dst Destination string code unit count limit + /// \param[in] src Source string + /// \param[in] count_src Source string code unit count limit + /// + /// \return Number of code units excluding zero terminator in the dst string after the operation. + /// + inline size_t strncpy( + _Out_writes_(count_dst) _Post_maybez_ char32_t* dst, _In_ size_t count_dst, + _In_reads_or_z_opt_(count_src) const char16_t* src, _In_ size_t count_src) + { + return strncpy( + reinterpret_cast(dst), count_dst, + reinterpret_cast(src), count_src); + } + /// /// Copy zero-terminated string /// @@ -1752,8 +1826,8 @@ namespace stdex /// template size_t strncpy( - _Out_ _Post_maybez_ T1 (&dst)[N1], - _In_ const T2 (&src)[N2]) + _Out_ _Post_maybez_ T1(&dst)[N1], + _In_ const T2(&src)[N2]) { return strncpy(dst, N1, src, N2); } @@ -1886,7 +1960,7 @@ namespace stdex /// \return Pointer to duplicated string; or nullptr if str is nullptr. Use delete[] operator to free the memory. /// template - _Check_return_ _Ret_maybenull_z_ T* strndup(_In_ const T (&str)[N]) + _Check_return_ _Ret_maybenull_z_ T* strndup(_In_ const T(&str)[N]) { return strndup(str, N); } @@ -2131,7 +2205,7 @@ namespace stdex /// template T_bin strtoint( - _In_ const T (&str)[N], + _In_ const T(&str)[N], _Out_opt_ size_t* end, _In_ int radix) { @@ -2182,7 +2256,7 @@ namespace stdex /// template T_bin strtouint( - _In_ const T (&str)[N], + _In_ const T(&str)[N], _Out_opt_ size_t* end, _In_ int radix) { @@ -2219,7 +2293,7 @@ namespace stdex /// template int8_t strto8( - _In_ const T (&str)[N], + _In_ const T(&str)[N], _Out_opt_ size_t* end, _In_ int radix) { @@ -2256,7 +2330,7 @@ namespace stdex /// template int16_t strto16( - _In_ const T (&str)[N], + _In_ const T(&str)[N], _Out_opt_ size_t* end, _In_ int radix) { @@ -2293,7 +2367,7 @@ namespace stdex /// template int32_t strto32( - _In_ const T (&str)[N], + _In_ const T(&str)[N], _Out_opt_ size_t* end, _In_ int radix) { @@ -2330,7 +2404,7 @@ namespace stdex /// template int64_t strto64( - _In_ const T (&str)[N], + _In_ const T(&str)[N], _Out_opt_ size_t* end, _In_ int radix) { @@ -2373,7 +2447,7 @@ namespace stdex /// template ptrdiff_t strtoi( - _In_ const T (&str)[N], + _In_ const T(&str)[N], _Out_opt_ size_t* end, _In_ int radix) { @@ -2410,7 +2484,7 @@ namespace stdex /// template uint8_t strtou8( - _In_ const T (&str)[N], + _In_ const T(&str)[N], _Out_opt_ size_t* end, _In_ int radix) { @@ -2447,7 +2521,7 @@ namespace stdex /// template uint16_t strtou16( - _In_ const T (&str)[N], + _In_ const T(&str)[N], _Out_opt_ size_t* end, _In_ int radix) { @@ -2484,7 +2558,7 @@ namespace stdex /// template uint32_t strtou32( - _In_ const T (&str)[N], + _In_ const T(&str)[N], _Out_opt_ size_t* end, _In_ int radix) { @@ -2521,7 +2595,7 @@ namespace stdex /// template uint64_t strtou64( - _In_ const T (&str)[N], + _In_ const T(&str)[N], _Out_opt_ size_t* end, _In_ int radix) { @@ -2564,7 +2638,7 @@ namespace stdex /// template size_t strtoui( - _In_ const T (&str)[N], + _In_ const T(&str)[N], _Out_opt_ size_t* end, _In_ int radix) { @@ -2682,7 +2756,8 @@ namespace stdex case EILSEQ: throw std::runtime_error("encoding error"); default: throw std::runtime_error("failed to format string"); } - } else + } + else return capacity; #else switch (errno) { @@ -2987,7 +3062,7 @@ namespace stdex /// \param[in,out] str String /// template - void strlwr(_Inout_ T (&str)[N]) + void strlwr(_Inout_ T(&str)[N]) { strlwr(str, N); } @@ -2999,7 +3074,7 @@ namespace stdex /// \param[in] locale C++ locale to use /// template - void strlwr(_Inout_ T (&str)[N], _In_ const std::locale& locale) + void strlwr(_Inout_ T(&str)[N], _In_ const std::locale& locale) { strlwr(str, N, locale); } @@ -3094,7 +3169,7 @@ namespace stdex /// \param[in,out] str String /// template - void strupr(_Inout_ T (&str)[N]) + void strupr(_Inout_ T(&str)[N]) { return strupr(str, N); } @@ -3106,7 +3181,7 @@ namespace stdex /// \param[in] locale C++ locale to use /// template - void strupr(_Inout_ T (&str)[N], _In_ const std::locale& locale) + void strupr(_Inout_ T(&str)[N], _In_ const std::locale& locale) { return strupr(str, N, locale); }