string: add variants for char16_t/char32_t strings

char16_t is not exactly the wchar_t on Windows. char32_t is not exactly
the wchar_t on POSIX. Rather than selecting the appropriate variant,
polymorphism picked the template implementation of strncmp, strcpy and
strncpy. The one that does not convert UTF16 surrogate pairs against
their UTF32 representation.

Signed-off-by: Simon Rozman <simon@rozman.si>
This commit is contained in:
Simon Rozman 2025-01-13 10:56:38 +01:00
parent a7543cf9ab
commit 2674bb0e32
2 changed files with 122 additions and 47 deletions

View File

@ -1,4 +1,4 @@
/* /*
SPDX-License-Identifier: MIT SPDX-License-Identifier: MIT
Copyright © 2023-2025 Amebis Copyright © 2023-2025 Amebis
*/ */
@ -16,7 +16,7 @@ namespace UnitTests
{ {
stdex::utf32_t tmp[0x100]; stdex::utf32_t tmp[0x100];
stdex::strncpy(tmp, u"This is a 🐔Test🐮."); stdex::strncpy(tmp, u"This is a 🐔Test🐮.");
Assert::AreEqual(reinterpret_cast<const stdex::utf32_t*>(U"This is a 🐔Test🐮."), tmp); Assert::IsTrue(stdex::strcmp(U"This is a 🐔Test🐮.", tmp) == 0);
} }
void string::sprintf() void string::sprintf()

View File

@ -14,6 +14,7 @@
#include <stdint.h> #include <stdint.h>
#include <stdio.h> #include <stdio.h>
#include <time.h> #include <time.h>
#include <uchar.h>
#ifdef __APPLE__ #ifdef __APPLE__
#include <xlocale.h> #include <xlocale.h>
#endif #endif
@ -346,7 +347,7 @@ namespace stdex
/// \return Number of code units excluding zero terminator in the string. /// \return Number of code units excluding zero terminator in the string.
/// ///
template <class T, size_t N> template <class T, size_t N>
size_t strnlen(_In_ const T (&str)[N]) size_t strnlen(_In_ const T(&str)[N])
{ {
return strnlen(str, N); return strnlen(str, N);
} }
@ -401,7 +402,7 @@ namespace stdex
/// ///
template <class T, size_t N> template <class T, size_t N>
size_t strnchr( size_t strnchr(
_In_ const T (&str)[N], _In_ const T(&str)[N],
_In_ T chr) _In_ T chr)
{ {
return strnchr(str, N, chr); return strnchr(str, N, chr);
@ -459,7 +460,7 @@ namespace stdex
/// ///
template <class T, size_t N> template <class T, size_t N>
size_t strrnchr( size_t strrnchr(
_In_ const T (&str)[N], _In_ const T(&str)[N],
_In_ T chr) _In_ T chr)
{ {
return strrnchr(str, N, chr); return strrnchr(str, N, chr);
@ -565,7 +566,7 @@ namespace stdex
/// ///
template <class T, size_t N> template <class T, size_t N>
size_t strnichr( size_t strnichr(
_In_ const T (&str)[N], _In_ const T(&str)[N],
_In_ T chr) _In_ T chr)
{ {
return strnichr(str, N, chr); return strnichr(str, N, chr);
@ -582,7 +583,7 @@ namespace stdex
/// ///
template <class T, size_t N> template <class T, size_t N>
size_t strnichr( size_t strnichr(
_In_ const T (&str)[N], _In_ const T(&str)[N],
_In_ T chr, _In_ T chr,
_In_ const std::locale& locale) _In_ const std::locale& locale)
{ {
@ -693,7 +694,7 @@ namespace stdex
/// ///
template <class T, size_t N> template <class T, size_t N>
size_t strrnichr( size_t strrnichr(
_In_ const T (&str)[N], _In_ const T(&str)[N],
_In_ T chr) _In_ T chr)
{ {
return strrnichr(str, N, chr); return strrnichr(str, N, chr);
@ -710,7 +711,7 @@ namespace stdex
/// ///
template <class T, size_t N> template <class T, size_t N>
size_t strrnichr( size_t strrnichr(
_In_ const T (&str)[N], _In_ const T(&str)[N],
_In_ T chr, _In_ T chr,
_In_ const std::locale& locale) _In_ const std::locale& locale)
{ {
@ -805,7 +806,7 @@ namespace stdex
/// \return `true` if all characters are white-space or `false` when any non-white-space character is found in string. /// \return `true` if all characters are white-space or `false` when any non-white-space character is found in string.
/// ///
template <class T, size_t N> template <class T, size_t N>
bool isblank(_In_ const T (&str)[N]) bool isblank(_In_ const T(&str)[N])
{ {
return isblank(str, N); return isblank(str, N);
} }
@ -820,7 +821,7 @@ namespace stdex
/// ///
template <class T, size_t N> template <class T, size_t N>
bool isblank( bool isblank(
_In_ const T (&str)[N], _In_ const T(&str)[N],
_In_ const std::locale& locale) _In_ const std::locale& locale)
{ {
return isblank(str, N, locale); return isblank(str, N, locale);
@ -869,7 +870,7 @@ namespace stdex
/// \return `true` if all characters are ASCII or `false` when any non-ASCII character is found in string. /// \return `true` if all characters are ASCII or `false` when any non-ASCII character is found in string.
/// ///
template <class T, size_t N> template <class T, size_t N>
bool is7bit(_In_ const T (&str)[N]) bool is7bit(_In_ const T(&str)[N])
{ {
return is7bit(str, N); return is7bit(str, N);
} }
@ -961,8 +962,8 @@ namespace stdex
/// ///
template <class T1, size_t N1, class T2, size_t N2> template <class T1, size_t N1, class T2, size_t N2>
int strncmp( int strncmp(
_In_ const T1 (&str1)[N1], _In_ const T1(&str1)[N1],
_In_ const T2 (&str2)[N2]) _In_ const T2(&str2)[N2])
{ {
return strncmp(str1, N1, str2, N2); return strncmp(str1, N1, str2, N2);
} }
@ -1006,6 +1007,25 @@ namespace stdex
return 0; return 0;
} }
///
/// Binary compare two strings
///
/// \param[in] str1 String 1
/// \param[in] count1 String 1 code unit count limit
/// \param[in] str2 String 2
/// \param[in] count2 String 2 code unit count limit
///
/// \return Negative if str1<str2; positive if str1>str2; zero if str1==str2
///
inline int strncmp(
_In_reads_or_z_opt_(count1) const char32_t* str1, _In_ size_t count1,
_In_reads_or_z_opt_(count2) const char16_t* str2, _In_ size_t count2)
{
return strncmp(
reinterpret_cast<const utf32_t*>(str1), count1,
reinterpret_cast<const utf16_t*>(str2), count2);
}
/// ///
/// Binary compare two strings /// Binary compare two strings
/// ///
@ -1016,12 +1036,30 @@ namespace stdex
/// ///
template <size_t N1, size_t N2> template <size_t N1, size_t N2>
int strncmp( int strncmp(
_In_ const utf32_t (&str1)[N1], _In_ const utf32_t(&str1)[N1],
_In_ const utf16_t (&str2)[N2]) _In_ const utf16_t(&str2)[N2])
{ {
return strncmp(str1, N1, str2, N2); return strncmp(str1, N1, str2, N2);
} }
///
/// Binary compare two strings
///
/// \param[in] str1 String 1
/// \param[in] str2 String 2
///
/// \return Negative if str1<str2; positive if str1>str2; zero if str1==str2
///
template <size_t N1, size_t N2>
int strncmp(
_In_ const char32_t(&str1)[N1],
_In_ const char16_t(&str2)[N2])
{
return strncmp(
reinterpret_cast<const utf32_t*>(str1), N1,
reinterpret_cast<const utf16_t*>(str2), N2);
}
/// ///
/// Binary compare two strings in reverse direction /// Binary compare two strings in reverse direction
/// ///
@ -1118,8 +1156,8 @@ namespace stdex
/// ///
template <class T1, size_t N1, class T2, size_t N2> template <class T1, size_t N1, class T2, size_t N2>
int strrncmp( int strrncmp(
_In_ const T1 (&str1)[N1], _In_ const T1(&str1)[N1],
_In_ const T2 (&str2)[N2]) _In_ const T2(&str2)[N2])
{ {
return strrncmp(str1, N1, str2, N2); return strrncmp(str1, N1, str2, N2);
} }
@ -1304,8 +1342,8 @@ namespace stdex
/// ///
template <class T1, size_t N1, class T2, size_t N2> template <class T1, size_t N1, class T2, size_t N2>
int strnicmp( int strnicmp(
_In_ const T1 (&str1)[N1], _In_ const T1(&str1)[N1],
_In_ const T2 (&str2)[N2]) _In_ const T2(&str2)[N2])
{ {
strnicmp(str1, N1, str2, N2); strnicmp(str1, N1, str2, N2);
} }
@ -1321,8 +1359,8 @@ namespace stdex
/// ///
template <class T1, size_t N1, class T2, size_t N2> template <class T1, size_t N1, class T2, size_t N2>
int strnicmp( int strnicmp(
_In_ const T1 (&str1)[N1], _In_ const T1(&str1)[N1],
_In_ const T2 (&str2)[N2], _In_ const T2(&str2)[N2],
_In_ const std::locale& locale) _In_ const std::locale& locale)
{ {
strnicmp(str1, N1, str2, N2, locale); strnicmp(str1, N1, str2, N2, locale);
@ -1383,8 +1421,8 @@ namespace stdex
/// ///
template <class T, size_t N1, size_t N2> template <class T, size_t N1, size_t N2>
int strncoll( int strncoll(
_In_ const T (&str1)[N1], _In_ const T(&str1)[N1],
_In_ const T (&str2)[N2], _In_ const T(&str2)[N2],
_In_ const std::locale& locale) _In_ const std::locale& locale)
{ {
return strncoll(str1, N1, str2, N2, locale); return strncoll(str1, N1, str2, N2, locale);
@ -1455,7 +1493,7 @@ namespace stdex
/// ///
template <class T1, size_t N1, class T2> template <class T1, size_t N1, class T2>
size_t strnstr( size_t strnstr(
_In_ const T1 (&str)[N1], _In_ const T1(&str)[N1],
_In_z_ const T2* sample) _In_z_ const T2* sample)
{ {
return strnstr(str, N1, sample); return strnstr(str, N1, sample);
@ -1591,7 +1629,7 @@ namespace stdex
/// ///
template <class T1, size_t N1, class T2> template <class T1, size_t N1, class T2>
size_t strnistr( size_t strnistr(
_In_ const T1 (&str)[N1], _In_ const T1(&str)[N1],
_In_z_ const T2* sample) _In_z_ const T2* sample)
{ {
return strnistr(str, N1, sample); return strnistr(str, N1, sample);
@ -1608,7 +1646,7 @@ namespace stdex
/// ///
template <class T1, size_t N1, class T2> template <class T1, size_t N1, class T2>
size_t strnistr( size_t strnistr(
_In_ const T1 (&str)[N1], _In_ const T1(&str)[N1],
_In_z_ const T2* sample, _In_z_ const T2* sample,
_In_ const std::locale& locale) _In_ const std::locale& locale)
{ {
@ -1659,6 +1697,23 @@ namespace stdex
} }
} }
///
/// Recode UTF-16 zero-terminated string to UTF-32
///
/// \param[in] dst Destination string
/// \param[in] src Source string
///
/// \return Number of code units excluding zero terminator in the dst string after the operation.
///
inline size_t strcpy(
_Out_writes_z_(_String_length_(src) + 1) char32_t* dst,
_In_z_ const char16_t* src)
{
return strcpy(
reinterpret_cast<utf32_t*>(dst),
reinterpret_cast<const utf16_t*>(src));
}
/// ///
/// Copy zero-terminated string /// Copy zero-terminated string
/// ///
@ -1742,6 +1797,25 @@ namespace stdex
} }
} }
///
/// Recode UTF-16 zero-terminated string to UTF-32
///
/// \param[in] dst Destination string
/// \param[in] count_dst Destination string code unit count limit
/// \param[in] src Source string
/// \param[in] count_src Source string code unit count limit
///
/// \return Number of code units excluding zero terminator in the dst string after the operation.
///
inline size_t strncpy(
_Out_writes_(count_dst) _Post_maybez_ char32_t* dst, _In_ size_t count_dst,
_In_reads_or_z_opt_(count_src) const char16_t* src, _In_ size_t count_src)
{
return strncpy(
reinterpret_cast<utf32_t*>(dst), count_dst,
reinterpret_cast<const utf16_t*>(src), count_src);
}
/// ///
/// Copy zero-terminated string /// Copy zero-terminated string
/// ///
@ -1752,8 +1826,8 @@ namespace stdex
/// ///
template <class T1, size_t N1, class T2, size_t N2> template <class T1, size_t N1, class T2, size_t N2>
size_t strncpy( size_t strncpy(
_Out_ _Post_maybez_ T1 (&dst)[N1], _Out_ _Post_maybez_ T1(&dst)[N1],
_In_ const T2 (&src)[N2]) _In_ const T2(&src)[N2])
{ {
return strncpy(dst, N1, src, N2); return strncpy(dst, N1, src, N2);
} }
@ -1886,7 +1960,7 @@ namespace stdex
/// \return Pointer to duplicated string; or nullptr if str is nullptr. Use delete[] operator to free the memory. /// \return Pointer to duplicated string; or nullptr if str is nullptr. Use delete[] operator to free the memory.
/// ///
template <class T, size_t N> template <class T, size_t N>
_Check_return_ _Ret_maybenull_z_ T* strndup(_In_ const T (&str)[N]) _Check_return_ _Ret_maybenull_z_ T* strndup(_In_ const T(&str)[N])
{ {
return strndup(str, N); return strndup(str, N);
} }
@ -2131,7 +2205,7 @@ namespace stdex
/// ///
template <class T, size_t N, class T_bin> template <class T, size_t N, class T_bin>
T_bin strtoint( T_bin strtoint(
_In_ const T (&str)[N], _In_ const T(&str)[N],
_Out_opt_ size_t* end, _Out_opt_ size_t* end,
_In_ int radix) _In_ int radix)
{ {
@ -2182,7 +2256,7 @@ namespace stdex
/// ///
template <class T, size_t N, class T_bin> template <class T, size_t N, class T_bin>
T_bin strtouint( T_bin strtouint(
_In_ const T (&str)[N], _In_ const T(&str)[N],
_Out_opt_ size_t* end, _Out_opt_ size_t* end,
_In_ int radix) _In_ int radix)
{ {
@ -2219,7 +2293,7 @@ namespace stdex
/// ///
template <class T, size_t N> template <class T, size_t N>
int8_t strto8( int8_t strto8(
_In_ const T (&str)[N], _In_ const T(&str)[N],
_Out_opt_ size_t* end, _Out_opt_ size_t* end,
_In_ int radix) _In_ int radix)
{ {
@ -2256,7 +2330,7 @@ namespace stdex
/// ///
template <class T, size_t N> template <class T, size_t N>
int16_t strto16( int16_t strto16(
_In_ const T (&str)[N], _In_ const T(&str)[N],
_Out_opt_ size_t* end, _Out_opt_ size_t* end,
_In_ int radix) _In_ int radix)
{ {
@ -2293,7 +2367,7 @@ namespace stdex
/// ///
template <class T, size_t N> template <class T, size_t N>
int32_t strto32( int32_t strto32(
_In_ const T (&str)[N], _In_ const T(&str)[N],
_Out_opt_ size_t* end, _Out_opt_ size_t* end,
_In_ int radix) _In_ int radix)
{ {
@ -2330,7 +2404,7 @@ namespace stdex
/// ///
template <class T, size_t N> template <class T, size_t N>
int64_t strto64( int64_t strto64(
_In_ const T (&str)[N], _In_ const T(&str)[N],
_Out_opt_ size_t* end, _Out_opt_ size_t* end,
_In_ int radix) _In_ int radix)
{ {
@ -2373,7 +2447,7 @@ namespace stdex
/// ///
template <class T, size_t N> template <class T, size_t N>
ptrdiff_t strtoi( ptrdiff_t strtoi(
_In_ const T (&str)[N], _In_ const T(&str)[N],
_Out_opt_ size_t* end, _Out_opt_ size_t* end,
_In_ int radix) _In_ int radix)
{ {
@ -2410,7 +2484,7 @@ namespace stdex
/// ///
template <class T, size_t N> template <class T, size_t N>
uint8_t strtou8( uint8_t strtou8(
_In_ const T (&str)[N], _In_ const T(&str)[N],
_Out_opt_ size_t* end, _Out_opt_ size_t* end,
_In_ int radix) _In_ int radix)
{ {
@ -2447,7 +2521,7 @@ namespace stdex
/// ///
template <class T, size_t N> template <class T, size_t N>
uint16_t strtou16( uint16_t strtou16(
_In_ const T (&str)[N], _In_ const T(&str)[N],
_Out_opt_ size_t* end, _Out_opt_ size_t* end,
_In_ int radix) _In_ int radix)
{ {
@ -2484,7 +2558,7 @@ namespace stdex
/// ///
template <class T, size_t N> template <class T, size_t N>
uint32_t strtou32( uint32_t strtou32(
_In_ const T (&str)[N], _In_ const T(&str)[N],
_Out_opt_ size_t* end, _Out_opt_ size_t* end,
_In_ int radix) _In_ int radix)
{ {
@ -2521,7 +2595,7 @@ namespace stdex
/// ///
template <class T, size_t N> template <class T, size_t N>
uint64_t strtou64( uint64_t strtou64(
_In_ const T (&str)[N], _In_ const T(&str)[N],
_Out_opt_ size_t* end, _Out_opt_ size_t* end,
_In_ int radix) _In_ int radix)
{ {
@ -2564,7 +2638,7 @@ namespace stdex
/// ///
template <class T, size_t N> template <class T, size_t N>
size_t strtoui( size_t strtoui(
_In_ const T (&str)[N], _In_ const T(&str)[N],
_Out_opt_ size_t* end, _Out_opt_ size_t* end,
_In_ int radix) _In_ int radix)
{ {
@ -2682,7 +2756,8 @@ namespace stdex
case EILSEQ: throw std::runtime_error("encoding error"); case EILSEQ: throw std::runtime_error("encoding error");
default: throw std::runtime_error("failed to format string"); default: throw std::runtime_error("failed to format string");
} }
} else }
else
return capacity; return capacity;
#else #else
switch (errno) { switch (errno) {
@ -2987,7 +3062,7 @@ namespace stdex
/// \param[in,out] str String /// \param[in,out] str String
/// ///
template<class T, size_t N> template<class T, size_t N>
void strlwr(_Inout_ T (&str)[N]) void strlwr(_Inout_ T(&str)[N])
{ {
strlwr(str, N); strlwr(str, N);
} }
@ -2999,7 +3074,7 @@ namespace stdex
/// \param[in] locale C++ locale to use /// \param[in] locale C++ locale to use
/// ///
template<class T, size_t N> template<class T, size_t N>
void strlwr(_Inout_ T (&str)[N], _In_ const std::locale& locale) void strlwr(_Inout_ T(&str)[N], _In_ const std::locale& locale)
{ {
strlwr(str, N, locale); strlwr(str, N, locale);
} }
@ -3094,7 +3169,7 @@ namespace stdex
/// \param[in,out] str String /// \param[in,out] str String
/// ///
template<class T, size_t N> template<class T, size_t N>
void strupr(_Inout_ T (&str)[N]) void strupr(_Inout_ T(&str)[N])
{ {
return strupr(str, N); return strupr(str, N);
} }
@ -3106,7 +3181,7 @@ namespace stdex
/// \param[in] locale C++ locale to use /// \param[in] locale C++ locale to use
/// ///
template<class T, size_t N> template<class T, size_t N>
void strupr(_Inout_ T (&str)[N], _In_ const std::locale& locale) void strupr(_Inout_ T(&str)[N], _In_ const std::locale& locale)
{ {
return strupr(str, N, locale); return strupr(str, N, locale);
} }