string: add variants for char16_t/char32_t strings

char16_t is not exactly the wchar_t on Windows. char32_t is not exactly
the wchar_t on POSIX. Rather than selecting the appropriate variant,
polymorphism picked the template implementation of strncmp, strcpy and
strncpy. The one that does not convert UTF16 surrogate pairs against
their UTF32 representation.

Signed-off-by: Simon Rozman <simon@rozman.si>
This commit is contained in:
Simon Rozman 2025-01-13 10:56:38 +01:00
parent a7543cf9ab
commit 2674bb0e32
2 changed files with 122 additions and 47 deletions

View File

@ -1,4 +1,4 @@
/*
/*
SPDX-License-Identifier: MIT
Copyright © 2023-2025 Amebis
*/
@ -16,7 +16,7 @@ namespace UnitTests
{
stdex::utf32_t tmp[0x100];
stdex::strncpy(tmp, u"This is a 🐔Test🐮.");
Assert::AreEqual(reinterpret_cast<const stdex::utf32_t*>(U"This is a 🐔Test🐮."), tmp);
Assert::IsTrue(stdex::strcmp(U"This is a 🐔Test🐮.", tmp) == 0);
}
void string::sprintf()

View File

@ -14,6 +14,7 @@
#include <stdint.h>
#include <stdio.h>
#include <time.h>
#include <uchar.h>
#ifdef __APPLE__
#include <xlocale.h>
#endif
@ -346,7 +347,7 @@ namespace stdex
/// \return Number of code units excluding zero terminator in the string.
///
template <class T, size_t N>
size_t strnlen(_In_ const T (&str)[N])
size_t strnlen(_In_ const T(&str)[N])
{
return strnlen(str, N);
}
@ -401,7 +402,7 @@ namespace stdex
///
template <class T, size_t N>
size_t strnchr(
_In_ const T (&str)[N],
_In_ const T(&str)[N],
_In_ T chr)
{
return strnchr(str, N, chr);
@ -459,7 +460,7 @@ namespace stdex
///
template <class T, size_t N>
size_t strrnchr(
_In_ const T (&str)[N],
_In_ const T(&str)[N],
_In_ T chr)
{
return strrnchr(str, N, chr);
@ -565,7 +566,7 @@ namespace stdex
///
template <class T, size_t N>
size_t strnichr(
_In_ const T (&str)[N],
_In_ const T(&str)[N],
_In_ T chr)
{
return strnichr(str, N, chr);
@ -582,7 +583,7 @@ namespace stdex
///
template <class T, size_t N>
size_t strnichr(
_In_ const T (&str)[N],
_In_ const T(&str)[N],
_In_ T chr,
_In_ const std::locale& locale)
{
@ -693,7 +694,7 @@ namespace stdex
///
template <class T, size_t N>
size_t strrnichr(
_In_ const T (&str)[N],
_In_ const T(&str)[N],
_In_ T chr)
{
return strrnichr(str, N, chr);
@ -710,7 +711,7 @@ namespace stdex
///
template <class T, size_t N>
size_t strrnichr(
_In_ const T (&str)[N],
_In_ const T(&str)[N],
_In_ T chr,
_In_ const std::locale& locale)
{
@ -805,7 +806,7 @@ namespace stdex
/// \return `true` if all characters are white-space or `false` when any non-white-space character is found in string.
///
template <class T, size_t N>
bool isblank(_In_ const T (&str)[N])
bool isblank(_In_ const T(&str)[N])
{
return isblank(str, N);
}
@ -820,7 +821,7 @@ namespace stdex
///
template <class T, size_t N>
bool isblank(
_In_ const T (&str)[N],
_In_ const T(&str)[N],
_In_ const std::locale& locale)
{
return isblank(str, N, locale);
@ -869,7 +870,7 @@ namespace stdex
/// \return `true` if all characters are ASCII or `false` when any non-ASCII character is found in string.
///
template <class T, size_t N>
bool is7bit(_In_ const T (&str)[N])
bool is7bit(_In_ const T(&str)[N])
{
return is7bit(str, N);
}
@ -961,8 +962,8 @@ namespace stdex
///
template <class T1, size_t N1, class T2, size_t N2>
int strncmp(
_In_ const T1 (&str1)[N1],
_In_ const T2 (&str2)[N2])
_In_ const T1(&str1)[N1],
_In_ const T2(&str2)[N2])
{
return strncmp(str1, N1, str2, N2);
}
@ -1006,6 +1007,25 @@ namespace stdex
return 0;
}
///
/// Binary compare two strings
///
/// \param[in] str1 String 1
/// \param[in] count1 String 1 code unit count limit
/// \param[in] str2 String 2
/// \param[in] count2 String 2 code unit count limit
///
/// \return Negative if str1<str2; positive if str1>str2; zero if str1==str2
///
inline int strncmp(
_In_reads_or_z_opt_(count1) const char32_t* str1, _In_ size_t count1,
_In_reads_or_z_opt_(count2) const char16_t* str2, _In_ size_t count2)
{
return strncmp(
reinterpret_cast<const utf32_t*>(str1), count1,
reinterpret_cast<const utf16_t*>(str2), count2);
}
///
/// Binary compare two strings
///
@ -1016,12 +1036,30 @@ namespace stdex
///
template <size_t N1, size_t N2>
int strncmp(
_In_ const utf32_t (&str1)[N1],
_In_ const utf16_t (&str2)[N2])
_In_ const utf32_t(&str1)[N1],
_In_ const utf16_t(&str2)[N2])
{
return strncmp(str1, N1, str2, N2);
}
///
/// Binary compare two strings
///
/// \param[in] str1 String 1
/// \param[in] str2 String 2
///
/// \return Negative if str1<str2; positive if str1>str2; zero if str1==str2
///
template <size_t N1, size_t N2>
int strncmp(
_In_ const char32_t(&str1)[N1],
_In_ const char16_t(&str2)[N2])
{
return strncmp(
reinterpret_cast<const utf32_t*>(str1), N1,
reinterpret_cast<const utf16_t*>(str2), N2);
}
///
/// Binary compare two strings in reverse direction
///
@ -1118,8 +1156,8 @@ namespace stdex
///
template <class T1, size_t N1, class T2, size_t N2>
int strrncmp(
_In_ const T1 (&str1)[N1],
_In_ const T2 (&str2)[N2])
_In_ const T1(&str1)[N1],
_In_ const T2(&str2)[N2])
{
return strrncmp(str1, N1, str2, N2);
}
@ -1304,8 +1342,8 @@ namespace stdex
///
template <class T1, size_t N1, class T2, size_t N2>
int strnicmp(
_In_ const T1 (&str1)[N1],
_In_ const T2 (&str2)[N2])
_In_ const T1(&str1)[N1],
_In_ const T2(&str2)[N2])
{
strnicmp(str1, N1, str2, N2);
}
@ -1321,8 +1359,8 @@ namespace stdex
///
template <class T1, size_t N1, class T2, size_t N2>
int strnicmp(
_In_ const T1 (&str1)[N1],
_In_ const T2 (&str2)[N2],
_In_ const T1(&str1)[N1],
_In_ const T2(&str2)[N2],
_In_ const std::locale& locale)
{
strnicmp(str1, N1, str2, N2, locale);
@ -1383,8 +1421,8 @@ namespace stdex
///
template <class T, size_t N1, size_t N2>
int strncoll(
_In_ const T (&str1)[N1],
_In_ const T (&str2)[N2],
_In_ const T(&str1)[N1],
_In_ const T(&str2)[N2],
_In_ const std::locale& locale)
{
return strncoll(str1, N1, str2, N2, locale);
@ -1455,7 +1493,7 @@ namespace stdex
///
template <class T1, size_t N1, class T2>
size_t strnstr(
_In_ const T1 (&str)[N1],
_In_ const T1(&str)[N1],
_In_z_ const T2* sample)
{
return strnstr(str, N1, sample);
@ -1591,7 +1629,7 @@ namespace stdex
///
template <class T1, size_t N1, class T2>
size_t strnistr(
_In_ const T1 (&str)[N1],
_In_ const T1(&str)[N1],
_In_z_ const T2* sample)
{
return strnistr(str, N1, sample);
@ -1608,7 +1646,7 @@ namespace stdex
///
template <class T1, size_t N1, class T2>
size_t strnistr(
_In_ const T1 (&str)[N1],
_In_ const T1(&str)[N1],
_In_z_ const T2* sample,
_In_ const std::locale& locale)
{
@ -1659,6 +1697,23 @@ namespace stdex
}
}
///
/// Recode UTF-16 zero-terminated string to UTF-32
///
/// \param[in] dst Destination string
/// \param[in] src Source string
///
/// \return Number of code units excluding zero terminator in the dst string after the operation.
///
inline size_t strcpy(
_Out_writes_z_(_String_length_(src) + 1) char32_t* dst,
_In_z_ const char16_t* src)
{
return strcpy(
reinterpret_cast<utf32_t*>(dst),
reinterpret_cast<const utf16_t*>(src));
}
///
/// Copy zero-terminated string
///
@ -1742,6 +1797,25 @@ namespace stdex
}
}
///
/// Recode UTF-16 zero-terminated string to UTF-32
///
/// \param[in] dst Destination string
/// \param[in] count_dst Destination string code unit count limit
/// \param[in] src Source string
/// \param[in] count_src Source string code unit count limit
///
/// \return Number of code units excluding zero terminator in the dst string after the operation.
///
inline size_t strncpy(
_Out_writes_(count_dst) _Post_maybez_ char32_t* dst, _In_ size_t count_dst,
_In_reads_or_z_opt_(count_src) const char16_t* src, _In_ size_t count_src)
{
return strncpy(
reinterpret_cast<utf32_t*>(dst), count_dst,
reinterpret_cast<const utf16_t*>(src), count_src);
}
///
/// Copy zero-terminated string
///
@ -1752,8 +1826,8 @@ namespace stdex
///
template <class T1, size_t N1, class T2, size_t N2>
size_t strncpy(
_Out_ _Post_maybez_ T1 (&dst)[N1],
_In_ const T2 (&src)[N2])
_Out_ _Post_maybez_ T1(&dst)[N1],
_In_ const T2(&src)[N2])
{
return strncpy(dst, N1, src, N2);
}
@ -1886,7 +1960,7 @@ namespace stdex
/// \return Pointer to duplicated string; or nullptr if str is nullptr. Use delete[] operator to free the memory.
///
template <class T, size_t N>
_Check_return_ _Ret_maybenull_z_ T* strndup(_In_ const T (&str)[N])
_Check_return_ _Ret_maybenull_z_ T* strndup(_In_ const T(&str)[N])
{
return strndup(str, N);
}
@ -2131,7 +2205,7 @@ namespace stdex
///
template <class T, size_t N, class T_bin>
T_bin strtoint(
_In_ const T (&str)[N],
_In_ const T(&str)[N],
_Out_opt_ size_t* end,
_In_ int radix)
{
@ -2182,7 +2256,7 @@ namespace stdex
///
template <class T, size_t N, class T_bin>
T_bin strtouint(
_In_ const T (&str)[N],
_In_ const T(&str)[N],
_Out_opt_ size_t* end,
_In_ int radix)
{
@ -2219,7 +2293,7 @@ namespace stdex
///
template <class T, size_t N>
int8_t strto8(
_In_ const T (&str)[N],
_In_ const T(&str)[N],
_Out_opt_ size_t* end,
_In_ int radix)
{
@ -2256,7 +2330,7 @@ namespace stdex
///
template <class T, size_t N>
int16_t strto16(
_In_ const T (&str)[N],
_In_ const T(&str)[N],
_Out_opt_ size_t* end,
_In_ int radix)
{
@ -2293,7 +2367,7 @@ namespace stdex
///
template <class T, size_t N>
int32_t strto32(
_In_ const T (&str)[N],
_In_ const T(&str)[N],
_Out_opt_ size_t* end,
_In_ int radix)
{
@ -2330,7 +2404,7 @@ namespace stdex
///
template <class T, size_t N>
int64_t strto64(
_In_ const T (&str)[N],
_In_ const T(&str)[N],
_Out_opt_ size_t* end,
_In_ int radix)
{
@ -2373,7 +2447,7 @@ namespace stdex
///
template <class T, size_t N>
ptrdiff_t strtoi(
_In_ const T (&str)[N],
_In_ const T(&str)[N],
_Out_opt_ size_t* end,
_In_ int radix)
{
@ -2410,7 +2484,7 @@ namespace stdex
///
template <class T, size_t N>
uint8_t strtou8(
_In_ const T (&str)[N],
_In_ const T(&str)[N],
_Out_opt_ size_t* end,
_In_ int radix)
{
@ -2447,7 +2521,7 @@ namespace stdex
///
template <class T, size_t N>
uint16_t strtou16(
_In_ const T (&str)[N],
_In_ const T(&str)[N],
_Out_opt_ size_t* end,
_In_ int radix)
{
@ -2484,7 +2558,7 @@ namespace stdex
///
template <class T, size_t N>
uint32_t strtou32(
_In_ const T (&str)[N],
_In_ const T(&str)[N],
_Out_opt_ size_t* end,
_In_ int radix)
{
@ -2521,7 +2595,7 @@ namespace stdex
///
template <class T, size_t N>
uint64_t strtou64(
_In_ const T (&str)[N],
_In_ const T(&str)[N],
_Out_opt_ size_t* end,
_In_ int radix)
{
@ -2564,7 +2638,7 @@ namespace stdex
///
template <class T, size_t N>
size_t strtoui(
_In_ const T (&str)[N],
_In_ const T(&str)[N],
_Out_opt_ size_t* end,
_In_ int radix)
{
@ -2682,7 +2756,8 @@ namespace stdex
case EILSEQ: throw std::runtime_error("encoding error");
default: throw std::runtime_error("failed to format string");
}
} else
}
else
return capacity;
#else
switch (errno) {
@ -2987,7 +3062,7 @@ namespace stdex
/// \param[in,out] str String
///
template<class T, size_t N>
void strlwr(_Inout_ T (&str)[N])
void strlwr(_Inout_ T(&str)[N])
{
strlwr(str, N);
}
@ -2999,7 +3074,7 @@ namespace stdex
/// \param[in] locale C++ locale to use
///
template<class T, size_t N>
void strlwr(_Inout_ T (&str)[N], _In_ const std::locale& locale)
void strlwr(_Inout_ T(&str)[N], _In_ const std::locale& locale)
{
strlwr(str, N, locale);
}
@ -3094,7 +3169,7 @@ namespace stdex
/// \param[in,out] str String
///
template<class T, size_t N>
void strupr(_Inout_ T (&str)[N])
void strupr(_Inout_ T(&str)[N])
{
return strupr(str, N);
}
@ -3106,7 +3181,7 @@ namespace stdex
/// \param[in] locale C++ locale to use
///
template<class T, size_t N>
void strupr(_Inout_ T (&str)[N], _In_ const std::locale& locale)
void strupr(_Inout_ T(&str)[N], _In_ const std::locale& locale)
{
return strupr(str, N, locale);
}