string: add variants for char16_t/char32_t strings

char16_t is not exactly the wchar_t on Windows. char32_t is not exactly
the wchar_t on POSIX. Rather than selecting the appropriate variant,
polymorphism picked the template implementation of strncmp, strcpy and
strncpy. The one that does not convert UTF16 surrogate pairs against
their UTF32 representation.

Signed-off-by: Simon Rozman <simon@rozman.si>
This commit is contained in:
Simon Rozman 2025-01-13 10:56:38 +01:00
parent a7543cf9ab
commit 2674bb0e32
2 changed files with 122 additions and 47 deletions

View File

@ -1,4 +1,4 @@
/* /*
SPDX-License-Identifier: MIT SPDX-License-Identifier: MIT
Copyright © 2023-2025 Amebis Copyright © 2023-2025 Amebis
*/ */
@ -16,7 +16,7 @@ namespace UnitTests
{ {
stdex::utf32_t tmp[0x100]; stdex::utf32_t tmp[0x100];
stdex::strncpy(tmp, u"This is a 🐔Test🐮."); stdex::strncpy(tmp, u"This is a 🐔Test🐮.");
Assert::AreEqual(reinterpret_cast<const stdex::utf32_t*>(U"This is a 🐔Test🐮."), tmp); Assert::IsTrue(stdex::strcmp(U"This is a 🐔Test🐮.", tmp) == 0);
} }
void string::sprintf() void string::sprintf()

View File

@ -14,6 +14,7 @@
#include <stdint.h> #include <stdint.h>
#include <stdio.h> #include <stdio.h>
#include <time.h> #include <time.h>
#include <uchar.h>
#ifdef __APPLE__ #ifdef __APPLE__
#include <xlocale.h> #include <xlocale.h>
#endif #endif
@ -1006,6 +1007,25 @@ namespace stdex
return 0; return 0;
} }
///
/// Binary compare two strings
///
/// \param[in] str1 String 1
/// \param[in] count1 String 1 code unit count limit
/// \param[in] str2 String 2
/// \param[in] count2 String 2 code unit count limit
///
/// \return Negative if str1<str2; positive if str1>str2; zero if str1==str2
///
inline int strncmp(
_In_reads_or_z_opt_(count1) const char32_t* str1, _In_ size_t count1,
_In_reads_or_z_opt_(count2) const char16_t* str2, _In_ size_t count2)
{
return strncmp(
reinterpret_cast<const utf32_t*>(str1), count1,
reinterpret_cast<const utf16_t*>(str2), count2);
}
/// ///
/// Binary compare two strings /// Binary compare two strings
/// ///
@ -1022,6 +1042,24 @@ namespace stdex
return strncmp(str1, N1, str2, N2); return strncmp(str1, N1, str2, N2);
} }
///
/// Binary compare two strings
///
/// \param[in] str1 String 1
/// \param[in] str2 String 2
///
/// \return Negative if str1<str2; positive if str1>str2; zero if str1==str2
///
template <size_t N1, size_t N2>
int strncmp(
_In_ const char32_t(&str1)[N1],
_In_ const char16_t(&str2)[N2])
{
return strncmp(
reinterpret_cast<const utf32_t*>(str1), N1,
reinterpret_cast<const utf16_t*>(str2), N2);
}
/// ///
/// Binary compare two strings in reverse direction /// Binary compare two strings in reverse direction
/// ///
@ -1659,6 +1697,23 @@ namespace stdex
} }
} }
///
/// Recode UTF-16 zero-terminated string to UTF-32
///
/// \param[in] dst Destination string
/// \param[in] src Source string
///
/// \return Number of code units excluding zero terminator in the dst string after the operation.
///
inline size_t strcpy(
_Out_writes_z_(_String_length_(src) + 1) char32_t* dst,
_In_z_ const char16_t* src)
{
return strcpy(
reinterpret_cast<utf32_t*>(dst),
reinterpret_cast<const utf16_t*>(src));
}
/// ///
/// Copy zero-terminated string /// Copy zero-terminated string
/// ///
@ -1742,6 +1797,25 @@ namespace stdex
} }
} }
///
/// Recode UTF-16 zero-terminated string to UTF-32
///
/// \param[in] dst Destination string
/// \param[in] count_dst Destination string code unit count limit
/// \param[in] src Source string
/// \param[in] count_src Source string code unit count limit
///
/// \return Number of code units excluding zero terminator in the dst string after the operation.
///
inline size_t strncpy(
_Out_writes_(count_dst) _Post_maybez_ char32_t* dst, _In_ size_t count_dst,
_In_reads_or_z_opt_(count_src) const char16_t* src, _In_ size_t count_src)
{
return strncpy(
reinterpret_cast<utf32_t*>(dst), count_dst,
reinterpret_cast<const utf16_t*>(src), count_src);
}
/// ///
/// Copy zero-terminated string /// Copy zero-terminated string
/// ///
@ -2682,7 +2756,8 @@ namespace stdex
case EILSEQ: throw std::runtime_error("encoding error"); case EILSEQ: throw std::runtime_error("encoding error");
default: throw std::runtime_error("failed to format string"); default: throw std::runtime_error("failed to format string");
} }
} else }
else
return capacity; return capacity;
#else #else
switch (errno) { switch (errno) {