string: add recoding from UTF-16 to UTF-32

I know there are system functions for this (and libiconv), but this is
so trivial and quick in our implementation.
This commit is contained in:
Simon Rozman 2024-09-27 14:11:11 +02:00
parent da11495282
commit 5675f8b139
5 changed files with 70 additions and 1 deletions

View File

@ -54,6 +54,17 @@ namespace Assert
throw std::runtime_error("not equal"); throw std::runtime_error("not equal");
} }
inline void AreEqual(const char32_t* a, const char32_t* b)
{
#ifdef _WIN32
if (stdex::strcmp(a, b) != 0)
throw std::runtime_error("not equal");
#else
if (wcscmp(reinterpret_cast<const wchar_t*>(a), reinterpret_cast<const wchar_t*>(b)) != 0)
throw std::runtime_error("not equal");
#endif
}
template <class T> template <class T>
void AreNotEqual(const T& a, const T& b) void AreNotEqual(const T& a, const T& b)
{ {

View File

@ -26,6 +26,7 @@ int main(int, const char *[])
UnitTests::stream::file_stat(); UnitTests::stream::file_stat();
UnitTests::stream::open_close(); UnitTests::stream::open_close();
UnitTests::stream::replicator(); UnitTests::stream::replicator();
UnitTests::string::strncpy();
UnitTests::string::sprintf(); UnitTests::string::sprintf();
UnitTests::unicode::charset_encoder(); UnitTests::unicode::charset_encoder();
UnitTests::unicode::normalize(); UnitTests::unicode::normalize();

View File

@ -107,6 +107,7 @@ namespace UnitTests
TEST_CLASS(string) TEST_CLASS(string)
{ {
public: public:
TEST_METHOD(strncpy);
TEST_METHOD(sprintf); TEST_METHOD(sprintf);
}; };

View File

@ -12,6 +12,13 @@ using namespace Microsoft::VisualStudio::CppUnitTestFramework;
namespace UnitTests namespace UnitTests
{ {
void string::strncpy()
{
stdex::utf32_t tmp[0x100];
stdex::strncpy(tmp, u"This is a 🐔Test🐮.");
Assert::AreEqual(reinterpret_cast<const stdex::utf32_t*>(U"This is a 🐔Test🐮."), tmp);
}
void string::sprintf() void string::sprintf()
{ {
stdex::locale locale(stdex::create_locale(LC_ALL, "en_US.UTF-8")); stdex::locale locale(stdex::create_locale(LC_ALL, "en_US.UTF-8"));

View File

@ -1,4 +1,4 @@
/* /*
SPDX-License-Identifier: MIT SPDX-License-Identifier: MIT
Copyright © 2016-2024 Amebis Copyright © 2016-2024 Amebis
*/ */
@ -1636,6 +1636,26 @@ namespace stdex
} }
} }
///
/// Recode UTF-16 zero-terminated string to UTF-32
///
/// \param[in] dst Destination string
/// \param[in] src Source string
///
/// \return Number of code units excluding zero terminator in the dst string after the operation.
///
inline size_t strcpy(
_Out_ _Post_maybez_ utf32_t* dst,
_In_z_ const utf16_t* src)
{
stdex_assert(dst);
stdex_assert(src);
for (size_t j = 0, i = 0; ; ++j, ++i) {
if ((dst[j] = (is_surrogate_pair(&src[i]) ? surrogate_pair_to_ucs4(&src[i++]) : static_cast<utf32_t>(src[i]))) == 0)
return j;
}
}
/// ///
/// Copy zero-terminated string /// Copy zero-terminated string
/// ///
@ -1690,6 +1710,35 @@ namespace stdex
} }
} }
///
/// Recode UTF-16 zero-terminated string to UTF-32
///
/// \param[in] dst Destination string
/// \param[in] count_dst Destination string code unit count limit
/// \param[in] src Source string
/// \param[in] count_src Source string code unit count limit
///
/// \return Number of code units excluding zero terminator in the dst string after the operation.
///
inline size_t strncpy(
_Out_writes_(count_dst) _Post_maybez_ utf32_t* dst, _In_ size_t count_dst,
_In_reads_or_z_opt_(count_src) const utf16_t* src, _In_ size_t count_src)
{
stdex_assert(dst || !count_dst);
stdex_assert(src || !count_src);
for (size_t j = 0, i = 0; ; ++j, ++i)
{
if (j >= count_dst)
return j;
if (i >= count_src) {
dst[j] = 0;
return j;
}
if ((dst[j] = (i + 1 < count_src && is_surrogate_pair(&src[i]) ? surrogate_pair_to_ucs4(&src[i++]) : static_cast<utf32_t>(src[i]))) == 0)
return j;
}
}
/// ///
/// Copy zero-terminated string /// Copy zero-terminated string
/// ///