string: add recoding from UTF-16 to UTF-32

I know there are system functions for this (and libiconv), but this is
so trivial and quick in our implementation.
This commit is contained in:
2024-09-27 14:11:11 +02:00
parent da11495282
commit 5675f8b139
5 changed files with 70 additions and 1 deletions

View File

@@ -1,4 +1,4 @@
/*
/*
SPDX-License-Identifier: MIT
Copyright © 2016-2024 Amebis
*/
@@ -1636,6 +1636,26 @@ namespace stdex
}
}
///
/// Recode UTF-16 zero-terminated string to UTF-32
///
/// \param[in] dst Destination string
/// \param[in] src Source string
///
/// \return Number of code units excluding zero terminator in the dst string after the operation.
///
inline size_t strcpy(
_Out_ _Post_maybez_ utf32_t* dst,
_In_z_ const utf16_t* src)
{
stdex_assert(dst);
stdex_assert(src);
for (size_t j = 0, i = 0; ; ++j, ++i) {
if ((dst[j] = (is_surrogate_pair(&src[i]) ? surrogate_pair_to_ucs4(&src[i++]) : static_cast<utf32_t>(src[i]))) == 0)
return j;
}
}
///
/// Copy zero-terminated string
///
@@ -1690,6 +1710,35 @@ namespace stdex
}
}
///
/// Recode UTF-16 zero-terminated string to UTF-32
///
/// \param[in] dst Destination string
/// \param[in] count_dst Destination string code unit count limit
/// \param[in] src Source string
/// \param[in] count_src Source string code unit count limit
///
/// \return Number of code units excluding zero terminator in the dst string after the operation.
///
inline size_t strncpy(
_Out_writes_(count_dst) _Post_maybez_ utf32_t* dst, _In_ size_t count_dst,
_In_reads_or_z_opt_(count_src) const utf16_t* src, _In_ size_t count_src)
{
stdex_assert(dst || !count_dst);
stdex_assert(src || !count_src);
for (size_t j = 0, i = 0; ; ++j, ++i)
{
if (j >= count_dst)
return j;
if (i >= count_src) {
dst[j] = 0;
return j;
}
if ((dst[j] = (i + 1 < count_src && is_surrogate_pair(&src[i]) ? surrogate_pair_to_ucs4(&src[i++]) : static_cast<utf32_t>(src[i]))) == 0)
return j;
}
}
///
/// Copy zero-terminated string
///