unicode: add charset_from_name

This makes name->charset_t conversion available to others.

Signed-off-by: Simon Rozman <simon@rozman.si>
This commit is contained in:
Simon Rozman 2023-11-17 14:52:24 +01:00
parent 52d9956891
commit 6c8d6f182c

View File

@ -62,6 +62,53 @@ namespace stdex
constexpr charset_id system_charset = charset_id::system;
#endif
inline charset_id charset_from_name(_In_z_ const char* name)
{
struct charset_less {
inline bool operator()(_In_z_ const char* a, _In_z_ const char* b) const
{
return stdex::stricmp(a, b, stdex::std_locale_C) < 0;
}
};
static const std::map<const char*, charset_id, charset_less> charsets = {
{ "UNICODE-1-1-UTF-7", charset_id::utf7 },
{ "UTF-7", charset_id::utf7 },
{ "CSUNICODE11UTF7", charset_id::utf7 },
{ "UTF-8", charset_id::utf8 },
{ "UTF8", charset_id::utf8 },
{ "UTF-16", charset_id::utf16 },
#if BYTE_ORDER == BIG_ENDIAN
{ "UTF-16BE", charset_id::utf16 },
#else
{ "UTF-16LE", charset_id::utf16 },
#endif
{ "UTF-32", charset_id::utf32 },
#if BYTE_ORDER == BIG_ENDIAN
{ "UTF-32BE", charset_id::utf32 },
#else
{ "UTF-32LE", charset_id::utf32 },
#endif
{ "CP1250", charset_id::windows1250 },
{ "MS-EE", charset_id::windows1250 },
{ "WINDOWS-1250", charset_id::windows1250 },
{ "CP1251", charset_id::windows1251 },
{ "MS-CYRL", charset_id::windows1251 },
{ "WINDOWS-1251", charset_id::windows1251 },
{ "CP1252", charset_id::windows1252 },
{ "MS-ANSI", charset_id::windows1252 },
{ "WINDOWS-1252", charset_id::windows1252 },
};
if (auto el = charsets.find(name); el != charsets.end())
return el->second;
return charset_id::system;
}
///
/// Encoding converter context
///
@ -105,7 +152,7 @@ namespace stdex
///
template <class _Traits_to = std::char_traits<T_to>, class _Alloc_to = std::allocator<T_to>>
void strcat(
_Inout_ std::basic_string<T_to, _Traits_to, _Alloc_to> &dst,
_Inout_ std::basic_string<T_to, _Traits_to, _Alloc_to>& dst,
_In_reads_or_z_opt_(count_src) const T_from* src, _In_ size_t count_src)
{
_Assume_(src || !count_src);
@ -359,44 +406,7 @@ namespace stdex
#ifdef _WIN32
return static_cast<charset_id>(GetACP());
#else
static const std::map<const char*, charset_id> charsets = {
{ "UNICODE-1-1-UTF-7", charset_id::utf7 },
{ "UTF-7", charset_id::utf7 },
{ "CSUNICODE11UTF7", charset_id::utf7 },
{ "UTF-8", charset_id::utf8 },
{ "UTF8", charset_id::utf8 },
{ "UTF-16", charset_id::utf16 },
#if BYTE_ORDER == BIG_ENDIAN
{ "UTF-16BE", charset_id::utf16 },
#else
{ "UTF-16LE", charset_id::utf16 },
#endif
{ "UTF-32", charset_id::utf32 },
#if BYTE_ORDER == BIG_ENDIAN
{ "UTF-32BE", charset_id::utf32 },
#else
{ "UTF-32LE", charset_id::utf32 },
#endif
{ "CP1250", charset_id::windows1250 },
{ "MS-EE", charset_id::windows1250 },
{ "WINDOWS-1250", charset_id::windows1250 },
{ "CP1251", charset_id::windows1251 },
{ "MS-CYRL", charset_id::windows1251 },
{ "WINDOWS-1251", charset_id::windows1251 },
{ "CP1252", charset_id::windows1252 },
{ "MS-ANSI", charset_id::windows1252 },
{ "WINDOWS-1252", charset_id::windows1252 },
};
const char* lctype = nl_langinfo(CODESET);
if (auto el = charsets.find(lctype); el != charsets.end())
return el->second;
return charset_id::system;
return charset_from_name(nl_langinfo(CODESET));
#endif
}