From fea0ed7754b49ba8775edb3de52ea60359b536f7 Mon Sep 17 00:00:00 2001 From: Simon Rozman Date: Fri, 15 Sep 2023 15:32:14 +0200 Subject: [PATCH] unicode: add system charset detection Signed-off-by: Simon Rozman --- include/stdex/unicode.hpp | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/include/stdex/unicode.hpp b/include/stdex/unicode.hpp index 2ecb42ed0..082fcd693 100644 --- a/include/stdex/unicode.hpp +++ b/include/stdex/unicode.hpp @@ -1,4 +1,4 @@ -/* +/* SPDX-License-Identifier: MIT Copyright © 2023 Amebis */ @@ -329,6 +329,32 @@ namespace stdex #endif } + static charset_id system_charset() + { +#ifdef _WIN32 + return static_cast(GetACP()); +#else + const char* lctype = nl_langinfo(LC_CTYPE); + if (strcmp(lctype, "UTF-8") == 0) return charset_id::utf8; + if (strcmp(lctype, "UTF-16") == 0) return charset_id::utf16; +#if BYTE_ORDER == BIG_ENDIAN + if (strcmp(lctype, "UTF-16BE") == 0) return charset_id::utf16; +#else + if (strcmp(lctype, "UTF-16LE") == 0) return charset_id::utf16; +#endif + if (strcmp(lctype, "UTF-32") == 0) return charset_id::utf32; +#if BYTE_ORDER == BIG_ENDIAN + if (strcmp(lctype, "UTF-32BE") == 0) return charset_id::utf32; +#else + if (strcmp(lctype, "UTF-32LE") == 0) return charset_id::utf32; +#endif + if (strcmp(lctype, "CP1250") == 0) return charset_id::windows1250; + if (strcmp(lctype, "CP1251") == 0) return charset_id::windows1251; + if (strcmp(lctype, "CP1252") == 0) return charset_id::windows1252; + return charset_id::system; +#endif + } + #ifdef _WIN32 protected: static UINT to_encoding(_In_ charset_id charset)