stdex
Additional custom or not Standard C++ covered algorithms
Loading...
Searching...
No Matches
langid.hpp
1/*
2 SPDX-License-Identifier: MIT
3 Copyright © 2024 Amebis
4*/
5
6#pragma once
7
8#include "compat.hpp"
9#include "string.hpp"
10#include "unicode.hpp"
11#ifdef _WIN32
12#include "windows.h"
13#endif
14#include <stddef.h>
15#include <stdint.h>
16#include <map>
17#include <string>
18
19#if defined(__GNUC__)
20#pragma GCC diagnostic push
21#pragma GCC diagnostic ignored "-Wexit-time-destructors"
22#endif
23
24namespace stdex
25{
26#ifdef _WIN32
27 using langid = LANGID;
28#else
29 using langid = uint16_t;
30#endif
31
32 constexpr langid langid_unknown = 127;
33
34#ifdef _WIN32
42 langid langid_from_rfc1766(_In_z_ const char *rfc1766)
43 {
44 return LANGIDFROMLCID(LocaleNameToLCID(str2wstr(rfc1766, langid::utf8).c_str(), 0));
45 }
46
54 langid langid_from_rfc1766(_In_z_ const wchar_t *rfc1766)
55 {
56 return LANGIDFROMLCID(LocaleNameToLCID(rfc1766, 0));
57 }
58#else
66 inline langid langid_from_rfc1766(_In_z_ const char *rfc1766)
67 {
68 struct stricmp_less
69 {
70 bool operator()(_In_z_ const char *str1, _In_z_ const char *str2) const
71 {
72 stdex_assert(str1);
73 stdex_assert(str2);
74 size_t i;
75 for (i = 0; ; ++i) {
76 auto a = stdex::tolower(str1[i]);
77 auto b = stdex::tolower(str2[i]);
78 auto a_end = !a || stdex::ispunct(a);
79 auto b_end = !b || stdex::ispunct(b);
80 if (a_end && b_end) return false;
81 if (b_end || a > b) return false;
82 if (a_end || a < b) return true;
83 }
84 }
85 };
86 struct language_mapping
87 {
88 langid id;
89 std::map<const char *, langid, stricmp_less> sublanguages;
90 };
91 static const std::map<const char *, language_mapping, stricmp_less> languages = {
92 {"af", {1078, {}}}, // Afrikaans
93 {"ar", {0x01, // Arabic
94 {
95 {"ae", 14337}, // Arabic(U.A.E.)
96 {"bh", 15361}, // Arabic(Bahrain)
97 {"dz", 5121}, // Arabic(Algeria)
98 {"eg", 3073}, // Arabic(Egypt)
99 {"iq", 2049}, // Arabic(Iraq)
100 {"jo", 11265}, // Arabic(Jordan)
101 {"kw", 13313}, // Arabic(Kuwait)
102 {"lb", 12289}, // Arabic(Lebanon)
103 {"ly", 4097}, // Arabic(Libya)
104 {"ma", 6145}, // Arabic(Morocco)
105 {"om", 8193}, // Arabic(Oman)
106 {"qa", 16385}, // Arabic(Qatar)
107 {"sa", 1025}, // Arabic(Saudi Arabia)
108 {"sy", 10241}, // Arabic(Syria)
109 {"tn", 7169}, // Arabic(Tunisia)
110 {"ye", 9217}, // Arabic(Yemen)
111 }}},
112 {"be", {1059, {}}}, // Belarusian
113 {"bg", {1026, {}}}, // Bulgarian
114 {"ca", {1027, {}}}, // Catalan
115 {"cs", {1029, {}}}, // Czech
116 {"da", {1030, {}}}, // Danish
117 {"de", {0x07, // German
118 {
119 {"at", 3079}, // German(Austrian)
120 {"ch", 2055}, // German(Swiss)
121 {"de", 1031}, // German(Germany)
122 {"li", 5127}, // German(Liechtenstein)
123 {"lu", 4103}, // German(Luxembourg)
124 }}},
125 {"el", {1032, {}}}, // Greek
126 {"en", {0x09, // English
127 {
128 {"au", 3081}, // English(Australian)
129 {"bz", 10249}, // English(Belize)
130 {"ca", 4105}, // English(Canadian)
131 {"ca", 9225}, // English(Caribbean)
132 {"gb", 2057}, // English(British)
133 {"ie", 6153}, // English(Ireland)
134 {"jm", 8201}, // English(Jamaica)
135 {"nz", 5129}, // English(New Zealand)
136 {"tt", 11273}, // English(Trinidad)
137 {"us", 1033}, // English(United States)
138 {"za", 7177}, // English(South Africa)
139 }}},
140 {"es", {0x0a, // Spanish
141 {
142 {"ar", 11274}, // Spanish(Argentina)
143 {"bo", 16394}, // Spanish(Bolivia)
144 {"c", 13322}, // Spanish(Chile)
145 {"co", 9226}, // Spanish(Colombia)
146 {"cr", 5130}, // Spanish(Costa Rica)
147 {"do", 7178}, // Spanish(Dominican Republic)
148 {"ec", 12298}, // Spanish(Ecuador)
149 {"es", 1034}, // Spanish(Spain)
150 {"gt", 4106}, // Spanish(Guatemala)
151 {"hn", 18442}, // Spanish(Honduras)
152 {"mx", 2058}, // Spanish(Mexican)
153 {"ni", 19466}, // Spanish(Nicaragua)
154 {"pa", 6154}, // Spanish(Panama)
155 {"pe", 10250}, // Spanish(Peru)
156 {"pr", 20490}, // Spanish(Puerto Rico)
157 {"py", 15370}, // Spanish(Paraguay)
158 {"sv", 17418}, // Spanish(El Salvador)
159 {"uy", 14346}, // Spanish(Uruguay)
160 {"ve", 8202}, // Spanish(Venezuela)
161 }}},
162 {"et", {1061, {}}}, // Estonian
163 {"eu", {1069, {}}}, // Basque
164 {"fa", {1065, {}}}, // Farsi
165 {"fi", {1035, {}}}, // Finnish
166 {"fo", {1080, {}}}, // Faeroese
167 {"fr", {0x0c, // French
168 {
169 {"be", 2060}, // French(Belgian)
170 {"ca", 3084}, // French(Canadian)
171 {"ch", 4108}, // French(Swiss)
172 {"fr", 1036}, // French(Luxembourg)
173 {"lu", 5132}, // French(Luxembourg)
174 }}},
175 {"gd", {1084, {}}}, // Gaelic(Scots)
176 {"he", {1037, {}}}, // Hebrew
177 {"hi", {1081, {}}}, // Hindi
178 {"hr", {1050, {}}}, // Croatian
179 {"hu", {1038, {}}}, // Hungarian
180 {"in", {1057, {}}}, // Indonesian
181 {"is", {1039, {}}}, // Icelandic
182 {"it", {0x10, // Italian
183 {
184 {"ch", 2064}, // Italian(Swiss)
185 {"it", 1040}, // Italian(Italy)
186 }}},
187 {"ja", {1041, {}}}, // Japanese
188 {"ji", {1085, {}}}, // Yiddish
189 {"ko", {0x12, // Korean
190 {
191 {"johab", 2066}, // Korean(Johab)
192 {"kr", 1042}, // Korean(Korea)
193 }}},
194 {"lt", {1063, {}}}, // Lithuanian
195 {"lv", {1062, {}}}, // Latvian
196 {"mk", {1071, {}}}, // Macedonian (FYROM)
197 {"ms", {1086, {}}}, // Malaysian
198 {"mt", {1082, {}}}, // Maltese
199 {"nl", {0x13, // Dutch
200 {
201 {"be", 2067}, // Dutch(Belgian)
202 {"nl", 1043}, // Dutch(Netherland)
203 }}},
204 {"no", {0x14, // Norwegian
205 {
206 {"bokmaal", 1044}, // Norwegian(Bokmaal)
207 {"nynorsk", 2068}, // Norwegian(Nynorsk)
208 }}},
209 {"pl", {1045, {}}}, // Polish
210 {"pt", {0x16, // Portuguese
211 {
212 {"br", 1046}, // Portuguese(Brazil)
213 {"pt", 2070}, // Portuguese(Portugal)
214 }}},
215 {"rm", {1047, {}}}, // Rhaeto-Romanic
216 {"ro", {0x18, // Romanian
217 {
218 {"mo", 2072}, // Romanian(Moldavia)
219 {"ro", 1048}, // Romanian(Romania)
220 }}},
221 {"ru", {0x19, // Russian
222 {
223 {"mo", 2073}, // Russian(Moldavia)
224 {"ru", 1049}, // Russian(Russia)
225 }}},
226 {"sb", {1070, {}}}, // Sorbian
227 {"sk", {1051, {}}}, // Slovak
228 {"sl", {1060, {}}}, // Slovenian
229 {"sq", {1052, {}}}, // Albanian
230 {"sr", {0x1a, // Serbian
231 {
232 {"cyrillic", 3098}, // Serbian(Cyrillic)
233 {"latin", 2074}, // Serbian(Latin)
234 }}},
235 {"sv", {0x1d, // Swedish
236 {
237 {"fi", 2077}, // Swedish(Finland)
238 {"se", 1053}, // Swedish(Sweden)
239 }}},
240 {"sx", {1072, {}}}, // Sutu
241 {"sz", {1083, {}}}, // Sami(Lappish)
242 {"th", {1054, {}}}, // Thai
243 {"tn", {1074, {}}}, // Tswana
244 {"tr", {1055, {}}}, // Turkish
245 {"ts", {1073, {}}}, // Tsonga
246 {"uk", {1058, {}}}, // Ukrainian
247 {"ur", {1056, {}}}, // Urdu
248 {"ve", {1075, {}}}, // Venda
249 {"vi", {1066, {}}}, // Vietnamese
250 {"xh", {1076, {}}}, // Xhosa
251 {"zh", {0x04, // Chinese
252 {
253 {"cn", 2052}, // Chinese(PRC)
254 {"hk", 3076}, // Chinese(Hong Kong)
255 {"sg", 4100}, // Chinese(Singapore)
256 {"tw", 1028}, // Chinese(Taiwan)
257 }}},
258 {"zu", {1077, {}}}, // Zulu
259 };
260
261 if (auto el = languages.find(rfc1766); el != languages.end()) {
262 if (!el->second.sublanguages.empty()) {
263 if (auto n = stdex::strlen(el->first); ispunct(rfc1766[n])) {
264 n++;
265 if (auto el_sub = el->second.sublanguages.find(&rfc1766[n]); el_sub != el->second.sublanguages.end())
266 return el_sub->second;
267 }
268 }
269 return el->second.id;
270 }
271 return langid_unknown;
272 }
273#endif
274}
275
276#if defined(__GNUC__)
277#pragma GCC diagnostic pop
278#endif