20#pragma GCC diagnostic push
21#pragma GCC diagnostic ignored "-Wexit-time-destructors"
27 using langid = LANGID;
29 using langid = uint16_t;
32 constexpr langid langid_unknown = 127;
41 inline langid langid_from_rfc1766(_In_z_
const char* rfc1766)
45 bool operator()(_In_z_
const char* str1, _In_z_
const char* str2)
const
51 auto a = stdex::tolower(str1[i]);
52 auto b = stdex::tolower(str2[i]);
53 auto a_end = !a || stdex::ispunct(a);
54 auto b_end = !b || stdex::ispunct(b);
55 if (a_end && b_end)
return false;
56 if (b_end || a > b)
return false;
57 if (a_end || a < b)
return true;
61 struct language_mapping
64 std::map<const char*, langid, stricmp_less> sublanguages;
66 static const std::map<const char*, language_mapping, stricmp_less> languages = {
70 {
"ae",
static_cast<langid
>(14337)},
71 {
"bh",
static_cast<langid
>(15361)},
72 {
"dz",
static_cast<langid
>(5121)},
73 {
"eg",
static_cast<langid
>(3073)},
74 {
"iq",
static_cast<langid
>(2049)},
75 {
"jo",
static_cast<langid
>(11265)},
76 {
"kw",
static_cast<langid
>(13313)},
77 {
"lb",
static_cast<langid
>(12289)},
78 {
"ly",
static_cast<langid
>(4097)},
79 {
"ma",
static_cast<langid
>(6145)},
80 {
"om",
static_cast<langid
>(8193)},
81 {
"qa",
static_cast<langid
>(16385)},
82 {
"sa",
static_cast<langid
>(1025)},
83 {
"sy",
static_cast<langid
>(10241)},
84 {
"tn",
static_cast<langid
>(7169)},
85 {
"ye",
static_cast<langid
>(9217)},
94 {
"at",
static_cast<langid
>(3079)},
95 {
"ch",
static_cast<langid
>(2055)},
96 {
"de",
static_cast<langid
>(1031)},
97 {
"li",
static_cast<langid
>(5127)},
98 {
"lu",
static_cast<langid
>(4103)},
103 {
"au",
static_cast<langid
>(3081)},
104 {
"bz",
static_cast<langid
>(10249)},
105 {
"ca",
static_cast<langid
>(4105)},
106 {
"ca",
static_cast<langid
>(9225)},
107 {
"gb",
static_cast<langid
>(2057)},
108 {
"ie",
static_cast<langid
>(6153)},
109 {
"jm",
static_cast<langid
>(8201)},
110 {
"nz",
static_cast<langid
>(5129)},
111 {
"tt",
static_cast<langid
>(11273)},
112 {
"us",
static_cast<langid
>(1033)},
113 {
"za",
static_cast<langid
>(7177)},
117 {
"ar",
static_cast<langid
>(11274)},
118 {
"bo",
static_cast<langid
>(16394)},
119 {
"cl",
static_cast<langid
>(13322)},
120 {
"co",
static_cast<langid
>(9226)},
121 {
"cr",
static_cast<langid
>(5130)},
122 {
"do",
static_cast<langid
>(7178)},
123 {
"ec",
static_cast<langid
>(12298)},
124 {
"es",
static_cast<langid
>(1034)},
125 {
"gt",
static_cast<langid
>(4106)},
126 {
"hn",
static_cast<langid
>(18442)},
127 {
"mx",
static_cast<langid
>(2058)},
128 {
"ni",
static_cast<langid
>(19466)},
129 {
"pa",
static_cast<langid
>(6154)},
130 {
"pe",
static_cast<langid
>(10250)},
131 {
"pr",
static_cast<langid
>(20490)},
132 {
"py",
static_cast<langid
>(15370)},
133 {
"sv",
static_cast<langid
>(17418)},
134 {
"uy",
static_cast<langid
>(14346)},
135 {
"ve",
static_cast<langid
>(8202)},
144 {
"be",
static_cast<langid
>(2060)},
145 {
"ca",
static_cast<langid
>(3084)},
146 {
"ch",
static_cast<langid
>(4108)},
147 {
"fr",
static_cast<langid
>(1036)},
148 {
"lu",
static_cast<langid
>(5132)},
159 {
"ch",
static_cast<langid
>(2064)},
160 {
"it",
static_cast<langid
>(1040)},
166 {
"johab",
static_cast<langid
>(2066)},
167 {
"kr",
static_cast<langid
>(1042)},
176 {
"be",
static_cast<langid
>(2067)},
177 {
"nl",
static_cast<langid
>(1043)},
181 {
"bokmaal",
static_cast<langid
>(1044)},
182 {
"nynorsk",
static_cast<langid
>(2068)},
187 {
"br",
static_cast<langid
>(1046)},
188 {
"pt",
static_cast<langid
>(2070)},
193 {
"mo",
static_cast<langid
>(2072)},
194 {
"ro",
static_cast<langid
>(1048)},
198 {
"mo",
static_cast<langid
>(2073)},
199 {
"ru",
static_cast<langid
>(1049)},
207 {
"cyrillic",
static_cast<langid
>(3098)},
208 {
"latin",
static_cast<langid
>(2074)},
212 {
"fi",
static_cast<langid
>(2077)},
213 {
"se",
static_cast<langid
>(1053)},
228 {
"cn",
static_cast<langid
>(2052)},
229 {
"hk",
static_cast<langid
>(3076)},
230 {
"sg",
static_cast<langid
>(4100)},
231 {
"tw",
static_cast<langid
>(1028)},
236 if (
auto el = languages.find(rfc1766); el != languages.end()) {
237 if (!el->second.sublanguages.empty()) {
238 if (
auto n = stdex::strlen(el->first); ispunct(rfc1766[n])) {
240 if (
auto el_sub = el->second.sublanguages.find(&rfc1766[n]); el_sub != el->second.sublanguages.end())
241 return el_sub->second;
244 return el->second.id;
246 return langid_unknown;
251#pragma GCC diagnostic pop