New tables, test, compile for Oct 2014 small table release
git-svn-id: https://cld2.googlecode.com/svn/trunk@176 b252ecd4-b096-bf77-eb8e-91563289f87e
This commit is contained in:
4615
internal/cld2_generated_deltaoctachrome20141015.cc
Normal file
4615
internal/cld2_generated_deltaoctachrome20141015.cc
Normal file
File diff suppressed because it is too large
Load Diff
2206
internal/cld2_generated_distinctoctachrome20141015.cc
Normal file
2206
internal/cld2_generated_distinctoctachrome20141015.cc
Normal file
File diff suppressed because it is too large
Load Diff
84504
internal/cld2_generated_quadchrome20141015_2.cc
Normal file
84504
internal/cld2_generated_quadchrome20141015_2.cc
Normal file
File diff suppressed because it is too large
Load Diff
329
internal/cld2_unittest_20141015.cc
Normal file
329
internal/cld2_unittest_20141015.cc
Normal file
@@ -0,0 +1,329 @@
|
|||||||
|
// Copyright 2013 Google Inc. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
//
|
||||||
|
// Author: dsites@google.com (Dick Sites)
|
||||||
|
//
|
||||||
|
// Unit test compact language detector, CLD2
|
||||||
|
// Compile with -Davoid_utf8_string_constants if your compiler cannot
|
||||||
|
// handle UTF-8 string constants
|
||||||
|
//
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#include "../public/compact_lang_det.h"
|
||||||
|
#include "../public/encodings.h"
|
||||||
|
#include "unittest_data.h"
|
||||||
|
|
||||||
|
|
||||||
|
namespace CLD2 {
|
||||||
|
|
||||||
|
// Test strings.
|
||||||
|
const char* kTeststr_en =
|
||||||
|
"confiscation of goods is assigned as the penalty part most of the courts "
|
||||||
|
"consist of members and when it is necessary to bring public cases before a "
|
||||||
|
"jury of members two courts combine for the purpose the most important cases "
|
||||||
|
"of all are brought jurors or";
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
Language lang;
|
||||||
|
const char* text;
|
||||||
|
} TestPair;
|
||||||
|
|
||||||
|
|
||||||
|
static const TestPair kTestPair[] = {
|
||||||
|
// A simple case to begin
|
||||||
|
{ENGLISH, kTeststr_en},
|
||||||
|
|
||||||
|
// 20 languages recognized via Unicode script
|
||||||
|
{ARMENIAN, kTeststr_hy_Armn},
|
||||||
|
{CHEROKEE, kTeststr_chr_Cher},
|
||||||
|
{DHIVEHI, kTeststr_dv_Thaa},
|
||||||
|
{GEORGIAN, kTeststr_ka_Geor},
|
||||||
|
{GREEK, kTeststr_el_Grek},
|
||||||
|
{GUJARATI, kTeststr_gu_Gujr},
|
||||||
|
{INUKTITUT, kTeststr_iu_Cans},
|
||||||
|
{KANNADA, kTeststr_kn_Knda},
|
||||||
|
{KHMER, kTeststr_km_Khmr},
|
||||||
|
{LAOTHIAN, kTeststr_lo_Laoo},
|
||||||
|
{LIMBU, kTeststr_lif_Limb},
|
||||||
|
{MALAYALAM, kTeststr_ml_Mlym},
|
||||||
|
{ORIYA, kTeststr_or_Orya},
|
||||||
|
{PUNJABI, kTeststr_pa_Guru},
|
||||||
|
{SINHALESE, kTeststr_si_Sinh},
|
||||||
|
{SYRIAC, kTeststr_syr_Syrc},
|
||||||
|
{TAGALOG, kTeststr_tl_Tglg}, // Also in quadgram list below
|
||||||
|
{TAMIL, kTeststr_ta_Taml},
|
||||||
|
{TELUGU, kTeststr_te_Telu},
|
||||||
|
{THAI, kTeststr_th_Thai},
|
||||||
|
|
||||||
|
// 4 languages regognized via single letters
|
||||||
|
{CHINESE, kTeststr_zh_Hans},
|
||||||
|
{CHINESE_T, kTeststr_zh_Hant},
|
||||||
|
{JAPANESE, kTeststr_ja_Hani},
|
||||||
|
{KOREAN, kTeststr_ko_Hani},
|
||||||
|
|
||||||
|
// 60 languages recognized via combinations of four letters
|
||||||
|
{AFRIKAANS, kTeststr_af_Latn},
|
||||||
|
{ALBANIAN, kTeststr_sq_Latn},
|
||||||
|
{ARABIC, kTeststr_ar_Arab},
|
||||||
|
{AZERBAIJANI, kTeststr_az_Latn},
|
||||||
|
{BASQUE, kTeststr_eu_Latn},
|
||||||
|
{BELARUSIAN, kTeststr_be_Cyrl},
|
||||||
|
{BENGALI, kTeststr_bn_Beng}, // No Assamese in subset
|
||||||
|
{BIHARI, kTeststr_bh_Deva},
|
||||||
|
{BULGARIAN, kTeststr_bg_Cyrl},
|
||||||
|
{CATALAN, kTeststr_ca_Latn},
|
||||||
|
{CEBUANO, kTeststr_ceb_Latn},
|
||||||
|
{CROATIAN, kTeststr_hr_Latn},
|
||||||
|
{CZECH, kTeststr_cs_Latn},
|
||||||
|
{DANISH, kTeststr_da_Latn},
|
||||||
|
{DUTCH, kTeststr_nl_Latn},
|
||||||
|
{ENGLISH, kTeststr_en_Latn},
|
||||||
|
{ESTONIAN, kTeststr_et_Latn},
|
||||||
|
{FINNISH, kTeststr_fi_Latn},
|
||||||
|
{FRENCH, kTeststr_fr_Latn},
|
||||||
|
{GALICIAN, kTeststr_gl_Latn},
|
||||||
|
{GANDA, kTeststr_lg_Latn},
|
||||||
|
{GERMAN, kTeststr_de_Latn},
|
||||||
|
{HAITIAN_CREOLE, kTeststr_ht_Latn},
|
||||||
|
{HEBREW, kTeststr_iw_Hebr},
|
||||||
|
{HINDI, kTeststr_hi_Deva},
|
||||||
|
{HMONG, kTeststr_blu_Latn},
|
||||||
|
{HUNGARIAN, kTeststr_hu_Latn},
|
||||||
|
{ICELANDIC, kTeststr_is_Latn},
|
||||||
|
{INDONESIAN, kTeststr_id_Latn},
|
||||||
|
{IRISH, kTeststr_ga_Latn},
|
||||||
|
{ITALIAN, kTeststr_it_Latn},
|
||||||
|
{JAVANESE, kTeststr_jw_Latn},
|
||||||
|
{KINYARWANDA, kTeststr_rw_Latn},
|
||||||
|
{LATVIAN, kTeststr_lv_Latn},
|
||||||
|
{LITHUANIAN, kTeststr_lt_Latn},
|
||||||
|
{MACEDONIAN, kTeststr_mk_Cyrl},
|
||||||
|
{MALAY, kTeststr_ms_Latn},
|
||||||
|
{MALTESE, kTeststr_mt_Latn},
|
||||||
|
{MARATHI, kTeststr_mr_Deva},
|
||||||
|
{NEPALI, kTeststr_ne_Deva},
|
||||||
|
{NORWEGIAN, kTeststr_no_Latn},
|
||||||
|
{PERSIAN, kTeststr_fa_Arab},
|
||||||
|
{POLISH, kTeststr_pl_Latn},
|
||||||
|
{PORTUGUESE, kTeststr_pt_Latn},
|
||||||
|
{ROMANIAN, kTeststr_ro_Latn},
|
||||||
|
{ROMANIAN, kTeststr_ro_Cyrl},
|
||||||
|
{RUSSIAN, kTeststr_ru_Cyrl},
|
||||||
|
{SCOTS_GAELIC, kTeststr_gd_Latn},
|
||||||
|
{SERBIAN, kTeststr_sr_Cyrl},
|
||||||
|
{SERBIAN, kTeststr_sr_Latn},
|
||||||
|
{SLOVAK, kTeststr_sk_Latn},
|
||||||
|
{SLOVENIAN, kTeststr_sl_Latn},
|
||||||
|
{SPANISH, kTeststr_es_Latn},
|
||||||
|
{SWAHILI, kTeststr_sw_Latn},
|
||||||
|
{SWEDISH, kTeststr_sv_Latn},
|
||||||
|
{TAGALOG, kTeststr_tl_Latn},
|
||||||
|
{TURKISH, kTeststr_tr_Latn},
|
||||||
|
{UKRAINIAN, kTeststr_uk_Cyrl},
|
||||||
|
{URDU, kTeststr_ur_Arab},
|
||||||
|
{VIETNAMESE, kTeststr_vi_Latn},
|
||||||
|
{WELSH, kTeststr_cy_Latn},
|
||||||
|
{YIDDISH, kTeststr_yi_Hebr},
|
||||||
|
|
||||||
|
// Added 2013.08.31 so-Latn ig-Latn ha-Latn yo-Latn zu-Latn
|
||||||
|
// Deleted 2014.10.15 so-Latn ig-Latn ha-Latn yo-Latn zu-Latn
|
||||||
|
//{SOMALI, kTeststr_so_Latn},
|
||||||
|
//{IGBO, kTeststr_ig_Latn},
|
||||||
|
//{HAUSA, kTeststr_ha_Latn},
|
||||||
|
//{YORUBA, kTeststr_yo_Latn},
|
||||||
|
//{ZULU, kTeststr_zu_Latn},
|
||||||
|
|
||||||
|
// Added 2014.01.22 bs-Latn
|
||||||
|
{BOSNIAN, kTeststr_bs_Latn},
|
||||||
|
|
||||||
|
// Added 2014.10.15
|
||||||
|
{KAZAKH, kTeststr_kk_Cyrl},
|
||||||
|
{KURDISH, kTeststr_ku_Latn}, // aka kmr
|
||||||
|
{KYRGYZ, kTeststr_ky_Cyrl},
|
||||||
|
{MALAGASY, kTeststr_mg_Latn},
|
||||||
|
{MALAYALAM, kTeststr_ml_Mlym},
|
||||||
|
{BURMESE, kTeststr_my_Mymr},
|
||||||
|
{NYANJA, kTeststr_ny_Latn},
|
||||||
|
{SINHALESE, kTeststr_si_Sinh}, // aka SINHALA
|
||||||
|
{SESOTHO, kTeststr_st_Latn},
|
||||||
|
{SUNDANESE, kTeststr_su_Latn},
|
||||||
|
{TAJIK, kTeststr_tg_Cyrl},
|
||||||
|
{UZBEK, kTeststr_uz_Latn},
|
||||||
|
{UZBEK, kTeststr_uz_Cyrl},
|
||||||
|
|
||||||
|
// 2 statistically-close languages
|
||||||
|
{INDONESIAN, kTeststr_id_close},
|
||||||
|
{MALAY, kTeststr_ms_close},
|
||||||
|
|
||||||
|
// Simple intermixed French/English text
|
||||||
|
{FRENCH, kTeststr_fr_en_Latn},
|
||||||
|
|
||||||
|
// Simple English with bad UTF-8
|
||||||
|
{UNKNOWN_LANGUAGE, kTeststr_en_Latn_bad_UTF8},
|
||||||
|
|
||||||
|
// Cross-check the main quadgram table build date
|
||||||
|
// Change the expected language each time it is rebuilt
|
||||||
|
// {WELSH, kTeststr_version}, // 2013.07.15
|
||||||
|
// {AZERBAIJANI, kTeststr_version}, // 2014.01.31
|
||||||
|
{TURKISH, kTeststr_version}, // 2014.10.16
|
||||||
|
|
||||||
|
{UNKNOWN_LANGUAGE, NULL}, // Must be last
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
bool OneTest(int flags, bool get_vector,
|
||||||
|
Language lang_expected, const char* buffer, int buffer_length) {
|
||||||
|
bool is_plain_text = true;
|
||||||
|
const char* tldhint = "";
|
||||||
|
const Encoding enchint = UNKNOWN_ENCODING;
|
||||||
|
const Language langhint = UNKNOWN_LANGUAGE;
|
||||||
|
const CLDHints cldhints = {NULL, tldhint, enchint, langhint};
|
||||||
|
Language language3[3];
|
||||||
|
int percent3[3];
|
||||||
|
double normalized_score3[3];
|
||||||
|
ResultChunkVector resultchunkvector;
|
||||||
|
int text_bytes;
|
||||||
|
bool is_reliable;
|
||||||
|
int valid_prefix_bytes;
|
||||||
|
|
||||||
|
Language lang_detected = ExtDetectLanguageSummaryCheckUTF8(
|
||||||
|
buffer,
|
||||||
|
buffer_length,
|
||||||
|
is_plain_text,
|
||||||
|
&cldhints,
|
||||||
|
flags,
|
||||||
|
language3,
|
||||||
|
percent3,
|
||||||
|
normalized_score3,
|
||||||
|
get_vector ? &resultchunkvector : NULL,
|
||||||
|
&text_bytes,
|
||||||
|
&is_reliable,
|
||||||
|
&valid_prefix_bytes);
|
||||||
|
// expose DumpExtLang DumpLanguages
|
||||||
|
bool good_utf8 = (valid_prefix_bytes == buffer_length);
|
||||||
|
if (!good_utf8) {
|
||||||
|
fprintf(stderr, "*** Bad UTF-8 after %d bytes<br>\n", valid_prefix_bytes);
|
||||||
|
fprintf(stdout, "*** Bad UTF-8 after %d bytes\n", valid_prefix_bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ok = (lang_detected == lang_expected);
|
||||||
|
ok &= good_utf8;
|
||||||
|
|
||||||
|
if (!ok) {
|
||||||
|
if ((flags & kCLDFlagHtml) != 0) {
|
||||||
|
fprintf(stderr, "*** Wrong result. expected %s, detected %s<br>\n",
|
||||||
|
LanguageName(lang_expected), LanguageName(lang_detected));
|
||||||
|
}
|
||||||
|
fprintf(stdout, "*** Wrong result. expected %s, detected %s\n",
|
||||||
|
LanguageName(lang_expected), LanguageName(lang_detected));
|
||||||
|
fprintf(stdout, "%s\n\n", buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (get_vector) {
|
||||||
|
DumpResultChunkVector(stderr, buffer, &resultchunkvector);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
DumpExtLang(flags, summary_lang, language3, percent3, normalized_score3,
|
||||||
|
text_bytes, is_reliable, n);
|
||||||
|
|
||||||
|
if ((flags & kCLDFlagHtml) != 0) {
|
||||||
|
DumpLanguages(summary_lang,
|
||||||
|
language3, percent3, text_bytes, is_reliable, n);
|
||||||
|
}
|
||||||
|
|
||||||
|
fprintf(stdout, " SummaryLanguage %s%s at %u of %d, %s\n",
|
||||||
|
LanguageName(summary_lang),
|
||||||
|
is_reliable ? "" : "(un-reliable)",
|
||||||
|
bytes_consumed,
|
||||||
|
n,
|
||||||
|
argv[1]);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return ok;
|
||||||
|
}
|
||||||
|
|
||||||
|
void InitHtmlOut(int flags) {
|
||||||
|
#if 1
|
||||||
|
if ((flags & kCLDFlagHtml) != 0) {
|
||||||
|
// Begin HTML file
|
||||||
|
fprintf(stderr, "<html><meta charset=\"UTF-8\"><body>\n");
|
||||||
|
// Encourage browsers to print background colors
|
||||||
|
fprintf(stderr, "<style media=\"print\" type=\"text/css\"> "
|
||||||
|
":root { -webkit-print-color-adjust: exact; } </style>\n");
|
||||||
|
fprintf(stderr, "<span style=\"font-size: 7pt\">\n");
|
||||||
|
fprintf(stderr, "file = %s<br>\n", "cld2_unittest");
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void FinishHtmlOut(int flags) {
|
||||||
|
#if 1
|
||||||
|
if ((flags & kCLDFlagHtml) != 0) {
|
||||||
|
fprintf(stderr, "\n</span></body></html>\n");
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
int RunTests (int flags, bool get_vector) {
|
||||||
|
fprintf(stdout, "CLD2 version: %s\n", CLD2::DetectLanguageVersion());
|
||||||
|
InitHtmlOut(flags);
|
||||||
|
bool any_fail = false;
|
||||||
|
int i = 0;
|
||||||
|
while (kTestPair[i].text != NULL) {
|
||||||
|
Language lang_expected = kTestPair[i].lang;
|
||||||
|
const char* buffer = kTestPair[i].text;
|
||||||
|
int buffer_length = strlen(buffer);
|
||||||
|
bool ok = OneTest(flags, get_vector, lang_expected, buffer, buffer_length);
|
||||||
|
if (kTestPair[i].text == kTeststr_en_Latn_bad_UTF8) {
|
||||||
|
// We expect this one to fail, so flip the value of ok
|
||||||
|
ok = !ok;
|
||||||
|
}
|
||||||
|
any_fail |= (!ok);
|
||||||
|
++i;
|
||||||
|
}
|
||||||
|
if (any_fail) {
|
||||||
|
fprintf(stderr, "FAIL\n");
|
||||||
|
fprintf(stdout, "FAIL\n");
|
||||||
|
} else {
|
||||||
|
fprintf(stderr, "PASS\n");
|
||||||
|
fprintf(stdout, "PASS\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
FinishHtmlOut(flags);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // End namespace CLD2
|
||||||
|
|
||||||
|
int main(int argc, char** argv) {
|
||||||
|
// Get command-line flags
|
||||||
|
int flags = 0;
|
||||||
|
bool get_vector = false;
|
||||||
|
for (int i = 1; i < argc; ++i) {
|
||||||
|
if (strcmp(argv[i], "--html") == 0) {flags |= CLD2::kCLDFlagHtml;}
|
||||||
|
if (strcmp(argv[i], "--cr") == 0) {flags |= CLD2::kCLDFlagCr;}
|
||||||
|
if (strcmp(argv[i], "--verbose") == 0) {flags |= CLD2::kCLDFlagVerbose;}
|
||||||
|
if (strcmp(argv[i], "--quiet") == 0) {flags |= CLD2::kCLDFlagQuiet;}
|
||||||
|
if (strcmp(argv[i], "--echo") == 0) {flags |= CLD2::kCLDFlagEcho;}
|
||||||
|
if (strcmp(argv[i], "--vector") == 0) {get_vector = true;}
|
||||||
|
}
|
||||||
|
|
||||||
|
return CLD2::RunTests(flags, get_vector);
|
||||||
|
}
|
||||||
|
|
649
internal/cld_generated_score_quad_octa_20141015_2.cc
Normal file
649
internal/cld_generated_score_quad_octa_20141015_2.cc
Normal file
@@ -0,0 +1,649 @@
|
|||||||
|
// Copyright 2013 Google Inc. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
// Generated by ngram_merge on 2011-01-21 10:50:46 from:
|
||||||
|
// score_me file /export/hda3/cld/pre2010/b0_samp_prune_20100722.utf8
|
||||||
|
// cld_generated_quad.bin, built 20110121 bytes 4443812 hash 462c-16c4
|
||||||
|
// cld_generated_deltaocta.bin, built 20110121 bytes 1053284 hash c834-81f5
|
||||||
|
|
||||||
|
// 1 # dsites Added text for
|
||||||
|
// ak haw ig kha ks mfe mo nd nso ny ve
|
||||||
|
// bs-Cyrl/Latn hr-Latn sr-Cyrl/Latn sr-ME-Latn
|
||||||
|
|
||||||
|
|
||||||
|
namespace CLD2 {
|
||||||
|
|
||||||
|
// Average score per 1024 bytes
|
||||||
|
static const int kAvgDeltaOctaScoreSize = 614 * 4;
|
||||||
|
extern const short kAvgDeltaOctaScore[kAvgDeltaOctaScoreSize] = {
|
||||||
|
// Latn Cyrl Arab Other script
|
||||||
|
// Updated 20140204 for CLD2 full
|
||||||
|
1163, 0, 0, 0, // 0 ENGLISH en
|
||||||
|
983, 0, 0, 0, // 1 DANISH da
|
||||||
|
1036, 0, 0, 0, // 2 DUTCH nl
|
||||||
|
1245, 0, 0, 0, // 3 FINNISH fi
|
||||||
|
873, 0, 0, 0, // 4 FRENCH fr
|
||||||
|
1146, 0, 0, 0, // 5 GERMAN de
|
||||||
|
0, 0, 0, 901, // 6 HEBREW iw
|
||||||
|
736, 0, 0, 0, // 7 ITALIAN it
|
||||||
|
0, 0, 0, 3100, // 8 Japanese ja
|
||||||
|
0, 0, 0, 3669, // 9 Korean ko
|
||||||
|
836, 0, 0, 0, // 10 NORWEGIAN no
|
||||||
|
1372, 0, 0, 0, // 11 POLISH pl
|
||||||
|
1044, 0, 0, 0, // 12 PORTUGUESE pt
|
||||||
|
0, 648, 0, 0, // 13 RUSSIAN ru
|
||||||
|
640, 0, 0, 0, // 14 SPANISH es
|
||||||
|
866, 0, 0, 0, // 15 SWEDISH sv
|
||||||
|
0, 0, 0, 1928, // 16 Chinese zh
|
||||||
|
1417, 0, 0, 0, // 17 CZECH cs
|
||||||
|
0, 0, 0, 1024, // 18 GREEK el
|
||||||
|
1212, 0, 0, 0, // 19 ICELANDIC is
|
||||||
|
1295, 0, 0, 0, // 20 LATVIAN lv
|
||||||
|
1169, 0, 0, 0, // 21 LITHUANIAN lt
|
||||||
|
898, 894, 0, 0, // 22 ROMANIAN ro
|
||||||
|
1370, 0, 0, 0, // 23 HUNGARIAN hu
|
||||||
|
1114, 0, 0, 0, // 24 ESTONIAN et
|
||||||
|
0, 0, 0, 0, // 25 Ignore xxx
|
||||||
|
0, 0, 0, 0, // 26 Unknown un
|
||||||
|
0, 684, 0, 0, // 27 BULGARIAN bg
|
||||||
|
575, 0, 0, 0, // 28 CROATIAN hr
|
||||||
|
574, 1023, 0, 0, // 29 SERBIAN sr
|
||||||
|
1341, 0, 0, 0, // 30 IRISH ga
|
||||||
|
809, 0, 0, 0, // 31 GALICIAN gl
|
||||||
|
1096, 0, 0, 0, // 32 TAGALOG tl
|
||||||
|
1184, 0, 0, 0, // 33 TURKISH tr
|
||||||
|
0, 801, 0, 0, // 34 UKRAINIAN uk
|
||||||
|
0, 0, 0, 822, // 35 HINDI hi
|
||||||
|
0, 793, 0, 0, // 36 MACEDONIAN mk
|
||||||
|
0, 0, 0, 569, // 37 BENGALI bn
|
||||||
|
1163, 0, 0, 0, // 38 INDONESIAN id
|
||||||
|
887, 0, 0, 0, // 39 LATIN la
|
||||||
|
1272, 0, 0, 0, // 40 MALAY ms
|
||||||
|
0, 0, 0, 1024, // 41 MALAYALAM ml
|
||||||
|
1593, 0, 0, 0, // 42 WELSH cy
|
||||||
|
0, 0, 0, 620, // 43 NEPALI ne
|
||||||
|
0, 0, 0, 1024, // 44 TELUGU te
|
||||||
|
1265, 0, 0, 0, // 45 ALBANIAN sq
|
||||||
|
0, 0, 0, 1024, // 46 TAMIL ta
|
||||||
|
0, 862, 0, 0, // 47 BELARUSIAN be
|
||||||
|
1031, 0, 0, 0, // 48 JAVANESE jw
|
||||||
|
701, 0, 0, 0, // 49 OCCITAN oc
|
||||||
|
0, 0, 981, 0, // 50 URDU ur
|
||||||
|
0, 0, 0, 614, // 51 BIHARI bh
|
||||||
|
0, 0, 0, 1024, // 52 GUJARATI gu
|
||||||
|
0, 0, 0, 1024, // 53 THAI th
|
||||||
|
0, 0, 872, 0, // 54 ARABIC ar
|
||||||
|
720, 0, 0, 0, // 55 CATALAN ca
|
||||||
|
844, 0, 0, 0, // 56 ESPERANTO eo
|
||||||
|
1312, 0, 0, 0, // 57 BASQUE eu
|
||||||
|
489, 0, 0, 0, // 58 INTERLINGUA ia
|
||||||
|
0, 0, 0, 1024, // 59 KANNADA kn
|
||||||
|
0, 0, 0, 1024, // 60 PUNJABI pa
|
||||||
|
1492, 0, 0, 0, // 61 SCOTS_GAELIC gd
|
||||||
|
1058, 0, 0, 0, // 62 SWAHILI sw
|
||||||
|
744, 0, 0, 0, // 63 SLOVENIAN sl
|
||||||
|
0, 0, 0, 643, // 64 MARATHI mr
|
||||||
|
1225, 0, 0, 0, // 65 MALTESE mt
|
||||||
|
1450, 0, 0, 0, // 66 VIETNAMESE vi
|
||||||
|
877, 0, 0, 0, // 67 FRISIAN fy
|
||||||
|
1396, 0, 0, 0, // 68 SLOVAK sk
|
||||||
|
0, 0, 0, 1908, // 69 ChineseT zh-Hant
|
||||||
|
1135, 0, 0, 0, // 70 FAROESE fo
|
||||||
|
887, 0, 0, 0, // 71 SUNDANESE su
|
||||||
|
1263, 966, 1054, 0, // 72 UZBEK uz
|
||||||
|
0, 0, 0, 469, // 73 AMHARIC am
|
||||||
|
1393, 0, 0, 0, // 74 AZERBAIJANI az
|
||||||
|
0, 0, 0, 1024, // 75 GEORGIAN ka
|
||||||
|
0, 0, 0, 465, // 76 TIGRINYA ti
|
||||||
|
0, 0, 890, 0, // 77 PERSIAN fa
|
||||||
|
608, 0, 0, 0, // 78 BOSNIAN bs
|
||||||
|
0, 0, 0, 1024, // 79 SINHALESE si
|
||||||
|
996, 0, 0, 0, // 80 NORWEGIAN_N nn
|
||||||
|
0, 0, 0, 0, // 81 81
|
||||||
|
0, 0, 0, 0, // 82 82
|
||||||
|
1081, 0, 0, 0, // 83 XHOSA xh
|
||||||
|
1254, 0, 0, 0, // 84 ZULU zu
|
||||||
|
1223, 0, 0, 0, // 85 GUARANI gn
|
||||||
|
961, 0, 0, 0, // 86 SESOTHO st
|
||||||
|
1226, 748, 0, 0, // 87 TURKMEN tk
|
||||||
|
0, 941, 963, 0, // 88 KYRGYZ ky
|
||||||
|
1230, 0, 0, 0, // 89 BRETON br
|
||||||
|
0, 0, 0, 0, // 90 TWI tw
|
||||||
|
0, 0, 0, 1088, // 91 YIDDISH yi
|
||||||
|
0, 0, 0, 0, // 92 92
|
||||||
|
1375, 0, 0, 0, // 93 SOMALI so
|
||||||
|
461, 1068, 1019, 0, // 94 UIGHUR ug
|
||||||
|
718, 0, 1074, 0, // 95 KURDISH ku Latn new 2014.10.15
|
||||||
|
0, 1130, 0, 1024, // 96 MONGOLIAN mn
|
||||||
|
0, 0, 0, 1024, // 97 ARMENIAN hy
|
||||||
|
0, 0, 0, 1024, // 98 LAOTHIAN lo
|
||||||
|
0, 0, 947, 0, // 99 SINDHI sd
|
||||||
|
878, 0, 0, 0, // 100 RHAETO_ROMANCE rm
|
||||||
|
1072, 0, 0, 0, // 101 AFRIKAANS af
|
||||||
|
963, 0, 0, 0, // 102 LUXEMBOURGISH lb
|
||||||
|
0, 0, 0, 1024, // 103 BURMESE my
|
||||||
|
0, 0, 0, 1024, // 104 KHMER km
|
||||||
|
0, 0, 0, 684, // 105 TIBETAN bo
|
||||||
|
0, 0, 0, 1024, // 106 DHIVEHI dv
|
||||||
|
0, 0, 0, 1024, // 107 CHEROKEE chr
|
||||||
|
0, 0, 0, 1024, // 108 SYRIAC syr
|
||||||
|
0, 0, 0, 1024, // 109 LIMBU lif
|
||||||
|
0, 0, 0, 1024, // 110 ORIYA or
|
||||||
|
0, 0, 0, 610, // 111 ASSAMESE as
|
||||||
|
740, 0, 0, 0, // 112 CORSICAN co
|
||||||
|
607, 0, 0, 0, // 113 INTERLINGUE ie
|
||||||
|
0, 955, 744, 0, // 114 KAZAKH kk
|
||||||
|
1060, 0, 0, 0, // 115 LINGALA ln
|
||||||
|
0, 0, 0, 0, // 116 116
|
||||||
|
0, 0, 888, 0, // 117 PASHTO ps
|
||||||
|
1613, 0, 0, 0, // 118 QUECHUA qu
|
||||||
|
1228, 0, 0, 0, // 119 SHONA sn
|
||||||
|
0, 1076, 0, 0, // 120 TAJIK tg
|
||||||
|
1219, 860, 0, 0, // 121 TATAR tt
|
||||||
|
1257, 0, 0, 0, // 122 TONGA to
|
||||||
|
920, 0, 0, 0, // 123 YORUBA yo
|
||||||
|
0, 0, 0, 0, // 124 124
|
||||||
|
0, 0, 0, 0, // 125 125
|
||||||
|
0, 0, 0, 0, // 126 126
|
||||||
|
0, 0, 0, 0, // 127 127
|
||||||
|
1262, 0, 0, 0, // 128 MAORI mi
|
||||||
|
1106, 0, 0, 0, // 129 WOLOF wo
|
||||||
|
0, 784, 0, 0, // 130 ABKHAZIAN ab
|
||||||
|
861, 0, 0, 0, // 131 AFAR aa
|
||||||
|
1371, 0, 0, 0, // 132 AYMARA ay
|
||||||
|
0, 870, 0, 0, // 133 BASHKIR ba
|
||||||
|
1411, 0, 0, 0, // 134 BISLAMA bi
|
||||||
|
0, 0, 0, 711, // 135 DZONGKHA dz
|
||||||
|
1286, 0, 0, 0, // 136 FIJIAN fj
|
||||||
|
1783, 0, 0, 0, // 137 GREENLANDIC kl
|
||||||
|
1180, 0, 0, 0, // 138 HAUSA ha
|
||||||
|
1051, 0, 0, 0, // 139 HAITIAN_CREOLE ht
|
||||||
|
800, 0, 0, 0, // 140 INUPIAK ik
|
||||||
|
0, 0, 0, 1024, // 141 INUKTITUT iu
|
||||||
|
0, 0, 868, 0, // 142 KASHMIRI ks
|
||||||
|
1433, 0, 0, 0, // 143 KINYARWANDA rw
|
||||||
|
1530, 0, 0, 0, // 144 MALAGASY mg
|
||||||
|
943, 0, 0, 0, // 145 NAURU na
|
||||||
|
1454, 0, 0, 0, // 146 OROMO om
|
||||||
|
1060, 0, 0, 0, // 147 RUNDI rn
|
||||||
|
971, 0, 0, 0, // 148 SAMOAN sm
|
||||||
|
1589, 0, 0, 0, // 149 SANGO sg
|
||||||
|
861, 0, 0, 461, // 150 SANSKRIT sa
|
||||||
|
944, 0, 0, 0, // 151 SISWANT ss
|
||||||
|
1240, 0, 0, 0, // 152 TSONGA ts
|
||||||
|
1005, 0, 0, 0, // 153 TSWANA tn
|
||||||
|
1148, 0, 0, 0, // 154 VOLAPUK vo
|
||||||
|
1658, 0, 0, 0, // 155 ZHUANG za
|
||||||
|
1191, 0, 0, 0, // 156 KHASI kha
|
||||||
|
1088, 0, 0, 0, // 157 SCOTS sco
|
||||||
|
1026, 0, 0, 0, // 158 GANDA lg
|
||||||
|
1374, 0, 0, 0, // 159 MANX gv
|
||||||
|
0, 0, 0, 0, // 160 MONTENEGRIN sr-ME
|
||||||
|
1265, 0, 0, 0, // 161 AKAN ak
|
||||||
|
1226, 0, 0, 0, // 162 IGBO ig
|
||||||
|
931, 0, 0, 0, // 163 MAURITIAN_CREOLE mfe
|
||||||
|
1040, 0, 0, 0, // 164 HAWAIIAN haw
|
||||||
|
1102, 0, 0, 0, // 165 CEBUANO ceb
|
||||||
|
0, 0, 0, 0, // 166 EWE ee
|
||||||
|
0, 0, 0, 0, // 167 GA gaa
|
||||||
|
1927, 0, 0, 0, // 168 HMONG hmn
|
||||||
|
0, 0, 0, 0, // 169 KRIO kri
|
||||||
|
0, 0, 0, 0, // 170 LOZI loz
|
||||||
|
0, 0, 0, 0, // 171 LUBA_LULUA lua
|
||||||
|
0, 0, 0, 0, // 172 LUO_KENYA_AND_TANZANIA luo
|
||||||
|
0, 0, 0, 0, // 173 NEWARI new
|
||||||
|
1133, 0, 0, 0, // 174 NYANJA ny
|
||||||
|
0, 0, 0, 0, // 175 OSSETIAN os
|
||||||
|
0, 0, 0, 0, // 176 PAMPANGA pam
|
||||||
|
1007, 0, 0, 0, // 177 PEDI nso
|
||||||
|
0, 0, 0, 0, // 178 RAJASTHANI raj
|
||||||
|
975, 0, 0, 0, // 179 SESELWA crs
|
||||||
|
0, 0, 0, 0, // 180 TUMBUKA tum
|
||||||
|
1496, 0, 0, 0, // 181 VENDA ve
|
||||||
|
987, 0, 0, 0, // 182 WARAY_PHILIPPINES war
|
||||||
|
0, 0, 0, 0, // 183 183
|
||||||
|
0, 0, 0, 0, // 184 184
|
||||||
|
0, 0, 0, 0, // 185 185
|
||||||
|
0, 0, 0, 0, // 186 186
|
||||||
|
0, 0, 0, 0, // 187 187
|
||||||
|
0, 0, 0, 0, // 188 188
|
||||||
|
0, 0, 0, 0, // 189 189
|
||||||
|
0, 0, 0, 0, // 190 190
|
||||||
|
0, 0, 0, 0, // 191 191
|
||||||
|
0, 0, 0, 0, // 192 192
|
||||||
|
0, 0, 0, 0, // 193 193
|
||||||
|
0, 0, 0, 0, // 194 194
|
||||||
|
0, 0, 0, 0, // 195 195
|
||||||
|
0, 0, 0, 0, // 196 196
|
||||||
|
0, 0, 0, 0, // 197 197
|
||||||
|
0, 0, 0, 0, // 198 198
|
||||||
|
0, 0, 0, 0, // 199 199
|
||||||
|
0, 0, 0, 0, // 200 200
|
||||||
|
0, 0, 0, 0, // 201 201
|
||||||
|
0, 0, 0, 0, // 202 202
|
||||||
|
0, 0, 0, 0, // 203 203
|
||||||
|
0, 0, 0, 0, // 204 204
|
||||||
|
0, 0, 0, 0, // 205 205
|
||||||
|
0, 0, 0, 0, // 206 206
|
||||||
|
0, 0, 0, 0, // 207 207
|
||||||
|
0, 0, 0, 0, // 208 208
|
||||||
|
0, 0, 0, 0, // 209 209
|
||||||
|
0, 0, 0, 0, // 210 210
|
||||||
|
0, 0, 0, 0, // 211 211
|
||||||
|
0, 0, 0, 0, // 212 212
|
||||||
|
0, 0, 0, 0, // 213 213
|
||||||
|
0, 0, 0, 0, // 214 214
|
||||||
|
0, 0, 0, 0, // 215 215
|
||||||
|
0, 0, 0, 0, // 216 216
|
||||||
|
0, 0, 0, 0, // 217 217
|
||||||
|
0, 0, 0, 0, // 218 218
|
||||||
|
0, 0, 0, 0, // 219 219
|
||||||
|
0, 0, 0, 0, // 220 220
|
||||||
|
0, 0, 0, 0, // 221 221
|
||||||
|
0, 0, 0, 0, // 222 222
|
||||||
|
0, 0, 0, 0, // 223 223
|
||||||
|
0, 0, 0, 0, // 224 224
|
||||||
|
0, 0, 0, 0, // 225 225
|
||||||
|
0, 0, 0, 0, // 226 226
|
||||||
|
0, 0, 0, 0, // 227 227
|
||||||
|
0, 0, 0, 0, // 228 228
|
||||||
|
0, 0, 0, 0, // 229 229
|
||||||
|
0, 0, 0, 0, // 230 230
|
||||||
|
0, 0, 0, 0, // 231 231
|
||||||
|
0, 0, 0, 0, // 232 232
|
||||||
|
0, 0, 0, 0, // 233 233
|
||||||
|
0, 0, 0, 0, // 234 234
|
||||||
|
0, 0, 0, 0, // 235 235
|
||||||
|
0, 0, 0, 0, // 236 236
|
||||||
|
0, 0, 0, 0, // 237 237
|
||||||
|
0, 0, 0, 0, // 238 238
|
||||||
|
0, 0, 0, 0, // 239 239
|
||||||
|
0, 0, 0, 0, // 240 240
|
||||||
|
0, 0, 0, 0, // 241 241
|
||||||
|
0, 0, 0, 0, // 242 242
|
||||||
|
0, 0, 0, 0, // 243 243
|
||||||
|
0, 0, 0, 0, // 244 244
|
||||||
|
0, 0, 0, 0, // 245 245
|
||||||
|
0, 0, 0, 0, // 246 246
|
||||||
|
0, 0, 0, 0, // 247 247
|
||||||
|
0, 0, 0, 0, // 248 248
|
||||||
|
0, 0, 0, 0, // 249 249
|
||||||
|
0, 0, 0, 0, // 250 250
|
||||||
|
0, 0, 0, 0, // 251 251
|
||||||
|
0, 0, 0, 0, // 252 252
|
||||||
|
0, 0, 0, 0, // 253 253
|
||||||
|
0, 0, 0, 0, // 254 254
|
||||||
|
0, 0, 0, 0, // 255 255
|
||||||
|
0, 0, 0, 0, // 256 256
|
||||||
|
0, 0, 0, 0, // 257 257
|
||||||
|
0, 0, 0, 0, // 258 258
|
||||||
|
0, 0, 0, 0, // 259 259
|
||||||
|
0, 0, 0, 0, // 260 260
|
||||||
|
0, 0, 0, 0, // 261 261
|
||||||
|
0, 0, 0, 0, // 262 262
|
||||||
|
0, 0, 0, 0, // 263 263
|
||||||
|
0, 0, 0, 0, // 264 264
|
||||||
|
0, 0, 0, 0, // 265 265
|
||||||
|
0, 0, 0, 0, // 266 266
|
||||||
|
0, 0, 0, 0, // 267 267
|
||||||
|
0, 0, 0, 0, // 268 268
|
||||||
|
0, 0, 0, 0, // 269 269
|
||||||
|
0, 0, 0, 0, // 270 270
|
||||||
|
0, 0, 0, 0, // 271 271
|
||||||
|
0, 0, 0, 0, // 272 272
|
||||||
|
0, 0, 0, 0, // 273 273
|
||||||
|
0, 0, 0, 0, // 274 274
|
||||||
|
0, 0, 0, 0, // 275 275
|
||||||
|
0, 0, 0, 0, // 276 276
|
||||||
|
0, 0, 0, 0, // 277 277
|
||||||
|
0, 0, 0, 0, // 278 278
|
||||||
|
0, 0, 0, 0, // 279 279
|
||||||
|
0, 0, 0, 0, // 280 280
|
||||||
|
0, 0, 0, 0, // 281 281
|
||||||
|
0, 0, 0, 0, // 282 282
|
||||||
|
0, 0, 0, 0, // 283 283
|
||||||
|
0, 0, 0, 0, // 284 284
|
||||||
|
0, 0, 0, 0, // 285 285
|
||||||
|
0, 0, 0, 0, // 286 286
|
||||||
|
0, 0, 0, 0, // 287 287
|
||||||
|
0, 0, 0, 0, // 288 288
|
||||||
|
0, 0, 0, 0, // 289 289
|
||||||
|
0, 0, 0, 0, // 290 290
|
||||||
|
0, 0, 0, 0, // 291 291
|
||||||
|
0, 0, 0, 0, // 292 292
|
||||||
|
0, 0, 0, 0, // 293 293
|
||||||
|
0, 0, 0, 0, // 294 294
|
||||||
|
0, 0, 0, 0, // 295 295
|
||||||
|
0, 0, 0, 0, // 296 296
|
||||||
|
0, 0, 0, 0, // 297 297
|
||||||
|
0, 0, 0, 0, // 298 298
|
||||||
|
0, 0, 0, 0, // 299 299
|
||||||
|
0, 0, 0, 0, // 300 300
|
||||||
|
0, 0, 0, 0, // 301 301
|
||||||
|
0, 0, 0, 0, // 302 302
|
||||||
|
0, 0, 0, 0, // 303 303
|
||||||
|
0, 0, 0, 0, // 304 304
|
||||||
|
0, 0, 0, 0, // 305 305
|
||||||
|
0, 0, 0, 0, // 306 306
|
||||||
|
0, 0, 0, 0, // 307 307
|
||||||
|
0, 0, 0, 0, // 308 308
|
||||||
|
0, 0, 0, 0, // 309 309
|
||||||
|
0, 0, 0, 0, // 310 310
|
||||||
|
0, 0, 0, 0, // 311 311
|
||||||
|
0, 0, 0, 0, // 312 312
|
||||||
|
0, 0, 0, 0, // 313 313
|
||||||
|
0, 0, 0, 0, // 314 314
|
||||||
|
0, 0, 0, 0, // 315 315
|
||||||
|
0, 0, 0, 0, // 316 316
|
||||||
|
0, 0, 0, 0, // 317 317
|
||||||
|
0, 0, 0, 0, // 318 318
|
||||||
|
0, 0, 0, 0, // 319 319
|
||||||
|
0, 0, 0, 0, // 320 320
|
||||||
|
0, 0, 0, 0, // 321 321
|
||||||
|
0, 0, 0, 0, // 322 322
|
||||||
|
0, 0, 0, 0, // 323 323
|
||||||
|
0, 0, 0, 0, // 324 324
|
||||||
|
0, 0, 0, 0, // 325 325
|
||||||
|
0, 0, 0, 0, // 326 326
|
||||||
|
0, 0, 0, 0, // 327 327
|
||||||
|
0, 0, 0, 0, // 328 328
|
||||||
|
0, 0, 0, 0, // 329 329
|
||||||
|
0, 0, 0, 0, // 330 330
|
||||||
|
0, 0, 0, 0, // 331 331
|
||||||
|
0, 0, 0, 0, // 332 332
|
||||||
|
0, 0, 0, 0, // 333 333
|
||||||
|
0, 0, 0, 0, // 334 334
|
||||||
|
0, 0, 0, 0, // 335 335
|
||||||
|
0, 0, 0, 0, // 336 336
|
||||||
|
0, 0, 0, 0, // 337 337
|
||||||
|
0, 0, 0, 0, // 338 338
|
||||||
|
0, 0, 0, 0, // 339 339
|
||||||
|
0, 0, 0, 0, // 340 340
|
||||||
|
0, 0, 0, 0, // 341 341
|
||||||
|
0, 0, 0, 0, // 342 342
|
||||||
|
0, 0, 0, 0, // 343 343
|
||||||
|
0, 0, 0, 0, // 344 344
|
||||||
|
0, 0, 0, 0, // 345 345
|
||||||
|
0, 0, 0, 0, // 346 346
|
||||||
|
0, 0, 0, 0, // 347 347
|
||||||
|
0, 0, 0, 0, // 348 348
|
||||||
|
0, 0, 0, 0, // 349 349
|
||||||
|
0, 0, 0, 0, // 350 350
|
||||||
|
0, 0, 0, 0, // 351 351
|
||||||
|
0, 0, 0, 0, // 352 352
|
||||||
|
0, 0, 0, 0, // 353 353
|
||||||
|
0, 0, 0, 0, // 354 354
|
||||||
|
0, 0, 0, 0, // 355 355
|
||||||
|
0, 0, 0, 0, // 356 356
|
||||||
|
0, 0, 0, 0, // 357 357
|
||||||
|
0, 0, 0, 0, // 358 358
|
||||||
|
0, 0, 0, 0, // 359 359
|
||||||
|
0, 0, 0, 0, // 360 360
|
||||||
|
0, 0, 0, 0, // 361 361
|
||||||
|
0, 0, 0, 0, // 362 362
|
||||||
|
0, 0, 0, 0, // 363 363
|
||||||
|
0, 0, 0, 0, // 364 364
|
||||||
|
0, 0, 0, 0, // 365 365
|
||||||
|
0, 0, 0, 0, // 366 366
|
||||||
|
0, 0, 0, 0, // 367 367
|
||||||
|
0, 0, 0, 0, // 368 368
|
||||||
|
0, 0, 0, 0, // 369 369
|
||||||
|
0, 0, 0, 0, // 370 370
|
||||||
|
0, 0, 0, 0, // 371 371
|
||||||
|
0, 0, 0, 0, // 372 372
|
||||||
|
0, 0, 0, 0, // 373 373
|
||||||
|
0, 0, 0, 0, // 374 374
|
||||||
|
0, 0, 0, 0, // 375 375
|
||||||
|
0, 0, 0, 0, // 376 376
|
||||||
|
0, 0, 0, 0, // 377 377
|
||||||
|
0, 0, 0, 0, // 378 378
|
||||||
|
0, 0, 0, 0, // 379 379
|
||||||
|
0, 0, 0, 0, // 380 380
|
||||||
|
0, 0, 0, 0, // 381 381
|
||||||
|
0, 0, 0, 0, // 382 382
|
||||||
|
0, 0, 0, 0, // 383 383
|
||||||
|
0, 0, 0, 0, // 384 384
|
||||||
|
0, 0, 0, 0, // 385 385
|
||||||
|
0, 0, 0, 0, // 386 386
|
||||||
|
0, 0, 0, 0, // 387 387
|
||||||
|
0, 0, 0, 0, // 388 388
|
||||||
|
0, 0, 0, 0, // 389 389
|
||||||
|
0, 0, 0, 0, // 390 390
|
||||||
|
0, 0, 0, 0, // 391 391
|
||||||
|
0, 0, 0, 0, // 392 392
|
||||||
|
0, 0, 0, 0, // 393 393
|
||||||
|
0, 0, 0, 0, // 394 394
|
||||||
|
0, 0, 0, 0, // 395 395
|
||||||
|
0, 0, 0, 0, // 396 396
|
||||||
|
0, 0, 0, 0, // 397 397
|
||||||
|
0, 0, 0, 0, // 398 398
|
||||||
|
0, 0, 0, 0, // 399 399
|
||||||
|
0, 0, 0, 0, // 400 400
|
||||||
|
0, 0, 0, 0, // 401 401
|
||||||
|
0, 0, 0, 0, // 402 402
|
||||||
|
0, 0, 0, 0, // 403 403
|
||||||
|
0, 0, 0, 0, // 404 404
|
||||||
|
0, 0, 0, 0, // 405 405
|
||||||
|
0, 0, 0, 0, // 406 406
|
||||||
|
0, 0, 0, 0, // 407 407
|
||||||
|
0, 0, 0, 0, // 408 408
|
||||||
|
0, 0, 0, 0, // 409 409
|
||||||
|
0, 0, 0, 0, // 410 410
|
||||||
|
0, 0, 0, 0, // 411 411
|
||||||
|
0, 0, 0, 0, // 412 412
|
||||||
|
0, 0, 0, 0, // 413 413
|
||||||
|
0, 0, 0, 0, // 414 414
|
||||||
|
0, 0, 0, 0, // 415 415
|
||||||
|
0, 0, 0, 0, // 416 416
|
||||||
|
0, 0, 0, 0, // 417 417
|
||||||
|
0, 0, 0, 0, // 418 418
|
||||||
|
0, 0, 0, 0, // 419 419
|
||||||
|
0, 0, 0, 0, // 420 420
|
||||||
|
0, 0, 0, 0, // 421 421
|
||||||
|
0, 0, 0, 0, // 422 422
|
||||||
|
0, 0, 0, 0, // 423 423
|
||||||
|
0, 0, 0, 0, // 424 424
|
||||||
|
0, 0, 0, 0, // 425 425
|
||||||
|
0, 0, 0, 0, // 426 426
|
||||||
|
0, 0, 0, 0, // 427 427
|
||||||
|
0, 0, 0, 0, // 428 428
|
||||||
|
0, 0, 0, 0, // 429 429
|
||||||
|
0, 0, 0, 0, // 430 430
|
||||||
|
0, 0, 0, 0, // 431 431
|
||||||
|
0, 0, 0, 0, // 432 432
|
||||||
|
0, 0, 0, 0, // 433 433
|
||||||
|
0, 0, 0, 0, // 434 434
|
||||||
|
0, 0, 0, 0, // 435 435
|
||||||
|
0, 0, 0, 0, // 436 436
|
||||||
|
0, 0, 0, 0, // 437 437
|
||||||
|
0, 0, 0, 0, // 438 438
|
||||||
|
0, 0, 0, 0, // 439 439
|
||||||
|
0, 0, 0, 0, // 440 440
|
||||||
|
0, 0, 0, 0, // 441 441
|
||||||
|
0, 0, 0, 0, // 442 442
|
||||||
|
0, 0, 0, 0, // 443 443
|
||||||
|
0, 0, 0, 0, // 444 444
|
||||||
|
0, 0, 0, 0, // 445 445
|
||||||
|
0, 0, 0, 0, // 446 446
|
||||||
|
0, 0, 0, 0, // 447 447
|
||||||
|
0, 0, 0, 0, // 448 448
|
||||||
|
0, 0, 0, 0, // 449 449
|
||||||
|
0, 0, 0, 0, // 450 450
|
||||||
|
0, 0, 0, 0, // 451 451
|
||||||
|
0, 0, 0, 0, // 452 452
|
||||||
|
0, 0, 0, 0, // 453 453
|
||||||
|
0, 0, 0, 0, // 454 454
|
||||||
|
0, 0, 0, 0, // 455 455
|
||||||
|
0, 0, 0, 0, // 456 456
|
||||||
|
0, 0, 0, 0, // 457 457
|
||||||
|
0, 0, 0, 0, // 458 458
|
||||||
|
0, 0, 0, 0, // 459 459
|
||||||
|
0, 0, 0, 0, // 460 460
|
||||||
|
0, 0, 0, 0, // 461 461
|
||||||
|
0, 0, 0, 0, // 462 462
|
||||||
|
0, 0, 0, 0, // 463 463
|
||||||
|
0, 0, 0, 0, // 464 464
|
||||||
|
0, 0, 0, 0, // 465 465
|
||||||
|
0, 0, 0, 0, // 466 466
|
||||||
|
0, 0, 0, 0, // 467 467
|
||||||
|
0, 0, 0, 0, // 468 468
|
||||||
|
0, 0, 0, 0, // 469 469
|
||||||
|
0, 0, 0, 0, // 470 470
|
||||||
|
0, 0, 0, 0, // 471 471
|
||||||
|
0, 0, 0, 0, // 472 472
|
||||||
|
0, 0, 0, 0, // 473 473
|
||||||
|
0, 0, 0, 0, // 474 474
|
||||||
|
0, 0, 0, 0, // 475 475
|
||||||
|
0, 0, 0, 0, // 476 476
|
||||||
|
0, 0, 0, 0, // 477 477
|
||||||
|
0, 0, 0, 0, // 478 478
|
||||||
|
0, 0, 0, 0, // 479 479
|
||||||
|
0, 0, 0, 0, // 480 480
|
||||||
|
0, 0, 0, 0, // 481 481
|
||||||
|
0, 0, 0, 0, // 482 482
|
||||||
|
0, 0, 0, 0, // 483 483
|
||||||
|
0, 0, 0, 0, // 484 484
|
||||||
|
0, 0, 0, 0, // 485 485
|
||||||
|
0, 0, 0, 0, // 486 486
|
||||||
|
0, 0, 0, 0, // 487 487
|
||||||
|
0, 0, 0, 0, // 488 488
|
||||||
|
0, 0, 0, 0, // 489 489
|
||||||
|
0, 0, 0, 0, // 490 490
|
||||||
|
0, 0, 0, 0, // 491 491
|
||||||
|
0, 0, 0, 0, // 492 492
|
||||||
|
0, 0, 0, 0, // 493 493
|
||||||
|
0, 0, 0, 0, // 494 494
|
||||||
|
0, 0, 0, 0, // 495 495
|
||||||
|
0, 0, 0, 0, // 496 496
|
||||||
|
0, 0, 0, 0, // 497 497
|
||||||
|
0, 0, 0, 0, // 498 498
|
||||||
|
0, 0, 0, 0, // 499 499
|
||||||
|
0, 0, 0, 0, // 500 500
|
||||||
|
0, 0, 0, 0, // 501 501
|
||||||
|
0, 0, 0, 0, // 502 502
|
||||||
|
0, 0, 0, 0, // 503 503
|
||||||
|
0, 0, 0, 0, // 504 504
|
||||||
|
0, 0, 0, 0, // 505 505
|
||||||
|
866, 0, 0, 0, // 506 NDEBELE nr
|
||||||
|
0, 0, 0, 0, // 507 X_BORK_BORK_BORK zzb
|
||||||
|
1657, 0, 0, 0, // 508 X_PIG_LATIN zzp
|
||||||
|
0, 0, 0, 0, // 509 X_HACKER zzh
|
||||||
|
1427, 0, 0, 0, // 510 X_KLINGON tlh
|
||||||
|
0, 0, 0, 0, // 511 X_ELMER_FUDD zze
|
||||||
|
0, 0, 0, 0, // 512 X_Common xx-Zyyy
|
||||||
|
0, 0, 0, 0, // 513 X_Latin xx-Latn
|
||||||
|
0, 0, 0, 0, // 514 X_Greek xx-Grek
|
||||||
|
0, 0, 0, 0, // 515 X_Cyrillic xx-Cyrl
|
||||||
|
0, 0, 0, 0, // 516 X_Armenian xx-Armn
|
||||||
|
0, 0, 0, 0, // 517 X_Hebrew xx-Hebr
|
||||||
|
0, 0, 0, 0, // 518 X_Arabic xx-Arab
|
||||||
|
0, 0, 0, 0, // 519 X_Syriac xx-Syrc
|
||||||
|
0, 0, 0, 0, // 520 X_Thaana xx-Thaa
|
||||||
|
0, 0, 0, 0, // 521 X_Devanagari xx-Deva
|
||||||
|
0, 0, 0, 0, // 522 X_Bengali xx-Beng
|
||||||
|
0, 0, 0, 0, // 523 X_Gurmukhi xx-Guru
|
||||||
|
0, 0, 0, 0, // 524 X_Gujarati xx-Gujr
|
||||||
|
0, 0, 0, 0, // 525 X_Oriya xx-Orya
|
||||||
|
0, 0, 0, 0, // 526 X_Tamil xx-Taml
|
||||||
|
0, 0, 0, 0, // 527 X_Telugu xx-Telu
|
||||||
|
0, 0, 0, 0, // 528 X_Kannada xx-Knda
|
||||||
|
0, 0, 0, 0, // 529 X_Malayalam xx-Mlym
|
||||||
|
0, 0, 0, 0, // 530 X_Sinhala xx-Sinh
|
||||||
|
0, 0, 0, 0, // 531 X_Thai xx-Thai
|
||||||
|
0, 0, 0, 0, // 532 X_Lao xx-Laoo
|
||||||
|
0, 0, 0, 0, // 533 X_Tibetan xx-Tibt
|
||||||
|
0, 0, 0, 0, // 534 X_Myanmar xx-Mymr
|
||||||
|
0, 0, 0, 0, // 535 X_Georgian xx-Geor
|
||||||
|
0, 0, 0, 0, // 536 X_Hangul xx-Hang
|
||||||
|
0, 0, 0, 0, // 537 X_Ethiopic xx-Ethi
|
||||||
|
0, 0, 0, 0, // 538 X_Cherokee xx-Cher
|
||||||
|
0, 0, 0, 0, // 539 X_Canadian_Aboriginal xx-Cans
|
||||||
|
0, 0, 0, 0, // 540 X_Ogham xx-Ogam
|
||||||
|
0, 0, 0, 0, // 541 X_Runic xx-Runr
|
||||||
|
0, 0, 0, 0, // 542 X_Khmer xx-Khmr
|
||||||
|
0, 0, 0, 0, // 543 X_Mongolian xx-Mong
|
||||||
|
0, 0, 0, 0, // 544 X_Hiragana xx-Hira
|
||||||
|
0, 0, 0, 0, // 545 X_Katakana xx-Kana
|
||||||
|
0, 0, 0, 0, // 546 X_Bopomofo xx-Bopo
|
||||||
|
0, 0, 0, 0, // 547 X_Han xx-Hani
|
||||||
|
0, 0, 0, 0, // 548 X_Yi xx-Yiii
|
||||||
|
0, 0, 0, 0, // 549 X_Old_Italic xx-Ital
|
||||||
|
0, 0, 0, 0, // 550 X_Gothic xx-Goth
|
||||||
|
0, 0, 0, 0, // 551 X_Deseret xx-Dsrt
|
||||||
|
0, 0, 0, 0, // 552 X_Inherited xx-Qaai
|
||||||
|
0, 0, 0, 0, // 553 X_Tagalog xx-Tglg
|
||||||
|
0, 0, 0, 0, // 554 X_Hanunoo xx-Hano
|
||||||
|
0, 0, 0, 0, // 555 X_Buhid xx-Buhd
|
||||||
|
0, 0, 0, 0, // 556 X_Tagbanwa xx-Tagb
|
||||||
|
0, 0, 0, 0, // 557 X_Limbu xx-Limb
|
||||||
|
0, 0, 0, 0, // 558 X_Tai_Le xx-Tale
|
||||||
|
0, 0, 0, 0, // 559 X_Linear_B xx-Linb
|
||||||
|
0, 0, 0, 0, // 560 X_Ugaritic xx-Ugar
|
||||||
|
0, 0, 0, 0, // 561 X_Shavian xx-Shaw
|
||||||
|
0, 0, 0, 0, // 562 X_Osmanya xx-Osma
|
||||||
|
0, 0, 0, 0, // 563 X_Cypriot xx-Cprt
|
||||||
|
0, 0, 0, 0, // 564 X_Braille xx-Brai
|
||||||
|
0, 0, 0, 0, // 565 X_Buginese xx-Bugi
|
||||||
|
0, 0, 0, 0, // 566 X_Coptic xx-Copt
|
||||||
|
0, 0, 0, 0, // 567 X_New_Tai_Lue xx-Talu
|
||||||
|
0, 0, 0, 0, // 568 X_Glagolitic xx-Glag
|
||||||
|
0, 0, 0, 0, // 569 X_Tifinagh xx-Tfng
|
||||||
|
0, 0, 0, 0, // 570 X_Syloti_Nagri xx-Sylo
|
||||||
|
0, 0, 0, 0, // 571 X_Old_Persian xx-Xpeo
|
||||||
|
0, 0, 0, 0, // 572 X_Kharoshthi xx-Khar
|
||||||
|
0, 0, 0, 0, // 573 X_Balinese xx-Bali
|
||||||
|
0, 0, 0, 0, // 574 X_Cuneiform xx-Xsux
|
||||||
|
0, 0, 0, 0, // 575 X_Phoenician xx-Phnx
|
||||||
|
0, 0, 0, 0, // 576 X_Phags_Pa xx-Phag
|
||||||
|
0, 0, 0, 0, // 577 X_Nko xx-Nkoo
|
||||||
|
0, 0, 0, 0, // 578 X_Sundanese xx-Sund
|
||||||
|
0, 0, 0, 0, // 579 X_Lepcha xx-Lepc
|
||||||
|
0, 0, 0, 0, // 580 X_Ol_Chiki xx-Olck
|
||||||
|
0, 0, 0, 0, // 581 X_Vai xx-Vaii
|
||||||
|
0, 0, 0, 0, // 582 X_Saurashtra xx-Saur
|
||||||
|
0, 0, 0, 0, // 583 X_Kayah_Li xx-Kali
|
||||||
|
0, 0, 0, 0, // 584 X_Rejang xx-Rjng
|
||||||
|
0, 0, 0, 0, // 585 X_Lycian xx-Lyci
|
||||||
|
0, 0, 0, 0, // 586 X_Carian xx-Cari
|
||||||
|
0, 0, 0, 0, // 587 X_Lydian xx-Lydi
|
||||||
|
0, 0, 0, 0, // 588 X_Cham xx-Cham
|
||||||
|
0, 0, 0, 0, // 589 X_Tai_Tham xx-Lana
|
||||||
|
0, 0, 0, 0, // 590 X_Tai_Viet xx-Tavt
|
||||||
|
0, 0, 0, 0, // 591 X_Avestan xx-Avst
|
||||||
|
0, 0, 0, 0, // 592 X_Egyptian_Hieroglyphs xx-Egyp
|
||||||
|
0, 0, 0, 0, // 593 X_Samaritan xx-Samr
|
||||||
|
0, 0, 0, 0, // 594 X_Lisu xx-Lisu
|
||||||
|
0, 0, 0, 0, // 595 X_Bamum xx-Bamu
|
||||||
|
0, 0, 0, 0, // 596 X_Javanese xx-Java
|
||||||
|
0, 0, 0, 0, // 597 X_Meetei_Mayek xx-Mtei
|
||||||
|
0, 0, 0, 0, // 598 X_Imperial_Aramaic xx-Armi
|
||||||
|
0, 0, 0, 0, // 599 X_Old_South_Arabian xx-Sarb
|
||||||
|
0, 0, 0, 0, // 600 X_Inscriptional_Parthian xx-Prti
|
||||||
|
0, 0, 0, 0, // 601 X_Inscriptional_Pahlavi xx-Phli
|
||||||
|
0, 0, 0, 0, // 602 X_Old_Turkic xx-Orkh
|
||||||
|
0, 0, 0, 0, // 603 X_Kaithi xx-Kthi
|
||||||
|
0, 0, 0, 0, // 604 X_Batak xx-Batk
|
||||||
|
0, 0, 0, 0, // 605 X_Brahmi xx-Brah
|
||||||
|
0, 0, 0, 0, // 606 X_Mandaic xx-Mand
|
||||||
|
0, 0, 0, 0, // 607 X_Chakma xx-Cakm
|
||||||
|
0, 0, 0, 0, // 608 X_Meroitic_Cursive xx-Merc
|
||||||
|
0, 0, 0, 0, // 609 X_Meroitic_Hieroglyphs xx-Mero
|
||||||
|
0, 0, 0, 0, // 610 X_Miao xx-Plrd
|
||||||
|
0, 0, 0, 0, // 611 X_Sharada xx-Shrd
|
||||||
|
0, 0, 0, 0, // 612 X_Sora_Sompeng xx-Sora
|
||||||
|
0, 0, 0, 0, // 613 X_Takri xx-Takr
|
||||||
|
};
|
||||||
|
|
||||||
|
} // End namespace CLD2
|
||||||
|
|
72
internal/compile20141015.sh
Executable file
72
internal/compile20141015.sh
Executable file
@@ -0,0 +1,72 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
#
|
||||||
|
# Copyright 2014 Google Inc. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http:# www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
g++ -O2 -m64 compact_lang_det_test.cc \
|
||||||
|
cldutil.cc cldutil_shared.cc compact_lang_det.cc compact_lang_det_hint_code.cc \
|
||||||
|
compact_lang_det_impl.cc debug.cc fixunicodevalue.cc \
|
||||||
|
generated_entities.cc generated_language.cc generated_ulscript.cc \
|
||||||
|
getonescriptspan.cc lang_script.cc offsetmap.cc scoreonescriptspan.cc \
|
||||||
|
tote.cc utf8statetable.cc \
|
||||||
|
cld_generated_cjk_uni_prop_80.cc cld2_generated_cjk_compatible.cc \
|
||||||
|
cld_generated_cjk_delta_bi_4.cc generated_distinct_bi_0.cc \
|
||||||
|
cld2_generated_quadchrome20141015_2.cc cld2_generated_deltaoctachrome20141015.cc \
|
||||||
|
cld2_generated_distinctoctachrome20141015.cc cld_generated_score_quad_octa_20141015_2.cc \
|
||||||
|
-o compact_lang_det_test_chrome20141015_2
|
||||||
|
echo " compact_lang_det_test_chrome20141015_2 compiled"
|
||||||
|
|
||||||
|
g++ -O2 -m64 compact_lang_det_test.cc \
|
||||||
|
cldutil.cc cldutil_shared.cc compact_lang_det.cc compact_lang_det_hint_code.cc \
|
||||||
|
compact_lang_det_impl.cc debug.cc fixunicodevalue.cc \
|
||||||
|
generated_entities.cc generated_language.cc generated_ulscript.cc \
|
||||||
|
getonescriptspan.cc lang_script.cc offsetmap.cc scoreonescriptspan.cc \
|
||||||
|
tote.cc utf8statetable.cc \
|
||||||
|
cld_generated_cjk_uni_prop_80.cc cld2_generated_cjk_compatible.cc \
|
||||||
|
cld_generated_cjk_delta_bi_4.cc generated_distinct_bi_0.cc \
|
||||||
|
cld2_generated_quadchrome20141015_16.cc cld2_generated_deltaoctachrome20141015.cc \
|
||||||
|
cld2_generated_distinctoctachrome20141015.cc cld_generated_score_quad_octa_20141015_2.cc \
|
||||||
|
-o compact_lang_det_test_chrome20141015_16
|
||||||
|
echo " compact_lang_det_test_chrome20141015_16 compiled"
|
||||||
|
|
||||||
|
|
||||||
|
g++ -O2 -m64 cld2_unittest_20141015.cc \
|
||||||
|
cldutil.cc cldutil_shared.cc compact_lang_det.cc compact_lang_det_hint_code.cc \
|
||||||
|
compact_lang_det_impl.cc debug.cc fixunicodevalue.cc \
|
||||||
|
generated_entities.cc generated_language.cc generated_ulscript.cc \
|
||||||
|
getonescriptspan.cc lang_script.cc offsetmap.cc scoreonescriptspan.cc \
|
||||||
|
tote.cc utf8statetable.cc \
|
||||||
|
cld_generated_cjk_uni_prop_80.cc cld2_generated_cjk_compatible.cc \
|
||||||
|
cld_generated_cjk_delta_bi_4.cc generated_distinct_bi_0.cc \
|
||||||
|
cld2_generated_quadchrome20141015_2.cc cld2_generated_deltaoctachrome20141015.cc \
|
||||||
|
cld2_generated_distinctoctachrome20141015.cc cld_generated_score_quad_octa_20141015_2.cc \
|
||||||
|
-o cld2_unittest_chrome20141015_2
|
||||||
|
echo " cld2_unittest_chrome20141015_2 compiled"
|
||||||
|
|
||||||
|
g++ -O2 -m64 -Davoid_utf8_string_constants cld2_unittest_20141015.cc \
|
||||||
|
cldutil.cc cldutil_shared.cc compact_lang_det.cc compact_lang_det_hint_code.cc \
|
||||||
|
compact_lang_det_impl.cc debug.cc fixunicodevalue.cc \
|
||||||
|
generated_entities.cc generated_language.cc generated_ulscript.cc \
|
||||||
|
getonescriptspan.cc lang_script.cc offsetmap.cc scoreonescriptspan.cc \
|
||||||
|
tote.cc utf8statetable.cc \
|
||||||
|
cld_generated_cjk_uni_prop_80.cc cld2_generated_cjk_compatible.cc \
|
||||||
|
cld_generated_cjk_delta_bi_4.cc generated_distinct_bi_0.cc \
|
||||||
|
cld2_generated_quadchrome20141015_2.cc cld2_generated_deltaoctachrome20141015.cc \
|
||||||
|
cld2_generated_distinctoctachrome20141015.cc cld_generated_score_quad_octa_20141015_2.cc \
|
||||||
|
-o cld2_unittest_avoid_chrome20141015_2
|
||||||
|
echo " cld2_unittest_avoid_chrome20141015_2 compiled"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
Reference in New Issue
Block a user