New tables, test, compile for Oct 2014 small table release

git-svn-id: https://cld2.googlecode.com/svn/trunk@176 b252ecd4-b096-bf77-eb8e-91563289f87e
This commit is contained in:
dsites@google.com
2014-10-28 20:41:13 +00:00
parent 5c84ccd348
commit 87276506cd
6 changed files with 92375 additions and 0 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,329 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Author: dsites@google.com (Dick Sites)
//
// Unit test compact language detector, CLD2
// Compile with -Davoid_utf8_string_constants if your compiler cannot
// handle UTF-8 string constants
//
#include <stdio.h>
#include <string.h>
#include "../public/compact_lang_det.h"
#include "../public/encodings.h"
#include "unittest_data.h"
namespace CLD2 {
// Test strings.
const char* kTeststr_en =
"confiscation of goods is assigned as the penalty part most of the courts "
"consist of members and when it is necessary to bring public cases before a "
"jury of members two courts combine for the purpose the most important cases "
"of all are brought jurors or";
typedef struct {
Language lang;
const char* text;
} TestPair;
static const TestPair kTestPair[] = {
// A simple case to begin
{ENGLISH, kTeststr_en},
// 20 languages recognized via Unicode script
{ARMENIAN, kTeststr_hy_Armn},
{CHEROKEE, kTeststr_chr_Cher},
{DHIVEHI, kTeststr_dv_Thaa},
{GEORGIAN, kTeststr_ka_Geor},
{GREEK, kTeststr_el_Grek},
{GUJARATI, kTeststr_gu_Gujr},
{INUKTITUT, kTeststr_iu_Cans},
{KANNADA, kTeststr_kn_Knda},
{KHMER, kTeststr_km_Khmr},
{LAOTHIAN, kTeststr_lo_Laoo},
{LIMBU, kTeststr_lif_Limb},
{MALAYALAM, kTeststr_ml_Mlym},
{ORIYA, kTeststr_or_Orya},
{PUNJABI, kTeststr_pa_Guru},
{SINHALESE, kTeststr_si_Sinh},
{SYRIAC, kTeststr_syr_Syrc},
{TAGALOG, kTeststr_tl_Tglg}, // Also in quadgram list below
{TAMIL, kTeststr_ta_Taml},
{TELUGU, kTeststr_te_Telu},
{THAI, kTeststr_th_Thai},
// 4 languages regognized via single letters
{CHINESE, kTeststr_zh_Hans},
{CHINESE_T, kTeststr_zh_Hant},
{JAPANESE, kTeststr_ja_Hani},
{KOREAN, kTeststr_ko_Hani},
// 60 languages recognized via combinations of four letters
{AFRIKAANS, kTeststr_af_Latn},
{ALBANIAN, kTeststr_sq_Latn},
{ARABIC, kTeststr_ar_Arab},
{AZERBAIJANI, kTeststr_az_Latn},
{BASQUE, kTeststr_eu_Latn},
{BELARUSIAN, kTeststr_be_Cyrl},
{BENGALI, kTeststr_bn_Beng}, // No Assamese in subset
{BIHARI, kTeststr_bh_Deva},
{BULGARIAN, kTeststr_bg_Cyrl},
{CATALAN, kTeststr_ca_Latn},
{CEBUANO, kTeststr_ceb_Latn},
{CROATIAN, kTeststr_hr_Latn},
{CZECH, kTeststr_cs_Latn},
{DANISH, kTeststr_da_Latn},
{DUTCH, kTeststr_nl_Latn},
{ENGLISH, kTeststr_en_Latn},
{ESTONIAN, kTeststr_et_Latn},
{FINNISH, kTeststr_fi_Latn},
{FRENCH, kTeststr_fr_Latn},
{GALICIAN, kTeststr_gl_Latn},
{GANDA, kTeststr_lg_Latn},
{GERMAN, kTeststr_de_Latn},
{HAITIAN_CREOLE, kTeststr_ht_Latn},
{HEBREW, kTeststr_iw_Hebr},
{HINDI, kTeststr_hi_Deva},
{HMONG, kTeststr_blu_Latn},
{HUNGARIAN, kTeststr_hu_Latn},
{ICELANDIC, kTeststr_is_Latn},
{INDONESIAN, kTeststr_id_Latn},
{IRISH, kTeststr_ga_Latn},
{ITALIAN, kTeststr_it_Latn},
{JAVANESE, kTeststr_jw_Latn},
{KINYARWANDA, kTeststr_rw_Latn},
{LATVIAN, kTeststr_lv_Latn},
{LITHUANIAN, kTeststr_lt_Latn},
{MACEDONIAN, kTeststr_mk_Cyrl},
{MALAY, kTeststr_ms_Latn},
{MALTESE, kTeststr_mt_Latn},
{MARATHI, kTeststr_mr_Deva},
{NEPALI, kTeststr_ne_Deva},
{NORWEGIAN, kTeststr_no_Latn},
{PERSIAN, kTeststr_fa_Arab},
{POLISH, kTeststr_pl_Latn},
{PORTUGUESE, kTeststr_pt_Latn},
{ROMANIAN, kTeststr_ro_Latn},
{ROMANIAN, kTeststr_ro_Cyrl},
{RUSSIAN, kTeststr_ru_Cyrl},
{SCOTS_GAELIC, kTeststr_gd_Latn},
{SERBIAN, kTeststr_sr_Cyrl},
{SERBIAN, kTeststr_sr_Latn},
{SLOVAK, kTeststr_sk_Latn},
{SLOVENIAN, kTeststr_sl_Latn},
{SPANISH, kTeststr_es_Latn},
{SWAHILI, kTeststr_sw_Latn},
{SWEDISH, kTeststr_sv_Latn},
{TAGALOG, kTeststr_tl_Latn},
{TURKISH, kTeststr_tr_Latn},
{UKRAINIAN, kTeststr_uk_Cyrl},
{URDU, kTeststr_ur_Arab},
{VIETNAMESE, kTeststr_vi_Latn},
{WELSH, kTeststr_cy_Latn},
{YIDDISH, kTeststr_yi_Hebr},
// Added 2013.08.31 so-Latn ig-Latn ha-Latn yo-Latn zu-Latn
// Deleted 2014.10.15 so-Latn ig-Latn ha-Latn yo-Latn zu-Latn
//{SOMALI, kTeststr_so_Latn},
//{IGBO, kTeststr_ig_Latn},
//{HAUSA, kTeststr_ha_Latn},
//{YORUBA, kTeststr_yo_Latn},
//{ZULU, kTeststr_zu_Latn},
// Added 2014.01.22 bs-Latn
{BOSNIAN, kTeststr_bs_Latn},
// Added 2014.10.15
{KAZAKH, kTeststr_kk_Cyrl},
{KURDISH, kTeststr_ku_Latn}, // aka kmr
{KYRGYZ, kTeststr_ky_Cyrl},
{MALAGASY, kTeststr_mg_Latn},
{MALAYALAM, kTeststr_ml_Mlym},
{BURMESE, kTeststr_my_Mymr},
{NYANJA, kTeststr_ny_Latn},
{SINHALESE, kTeststr_si_Sinh}, // aka SINHALA
{SESOTHO, kTeststr_st_Latn},
{SUNDANESE, kTeststr_su_Latn},
{TAJIK, kTeststr_tg_Cyrl},
{UZBEK, kTeststr_uz_Latn},
{UZBEK, kTeststr_uz_Cyrl},
// 2 statistically-close languages
{INDONESIAN, kTeststr_id_close},
{MALAY, kTeststr_ms_close},
// Simple intermixed French/English text
{FRENCH, kTeststr_fr_en_Latn},
// Simple English with bad UTF-8
{UNKNOWN_LANGUAGE, kTeststr_en_Latn_bad_UTF8},
// Cross-check the main quadgram table build date
// Change the expected language each time it is rebuilt
// {WELSH, kTeststr_version}, // 2013.07.15
// {AZERBAIJANI, kTeststr_version}, // 2014.01.31
{TURKISH, kTeststr_version}, // 2014.10.16
{UNKNOWN_LANGUAGE, NULL}, // Must be last
};
bool OneTest(int flags, bool get_vector,
Language lang_expected, const char* buffer, int buffer_length) {
bool is_plain_text = true;
const char* tldhint = "";
const Encoding enchint = UNKNOWN_ENCODING;
const Language langhint = UNKNOWN_LANGUAGE;
const CLDHints cldhints = {NULL, tldhint, enchint, langhint};
Language language3[3];
int percent3[3];
double normalized_score3[3];
ResultChunkVector resultchunkvector;
int text_bytes;
bool is_reliable;
int valid_prefix_bytes;
Language lang_detected = ExtDetectLanguageSummaryCheckUTF8(
buffer,
buffer_length,
is_plain_text,
&cldhints,
flags,
language3,
percent3,
normalized_score3,
get_vector ? &resultchunkvector : NULL,
&text_bytes,
&is_reliable,
&valid_prefix_bytes);
// expose DumpExtLang DumpLanguages
bool good_utf8 = (valid_prefix_bytes == buffer_length);
if (!good_utf8) {
fprintf(stderr, "*** Bad UTF-8 after %d bytes<br>\n", valid_prefix_bytes);
fprintf(stdout, "*** Bad UTF-8 after %d bytes\n", valid_prefix_bytes);
}
bool ok = (lang_detected == lang_expected);
ok &= good_utf8;
if (!ok) {
if ((flags & kCLDFlagHtml) != 0) {
fprintf(stderr, "*** Wrong result. expected %s, detected %s<br>\n",
LanguageName(lang_expected), LanguageName(lang_detected));
}
fprintf(stdout, "*** Wrong result. expected %s, detected %s\n",
LanguageName(lang_expected), LanguageName(lang_detected));
fprintf(stdout, "%s\n\n", buffer);
}
if (get_vector) {
DumpResultChunkVector(stderr, buffer, &resultchunkvector);
}
#if 0
DumpExtLang(flags, summary_lang, language3, percent3, normalized_score3,
text_bytes, is_reliable, n);
if ((flags & kCLDFlagHtml) != 0) {
DumpLanguages(summary_lang,
language3, percent3, text_bytes, is_reliable, n);
}
fprintf(stdout, " SummaryLanguage %s%s at %u of %d, %s\n",
LanguageName(summary_lang),
is_reliable ? "" : "(un-reliable)",
bytes_consumed,
n,
argv[1]);
#endif
return ok;
}
void InitHtmlOut(int flags) {
#if 1
if ((flags & kCLDFlagHtml) != 0) {
// Begin HTML file
fprintf(stderr, "<html><meta charset=\"UTF-8\"><body>\n");
// Encourage browsers to print background colors
fprintf(stderr, "<style media=\"print\" type=\"text/css\"> "
":root { -webkit-print-color-adjust: exact; } </style>\n");
fprintf(stderr, "<span style=\"font-size: 7pt\">\n");
fprintf(stderr, "file = %s<br>\n", "cld2_unittest");
}
#endif
}
void FinishHtmlOut(int flags) {
#if 1
if ((flags & kCLDFlagHtml) != 0) {
fprintf(stderr, "\n</span></body></html>\n");
}
#endif
}
int RunTests (int flags, bool get_vector) {
fprintf(stdout, "CLD2 version: %s\n", CLD2::DetectLanguageVersion());
InitHtmlOut(flags);
bool any_fail = false;
int i = 0;
while (kTestPair[i].text != NULL) {
Language lang_expected = kTestPair[i].lang;
const char* buffer = kTestPair[i].text;
int buffer_length = strlen(buffer);
bool ok = OneTest(flags, get_vector, lang_expected, buffer, buffer_length);
if (kTestPair[i].text == kTeststr_en_Latn_bad_UTF8) {
// We expect this one to fail, so flip the value of ok
ok = !ok;
}
any_fail |= (!ok);
++i;
}
if (any_fail) {
fprintf(stderr, "FAIL\n");
fprintf(stdout, "FAIL\n");
} else {
fprintf(stderr, "PASS\n");
fprintf(stdout, "PASS\n");
}
FinishHtmlOut(flags);
return 0;
}
} // End namespace CLD2
int main(int argc, char** argv) {
// Get command-line flags
int flags = 0;
bool get_vector = false;
for (int i = 1; i < argc; ++i) {
if (strcmp(argv[i], "--html") == 0) {flags |= CLD2::kCLDFlagHtml;}
if (strcmp(argv[i], "--cr") == 0) {flags |= CLD2::kCLDFlagCr;}
if (strcmp(argv[i], "--verbose") == 0) {flags |= CLD2::kCLDFlagVerbose;}
if (strcmp(argv[i], "--quiet") == 0) {flags |= CLD2::kCLDFlagQuiet;}
if (strcmp(argv[i], "--echo") == 0) {flags |= CLD2::kCLDFlagEcho;}
if (strcmp(argv[i], "--vector") == 0) {get_vector = true;}
}
return CLD2::RunTests(flags, get_vector);
}

View File

@@ -0,0 +1,649 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Generated by ngram_merge on 2011-01-21 10:50:46 from:
// score_me file /export/hda3/cld/pre2010/b0_samp_prune_20100722.utf8
// cld_generated_quad.bin, built 20110121 bytes 4443812 hash 462c-16c4
// cld_generated_deltaocta.bin, built 20110121 bytes 1053284 hash c834-81f5
// 1 # dsites Added text for
// ak haw ig kha ks mfe mo nd nso ny ve
// bs-Cyrl/Latn hr-Latn sr-Cyrl/Latn sr-ME-Latn
namespace CLD2 {
// Average score per 1024 bytes
static const int kAvgDeltaOctaScoreSize = 614 * 4;
extern const short kAvgDeltaOctaScore[kAvgDeltaOctaScoreSize] = {
// Latn Cyrl Arab Other script
// Updated 20140204 for CLD2 full
1163, 0, 0, 0, // 0 ENGLISH en
983, 0, 0, 0, // 1 DANISH da
1036, 0, 0, 0, // 2 DUTCH nl
1245, 0, 0, 0, // 3 FINNISH fi
873, 0, 0, 0, // 4 FRENCH fr
1146, 0, 0, 0, // 5 GERMAN de
0, 0, 0, 901, // 6 HEBREW iw
736, 0, 0, 0, // 7 ITALIAN it
0, 0, 0, 3100, // 8 Japanese ja
0, 0, 0, 3669, // 9 Korean ko
836, 0, 0, 0, // 10 NORWEGIAN no
1372, 0, 0, 0, // 11 POLISH pl
1044, 0, 0, 0, // 12 PORTUGUESE pt
0, 648, 0, 0, // 13 RUSSIAN ru
640, 0, 0, 0, // 14 SPANISH es
866, 0, 0, 0, // 15 SWEDISH sv
0, 0, 0, 1928, // 16 Chinese zh
1417, 0, 0, 0, // 17 CZECH cs
0, 0, 0, 1024, // 18 GREEK el
1212, 0, 0, 0, // 19 ICELANDIC is
1295, 0, 0, 0, // 20 LATVIAN lv
1169, 0, 0, 0, // 21 LITHUANIAN lt
898, 894, 0, 0, // 22 ROMANIAN ro
1370, 0, 0, 0, // 23 HUNGARIAN hu
1114, 0, 0, 0, // 24 ESTONIAN et
0, 0, 0, 0, // 25 Ignore xxx
0, 0, 0, 0, // 26 Unknown un
0, 684, 0, 0, // 27 BULGARIAN bg
575, 0, 0, 0, // 28 CROATIAN hr
574, 1023, 0, 0, // 29 SERBIAN sr
1341, 0, 0, 0, // 30 IRISH ga
809, 0, 0, 0, // 31 GALICIAN gl
1096, 0, 0, 0, // 32 TAGALOG tl
1184, 0, 0, 0, // 33 TURKISH tr
0, 801, 0, 0, // 34 UKRAINIAN uk
0, 0, 0, 822, // 35 HINDI hi
0, 793, 0, 0, // 36 MACEDONIAN mk
0, 0, 0, 569, // 37 BENGALI bn
1163, 0, 0, 0, // 38 INDONESIAN id
887, 0, 0, 0, // 39 LATIN la
1272, 0, 0, 0, // 40 MALAY ms
0, 0, 0, 1024, // 41 MALAYALAM ml
1593, 0, 0, 0, // 42 WELSH cy
0, 0, 0, 620, // 43 NEPALI ne
0, 0, 0, 1024, // 44 TELUGU te
1265, 0, 0, 0, // 45 ALBANIAN sq
0, 0, 0, 1024, // 46 TAMIL ta
0, 862, 0, 0, // 47 BELARUSIAN be
1031, 0, 0, 0, // 48 JAVANESE jw
701, 0, 0, 0, // 49 OCCITAN oc
0, 0, 981, 0, // 50 URDU ur
0, 0, 0, 614, // 51 BIHARI bh
0, 0, 0, 1024, // 52 GUJARATI gu
0, 0, 0, 1024, // 53 THAI th
0, 0, 872, 0, // 54 ARABIC ar
720, 0, 0, 0, // 55 CATALAN ca
844, 0, 0, 0, // 56 ESPERANTO eo
1312, 0, 0, 0, // 57 BASQUE eu
489, 0, 0, 0, // 58 INTERLINGUA ia
0, 0, 0, 1024, // 59 KANNADA kn
0, 0, 0, 1024, // 60 PUNJABI pa
1492, 0, 0, 0, // 61 SCOTS_GAELIC gd
1058, 0, 0, 0, // 62 SWAHILI sw
744, 0, 0, 0, // 63 SLOVENIAN sl
0, 0, 0, 643, // 64 MARATHI mr
1225, 0, 0, 0, // 65 MALTESE mt
1450, 0, 0, 0, // 66 VIETNAMESE vi
877, 0, 0, 0, // 67 FRISIAN fy
1396, 0, 0, 0, // 68 SLOVAK sk
0, 0, 0, 1908, // 69 ChineseT zh-Hant
1135, 0, 0, 0, // 70 FAROESE fo
887, 0, 0, 0, // 71 SUNDANESE su
1263, 966, 1054, 0, // 72 UZBEK uz
0, 0, 0, 469, // 73 AMHARIC am
1393, 0, 0, 0, // 74 AZERBAIJANI az
0, 0, 0, 1024, // 75 GEORGIAN ka
0, 0, 0, 465, // 76 TIGRINYA ti
0, 0, 890, 0, // 77 PERSIAN fa
608, 0, 0, 0, // 78 BOSNIAN bs
0, 0, 0, 1024, // 79 SINHALESE si
996, 0, 0, 0, // 80 NORWEGIAN_N nn
0, 0, 0, 0, // 81 81
0, 0, 0, 0, // 82 82
1081, 0, 0, 0, // 83 XHOSA xh
1254, 0, 0, 0, // 84 ZULU zu
1223, 0, 0, 0, // 85 GUARANI gn
961, 0, 0, 0, // 86 SESOTHO st
1226, 748, 0, 0, // 87 TURKMEN tk
0, 941, 963, 0, // 88 KYRGYZ ky
1230, 0, 0, 0, // 89 BRETON br
0, 0, 0, 0, // 90 TWI tw
0, 0, 0, 1088, // 91 YIDDISH yi
0, 0, 0, 0, // 92 92
1375, 0, 0, 0, // 93 SOMALI so
461, 1068, 1019, 0, // 94 UIGHUR ug
718, 0, 1074, 0, // 95 KURDISH ku Latn new 2014.10.15
0, 1130, 0, 1024, // 96 MONGOLIAN mn
0, 0, 0, 1024, // 97 ARMENIAN hy
0, 0, 0, 1024, // 98 LAOTHIAN lo
0, 0, 947, 0, // 99 SINDHI sd
878, 0, 0, 0, // 100 RHAETO_ROMANCE rm
1072, 0, 0, 0, // 101 AFRIKAANS af
963, 0, 0, 0, // 102 LUXEMBOURGISH lb
0, 0, 0, 1024, // 103 BURMESE my
0, 0, 0, 1024, // 104 KHMER km
0, 0, 0, 684, // 105 TIBETAN bo
0, 0, 0, 1024, // 106 DHIVEHI dv
0, 0, 0, 1024, // 107 CHEROKEE chr
0, 0, 0, 1024, // 108 SYRIAC syr
0, 0, 0, 1024, // 109 LIMBU lif
0, 0, 0, 1024, // 110 ORIYA or
0, 0, 0, 610, // 111 ASSAMESE as
740, 0, 0, 0, // 112 CORSICAN co
607, 0, 0, 0, // 113 INTERLINGUE ie
0, 955, 744, 0, // 114 KAZAKH kk
1060, 0, 0, 0, // 115 LINGALA ln
0, 0, 0, 0, // 116 116
0, 0, 888, 0, // 117 PASHTO ps
1613, 0, 0, 0, // 118 QUECHUA qu
1228, 0, 0, 0, // 119 SHONA sn
0, 1076, 0, 0, // 120 TAJIK tg
1219, 860, 0, 0, // 121 TATAR tt
1257, 0, 0, 0, // 122 TONGA to
920, 0, 0, 0, // 123 YORUBA yo
0, 0, 0, 0, // 124 124
0, 0, 0, 0, // 125 125
0, 0, 0, 0, // 126 126
0, 0, 0, 0, // 127 127
1262, 0, 0, 0, // 128 MAORI mi
1106, 0, 0, 0, // 129 WOLOF wo
0, 784, 0, 0, // 130 ABKHAZIAN ab
861, 0, 0, 0, // 131 AFAR aa
1371, 0, 0, 0, // 132 AYMARA ay
0, 870, 0, 0, // 133 BASHKIR ba
1411, 0, 0, 0, // 134 BISLAMA bi
0, 0, 0, 711, // 135 DZONGKHA dz
1286, 0, 0, 0, // 136 FIJIAN fj
1783, 0, 0, 0, // 137 GREENLANDIC kl
1180, 0, 0, 0, // 138 HAUSA ha
1051, 0, 0, 0, // 139 HAITIAN_CREOLE ht
800, 0, 0, 0, // 140 INUPIAK ik
0, 0, 0, 1024, // 141 INUKTITUT iu
0, 0, 868, 0, // 142 KASHMIRI ks
1433, 0, 0, 0, // 143 KINYARWANDA rw
1530, 0, 0, 0, // 144 MALAGASY mg
943, 0, 0, 0, // 145 NAURU na
1454, 0, 0, 0, // 146 OROMO om
1060, 0, 0, 0, // 147 RUNDI rn
971, 0, 0, 0, // 148 SAMOAN sm
1589, 0, 0, 0, // 149 SANGO sg
861, 0, 0, 461, // 150 SANSKRIT sa
944, 0, 0, 0, // 151 SISWANT ss
1240, 0, 0, 0, // 152 TSONGA ts
1005, 0, 0, 0, // 153 TSWANA tn
1148, 0, 0, 0, // 154 VOLAPUK vo
1658, 0, 0, 0, // 155 ZHUANG za
1191, 0, 0, 0, // 156 KHASI kha
1088, 0, 0, 0, // 157 SCOTS sco
1026, 0, 0, 0, // 158 GANDA lg
1374, 0, 0, 0, // 159 MANX gv
0, 0, 0, 0, // 160 MONTENEGRIN sr-ME
1265, 0, 0, 0, // 161 AKAN ak
1226, 0, 0, 0, // 162 IGBO ig
931, 0, 0, 0, // 163 MAURITIAN_CREOLE mfe
1040, 0, 0, 0, // 164 HAWAIIAN haw
1102, 0, 0, 0, // 165 CEBUANO ceb
0, 0, 0, 0, // 166 EWE ee
0, 0, 0, 0, // 167 GA gaa
1927, 0, 0, 0, // 168 HMONG hmn
0, 0, 0, 0, // 169 KRIO kri
0, 0, 0, 0, // 170 LOZI loz
0, 0, 0, 0, // 171 LUBA_LULUA lua
0, 0, 0, 0, // 172 LUO_KENYA_AND_TANZANIA luo
0, 0, 0, 0, // 173 NEWARI new
1133, 0, 0, 0, // 174 NYANJA ny
0, 0, 0, 0, // 175 OSSETIAN os
0, 0, 0, 0, // 176 PAMPANGA pam
1007, 0, 0, 0, // 177 PEDI nso
0, 0, 0, 0, // 178 RAJASTHANI raj
975, 0, 0, 0, // 179 SESELWA crs
0, 0, 0, 0, // 180 TUMBUKA tum
1496, 0, 0, 0, // 181 VENDA ve
987, 0, 0, 0, // 182 WARAY_PHILIPPINES war
0, 0, 0, 0, // 183 183
0, 0, 0, 0, // 184 184
0, 0, 0, 0, // 185 185
0, 0, 0, 0, // 186 186
0, 0, 0, 0, // 187 187
0, 0, 0, 0, // 188 188
0, 0, 0, 0, // 189 189
0, 0, 0, 0, // 190 190
0, 0, 0, 0, // 191 191
0, 0, 0, 0, // 192 192
0, 0, 0, 0, // 193 193
0, 0, 0, 0, // 194 194
0, 0, 0, 0, // 195 195
0, 0, 0, 0, // 196 196
0, 0, 0, 0, // 197 197
0, 0, 0, 0, // 198 198
0, 0, 0, 0, // 199 199
0, 0, 0, 0, // 200 200
0, 0, 0, 0, // 201 201
0, 0, 0, 0, // 202 202
0, 0, 0, 0, // 203 203
0, 0, 0, 0, // 204 204
0, 0, 0, 0, // 205 205
0, 0, 0, 0, // 206 206
0, 0, 0, 0, // 207 207
0, 0, 0, 0, // 208 208
0, 0, 0, 0, // 209 209
0, 0, 0, 0, // 210 210
0, 0, 0, 0, // 211 211
0, 0, 0, 0, // 212 212
0, 0, 0, 0, // 213 213
0, 0, 0, 0, // 214 214
0, 0, 0, 0, // 215 215
0, 0, 0, 0, // 216 216
0, 0, 0, 0, // 217 217
0, 0, 0, 0, // 218 218
0, 0, 0, 0, // 219 219
0, 0, 0, 0, // 220 220
0, 0, 0, 0, // 221 221
0, 0, 0, 0, // 222 222
0, 0, 0, 0, // 223 223
0, 0, 0, 0, // 224 224
0, 0, 0, 0, // 225 225
0, 0, 0, 0, // 226 226
0, 0, 0, 0, // 227 227
0, 0, 0, 0, // 228 228
0, 0, 0, 0, // 229 229
0, 0, 0, 0, // 230 230
0, 0, 0, 0, // 231 231
0, 0, 0, 0, // 232 232
0, 0, 0, 0, // 233 233
0, 0, 0, 0, // 234 234
0, 0, 0, 0, // 235 235
0, 0, 0, 0, // 236 236
0, 0, 0, 0, // 237 237
0, 0, 0, 0, // 238 238
0, 0, 0, 0, // 239 239
0, 0, 0, 0, // 240 240
0, 0, 0, 0, // 241 241
0, 0, 0, 0, // 242 242
0, 0, 0, 0, // 243 243
0, 0, 0, 0, // 244 244
0, 0, 0, 0, // 245 245
0, 0, 0, 0, // 246 246
0, 0, 0, 0, // 247 247
0, 0, 0, 0, // 248 248
0, 0, 0, 0, // 249 249
0, 0, 0, 0, // 250 250
0, 0, 0, 0, // 251 251
0, 0, 0, 0, // 252 252
0, 0, 0, 0, // 253 253
0, 0, 0, 0, // 254 254
0, 0, 0, 0, // 255 255
0, 0, 0, 0, // 256 256
0, 0, 0, 0, // 257 257
0, 0, 0, 0, // 258 258
0, 0, 0, 0, // 259 259
0, 0, 0, 0, // 260 260
0, 0, 0, 0, // 261 261
0, 0, 0, 0, // 262 262
0, 0, 0, 0, // 263 263
0, 0, 0, 0, // 264 264
0, 0, 0, 0, // 265 265
0, 0, 0, 0, // 266 266
0, 0, 0, 0, // 267 267
0, 0, 0, 0, // 268 268
0, 0, 0, 0, // 269 269
0, 0, 0, 0, // 270 270
0, 0, 0, 0, // 271 271
0, 0, 0, 0, // 272 272
0, 0, 0, 0, // 273 273
0, 0, 0, 0, // 274 274
0, 0, 0, 0, // 275 275
0, 0, 0, 0, // 276 276
0, 0, 0, 0, // 277 277
0, 0, 0, 0, // 278 278
0, 0, 0, 0, // 279 279
0, 0, 0, 0, // 280 280
0, 0, 0, 0, // 281 281
0, 0, 0, 0, // 282 282
0, 0, 0, 0, // 283 283
0, 0, 0, 0, // 284 284
0, 0, 0, 0, // 285 285
0, 0, 0, 0, // 286 286
0, 0, 0, 0, // 287 287
0, 0, 0, 0, // 288 288
0, 0, 0, 0, // 289 289
0, 0, 0, 0, // 290 290
0, 0, 0, 0, // 291 291
0, 0, 0, 0, // 292 292
0, 0, 0, 0, // 293 293
0, 0, 0, 0, // 294 294
0, 0, 0, 0, // 295 295
0, 0, 0, 0, // 296 296
0, 0, 0, 0, // 297 297
0, 0, 0, 0, // 298 298
0, 0, 0, 0, // 299 299
0, 0, 0, 0, // 300 300
0, 0, 0, 0, // 301 301
0, 0, 0, 0, // 302 302
0, 0, 0, 0, // 303 303
0, 0, 0, 0, // 304 304
0, 0, 0, 0, // 305 305
0, 0, 0, 0, // 306 306
0, 0, 0, 0, // 307 307
0, 0, 0, 0, // 308 308
0, 0, 0, 0, // 309 309
0, 0, 0, 0, // 310 310
0, 0, 0, 0, // 311 311
0, 0, 0, 0, // 312 312
0, 0, 0, 0, // 313 313
0, 0, 0, 0, // 314 314
0, 0, 0, 0, // 315 315
0, 0, 0, 0, // 316 316
0, 0, 0, 0, // 317 317
0, 0, 0, 0, // 318 318
0, 0, 0, 0, // 319 319
0, 0, 0, 0, // 320 320
0, 0, 0, 0, // 321 321
0, 0, 0, 0, // 322 322
0, 0, 0, 0, // 323 323
0, 0, 0, 0, // 324 324
0, 0, 0, 0, // 325 325
0, 0, 0, 0, // 326 326
0, 0, 0, 0, // 327 327
0, 0, 0, 0, // 328 328
0, 0, 0, 0, // 329 329
0, 0, 0, 0, // 330 330
0, 0, 0, 0, // 331 331
0, 0, 0, 0, // 332 332
0, 0, 0, 0, // 333 333
0, 0, 0, 0, // 334 334
0, 0, 0, 0, // 335 335
0, 0, 0, 0, // 336 336
0, 0, 0, 0, // 337 337
0, 0, 0, 0, // 338 338
0, 0, 0, 0, // 339 339
0, 0, 0, 0, // 340 340
0, 0, 0, 0, // 341 341
0, 0, 0, 0, // 342 342
0, 0, 0, 0, // 343 343
0, 0, 0, 0, // 344 344
0, 0, 0, 0, // 345 345
0, 0, 0, 0, // 346 346
0, 0, 0, 0, // 347 347
0, 0, 0, 0, // 348 348
0, 0, 0, 0, // 349 349
0, 0, 0, 0, // 350 350
0, 0, 0, 0, // 351 351
0, 0, 0, 0, // 352 352
0, 0, 0, 0, // 353 353
0, 0, 0, 0, // 354 354
0, 0, 0, 0, // 355 355
0, 0, 0, 0, // 356 356
0, 0, 0, 0, // 357 357
0, 0, 0, 0, // 358 358
0, 0, 0, 0, // 359 359
0, 0, 0, 0, // 360 360
0, 0, 0, 0, // 361 361
0, 0, 0, 0, // 362 362
0, 0, 0, 0, // 363 363
0, 0, 0, 0, // 364 364
0, 0, 0, 0, // 365 365
0, 0, 0, 0, // 366 366
0, 0, 0, 0, // 367 367
0, 0, 0, 0, // 368 368
0, 0, 0, 0, // 369 369
0, 0, 0, 0, // 370 370
0, 0, 0, 0, // 371 371
0, 0, 0, 0, // 372 372
0, 0, 0, 0, // 373 373
0, 0, 0, 0, // 374 374
0, 0, 0, 0, // 375 375
0, 0, 0, 0, // 376 376
0, 0, 0, 0, // 377 377
0, 0, 0, 0, // 378 378
0, 0, 0, 0, // 379 379
0, 0, 0, 0, // 380 380
0, 0, 0, 0, // 381 381
0, 0, 0, 0, // 382 382
0, 0, 0, 0, // 383 383
0, 0, 0, 0, // 384 384
0, 0, 0, 0, // 385 385
0, 0, 0, 0, // 386 386
0, 0, 0, 0, // 387 387
0, 0, 0, 0, // 388 388
0, 0, 0, 0, // 389 389
0, 0, 0, 0, // 390 390
0, 0, 0, 0, // 391 391
0, 0, 0, 0, // 392 392
0, 0, 0, 0, // 393 393
0, 0, 0, 0, // 394 394
0, 0, 0, 0, // 395 395
0, 0, 0, 0, // 396 396
0, 0, 0, 0, // 397 397
0, 0, 0, 0, // 398 398
0, 0, 0, 0, // 399 399
0, 0, 0, 0, // 400 400
0, 0, 0, 0, // 401 401
0, 0, 0, 0, // 402 402
0, 0, 0, 0, // 403 403
0, 0, 0, 0, // 404 404
0, 0, 0, 0, // 405 405
0, 0, 0, 0, // 406 406
0, 0, 0, 0, // 407 407
0, 0, 0, 0, // 408 408
0, 0, 0, 0, // 409 409
0, 0, 0, 0, // 410 410
0, 0, 0, 0, // 411 411
0, 0, 0, 0, // 412 412
0, 0, 0, 0, // 413 413
0, 0, 0, 0, // 414 414
0, 0, 0, 0, // 415 415
0, 0, 0, 0, // 416 416
0, 0, 0, 0, // 417 417
0, 0, 0, 0, // 418 418
0, 0, 0, 0, // 419 419
0, 0, 0, 0, // 420 420
0, 0, 0, 0, // 421 421
0, 0, 0, 0, // 422 422
0, 0, 0, 0, // 423 423
0, 0, 0, 0, // 424 424
0, 0, 0, 0, // 425 425
0, 0, 0, 0, // 426 426
0, 0, 0, 0, // 427 427
0, 0, 0, 0, // 428 428
0, 0, 0, 0, // 429 429
0, 0, 0, 0, // 430 430
0, 0, 0, 0, // 431 431
0, 0, 0, 0, // 432 432
0, 0, 0, 0, // 433 433
0, 0, 0, 0, // 434 434
0, 0, 0, 0, // 435 435
0, 0, 0, 0, // 436 436
0, 0, 0, 0, // 437 437
0, 0, 0, 0, // 438 438
0, 0, 0, 0, // 439 439
0, 0, 0, 0, // 440 440
0, 0, 0, 0, // 441 441
0, 0, 0, 0, // 442 442
0, 0, 0, 0, // 443 443
0, 0, 0, 0, // 444 444
0, 0, 0, 0, // 445 445
0, 0, 0, 0, // 446 446
0, 0, 0, 0, // 447 447
0, 0, 0, 0, // 448 448
0, 0, 0, 0, // 449 449
0, 0, 0, 0, // 450 450
0, 0, 0, 0, // 451 451
0, 0, 0, 0, // 452 452
0, 0, 0, 0, // 453 453
0, 0, 0, 0, // 454 454
0, 0, 0, 0, // 455 455
0, 0, 0, 0, // 456 456
0, 0, 0, 0, // 457 457
0, 0, 0, 0, // 458 458
0, 0, 0, 0, // 459 459
0, 0, 0, 0, // 460 460
0, 0, 0, 0, // 461 461
0, 0, 0, 0, // 462 462
0, 0, 0, 0, // 463 463
0, 0, 0, 0, // 464 464
0, 0, 0, 0, // 465 465
0, 0, 0, 0, // 466 466
0, 0, 0, 0, // 467 467
0, 0, 0, 0, // 468 468
0, 0, 0, 0, // 469 469
0, 0, 0, 0, // 470 470
0, 0, 0, 0, // 471 471
0, 0, 0, 0, // 472 472
0, 0, 0, 0, // 473 473
0, 0, 0, 0, // 474 474
0, 0, 0, 0, // 475 475
0, 0, 0, 0, // 476 476
0, 0, 0, 0, // 477 477
0, 0, 0, 0, // 478 478
0, 0, 0, 0, // 479 479
0, 0, 0, 0, // 480 480
0, 0, 0, 0, // 481 481
0, 0, 0, 0, // 482 482
0, 0, 0, 0, // 483 483
0, 0, 0, 0, // 484 484
0, 0, 0, 0, // 485 485
0, 0, 0, 0, // 486 486
0, 0, 0, 0, // 487 487
0, 0, 0, 0, // 488 488
0, 0, 0, 0, // 489 489
0, 0, 0, 0, // 490 490
0, 0, 0, 0, // 491 491
0, 0, 0, 0, // 492 492
0, 0, 0, 0, // 493 493
0, 0, 0, 0, // 494 494
0, 0, 0, 0, // 495 495
0, 0, 0, 0, // 496 496
0, 0, 0, 0, // 497 497
0, 0, 0, 0, // 498 498
0, 0, 0, 0, // 499 499
0, 0, 0, 0, // 500 500
0, 0, 0, 0, // 501 501
0, 0, 0, 0, // 502 502
0, 0, 0, 0, // 503 503
0, 0, 0, 0, // 504 504
0, 0, 0, 0, // 505 505
866, 0, 0, 0, // 506 NDEBELE nr
0, 0, 0, 0, // 507 X_BORK_BORK_BORK zzb
1657, 0, 0, 0, // 508 X_PIG_LATIN zzp
0, 0, 0, 0, // 509 X_HACKER zzh
1427, 0, 0, 0, // 510 X_KLINGON tlh
0, 0, 0, 0, // 511 X_ELMER_FUDD zze
0, 0, 0, 0, // 512 X_Common xx-Zyyy
0, 0, 0, 0, // 513 X_Latin xx-Latn
0, 0, 0, 0, // 514 X_Greek xx-Grek
0, 0, 0, 0, // 515 X_Cyrillic xx-Cyrl
0, 0, 0, 0, // 516 X_Armenian xx-Armn
0, 0, 0, 0, // 517 X_Hebrew xx-Hebr
0, 0, 0, 0, // 518 X_Arabic xx-Arab
0, 0, 0, 0, // 519 X_Syriac xx-Syrc
0, 0, 0, 0, // 520 X_Thaana xx-Thaa
0, 0, 0, 0, // 521 X_Devanagari xx-Deva
0, 0, 0, 0, // 522 X_Bengali xx-Beng
0, 0, 0, 0, // 523 X_Gurmukhi xx-Guru
0, 0, 0, 0, // 524 X_Gujarati xx-Gujr
0, 0, 0, 0, // 525 X_Oriya xx-Orya
0, 0, 0, 0, // 526 X_Tamil xx-Taml
0, 0, 0, 0, // 527 X_Telugu xx-Telu
0, 0, 0, 0, // 528 X_Kannada xx-Knda
0, 0, 0, 0, // 529 X_Malayalam xx-Mlym
0, 0, 0, 0, // 530 X_Sinhala xx-Sinh
0, 0, 0, 0, // 531 X_Thai xx-Thai
0, 0, 0, 0, // 532 X_Lao xx-Laoo
0, 0, 0, 0, // 533 X_Tibetan xx-Tibt
0, 0, 0, 0, // 534 X_Myanmar xx-Mymr
0, 0, 0, 0, // 535 X_Georgian xx-Geor
0, 0, 0, 0, // 536 X_Hangul xx-Hang
0, 0, 0, 0, // 537 X_Ethiopic xx-Ethi
0, 0, 0, 0, // 538 X_Cherokee xx-Cher
0, 0, 0, 0, // 539 X_Canadian_Aboriginal xx-Cans
0, 0, 0, 0, // 540 X_Ogham xx-Ogam
0, 0, 0, 0, // 541 X_Runic xx-Runr
0, 0, 0, 0, // 542 X_Khmer xx-Khmr
0, 0, 0, 0, // 543 X_Mongolian xx-Mong
0, 0, 0, 0, // 544 X_Hiragana xx-Hira
0, 0, 0, 0, // 545 X_Katakana xx-Kana
0, 0, 0, 0, // 546 X_Bopomofo xx-Bopo
0, 0, 0, 0, // 547 X_Han xx-Hani
0, 0, 0, 0, // 548 X_Yi xx-Yiii
0, 0, 0, 0, // 549 X_Old_Italic xx-Ital
0, 0, 0, 0, // 550 X_Gothic xx-Goth
0, 0, 0, 0, // 551 X_Deseret xx-Dsrt
0, 0, 0, 0, // 552 X_Inherited xx-Qaai
0, 0, 0, 0, // 553 X_Tagalog xx-Tglg
0, 0, 0, 0, // 554 X_Hanunoo xx-Hano
0, 0, 0, 0, // 555 X_Buhid xx-Buhd
0, 0, 0, 0, // 556 X_Tagbanwa xx-Tagb
0, 0, 0, 0, // 557 X_Limbu xx-Limb
0, 0, 0, 0, // 558 X_Tai_Le xx-Tale
0, 0, 0, 0, // 559 X_Linear_B xx-Linb
0, 0, 0, 0, // 560 X_Ugaritic xx-Ugar
0, 0, 0, 0, // 561 X_Shavian xx-Shaw
0, 0, 0, 0, // 562 X_Osmanya xx-Osma
0, 0, 0, 0, // 563 X_Cypriot xx-Cprt
0, 0, 0, 0, // 564 X_Braille xx-Brai
0, 0, 0, 0, // 565 X_Buginese xx-Bugi
0, 0, 0, 0, // 566 X_Coptic xx-Copt
0, 0, 0, 0, // 567 X_New_Tai_Lue xx-Talu
0, 0, 0, 0, // 568 X_Glagolitic xx-Glag
0, 0, 0, 0, // 569 X_Tifinagh xx-Tfng
0, 0, 0, 0, // 570 X_Syloti_Nagri xx-Sylo
0, 0, 0, 0, // 571 X_Old_Persian xx-Xpeo
0, 0, 0, 0, // 572 X_Kharoshthi xx-Khar
0, 0, 0, 0, // 573 X_Balinese xx-Bali
0, 0, 0, 0, // 574 X_Cuneiform xx-Xsux
0, 0, 0, 0, // 575 X_Phoenician xx-Phnx
0, 0, 0, 0, // 576 X_Phags_Pa xx-Phag
0, 0, 0, 0, // 577 X_Nko xx-Nkoo
0, 0, 0, 0, // 578 X_Sundanese xx-Sund
0, 0, 0, 0, // 579 X_Lepcha xx-Lepc
0, 0, 0, 0, // 580 X_Ol_Chiki xx-Olck
0, 0, 0, 0, // 581 X_Vai xx-Vaii
0, 0, 0, 0, // 582 X_Saurashtra xx-Saur
0, 0, 0, 0, // 583 X_Kayah_Li xx-Kali
0, 0, 0, 0, // 584 X_Rejang xx-Rjng
0, 0, 0, 0, // 585 X_Lycian xx-Lyci
0, 0, 0, 0, // 586 X_Carian xx-Cari
0, 0, 0, 0, // 587 X_Lydian xx-Lydi
0, 0, 0, 0, // 588 X_Cham xx-Cham
0, 0, 0, 0, // 589 X_Tai_Tham xx-Lana
0, 0, 0, 0, // 590 X_Tai_Viet xx-Tavt
0, 0, 0, 0, // 591 X_Avestan xx-Avst
0, 0, 0, 0, // 592 X_Egyptian_Hieroglyphs xx-Egyp
0, 0, 0, 0, // 593 X_Samaritan xx-Samr
0, 0, 0, 0, // 594 X_Lisu xx-Lisu
0, 0, 0, 0, // 595 X_Bamum xx-Bamu
0, 0, 0, 0, // 596 X_Javanese xx-Java
0, 0, 0, 0, // 597 X_Meetei_Mayek xx-Mtei
0, 0, 0, 0, // 598 X_Imperial_Aramaic xx-Armi
0, 0, 0, 0, // 599 X_Old_South_Arabian xx-Sarb
0, 0, 0, 0, // 600 X_Inscriptional_Parthian xx-Prti
0, 0, 0, 0, // 601 X_Inscriptional_Pahlavi xx-Phli
0, 0, 0, 0, // 602 X_Old_Turkic xx-Orkh
0, 0, 0, 0, // 603 X_Kaithi xx-Kthi
0, 0, 0, 0, // 604 X_Batak xx-Batk
0, 0, 0, 0, // 605 X_Brahmi xx-Brah
0, 0, 0, 0, // 606 X_Mandaic xx-Mand
0, 0, 0, 0, // 607 X_Chakma xx-Cakm
0, 0, 0, 0, // 608 X_Meroitic_Cursive xx-Merc
0, 0, 0, 0, // 609 X_Meroitic_Hieroglyphs xx-Mero
0, 0, 0, 0, // 610 X_Miao xx-Plrd
0, 0, 0, 0, // 611 X_Sharada xx-Shrd
0, 0, 0, 0, // 612 X_Sora_Sompeng xx-Sora
0, 0, 0, 0, // 613 X_Takri xx-Takr
};
} // End namespace CLD2

72
internal/compile20141015.sh Executable file
View File

@@ -0,0 +1,72 @@
#!/bin/sh
#
# Copyright 2014 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http:# www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
g++ -O2 -m64 compact_lang_det_test.cc \
cldutil.cc cldutil_shared.cc compact_lang_det.cc compact_lang_det_hint_code.cc \
compact_lang_det_impl.cc debug.cc fixunicodevalue.cc \
generated_entities.cc generated_language.cc generated_ulscript.cc \
getonescriptspan.cc lang_script.cc offsetmap.cc scoreonescriptspan.cc \
tote.cc utf8statetable.cc \
cld_generated_cjk_uni_prop_80.cc cld2_generated_cjk_compatible.cc \
cld_generated_cjk_delta_bi_4.cc generated_distinct_bi_0.cc \
cld2_generated_quadchrome20141015_2.cc cld2_generated_deltaoctachrome20141015.cc \
cld2_generated_distinctoctachrome20141015.cc cld_generated_score_quad_octa_20141015_2.cc \
-o compact_lang_det_test_chrome20141015_2
echo " compact_lang_det_test_chrome20141015_2 compiled"
g++ -O2 -m64 compact_lang_det_test.cc \
cldutil.cc cldutil_shared.cc compact_lang_det.cc compact_lang_det_hint_code.cc \
compact_lang_det_impl.cc debug.cc fixunicodevalue.cc \
generated_entities.cc generated_language.cc generated_ulscript.cc \
getonescriptspan.cc lang_script.cc offsetmap.cc scoreonescriptspan.cc \
tote.cc utf8statetable.cc \
cld_generated_cjk_uni_prop_80.cc cld2_generated_cjk_compatible.cc \
cld_generated_cjk_delta_bi_4.cc generated_distinct_bi_0.cc \
cld2_generated_quadchrome20141015_16.cc cld2_generated_deltaoctachrome20141015.cc \
cld2_generated_distinctoctachrome20141015.cc cld_generated_score_quad_octa_20141015_2.cc \
-o compact_lang_det_test_chrome20141015_16
echo " compact_lang_det_test_chrome20141015_16 compiled"
g++ -O2 -m64 cld2_unittest_20141015.cc \
cldutil.cc cldutil_shared.cc compact_lang_det.cc compact_lang_det_hint_code.cc \
compact_lang_det_impl.cc debug.cc fixunicodevalue.cc \
generated_entities.cc generated_language.cc generated_ulscript.cc \
getonescriptspan.cc lang_script.cc offsetmap.cc scoreonescriptspan.cc \
tote.cc utf8statetable.cc \
cld_generated_cjk_uni_prop_80.cc cld2_generated_cjk_compatible.cc \
cld_generated_cjk_delta_bi_4.cc generated_distinct_bi_0.cc \
cld2_generated_quadchrome20141015_2.cc cld2_generated_deltaoctachrome20141015.cc \
cld2_generated_distinctoctachrome20141015.cc cld_generated_score_quad_octa_20141015_2.cc \
-o cld2_unittest_chrome20141015_2
echo " cld2_unittest_chrome20141015_2 compiled"
g++ -O2 -m64 -Davoid_utf8_string_constants cld2_unittest_20141015.cc \
cldutil.cc cldutil_shared.cc compact_lang_det.cc compact_lang_det_hint_code.cc \
compact_lang_det_impl.cc debug.cc fixunicodevalue.cc \
generated_entities.cc generated_language.cc generated_ulscript.cc \
getonescriptspan.cc lang_script.cc offsetmap.cc scoreonescriptspan.cc \
tote.cc utf8statetable.cc \
cld_generated_cjk_uni_prop_80.cc cld2_generated_cjk_compatible.cc \
cld_generated_cjk_delta_bi_4.cc generated_distinct_bi_0.cc \
cld2_generated_quadchrome20141015_2.cc cld2_generated_deltaoctachrome20141015.cc \
cld2_generated_distinctoctachrome20141015.cc cld_generated_score_quad_octa_20141015_2.cc \
-o cld2_unittest_avoid_chrome20141015_2
echo " cld2_unittest_avoid_chrome20141015_2 compiled"