From 29e7b10843cd9630ee83fd32c0864f805bfcbb60 Mon Sep 17 00:00:00 2001 From: "dsites@google.com" Date: Fri, 31 Jan 2014 18:58:58 +0000 Subject: [PATCH] Move DetectLanguageVersion, dual-table lookup git-svn-id: https://cld2.googlecode.com/svn/trunk@101 b252ecd4-b096-bf77-eb8e-91563289f87e --- internal/compact_lang_det_impl.cc | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/internal/compact_lang_det_impl.cc b/internal/compact_lang_det_impl.cc index a2a96b9..007cc98 100644 --- a/internal/compact_lang_det_impl.cc +++ b/internal/compact_lang_det_impl.cc @@ -14,6 +14,7 @@ // // Author: dsites@google.com (Dick Sites) +// Updated 2014.01 for dual table lookup // #include @@ -45,6 +46,10 @@ using namespace std; // cld2_generated_distinctocta*.cc // cld_generated_score_quad_octa_1024_256.cc +// 2014.01 Now implementing quadgram dual lookup tables, to allow main table +// sizes that are 1/3/5 times a power of two, instead of just powers of two. +// Gives more flexibility of total footprint for CLD2. + extern const int kLanguageToPLangSize; extern const int kCloseSetSize; @@ -53,10 +58,12 @@ extern const CLD2TableSummary kCjkCompat_obj; extern const CLD2TableSummary kCjkDeltaBi_obj; extern const CLD2TableSummary kDistinctBiTable_obj; extern const CLD2TableSummary kQuad_obj; +extern const CLD2TableSummary kQuad_obj2; // Dual lookup tables extern const CLD2TableSummary kDeltaOcta_obj; extern const CLD2TableSummary kDistinctOcta_obj; extern const short kAvgDeltaOctaScore[]; +// This initializes kScoringtables.quadgram_obj etc. static const ScoringTables kScoringtables = { &cld_generated_CjkUni_obj, &kCjkCompat_obj, @@ -64,6 +71,7 @@ static const ScoringTables kScoringtables = { &kDistinctBiTable_obj, &kQuad_obj, + &kQuad_obj2, // Dual lookup tables &kDeltaOcta_obj, &kDistinctOcta_obj, @@ -1614,10 +1622,8 @@ Language DetectLanguageSummaryV2( } } - // Print incoming text in its entirety - // fprintf(stderr, "DetectLanguageSummaryV2:\n"); - // PrintHtmlEscapedText(stderr, buffer, buffer_length); - // fprintf(stderr, "
\n"); + // Exit now if no text + if (buffer_length == 0) {return UNKNOWN_LANGUAGE;} // Document totals DocTote doc_tote; // Reliability = 0..100 @@ -1960,4 +1966,17 @@ Language DetectLanguageSummaryV2( is_reliable); } + +// For debugging and wrappers. Not thread safe. +static char temp_detectlanguageversion[32]; + +// Return version text string +// String is "code_version - data_build_date" +const char* DetectLanguageVersion() { + sprintf(temp_detectlanguageversion, + "V2.0 - %u", kQuad_obj.kCLDTableBuildDate); + return temp_detectlanguageversion; +} + + } // End namespace CLD2