From 082b04a6d8afd506e82649b9796eb7449d3e2065 Mon Sep 17 00:00:00 2001 From: "dsites@google.com" Date: Thu, 30 Jan 2014 00:31:55 +0000 Subject: [PATCH] dual-table lookup git-svn-id: https://cld2.googlecode.com/svn/trunk@97 b252ecd4-b096-bf77-eb8e-91563289f87e --- internal/cldutil.cc | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/internal/cldutil.cc b/internal/cldutil.cc index d2377ce..ecda9a5 100644 --- a/internal/cldutil.cc +++ b/internal/cldutil.cc @@ -14,6 +14,7 @@ // // Author: dsites@google.com (Dick Sites) +// Updated 2014.01 for dual table lookup // #include "cldutil.h" @@ -323,6 +324,8 @@ int GetQuadHits(const char* text, // Local copies const CLD2TableSummary* quadgram_obj = scoringcontext->scoringtables->quadgram_obj; + const CLD2TableSummary* quadgram_obj2 = + scoringcontext->scoringtables->quadgram_obj2; int next_base = hitbuffer->next_base; int next_base_limit = hitbuffer->maxscoringhits; @@ -348,16 +351,26 @@ int GetQuadHits(const char* text, // Filter out recent repeats if ((quadhash != prior_quadhash[0]) && (quadhash != prior_quadhash[1])) { // Look up this quadgram and save + uint32 indirect_flag = 0; // For dual tables + const CLD2TableSummary* hit_obj = quadgram_obj; uint32 probs = QuadHashV3Lookup4(quadgram_obj, quadhash); + if ((probs == 0) && (quadgram_obj2->kCLDTableSize != 0)) { + // Try lookup in dual table if not found in first one + // Note: we need to know later which of two indirect tables to use. + indirect_flag = 0x80000000u; + hit_obj = quadgram_obj2; + probs = QuadHashV3Lookup4(quadgram_obj2, quadhash); + } if (probs != 0) { // Round-robin two entries of actual hits prior_quadhash[next_prior_quadhash] = quadhash; next_prior_quadhash = (next_prior_quadhash + 1) & 1; // Save indirect subscript for later scoring; 1 or 2 langprobs - int indirect_subscr = probs & ~quadgram_obj->kCLDTableKeyMask; + int indirect_subscr = probs & ~hit_obj->kCLDTableKeyMask; hitbuffer->base[next_base].offset = src - text; // Offset in text - hitbuffer->base[next_base].indirect = indirect_subscr; + // Flip the high bit for table2 + hitbuffer->base[next_base].indirect = indirect_subscr | indirect_flag; ++next_base; } } @@ -395,8 +408,8 @@ int GetQuadHits(const char* text, // const tables // const char* isrc, int srclen (in sscriptbuffer) // intermediates: -// vector of octa (which need quadgram_obj indirect tbale ot decode) -// vector of distinct (which need quadgram_obj indirect tbale ot decode) +// vector of octa (which need indirect table to decode) +// vector of distinct (which need indirect table to decode) // Score up to 64KB of a single script span, doing both delta-octa and // distinct words in one pass