dual-table lookup

git-svn-id: https://cld2.googlecode.com/svn/trunk@97 b252ecd4-b096-bf77-eb8e-91563289f87e
This commit is contained in:
dsites@google.com
2014-01-30 00:31:55 +00:00
parent f228098766
commit 082b04a6d8

View File

@@ -14,6 +14,7 @@
//
// Author: dsites@google.com (Dick Sites)
// Updated 2014.01 for dual table lookup
//
#include "cldutil.h"
@@ -323,6 +324,8 @@ int GetQuadHits(const char* text,
// Local copies
const CLD2TableSummary* quadgram_obj =
scoringcontext->scoringtables->quadgram_obj;
const CLD2TableSummary* quadgram_obj2 =
scoringcontext->scoringtables->quadgram_obj2;
int next_base = hitbuffer->next_base;
int next_base_limit = hitbuffer->maxscoringhits;
@@ -348,16 +351,26 @@ int GetQuadHits(const char* text,
// Filter out recent repeats
if ((quadhash != prior_quadhash[0]) && (quadhash != prior_quadhash[1])) {
// Look up this quadgram and save <offset, indirect>
uint32 indirect_flag = 0; // For dual tables
const CLD2TableSummary* hit_obj = quadgram_obj;
uint32 probs = QuadHashV3Lookup4(quadgram_obj, quadhash);
if ((probs == 0) && (quadgram_obj2->kCLDTableSize != 0)) {
// Try lookup in dual table if not found in first one
// Note: we need to know later which of two indirect tables to use.
indirect_flag = 0x80000000u;
hit_obj = quadgram_obj2;
probs = QuadHashV3Lookup4(quadgram_obj2, quadhash);
}
if (probs != 0) {
// Round-robin two entries of actual hits
prior_quadhash[next_prior_quadhash] = quadhash;
next_prior_quadhash = (next_prior_quadhash + 1) & 1;
// Save indirect subscript for later scoring; 1 or 2 langprobs
int indirect_subscr = probs & ~quadgram_obj->kCLDTableKeyMask;
int indirect_subscr = probs & ~hit_obj->kCLDTableKeyMask;
hitbuffer->base[next_base].offset = src - text; // Offset in text
hitbuffer->base[next_base].indirect = indirect_subscr;
// Flip the high bit for table2
hitbuffer->base[next_base].indirect = indirect_subscr | indirect_flag;
++next_base;
}
}
@@ -395,8 +408,8 @@ int GetQuadHits(const char* text,
// const tables
// const char* isrc, int srclen (in sscriptbuffer)
// intermediates:
// vector of octa <offset, probs> (which need quadgram_obj indirect tbale ot decode)
// vector of distinct <offset, probs> (which need quadgram_obj indirect tbale ot decode)
// vector of octa <offset, probs> (which need indirect table to decode)
// vector of distinct <offset, probs> (which need indirect table to decode)
// Score up to 64KB of a single script span, doing both delta-octa and
// distinct words in one pass