Add Is*Language for better hints

git-svn-id: https://cld2.googlecode.com/svn/trunk@12 b252ecd4-b096-bf77-eb8e-91563289f87e
This commit is contained in:
dsites@google.com
2013-07-15 23:23:28 +00:00
parent f6f760b5f8
commit a2142c1464
2 changed files with 25 additions and 4 deletions

View File

@@ -254,10 +254,13 @@ const char* ExtLanguageDeclaredName(const Language lang) {
return LanguageDeclaredName(lang); return LanguageDeclaredName(lang);
} }
extern const int kCloseSetSize = 10;
// Returns which set of statistically-close languages lang is in. 0 means none. // Returns which set of statistically-close languages lang is in. 0 means none.
int LanguageCloseSet(Language lang) { int LanguageCloseSet(Language lang) {
// Scaffolding // Scaffolding
// id ms # INDONESIAN MALAY coef=0.4698 Problematic w/o extra words // id ms # INDONESIAN MALAY coef=0.4698 Problematic w/o extra words
// bo dz # TIBETAN DZONGKHA coef=0.4571 // bo dz # TIBETAN DZONGKHA coef=0.4571
// cs sk # CZECH SLOVAK coef=0.4273 // cs sk # CZECH SLOVAK coef=0.4273
// zu xh # ZULU XHOSA coef=0.3716 // zu xh # ZULU XHOSA coef=0.3716
@@ -337,6 +340,17 @@ Language FromPerScriptNumber(ULScript ulscript, uint8 perscript_number) {
} }
} }
// Return true if language can be in the Latin script
bool IsLatnLanguage(Language lang) {
if (lang >= kLanguageToPLangSize) {return false;}
return (lang == kPLangToLanguageLatn[kLanguageToPLang[lang]]);
}
// Return true if language can be in a non-Latin script
bool IsOthrLanguage(Language lang) {
if (lang >= kLanguageToPLangSize) {return false;}
return (lang == kPLangToLanguageOthr[kLanguageToPLang[lang]]);
}
//----------------------------------------------------------------------------// //----------------------------------------------------------------------------//

View File

@@ -109,9 +109,6 @@ int LScript4(ULScript ulscript);
// //
// The Language enum includes the fake language numbers for RTypeNone above. // The Language enum includes the fake language numbers for RTypeNone above.
// //
// In an open-source environment, the Google-specific Language enum is not
// available. Language decouples the two environments while maintaining
// internal compatibility.
// If the input is out of range or otherwise unrecognized, it is treated // If the input is out of range or otherwise unrecognized, it is treated
@@ -168,6 +165,16 @@ Language DefaultLanguage(ULScript ulscript);
uint8 PerScriptNumber(ULScript ulscript, Language lang); uint8 PerScriptNumber(ULScript ulscript, Language lang);
Language FromPerScriptNumber(ULScript ulscript, uint8 perscript_number); Language FromPerScriptNumber(ULScript ulscript, uint8 perscript_number);
// While the speed-sensitive processing deals with per-script language numbers,
// there is a need for low-performance dealing with original language numbers
// and unknown scripts, mostly for processing language hints.
// These routines let one derive a script class from a bare language.
// For languages written in multiple scripts, both of tehse can return true.
bool IsLatnLanguage(Language lang);
bool IsOthrLanguage(Language lang);
//----------------------------------------------------------------------------// //----------------------------------------------------------------------------//
// Other // // Other //
//----------------------------------------------------------------------------// //----------------------------------------------------------------------------//