Add Is*Language for better hints
git-svn-id: https://cld2.googlecode.com/svn/trunk@12 b252ecd4-b096-bf77-eb8e-91563289f87e
This commit is contained in:
@@ -254,10 +254,13 @@ const char* ExtLanguageDeclaredName(const Language lang) {
|
|||||||
return LanguageDeclaredName(lang);
|
return LanguageDeclaredName(lang);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
extern const int kCloseSetSize = 10;
|
||||||
|
|
||||||
// Returns which set of statistically-close languages lang is in. 0 means none.
|
// Returns which set of statistically-close languages lang is in. 0 means none.
|
||||||
int LanguageCloseSet(Language lang) {
|
int LanguageCloseSet(Language lang) {
|
||||||
// Scaffolding
|
// Scaffolding
|
||||||
// id ms # INDONESIAN MALAY coef=0.4698 Problematic w/o extra words
|
// id ms # INDONESIAN MALAY coef=0.4698 Problematic w/o extra words
|
||||||
// bo dz # TIBETAN DZONGKHA coef=0.4571
|
// bo dz # TIBETAN DZONGKHA coef=0.4571
|
||||||
// cs sk # CZECH SLOVAK coef=0.4273
|
// cs sk # CZECH SLOVAK coef=0.4273
|
||||||
// zu xh # ZULU XHOSA coef=0.3716
|
// zu xh # ZULU XHOSA coef=0.3716
|
||||||
@@ -337,6 +340,17 @@ Language FromPerScriptNumber(ULScript ulscript, uint8 perscript_number) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Return true if language can be in the Latin script
|
||||||
|
bool IsLatnLanguage(Language lang) {
|
||||||
|
if (lang >= kLanguageToPLangSize) {return false;}
|
||||||
|
return (lang == kPLangToLanguageLatn[kLanguageToPLang[lang]]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return true if language can be in a non-Latin script
|
||||||
|
bool IsOthrLanguage(Language lang) {
|
||||||
|
if (lang >= kLanguageToPLangSize) {return false;}
|
||||||
|
return (lang == kPLangToLanguageOthr[kLanguageToPLang[lang]]);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
//----------------------------------------------------------------------------//
|
//----------------------------------------------------------------------------//
|
||||||
|
@@ -109,9 +109,6 @@ int LScript4(ULScript ulscript);
|
|||||||
//
|
//
|
||||||
// The Language enum includes the fake language numbers for RTypeNone above.
|
// The Language enum includes the fake language numbers for RTypeNone above.
|
||||||
//
|
//
|
||||||
// In an open-source environment, the Google-specific Language enum is not
|
|
||||||
// available. Language decouples the two environments while maintaining
|
|
||||||
// internal compatibility.
|
|
||||||
|
|
||||||
|
|
||||||
// If the input is out of range or otherwise unrecognized, it is treated
|
// If the input is out of range or otherwise unrecognized, it is treated
|
||||||
@@ -168,6 +165,16 @@ Language DefaultLanguage(ULScript ulscript);
|
|||||||
uint8 PerScriptNumber(ULScript ulscript, Language lang);
|
uint8 PerScriptNumber(ULScript ulscript, Language lang);
|
||||||
Language FromPerScriptNumber(ULScript ulscript, uint8 perscript_number);
|
Language FromPerScriptNumber(ULScript ulscript, uint8 perscript_number);
|
||||||
|
|
||||||
|
// While the speed-sensitive processing deals with per-script language numbers,
|
||||||
|
// there is a need for low-performance dealing with original language numbers
|
||||||
|
// and unknown scripts, mostly for processing language hints.
|
||||||
|
// These routines let one derive a script class from a bare language.
|
||||||
|
// For languages written in multiple scripts, both of tehse can return true.
|
||||||
|
|
||||||
|
bool IsLatnLanguage(Language lang);
|
||||||
|
bool IsOthrLanguage(Language lang);
|
||||||
|
|
||||||
|
|
||||||
//----------------------------------------------------------------------------//
|
//----------------------------------------------------------------------------//
|
||||||
// Other //
|
// Other //
|
||||||
//----------------------------------------------------------------------------//
|
//----------------------------------------------------------------------------//
|
||||||
|
Reference in New Issue
Block a user