diff --git a/internal/lang_script.cc b/internal/lang_script.cc index 8388e8d..10fc455 100644 --- a/internal/lang_script.cc +++ b/internal/lang_script.cc @@ -254,10 +254,13 @@ const char* ExtLanguageDeclaredName(const Language lang) { return LanguageDeclaredName(lang); } + +extern const int kCloseSetSize = 10; + // Returns which set of statistically-close languages lang is in. 0 means none. int LanguageCloseSet(Language lang) { // Scaffolding - // id ms # INDONESIAN MALAY coef=0.4698 Problematic w/o extra words + // id ms # INDONESIAN MALAY coef=0.4698 Problematic w/o extra words // bo dz # TIBETAN DZONGKHA coef=0.4571 // cs sk # CZECH SLOVAK coef=0.4273 // zu xh # ZULU XHOSA coef=0.3716 @@ -337,6 +340,17 @@ Language FromPerScriptNumber(ULScript ulscript, uint8 perscript_number) { } } +// Return true if language can be in the Latin script +bool IsLatnLanguage(Language lang) { + if (lang >= kLanguageToPLangSize) {return false;} + return (lang == kPLangToLanguageLatn[kLanguageToPLang[lang]]); +} + +// Return true if language can be in a non-Latin script +bool IsOthrLanguage(Language lang) { + if (lang >= kLanguageToPLangSize) {return false;} + return (lang == kPLangToLanguageOthr[kLanguageToPLang[lang]]); +} //----------------------------------------------------------------------------// diff --git a/internal/lang_script.h b/internal/lang_script.h index 9b0a864..ab69d2c 100644 --- a/internal/lang_script.h +++ b/internal/lang_script.h @@ -109,9 +109,6 @@ int LScript4(ULScript ulscript); // // The Language enum includes the fake language numbers for RTypeNone above. // -// In an open-source environment, the Google-specific Language enum is not -// available. Language decouples the two environments while maintaining -// internal compatibility. // If the input is out of range or otherwise unrecognized, it is treated @@ -168,6 +165,16 @@ Language DefaultLanguage(ULScript ulscript); uint8 PerScriptNumber(ULScript ulscript, Language lang); Language FromPerScriptNumber(ULScript ulscript, uint8 perscript_number); +// While the speed-sensitive processing deals with per-script language numbers, +// there is a need for low-performance dealing with original language numbers +// and unknown scripts, mostly for processing language hints. +// These routines let one derive a script class from a bare language. +// For languages written in multiple scripts, both of tehse can return true. + +bool IsLatnLanguage(Language lang); +bool IsOthrLanguage(Language lang); + + //----------------------------------------------------------------------------// // Other // //----------------------------------------------------------------------------//