Fix comments, remove redundant test
git-svn-id: https://cld2.googlecode.com/svn/trunk@90 b252ecd4-b096-bf77-eb8e-91563289f87e
This commit is contained in:
@@ -489,7 +489,7 @@ int ScanToLetterOrSpecial(const char* src, int len) {
|
|||||||
|
|
||||||
// src points to non-letter, such as tag-opening '<'
|
// src points to non-letter, such as tag-opening '<'
|
||||||
// Return length from here to next possible letter
|
// Return length from here to next possible letter
|
||||||
// On eos or another < before >, return 1
|
// On another < before >, return 1
|
||||||
// advances <tag>
|
// advances <tag>
|
||||||
// | |
|
// | |
|
||||||
// advances <tag> ... </tag> for <script> <style>
|
// advances <tag> ... </tag> for <script> <style>
|
||||||
@@ -497,9 +497,9 @@ int ScanToLetterOrSpecial(const char* src, int len) {
|
|||||||
// advances <!-- ... <tag> ... -->
|
// advances <!-- ... <tag> ... -->
|
||||||
// | |
|
// | |
|
||||||
// advances <tag
|
// advances <tag
|
||||||
// || (1)
|
// | | end of string
|
||||||
// advances <tag <tag2>
|
// advances <tag <tag2>
|
||||||
// || (1)
|
// ||
|
||||||
int ScanToPossibleLetter(const char* isrc, int len, int max_exit_state) {
|
int ScanToPossibleLetter(const char* isrc, int len, int max_exit_state) {
|
||||||
const uint8* src = reinterpret_cast<const uint8*>(isrc);
|
const uint8* src = reinterpret_cast<const uint8*>(isrc);
|
||||||
const uint8* srclimit = src + len;
|
const uint8* srclimit = src + len;
|
||||||
@@ -1023,7 +1023,7 @@ bool ScriptScanner::GetOneScriptSpan(LangSpan* span) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Force Latin, Cyrillic, Armenian, Greek scripts to be lowercase
|
// Force Latin, Cyrillic, Armenian, Greek scripts to be lowercase
|
||||||
// List changes with each version of Unicode, so always lowercase
|
// List changes with each version of Unicode, so just always lowercase
|
||||||
// Unicode 6.2.0:
|
// Unicode 6.2.0:
|
||||||
// ARMENIAN COPTIC CYRILLIC DESERET GEORGIAN GLAGOLITIC GREEK LATIN
|
// ARMENIAN COPTIC CYRILLIC DESERET GEORGIAN GLAGOLITIC GREEK LATIN
|
||||||
void ScriptScanner::LowerScriptSpan(LangSpan* span) {
|
void ScriptScanner::LowerScriptSpan(LangSpan* span) {
|
||||||
@@ -1031,29 +1031,21 @@ void ScriptScanner::LowerScriptSpan(LangSpan* span) {
|
|||||||
// lowercasing an entity such as Á
|
// lowercasing an entity such as Á
|
||||||
// We only need to do this for Latn and Cyrl scripts
|
// We only need to do this for Latn and Cyrl scripts
|
||||||
map2uplow_.Clear();
|
map2uplow_.Clear();
|
||||||
if (true ||
|
// Full Unicode lowercase of the entire buffer, including
|
||||||
(span->ulscript == ULScript_Latin) ||
|
// four pad bytes off the end.
|
||||||
(span->ulscript == ULScript_Cyrillic) ||
|
// Ahhh. But the last byte 0x00 is not interchange-valid, so we do 3 pad
|
||||||
(span->ulscript == ULScript_Armenian) ||
|
// bytes and put the 0x00 in explicitly.
|
||||||
(span->ulscript == ULScript_Greek)) {
|
// Build an offset map from script_buffer_lower_ back to script_buffer_
|
||||||
// Full Unicode lowercase of the entire buffer, including
|
int consumed, filled, changed;
|
||||||
// four pad bytes off the end.
|
StringPiece istr(span->text, span->text_bytes + 3);
|
||||||
// Ahhh. But the last byte 0x00 is not interchange-valid, so we do 3 pad
|
StringPiece ostr(script_buffer_lower_, kMaxScriptLowerBuffer);
|
||||||
// bytes and put the 0x00 in explicitly.
|
|
||||||
// Build an offset map from script_buffer_lower_ back to script_buffer_
|
|
||||||
int consumed, filled, changed;
|
|
||||||
StringPiece istr(span->text, span->text_bytes + 3);
|
|
||||||
StringPiece ostr(script_buffer_lower_, kMaxScriptLowerBuffer);
|
|
||||||
|
|
||||||
UTF8GenericReplace(&utf8repl_lettermarklower_obj,
|
UTF8GenericReplace(&utf8repl_lettermarklower_obj,
|
||||||
istr, ostr, is_plain_text_,
|
istr, ostr, is_plain_text_,
|
||||||
&consumed, &filled, &changed, &map2uplow_);
|
&consumed, &filled, &changed, &map2uplow_);
|
||||||
script_buffer_lower_[filled] = '\0';
|
script_buffer_lower_[filled] = '\0';
|
||||||
span->text = script_buffer_lower_;
|
span->text = script_buffer_lower_;
|
||||||
span->text_bytes = filled - 3;
|
span->text_bytes = filled - 3;
|
||||||
} else {
|
|
||||||
map2uplow_.Copy(span->text_bytes + 3);
|
|
||||||
}
|
|
||||||
map2uplow_.Reset();
|
map2uplow_.Reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user