Fix comments, remove redundant test

git-svn-id: https://cld2.googlecode.com/svn/trunk@90 b252ecd4-b096-bf77-eb8e-91563289f87e
2014-01-29 23:55:04 +00:00
parent 1479ce7874
commit 0c5e9e7cd4
1 changed files with 18 additions and 26 deletions
--- a/internal/getonescriptspan.cc
+++ b/internal/getonescriptspan.cc
@@ -489,7 +489,7 @@ int ScanToLetterOrSpecial(const char* src, int len) {
 // src points to non-letter, such as tag-opening '<'
 // Return length from here to next possible letter
-// On eos or another < before >, return 1
+// On another < before >, return 1
 // advances <tag>
 //          |    |
 // advances <tag> ... </tag>  for <script> <style>
@@ -497,9 +497,9 @@ int ScanToLetterOrSpecial(const char* src, int len) {
 // advances <!-- ... <tag> ... -->
 //          |                     |
 // advances <tag
-//          ||  (1)
+//          |    | end of string
 // advances <tag <tag2>
-//          ||  (1)
+//          ||
 int ScanToPossibleLetter(const char* isrc, int len, int max_exit_state) {
  const uint8* src = reinterpret_cast<const uint8*>(isrc);
  const uint8* srclimit = src + len;
@@ -1023,7 +1023,7 @@ bool ScriptScanner::GetOneScriptSpan(LangSpan* span) {
 }
 // Force Latin, Cyrillic, Armenian, Greek scripts to be lowercase
-// List changes with each version of Unicode, so always lowercase
+// List changes with each version of Unicode, so just always lowercase
 // Unicode 6.2.0:
 //   ARMENIAN COPTIC CYRILLIC DESERET GEORGIAN GLAGOLITIC GREEK LATIN
 void ScriptScanner::LowerScriptSpan(LangSpan* span) {
@@ -1031,29 +1031,21 @@ void ScriptScanner::LowerScriptSpan(LangSpan* span) {
  // lowercasing an entity such as &Aacute;
  // We only need to do this for Latn and Cyrl scripts
  map2uplow_.Clear();
-  if (true ||
+  // Full Unicode lowercase of the entire buffer, including
-      (span->ulscript == ULScript_Latin) ||
+  // four pad bytes off the end.
-      (span->ulscript == ULScript_Cyrillic) ||
+  // Ahhh. But the last byte 0x00 is not interchange-valid, so we do 3 pad
-      (span->ulscript == ULScript_Armenian) ||
+  // bytes and put the 0x00 in explicitly.
-      (span->ulscript == ULScript_Greek)) {
+  // Build an offset map from script_buffer_lower_ back to script_buffer_
-    // Full Unicode lowercase of the entire buffer, including
+  int consumed, filled, changed;
-    // four pad bytes off the end.
+  StringPiece istr(span->text, span->text_bytes + 3);
-    // Ahhh. But the last byte 0x00 is not interchange-valid, so we do 3 pad
+  StringPiece ostr(script_buffer_lower_, kMaxScriptLowerBuffer);
    // bytes and put the 0x00 in explicitly.
    // Build an offset map from script_buffer_lower_ back to script_buffer_
    int consumed, filled, changed;
    StringPiece istr(span->text, span->text_bytes + 3);
    StringPiece ostr(script_buffer_lower_, kMaxScriptLowerBuffer);
-    UTF8GenericReplace(&utf8repl_lettermarklower_obj,
+  UTF8GenericReplace(&utf8repl_lettermarklower_obj,
-                              istr, ostr, is_plain_text_,
+                            istr, ostr, is_plain_text_,
-                              &consumed, &filled, &changed, &map2uplow_);
+                            &consumed, &filled, &changed, &map2uplow_);
-    script_buffer_lower_[filled] = '\0';
+  script_buffer_lower_[filled] = '\0';
-    span->text = script_buffer_lower_;
+  span->text = script_buffer_lower_;
-    span->text_bytes = filled - 3;
+  span->text_bytes = filled - 3;
  } else {
    map2uplow_.Copy(span->text_bytes + 3);
  }
  map2uplow_.Reset();
 }