diff --git a/internal/compact_lang_det.cc b/internal/compact_lang_det.cc
index ccc6588..045e0b2 100644
--- a/internal/compact_lang_det.cc
+++ b/internal/compact_lang_det.cc
@@ -28,11 +28,32 @@
 namespace CLD2 {
 
 // String is "code_version - data_scrape_date"
-//static const char* kDetectLanguageVersion = "V2.0 - 20130715";
-
+// static const char* kDetectLanguageVersion = "V2.0 - 20141015";
 
 // Large-table version for all ~160 languages
-// Small-table version for all ~60 languages
+// Small-table version for all ~80 languages
+
+
+// Scan interchange-valid UTF-8 bytes and detect most likely language
+// If the input is in fact not valid UTF-8, this returns immediately with
+// the result value UNKNOWN_LANGUAGE and is_reliable set false.
+//
+// In all cases, valid_prefix_bytes will be set to the number of leading
+// bytes that are valid UTF-8. If this is < buffer_length, there is invalid
+// input starting at the following byte.
+Language DetectLanguageCheckUTF8(
+                        const char* buffer,
+                        int buffer_length,
+                        bool is_plain_text,
+                        bool* is_reliable,
+                        int* valid_prefix_bytes) {
+  *valid_prefix_bytes = SpanInterchangeValid(buffer, buffer_length);
+  if (*valid_prefix_bytes < buffer_length) {
+    *is_reliable = false;
+    return UNKNOWN_LANGUAGE;
+  }
+  return DetectLanguage(buffer, buffer_length, is_plain_text, is_reliable);
+}
 
 // Scan interchange-valid UTF-8 bytes and detect most likely language
 Language DetectLanguage(
@@ -272,7 +293,70 @@ Language ExtDetectLanguageSummary(
   return lang;
 }
 
+
 // Use this one.
+//
+// Hints are collected into a struct.
+// Flags are passed in (normally zero).
+//
+// Also returns 3 internal language scores as a ratio to
+// normal score for real text in that language. Scores close to 1.0 indicate
+// normal text, while scores far away from 1.0 indicate badly-skewed text or
+// gibberish
+//
+// Returns a vector of chunks in different languages, so that caller may
+// spell-check, translate, or otherwise process different parts of the input
+// buffer in language-dependant ways.
+//
+// If the input is in fact not valid UTF-8, this returns immediately with
+// the result value UNKNOWN_LANGUAGE and is_reliable set false.
+//
+// In all cases, valid_prefix_bytes will be set to the number of leading
+// bytes that are valid UTF-8. If this is < buffer_length, there is invalid
+// input starting at the following byte.
+Language ExtDetectLanguageSummaryCheckUTF8(
+                        const char* buffer,
+                        int buffer_length,
+                        bool is_plain_text,
+                        const CLDHints* cld_hints,
+                        int flags,
+                        Language* language3,
+                        int* percent3,
+                        double* normalized_score3,
+                        ResultChunkVector* resultchunkvector,
+                        int* text_bytes,
+                        bool* is_reliable,
+                        int* valid_prefix_bytes) {
+  *valid_prefix_bytes = SpanInterchangeValid(buffer, buffer_length);
+  if (*valid_prefix_bytes < buffer_length) {
+    *is_reliable = false;
+    return UNKNOWN_LANGUAGE;
+  }
+
+  bool allow_extended_lang = true;
+  Language plus_one = UNKNOWN_LANGUAGE;
+
+  Language lang = DetectLanguageSummaryV2(
+                          buffer,
+                          buffer_length,
+                          is_plain_text,
+                          cld_hints,
+                          allow_extended_lang,
+                          flags,
+                          plus_one,
+                          language3,
+                          percent3,
+                          normalized_score3,
+                          resultchunkvector,
+                          text_bytes,
+                          is_reliable);
+  // Do not default to English
+  return lang;
+}
+
+// Use this one ONLY if you can prove the the input text is valid UTF-8 by
+// design because it went thorough a known-good conversion program.
+//
 // Hints are collected into a struct.
 // Flags are passed in (normally zero).
 //
@@ -318,5 +402,7 @@ Language ExtDetectLanguageSummary(
   return lang;
 }
 
+
+
 }       // End namespace CLD2
 
diff --git a/internal/compact_lang_det_impl.cc b/internal/compact_lang_det_impl.cc
index 3277b18..eafbdfc 100644
--- a/internal/compact_lang_det_impl.cc
+++ b/internal/compact_lang_det_impl.cc
@@ -27,6 +27,7 @@
 #include "debug.h"
 #include "integral_types.h"
 #include "lang_script.h"
+#include "utf8acceptinterchange.h"
 #include "utf8statetable.h"
 
 #ifdef CLD2_DYNAMIC_MODE
@@ -68,6 +69,16 @@ extern const CLD2TableSummary kDeltaOcta_obj;
 extern const CLD2TableSummary kDistinctOcta_obj;
 extern const short kAvgDeltaOctaScore[];
 
+// Returns the length in bytes of the prefix of src that is all
+//  interchange valid UTF-8
+int SpanInterchangeValid(const char* src, int byte_length) {
+  int bytes_consumed;
+  const UTF8ReplaceObj* st = &utf8acceptinterchange_obj;
+  StringPiece str(src, byte_length);
+  UTF8GenericScan(st, str, &bytes_consumed);
+  return bytes_consumed;
+}
+
 #ifdef CLD2_DYNAMIC_MODE
   // CLD2_DYNAMIC_MODE is defined:
   // Data will be read from an mmap opened at runtime.
@@ -426,6 +437,9 @@ inline bool FlagTop40(int flags) {return (flags & kCLDFlagTop40) != 0;}
 inline bool FlagShort(int flags) {return (flags & kCLDFlagShort) != 0;}
 inline bool FlagHint(int flags) {return (flags & kCLDFlagHint) != 0;}
 inline bool FlagUseWords(int flags) {return (flags & kCLDFlagUseWords) != 0;}
+inline bool FlagBestEffort(int flags) {
+  return (flags & kCLDFlagBestEffort) != 0;
+}
 
 
   // Defines Top40 packed languages
@@ -679,7 +693,7 @@ int CheapRepWordsInplace(char* isrc, int src_len, int* hash, int* tbl) {
 
 
 // This alternate form overwrites redundant words, thus avoiding corrupting the
-// backmap for generate a vector of original-text ranges.
+// backmap for generating a vector of original-text ranges.
 int CheapRepWordsInplaceOverwrite(char* isrc, int src_len, int* hash, int* tbl) {
   const uint8* src = reinterpret_cast<const uint8*>(isrc);
   const uint8* srclimit = src + src_len;
@@ -851,7 +865,7 @@ int CheapSqueezeInplace(char* isrc,
 }
 
 // This alternate form overwrites redundant words, thus avoiding corrupting the
-// backmap for generate a vector of original-text ranges.
+// backmap for generating a vector of original-text ranges.
 int CheapSqueezeInplaceOverwrite(char* isrc,
                                             int src_len,
                                             int ichunksize) {
@@ -1402,7 +1416,8 @@ void CalcSummaryLang(DocTote* doc_tote, int total_text_bytes,
                      const Language* language3,
                      const int* percent3,
                      Language* summary_lang, bool* is_reliable,
-                     bool FLAGS_cld2_html, bool FLAGS_cld2_quiet) {
+                     bool FLAGS_cld2_html, bool FLAGS_cld2_quiet,
+                     int flags) {
   // Vector of active languages; changes if we delete some
   int slot_count = 3;
   int active_slot[3] = {0, 1, 2};
@@ -1417,7 +1432,7 @@ void CalcSummaryLang(DocTote* doc_tote, int total_text_bytes,
   for (int i = 0; i < 3; ++i) {
     if (language3[i] == TG_UNKNOWN_LANGUAGE) {
       ignore_percent += percent3[i];
-      // Move the rest up, levaing input vectors unchanged
+      // Move the rest up, leaving input vectors unchanged
       for (int j=i+1; j < 3; ++j) {
         active_slot[j - 1] = active_slot[j];
       }
@@ -1475,7 +1490,7 @@ void CalcSummaryLang(DocTote* doc_tote, int total_text_bytes,
   }
 
   // If return percent is too small (too many languages), return UNKNOWN
-  if ((return_percent < kGoodFirstMinPercent)) {
+  if ((return_percent < kGoodFirstMinPercent) && !FlagBestEffort(flags)) {
     if (FLAGS_cld2_html && !FLAGS_cld2_quiet) {
       fprintf(stderr, "{Unreli %s %d%% percent too small} ",
               LanguageCode(*summary_lang), return_percent);
@@ -1666,15 +1681,27 @@ void ApplyHints(const char* buffer,
       }
     }
   }
-
-
-
-
-
-
 }
 
 
+// Extend results to fully cover the [lo..hi) range
+void FinishResultVector(int lo, int hi, ResultChunkVector* vec) {
+  if (vec == NULL) {return;}
+  if (vec->size() == 0) {return;}
+  ResultChunk* rc = &(*vec)[0];
+  if (rc->offset > lo) {
+    int diff = rc->offset - lo;
+    rc->offset -= diff;
+    rc->bytes += diff;
+  }
+  ResultChunk* rc2 = &(*vec)[vec->size() - 1];
+  int rc2hi = rc2->offset + rc2->bytes;
+  if (rc2hi < hi) {
+    int diff = hi - rc2hi;
+    rc2->bytes += diff;
+  }
+}
+
 
 // Results language3/percent3/text_bytes must be exactly three items
 Language DetectLanguageSummaryV2(
@@ -1968,7 +1995,9 @@ Language DetectLanguageSummaryV2(
     // This is the real, non-recursive return
 
     // Move bytes for unreliable langs to another lang or UNKNOWN
-    RemoveUnreliableLanguages(&doc_tote, FLAGS_cld2_html, FLAGS_cld2_quiet);
+    if (!FlagBestEffort(flags)) {
+      RemoveUnreliableLanguages(&doc_tote, FLAGS_cld2_html, FLAGS_cld2_quiet);
+    }
 
     // Redo the result extraction after the removal above
     doc_tote.Sort(3);
@@ -1976,13 +2005,11 @@ Language DetectLanguageSummaryV2(
                    reliable_percent3, language3, percent3, normalized_score3,
                    text_bytes, is_reliable);
 
-
-
     Language summary_lang;
     CalcSummaryLang(&doc_tote, total_text_bytes,
                     reliable_percent3, language3, percent3,
                     &summary_lang, is_reliable,
-                    FLAGS_cld2_html, FLAGS_cld2_quiet);
+                    FLAGS_cld2_html, FLAGS_cld2_quiet, flags);
 
     if (FLAGS_cld2_html && !FLAGS_cld2_quiet) {
       for (int i = 0; i < 3; ++i) {
@@ -2015,6 +2042,9 @@ Language DetectLanguageSummaryV2(
       fprintf(stderr, "<br>\n");
     }
 
+    // Extend results to fully cover the input buffer
+    FinishResultVector(0, buffer_length, resultchunkvector);
+
     return summary_lang;
   }
 
diff --git a/internal/compact_lang_det_impl.h b/internal/compact_lang_det_impl.h
index 6cadb3d..0cde880 100644
--- a/internal/compact_lang_det_impl.h
+++ b/internal/compact_lang_det_impl.h
@@ -126,6 +126,10 @@ At the end of the first pass --
     const uint32* kQuadValueTable;
   } LangDetObj;
 
+  // Returns the length in bytes of the prefix of src that is all
+  //  interchange valid UTF-8
+  int SpanInterchangeValid(const char* src, int byte_length);
+
   // For HTML documents, tags are skipped, along with <script> ... </script>
   // and <style> ... </style> sequences, and entities are expanded.
   //
diff --git a/public/compact_lang_det.h b/public/compact_lang_det.h
index 359457c..ecd75e9 100644
--- a/public/compact_lang_det.h
+++ b/public/compact_lang_det.h
@@ -42,7 +42,7 @@
 //  HAUSA (Latin, Arabic)
 //  KASHMIRI (Arabic, Devanagari)
 //  KAZAKH (Latin, Cyrillic, Arabic)
-//  KURDISH (Latin*, Arabic)
+//  KURDISH (Latin, Arabic)
 //  KYRGYZ (Cyrillic, Arabic)
 //  LIMBU (Devanagari, Limbu)
 //  MONGOLIAN (Cyrillic, Mongolian)
@@ -56,8 +56,7 @@
 //  UZBEK (Latin, Cyrillic, Arabic)
 //
 // * Due to a shortage of training text, AZERBAIJANI is not currently detected
-//   in Arabic or Cyrillic scripts, nor KURDISH in Latin script, nor TAJIK in
-//   Arabic script.
+//   in Arabic or Cyrillic scripts, nor TAJIK in Arabic script.
 //
 
 #ifndef I18N_ENCODINGS_CLD2_PUBLIC_COMPACT_LANG_DET_H_
@@ -65,10 +64,19 @@
 
 #include <stdint.h>
 #include <vector>
-#include "../internal/lang_script.h"  // For Language
+#include "../internal/integral_types.h"   // For uint8 etc.
+#include "../internal/lang_script.h"      // For Language
 
 namespace CLD2 {
 
+// NOTE: If you cannot prove the the input text is valid UTF-8 by design because
+// it went thorough a known-good conversion program, call one of the *CheckUTF8
+// routines. For example, never trust raw user-supplied bytes. It is especially
+// important to do a UTF8-to-UTF8 conversion on raw bytes that claim to be
+// UTF-8, using a converter that guarantees to produce valid UTF-8, turning
+// other byte sequences into the Unicode replacement character U+FFFD (deleting
+// or turning into space or question-mark can create security holes).
+
   // Scan interchange-valid UTF-8 bytes and detect most likely language,
   // or set of languages.
   //
@@ -129,19 +137,42 @@ namespace CLD2 {
     Language language_hint;                 // ITALIAN boosts it
   } CLDHints;
 
-  static const int kMaxResultChunkBytes = 65535;
+  static const int32 kMaxResultChunkBytes = 0x7fffffff;
 
+  // Note: this was initially over-optimized to fit into 8 bytes,
+  //  causing too much work to deal with with greater than 16-bit byte lengths.
   // For returning a vector of per-language pieces of the input buffer
   // Unreliable and too-short are mapped to UNKNOWN_LANGUAGE
   typedef struct {
     int offset;                 // Starting byte offset in original buffer
-    uint16 bytes;               // Number of bytes in chunk
+    int32 bytes;                // Number of bytes in chunk
     uint16 lang1;               // Top lang, as full Language. Apply
-                                // static_cast<Language>() to this short value.
+                                //  static_cast<Language>() to this short value.
+    uint16 pad;                 // Make multiple of 4 bytes
   } ResultChunk;
   typedef std::vector<ResultChunk> ResultChunkVector;
 
 
+  // These initial simple versions all cascade through the full-blown last
+  // version which it would be better for you to use directly because you will
+  // get better results passing in any available hints.
+
+  // Scan interchange-valid UTF-8 bytes and detect most likely language
+  // If the input is in fact not valid UTF-8, this returns immediately with
+  // the result value UNKNOWN_LANGUAGE and is_reliable set false.
+  //
+  // In all cases, valid_prefix_bytes will be set to the number of leading
+  // bytes that are valid UTF-8. If this is < buffer_length, there is invalid
+  // input starting at the following byte.
+  Language DetectLanguageCheckUTF8(
+                          const char* buffer,
+                          int buffer_length,
+                          bool is_plain_text,
+                          bool* is_reliable,
+                          int* valid_prefix_bytes);
+
+  // Use this one ONLY if you can prove the the input text is valid UTF-8 by
+  // design because it went thorough a known-good conversion program.
   // Scan interchange-valid UTF-8 bytes and detect most likely language
   Language DetectLanguage(
                           const char* buffer,
@@ -149,6 +180,8 @@ namespace CLD2 {
                           bool is_plain_text,
                           bool* is_reliable);
 
+  // Use this one ONLY if you can prove the the input text is valid UTF-8 by
+  // design because it went thorough a known-good conversion program.
   // Scan interchange-valid UTF-8 bytes and detect list of top 3 languages.
   // language3[0] is usually also the return value
   Language DetectLanguageSummary(
@@ -160,6 +193,8 @@ namespace CLD2 {
                           int* text_bytes,
                           bool* is_reliable);
 
+  // Use this one ONLY if you can prove the the input text is valid UTF-8 by
+  // design because it went thorough a known-good conversion program.
   // Same as above, with hints supplied
   // Scan interchange-valid UTF-8 bytes and detect list of top 3 languages.
   // language3[0] is usually also the return value
@@ -175,6 +210,8 @@ namespace CLD2 {
                           int* text_bytes,
                           bool* is_reliable);
 
+  // Use this one ONLY if you can prove the the input text is valid UTF-8 by
+  // design because it went thorough a known-good conversion program.
   // Scan interchange-valid UTF-8 bytes and detect list of top 3 extended
   // languages.
   //
@@ -191,6 +228,8 @@ namespace CLD2 {
                           int* text_bytes,
                           bool* is_reliable);
 
+  // Use this one ONLY if you can prove the the input text is valid UTF-8 by
+  // design because it went thorough a known-good conversion program.
   // Same as above, with hints supplied
   // Scan interchange-valid UTF-8 bytes and detect list of top 3 extended
   // languages.
@@ -211,6 +250,8 @@ namespace CLD2 {
                           int* text_bytes,
                           bool* is_reliable);
 
+  // Use this one ONLY if you can prove the the input text is valid UTF-8 by
+  // design because it went thorough a known-good conversion program.
   // Same as above, and also returns 3 internal language scores as a ratio to
   // normal score for real text in that language. Scores close to 1.0 indicate
   // normal text, while scores far away from 1.0 indicate badly-skewed text or
@@ -231,6 +272,42 @@ namespace CLD2 {
 
 
   // Use this one.
+  //
+  // Hints are collected into a struct.
+  // Flags are passed in (normally zero).
+  //
+  // Also returns 3 internal language scores as a ratio to
+  // normal score for real text in that language. Scores close to 1.0 indicate
+  // normal text, while scores far away from 1.0 indicate badly-skewed text or
+  // gibberish
+  //
+  // Returns a vector of chunks in different languages, so that caller may
+  // spell-check, translate, or otherwise process different parts of the input
+  // buffer in language-dependant ways.
+  //
+  // If the input is in fact not valid UTF-8, this returns immediately with
+  // the result value UNKNOWN_LANGUAGE and is_reliable set false.
+  //
+  // In all cases, valid_prefix_bytes will be set to the number of leading
+  // bytes that are valid UTF-8. If this is < buffer_length, there is invalid
+  // input starting at the following byte.
+  Language ExtDetectLanguageSummaryCheckUTF8(
+                          const char* buffer,
+                          int buffer_length,
+                          bool is_plain_text,
+                          const CLDHints* cld_hints,
+                          int flags,
+                          Language* language3,
+                          int* percent3,
+                          double* normalized_score3,
+                          ResultChunkVector* resultchunkvector,
+                          int* text_bytes,
+                          bool* is_reliable,
+                          int* valid_prefix_bytes);
+
+  // Use this one ONLY if you can prove the the input text is valid UTF-8 by
+  // design because it went thorough a known-good conversion program.
+  //
   // Hints are collected into a struct.
   // Flags are passed in (normally zero).
   //
@@ -268,6 +345,8 @@ namespace CLD2 {
   static const int kCLDFlagVerbose =      0x0800;  // More debug HTML => stderr
   static const int kCLDFlagQuiet =        0x1000;  // Less debug HTML => stderr
   static const int kCLDFlagEcho =         0x2000;  // Echo input => stderr
+  static const int kCLDFlagBestEffort =   0x4000;  // Give best-effort answer,
+                                                   // even on short text
 
 
 /***
@@ -290,6 +369,10 @@ Flag meanings:
    In that HTML file, suppress most of the output detail.
  kCLDFlagEcho
   Echo every input buffer to stderr.
+ kCLDFlagBestEffort
+  Give best-effort answer, instead of UNKNOWN_LANGUAGE. May be useful for
+  short text if the caller prefers an approximate answer over none.
+
 ***/
 
 // Debug output: Print the resultchunkvector to file f