From 1e435d2347cf76337386da9fad43406e49f2ea2f Mon Sep 17 00:00:00 2001
From: Pavel Tyunin <pavel51tunin@gmail.com>
Date: Mon, 28 Sep 2020 21:58:52 +0300
Subject: [PATCH] Fix wxTextInputStream incorrectly decoding multibyte fallback
 encodings

---
 src/common/txtstrm.cpp | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/common/txtstrm.cpp b/src/common/txtstrm.cpp
index c38f7c29ab..fc5e352b58 100644
--- a/src/common/txtstrm.cpp
+++ b/src/common/txtstrm.cpp
@@ -134,12 +134,13 @@ wxChar wxTextInputStream::GetChar()
                 // one extra byte, the only explanation is that we were using a
                 // wxConvAuto conversion recognizing the initial BOM and that
                 // it couldn't detect the presence or absence of BOM so far,
-                // but now finally has enough data to see that there is none.
-                // As we must have fallen back to Latin-1 in this case, return
-                // just the first byte and keep the other ones for the next
-                // time.
-                m_validBegin = 1;
-                return wbuf[0];
+                // but now finally has enough data to see that there is none, or
+                // it was trying to decode the data as UTF-8 sequence, but now
+                // recognized that it's not valid UTF-8 and switched to fallback.
+                // We don't know how long is the first character or if it's decoded
+                // as 1 or 2 wchar_t characters, so we need to start with 1 byte again.
+                inlen = -1;
+                break;
 
 #if SIZEOF_WCHAR_T == 2
             case 2: