From 1e435d2347cf76337386da9fad43406e49f2ea2f Mon Sep 17 00:00:00 2001 From: Pavel Tyunin Date: Mon, 28 Sep 2020 21:58:52 +0300 Subject: [PATCH] Fix wxTextInputStream incorrectly decoding multibyte fallback encodings --- src/common/txtstrm.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/common/txtstrm.cpp b/src/common/txtstrm.cpp index c38f7c29ab..fc5e352b58 100644 --- a/src/common/txtstrm.cpp +++ b/src/common/txtstrm.cpp @@ -134,12 +134,13 @@ wxChar wxTextInputStream::GetChar() // one extra byte, the only explanation is that we were using a // wxConvAuto conversion recognizing the initial BOM and that // it couldn't detect the presence or absence of BOM so far, - // but now finally has enough data to see that there is none. - // As we must have fallen back to Latin-1 in this case, return - // just the first byte and keep the other ones for the next - // time. - m_validBegin = 1; - return wbuf[0]; + // but now finally has enough data to see that there is none, or + // it was trying to decode the data as UTF-8 sequence, but now + // recognized that it's not valid UTF-8 and switched to fallback. + // We don't know how long is the first character or if it's decoded + // as 1 or 2 wchar_t characters, so we need to start with 1 byte again. + inlen = -1; + break; #if SIZEOF_WCHAR_T == 2 case 2: