Make leading nulls a special case to avoid breaking decoding some short strings in fallback encoding

This commit is contained in:
Pavel Tyunin
2020-10-07 17:02:06 +03:00
parent b536457e07
commit 1cbcf24832
2 changed files with 14 additions and 2 deletions

View File

@@ -334,9 +334,12 @@ wxConvAuto::ToWChar(wchar_t *dst, size_t dstLen,
// to the fall-back conversion in this case as it would prevent us from
// decoding UTF-8 input when fed it byte by byte, as done by
// wxTextInputStream, for example
// 2 extra bytes are needed for inputs that start with 1 or 2 null bytes
// up to 2 extra bytes are needed for inputs that start with null bytes
// that look like the start of UTF-32BE BOM, but can be in UTF-8 too
if ( srcLen < 2 + m_conv->GetMaxCharLen() && wxIsUTF8Prefix(src, srcLen) )
size_t nNull = 0;
if ( srcLen != wxNO_LEN && srcLen >= 2 && !src[0] )
nNull = ( src[1]? 1 : 2 );
if ( srcLen < nNull + m_conv->GetMaxCharLen() && wxIsUTF8Prefix(src, srcLen) )
return wxCONV_FAILED;
// if the conversion failed but we didn't really detect anything and

View File

@@ -47,6 +47,7 @@ private:
CPPUNIT_TEST( UTF8NoBom );
CPPUNIT_TEST( Fallback );
CPPUNIT_TEST( FallbackMultibyte );
CPPUNIT_TEST( FallbackShort );
CPPUNIT_TEST( StreamUTF8NoBOM );
CPPUNIT_TEST( StreamUTF8 );
CPPUNIT_TEST( StreamUTF16LE );
@@ -100,6 +101,7 @@ private:
void UTF8NoBom();
void Fallback();
void FallbackMultibyte();
void FallbackShort();
// test whether two lines of text are converted properly from a stream
void TestTextStream(const char *src,
@@ -222,6 +224,13 @@ void ConvAutoTestCase::FallbackMultibyte()
#endif
}
void ConvAutoTestCase::FallbackShort()
{
TestFirstChar("\x61\x61\x61\xc4", 'a', 4,
ConvState(wxBOM_None, wxFONTENCODING_ISO8859_5, true),
wxFONTENCODING_ISO8859_5);
}
void ConvAutoTestCase::TestTextStream(const char *src,
size_t srclength,
const wxString& line1,