Fix wxTextInputStream for some inputs starting with nulls

This commit is contained in:
Pavel Tyunin
2020-10-03 18:21:18 +03:00
parent b3eff48e28
commit 45adce8561
3 changed files with 46 additions and 5 deletions

View File

@@ -313,7 +313,7 @@ wxConvAuto::ToWChar(wchar_t *dst, size_t dstLen,
// to the fall-back conversion in this case as it would prevent us from
// decoding UTF-8 input when fed it byte by byte, as done by
// wxTextInputStream, for example
if ( srcLen < m_conv->GetMaxCharLen() && wxIsUTF8Prefix(src, srcLen) )
if ( srcLen < 2 + m_conv->GetMaxCharLen() && wxIsUTF8Prefix(src, srcLen) )
return wxCONV_FAILED;
// if the conversion failed but we didn't really detect anything and

View File

@@ -97,10 +97,11 @@ wxChar wxTextInputStream::GetChar()
m_validEnd = 0;
}
// We may need to decode up to 4 characters if we have input starting with
// 3 BOM-like bytes, but not actually containing a BOM, as decoding it will
// only succeed when 4 bytes are read -- and will yield 4 wide characters.
wxChar wbuf[4];
// We may need to decode up to 6 characters if we have input starting with
// 2 null bytes (like in UTF-32BE BOM), and then 3 bytes that look like
// the start of UTF-8 sequence, as decoding it will only succeed when
// 6 bytes are read -- and will yield 6 wide characters.
wxChar wbuf[6];
for(size_t inlen = 0; inlen < sizeof(m_lastBytes); inlen++)
{
if ( inlen >= m_validEnd )

View File

@@ -324,6 +324,46 @@ TEST_CASE("wxTextInputStream::GetChar", "[text][input][stream][char]")
REQUIRE( tis.GetChar() == 0x00 );
CHECK( tis.GetInputStream().Eof() );
}
// Two null bytes that look like the start of UTF-32BE BOM,
// followed by 4 byte UTF-8 sequence.
// Needs wxConvAuto to not switch to fallback on <6 bytes.
SECTION("UTF8-with-nulls")
{
const wxUint8 buf[] = { 0x00, 0x00, 0xf0, 0x90, 0x8c, 0x98 };
wxMemoryInputStream mis(buf, sizeof(buf));
wxTextInputStream tis(mis);
wxCharTypeBuffer<wxChar> e = wxString::FromUTF8((char*)buf, sizeof(buf))
.tchar_str<wxChar>();
for ( size_t i = 0; i < e.length(); ++i )
{
INFO("i = " << i);
REQUIRE( tis.GetChar() == e[i] );
}
REQUIRE( tis.GetChar() == 0x00 );
CHECK( tis.GetInputStream().Eof() );
}
// Two null bytes that look like the start of UTF-32BE BOM,
// then 3 bytes that look like the start of UTF-8 sequence.
// Needs 6 character output buffer in GetChar().
SECTION("almost-UTF8-with-nulls")
{
const wxUint8 buf[] = { 0x00, 0x00, 0xf0, 0x90, 0x8c, 0xe0 };
wxMemoryInputStream mis(buf, sizeof(buf));
wxTextInputStream tis(mis);
wxCharTypeBuffer<wxChar> e = wxString((char*)buf, wxCSConv(wxFONTENCODING_ISO8859_1),
sizeof(buf)).tchar_str<wxChar>();
for ( size_t i = 0; i < e.length(); ++i )
{
INFO("i = " << i);
REQUIRE( tis.GetChar() == e[i] );
}
REQUIRE( tis.GetChar() == 0x00 );
CHECK( tis.GetInputStream().Eof() );
}
}
#endif // wxUSE_UNICODE