Fix wxTextInputStream for some inputs starting with nulls
This commit is contained in:
@@ -313,7 +313,7 @@ wxConvAuto::ToWChar(wchar_t *dst, size_t dstLen,
|
|||||||
// to the fall-back conversion in this case as it would prevent us from
|
// to the fall-back conversion in this case as it would prevent us from
|
||||||
// decoding UTF-8 input when fed it byte by byte, as done by
|
// decoding UTF-8 input when fed it byte by byte, as done by
|
||||||
// wxTextInputStream, for example
|
// wxTextInputStream, for example
|
||||||
if ( srcLen < m_conv->GetMaxCharLen() && wxIsUTF8Prefix(src, srcLen) )
|
if ( srcLen < 2 + m_conv->GetMaxCharLen() && wxIsUTF8Prefix(src, srcLen) )
|
||||||
return wxCONV_FAILED;
|
return wxCONV_FAILED;
|
||||||
|
|
||||||
// if the conversion failed but we didn't really detect anything and
|
// if the conversion failed but we didn't really detect anything and
|
||||||
|
@@ -97,10 +97,11 @@ wxChar wxTextInputStream::GetChar()
|
|||||||
m_validEnd = 0;
|
m_validEnd = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// We may need to decode up to 4 characters if we have input starting with
|
// We may need to decode up to 6 characters if we have input starting with
|
||||||
// 3 BOM-like bytes, but not actually containing a BOM, as decoding it will
|
// 2 null bytes (like in UTF-32BE BOM), and then 3 bytes that look like
|
||||||
// only succeed when 4 bytes are read -- and will yield 4 wide characters.
|
// the start of UTF-8 sequence, as decoding it will only succeed when
|
||||||
wxChar wbuf[4];
|
// 6 bytes are read -- and will yield 6 wide characters.
|
||||||
|
wxChar wbuf[6];
|
||||||
for(size_t inlen = 0; inlen < sizeof(m_lastBytes); inlen++)
|
for(size_t inlen = 0; inlen < sizeof(m_lastBytes); inlen++)
|
||||||
{
|
{
|
||||||
if ( inlen >= m_validEnd )
|
if ( inlen >= m_validEnd )
|
||||||
|
@@ -324,6 +324,46 @@ TEST_CASE("wxTextInputStream::GetChar", "[text][input][stream][char]")
|
|||||||
REQUIRE( tis.GetChar() == 0x00 );
|
REQUIRE( tis.GetChar() == 0x00 );
|
||||||
CHECK( tis.GetInputStream().Eof() );
|
CHECK( tis.GetInputStream().Eof() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Two null bytes that look like the start of UTF-32BE BOM,
|
||||||
|
// followed by 4 byte UTF-8 sequence.
|
||||||
|
// Needs wxConvAuto to not switch to fallback on <6 bytes.
|
||||||
|
SECTION("UTF8-with-nulls")
|
||||||
|
{
|
||||||
|
const wxUint8 buf[] = { 0x00, 0x00, 0xf0, 0x90, 0x8c, 0x98 };
|
||||||
|
wxMemoryInputStream mis(buf, sizeof(buf));
|
||||||
|
wxTextInputStream tis(mis);
|
||||||
|
|
||||||
|
wxCharTypeBuffer<wxChar> e = wxString::FromUTF8((char*)buf, sizeof(buf))
|
||||||
|
.tchar_str<wxChar>();
|
||||||
|
for ( size_t i = 0; i < e.length(); ++i )
|
||||||
|
{
|
||||||
|
INFO("i = " << i);
|
||||||
|
REQUIRE( tis.GetChar() == e[i] );
|
||||||
|
}
|
||||||
|
REQUIRE( tis.GetChar() == 0x00 );
|
||||||
|
CHECK( tis.GetInputStream().Eof() );
|
||||||
|
}
|
||||||
|
|
||||||
|
// Two null bytes that look like the start of UTF-32BE BOM,
|
||||||
|
// then 3 bytes that look like the start of UTF-8 sequence.
|
||||||
|
// Needs 6 character output buffer in GetChar().
|
||||||
|
SECTION("almost-UTF8-with-nulls")
|
||||||
|
{
|
||||||
|
const wxUint8 buf[] = { 0x00, 0x00, 0xf0, 0x90, 0x8c, 0xe0 };
|
||||||
|
wxMemoryInputStream mis(buf, sizeof(buf));
|
||||||
|
wxTextInputStream tis(mis);
|
||||||
|
|
||||||
|
wxCharTypeBuffer<wxChar> e = wxString((char*)buf, wxCSConv(wxFONTENCODING_ISO8859_1),
|
||||||
|
sizeof(buf)).tchar_str<wxChar>();
|
||||||
|
for ( size_t i = 0; i < e.length(); ++i )
|
||||||
|
{
|
||||||
|
INFO("i = " << i);
|
||||||
|
REQUIRE( tis.GetChar() == e[i] );
|
||||||
|
}
|
||||||
|
REQUIRE( tis.GetChar() == 0x00 );
|
||||||
|
CHECK( tis.GetInputStream().Eof() );
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // wxUSE_UNICODE
|
#endif // wxUSE_UNICODE
|
||||||
|
Reference in New Issue
Block a user