Fix wxTextInputStream for some inputs starting with nulls
This commit is contained in:
@@ -313,7 +313,7 @@ wxConvAuto::ToWChar(wchar_t *dst, size_t dstLen,
|
||||
// to the fall-back conversion in this case as it would prevent us from
|
||||
// decoding UTF-8 input when fed it byte by byte, as done by
|
||||
// wxTextInputStream, for example
|
||||
if ( srcLen < m_conv->GetMaxCharLen() && wxIsUTF8Prefix(src, srcLen) )
|
||||
if ( srcLen < 2 + m_conv->GetMaxCharLen() && wxIsUTF8Prefix(src, srcLen) )
|
||||
return wxCONV_FAILED;
|
||||
|
||||
// if the conversion failed but we didn't really detect anything and
|
||||
|
@@ -97,10 +97,11 @@ wxChar wxTextInputStream::GetChar()
|
||||
m_validEnd = 0;
|
||||
}
|
||||
|
||||
// We may need to decode up to 4 characters if we have input starting with
|
||||
// 3 BOM-like bytes, but not actually containing a BOM, as decoding it will
|
||||
// only succeed when 4 bytes are read -- and will yield 4 wide characters.
|
||||
wxChar wbuf[4];
|
||||
// We may need to decode up to 6 characters if we have input starting with
|
||||
// 2 null bytes (like in UTF-32BE BOM), and then 3 bytes that look like
|
||||
// the start of UTF-8 sequence, as decoding it will only succeed when
|
||||
// 6 bytes are read -- and will yield 6 wide characters.
|
||||
wxChar wbuf[6];
|
||||
for(size_t inlen = 0; inlen < sizeof(m_lastBytes); inlen++)
|
||||
{
|
||||
if ( inlen >= m_validEnd )
|
||||
|
@@ -324,6 +324,46 @@ TEST_CASE("wxTextInputStream::GetChar", "[text][input][stream][char]")
|
||||
REQUIRE( tis.GetChar() == 0x00 );
|
||||
CHECK( tis.GetInputStream().Eof() );
|
||||
}
|
||||
|
||||
// Two null bytes that look like the start of UTF-32BE BOM,
|
||||
// followed by 4 byte UTF-8 sequence.
|
||||
// Needs wxConvAuto to not switch to fallback on <6 bytes.
|
||||
SECTION("UTF8-with-nulls")
|
||||
{
|
||||
const wxUint8 buf[] = { 0x00, 0x00, 0xf0, 0x90, 0x8c, 0x98 };
|
||||
wxMemoryInputStream mis(buf, sizeof(buf));
|
||||
wxTextInputStream tis(mis);
|
||||
|
||||
wxCharTypeBuffer<wxChar> e = wxString::FromUTF8((char*)buf, sizeof(buf))
|
||||
.tchar_str<wxChar>();
|
||||
for ( size_t i = 0; i < e.length(); ++i )
|
||||
{
|
||||
INFO("i = " << i);
|
||||
REQUIRE( tis.GetChar() == e[i] );
|
||||
}
|
||||
REQUIRE( tis.GetChar() == 0x00 );
|
||||
CHECK( tis.GetInputStream().Eof() );
|
||||
}
|
||||
|
||||
// Two null bytes that look like the start of UTF-32BE BOM,
|
||||
// then 3 bytes that look like the start of UTF-8 sequence.
|
||||
// Needs 6 character output buffer in GetChar().
|
||||
SECTION("almost-UTF8-with-nulls")
|
||||
{
|
||||
const wxUint8 buf[] = { 0x00, 0x00, 0xf0, 0x90, 0x8c, 0xe0 };
|
||||
wxMemoryInputStream mis(buf, sizeof(buf));
|
||||
wxTextInputStream tis(mis);
|
||||
|
||||
wxCharTypeBuffer<wxChar> e = wxString((char*)buf, wxCSConv(wxFONTENCODING_ISO8859_1),
|
||||
sizeof(buf)).tchar_str<wxChar>();
|
||||
for ( size_t i = 0; i < e.length(); ++i )
|
||||
{
|
||||
INFO("i = " << i);
|
||||
REQUIRE( tis.GetChar() == e[i] );
|
||||
}
|
||||
REQUIRE( tis.GetChar() == 0x00 );
|
||||
CHECK( tis.GetInputStream().Eof() );
|
||||
}
|
||||
}
|
||||
|
||||
#endif // wxUSE_UNICODE
|
||||
|
Reference in New Issue
Block a user