Allow decoding even shorter strings in fallback encoding

Complete UTF-8 characters (except leading nulls) never appear in failed decoding attempts when the input is fed byte by byte.
This commit is contained in:
Pavel Tyunin
2020-10-07 17:31:42 +03:00
parent 1cbcf24832
commit c9dd9e96a1
2 changed files with 15 additions and 17 deletions

View File

@@ -267,24 +267,21 @@ bool wxConvAuto::InitFromInput(const char *src, size_t len)
return true; return true;
} }
// checks if the input can be the beginning of a valid UTF-8 string // checks if the input can be the beginning of a valid UTF-8 sequence
static bool wxIsUTF8Prefix(const char *src, size_t len) static bool wxCanBeUTF8SequencePrefix(const char *src, size_t len)
{ {
unsigned char l; size_t i = 0;
for ( size_t i = 0; i < len; ++i ) unsigned char l = tableUtf8Lengths[(unsigned char)src[i]];
if ( !l )
return false; // invalid leading byte
while ( --l )
{ {
l = tableUtf8Lengths[(unsigned char)src[i]]; if ( ++i == len )
if ( !l ) return true; // truncated sequence
return false; // invalid leading byte if ( (src[i] & 0xC0) != 0x80 )
while ( --l ) return false; // invalid continuation byte
{
if ( ++i == len )
return true; // truncated sequence
if ( (src[i] & 0xC0) != 0x80 )
return false; // invalid continuation byte
}
} }
return true; return false; // complete sequence
} }
size_t size_t
@@ -339,7 +336,8 @@ wxConvAuto::ToWChar(wchar_t *dst, size_t dstLen,
size_t nNull = 0; size_t nNull = 0;
if ( srcLen != wxNO_LEN && srcLen >= 2 && !src[0] ) if ( srcLen != wxNO_LEN && srcLen >= 2 && !src[0] )
nNull = ( src[1]? 1 : 2 ); nNull = ( src[1]? 1 : 2 );
if ( srcLen < nNull + m_conv->GetMaxCharLen() && wxIsUTF8Prefix(src, srcLen) ) if ( srcLen < nNull + m_conv->GetMaxCharLen() &&
wxCanBeUTF8SequencePrefix(src + nNull, srcLen - nNull) )
return wxCONV_FAILED; return wxCONV_FAILED;
// if the conversion failed but we didn't really detect anything and // if the conversion failed but we didn't really detect anything and

View File

@@ -226,7 +226,7 @@ void ConvAutoTestCase::FallbackMultibyte()
void ConvAutoTestCase::FallbackShort() void ConvAutoTestCase::FallbackShort()
{ {
TestFirstChar("\x61\x61\x61\xc4", 'a', 4, TestFirstChar("\x61\xc4", 'a', 2,
ConvState(wxBOM_None, wxFONTENCODING_ISO8859_5, true), ConvState(wxBOM_None, wxFONTENCODING_ISO8859_5, true),
wxFONTENCODING_ISO8859_5); wxFONTENCODING_ISO8859_5);
} }