Allow decoding even shorter strings in fallback encoding
Complete UTF-8 characters (except leading nulls) never appear in failed decoding attempts when the input is fed byte by byte.
This commit is contained in:
@@ -267,24 +267,21 @@ bool wxConvAuto::InitFromInput(const char *src, size_t len)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// checks if the input can be the beginning of a valid UTF-8 string
|
// checks if the input can be the beginning of a valid UTF-8 sequence
|
||||||
static bool wxIsUTF8Prefix(const char *src, size_t len)
|
static bool wxCanBeUTF8SequencePrefix(const char *src, size_t len)
|
||||||
{
|
{
|
||||||
unsigned char l;
|
size_t i = 0;
|
||||||
for ( size_t i = 0; i < len; ++i )
|
unsigned char l = tableUtf8Lengths[(unsigned char)src[i]];
|
||||||
|
if ( !l )
|
||||||
|
return false; // invalid leading byte
|
||||||
|
while ( --l )
|
||||||
{
|
{
|
||||||
l = tableUtf8Lengths[(unsigned char)src[i]];
|
if ( ++i == len )
|
||||||
if ( !l )
|
return true; // truncated sequence
|
||||||
return false; // invalid leading byte
|
if ( (src[i] & 0xC0) != 0x80 )
|
||||||
while ( --l )
|
return false; // invalid continuation byte
|
||||||
{
|
|
||||||
if ( ++i == len )
|
|
||||||
return true; // truncated sequence
|
|
||||||
if ( (src[i] & 0xC0) != 0x80 )
|
|
||||||
return false; // invalid continuation byte
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return true;
|
return false; // complete sequence
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t
|
size_t
|
||||||
@@ -339,7 +336,8 @@ wxConvAuto::ToWChar(wchar_t *dst, size_t dstLen,
|
|||||||
size_t nNull = 0;
|
size_t nNull = 0;
|
||||||
if ( srcLen != wxNO_LEN && srcLen >= 2 && !src[0] )
|
if ( srcLen != wxNO_LEN && srcLen >= 2 && !src[0] )
|
||||||
nNull = ( src[1]? 1 : 2 );
|
nNull = ( src[1]? 1 : 2 );
|
||||||
if ( srcLen < nNull + m_conv->GetMaxCharLen() && wxIsUTF8Prefix(src, srcLen) )
|
if ( srcLen < nNull + m_conv->GetMaxCharLen() &&
|
||||||
|
wxCanBeUTF8SequencePrefix(src + nNull, srcLen - nNull) )
|
||||||
return wxCONV_FAILED;
|
return wxCONV_FAILED;
|
||||||
|
|
||||||
// if the conversion failed but we didn't really detect anything and
|
// if the conversion failed but we didn't really detect anything and
|
||||||
|
@@ -226,7 +226,7 @@ void ConvAutoTestCase::FallbackMultibyte()
|
|||||||
|
|
||||||
void ConvAutoTestCase::FallbackShort()
|
void ConvAutoTestCase::FallbackShort()
|
||||||
{
|
{
|
||||||
TestFirstChar("\x61\x61\x61\xc4", 'a', 4,
|
TestFirstChar("\x61\xc4", 'a', 2,
|
||||||
ConvState(wxBOM_None, wxFONTENCODING_ISO8859_5, true),
|
ConvState(wxBOM_None, wxFONTENCODING_ISO8859_5, true),
|
||||||
wxFONTENCODING_ISO8859_5);
|
wxFONTENCODING_ISO8859_5);
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user