Switch to fallback earlier if the input is not valid UTF-8 prefix
This commit is contained in:
@@ -387,6 +387,8 @@ private:
|
|||||||
int m_options;
|
int m_options;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
bool wxIsUTF8Prefix(const char *src, size_t len);
|
||||||
|
|
||||||
// ----------------------------------------------------------------------------
|
// ----------------------------------------------------------------------------
|
||||||
// wxMBConvUTF16Base: for both LE and BE variants
|
// wxMBConvUTF16Base: for both LE and BE variants
|
||||||
// ----------------------------------------------------------------------------
|
// ----------------------------------------------------------------------------
|
||||||
|
@@ -313,7 +313,7 @@ wxConvAuto::ToWChar(wchar_t *dst, size_t dstLen,
|
|||||||
// to the fall-back conversion in this case as it would prevent us from
|
// to the fall-back conversion in this case as it would prevent us from
|
||||||
// decoding UTF-8 input when fed it byte by byte, as done by
|
// decoding UTF-8 input when fed it byte by byte, as done by
|
||||||
// wxTextInputStream, for example
|
// wxTextInputStream, for example
|
||||||
if ( srcLen < m_conv->GetMaxCharLen() )
|
if ( srcLen < m_conv->GetMaxCharLen() && wxIsUTF8Prefix(src, srcLen) )
|
||||||
return wxCONV_FAILED;
|
return wxCONV_FAILED;
|
||||||
|
|
||||||
// if the conversion failed but we didn't really detect anything and
|
// if the conversion failed but we didn't really detect anything and
|
||||||
|
@@ -1446,6 +1446,26 @@ size_t wxMBConvUTF8::FromWChar(char *buf, size_t n,
|
|||||||
return len;
|
return len;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// checks if the input can be the beginning of a valid UTF-8 string
|
||||||
|
bool wxIsUTF8Prefix(const char *src, size_t len)
|
||||||
|
{
|
||||||
|
unsigned char l;
|
||||||
|
for ( size_t i = 0; i < len; ++i )
|
||||||
|
{
|
||||||
|
l = tableUtf8Lengths[(unsigned char)src[i]];
|
||||||
|
if ( !l )
|
||||||
|
return false; // invalid leading byte
|
||||||
|
while ( --l )
|
||||||
|
{
|
||||||
|
if ( ++i == len )
|
||||||
|
return true; // truncated sequence
|
||||||
|
if ( (src[i] & 0xC0) != 0x80 )
|
||||||
|
return false; // invalid continuation byte
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
// UTF-16
|
// UTF-16
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
|
@@ -288,9 +288,8 @@ void ConvAutoTestCase::StreamUTF32BE()
|
|||||||
|
|
||||||
void ConvAutoTestCase::StreamFallback()
|
void ConvAutoTestCase::StreamFallback()
|
||||||
{
|
{
|
||||||
// this only works if there are at least 3 bytes after the first non-ASCII character
|
TestTextStream("\x61\xbf\x0A\xe0",
|
||||||
TestTextStream("\x61\xbf\x0A\xe0\x7a",
|
4, wxString::FromUTF8("a\xd0\x9f"), wxString::FromUTF8("\xd1\x80"),
|
||||||
5, wxString::FromUTF8("a\xd0\x9f"), wxString::FromUTF8("\xd1\x80z"),
|
|
||||||
wxFONTENCODING_ISO8859_5);
|
wxFONTENCODING_ISO8859_5);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user