Correct UTF-8 encoding of U+FFFF

Overlong (and hence invalid) 4-byte encoding was used for this character
instead of the correct 3-byte 0xEF 0xBF 0xBF sequence.

Fix this by using 3 bytes for the code points up to 0xFFFF included,
instead of excluding it as was done before.

Closes #17920.
This commit is contained in:
Vadim Zeitlin
2018-01-28 17:50:51 +01:00
parent 26997607b6
commit 5bc208df3c
4 changed files with 17 additions and 2 deletions

View File

@@ -108,6 +108,7 @@ All:
- Update all bundled 3rd party libraries to their latest versions. - Update all bundled 3rd party libraries to their latest versions.
- Use unique prefix for all zlib symbols to avoid link conflicts. - Use unique prefix for all zlib symbols to avoid link conflicts.
- Make wxFile::ReadAll() work for unseekable files too. - Make wxFile::ReadAll() work for unseekable files too.
- Correct UTF-8 encoding of U+FFFF (axiom).
All (GUI): All (GUI):

View File

@@ -1135,7 +1135,7 @@ wxMBConvStrictUTF8::FromWChar(char *dst, size_t dstLen,
out[0] = 0xC0 | code; out[0] = 0xC0 | code;
} }
} }
else if ( code < 0xFFFF ) else if ( code <= 0xFFFF )
{ {
len = 3; len = 3;
if ( out ) if ( out )

View File

@@ -431,7 +431,7 @@ wxScopedCharBuffer wxUString::utf8_str() const
{ {
utf8_length += 2; utf8_length += 2;
} }
else if ( code < 0xFFFF ) else if ( code <= 0xFFFF )
{ {
utf8_length += 3; utf8_length += 3;
} }

View File

@@ -474,6 +474,20 @@ void MBConvTestCase::UTF8Tests()
const wchar_t wc = 0xd800; const wchar_t wc = 0xd800;
CPPUNIT_ASSERT_EQUAL(wxCONV_FAILED, wxConvUTF8.FromWChar(NULL, 0, &wc, 1)); CPPUNIT_ASSERT_EQUAL(wxCONV_FAILED, wxConvUTF8.FromWChar(NULL, 0, &wc, 1));
#endif // SIZEOF_WCHAR_T == 2 #endif // SIZEOF_WCHAR_T == 2
SECTION("UTF-8-FFFF")
{
const wchar_t wcFFFF = 0xFFFF;
REQUIRE(wxConvUTF8.FromWChar(NULL, 0, &wcFFFF, 1) == 3);
char buf[4];
buf[3] = '\0';
REQUIRE(wxConvUTF8.FromWChar(buf, 3, &wcFFFF, 1) == 3);
CHECK(static_cast<unsigned char>(buf[0]) == 0xef);
CHECK(static_cast<unsigned char>(buf[1]) == 0xbf);
CHECK(static_cast<unsigned char>(buf[2]) == 0xbf);
}
} }
void MBConvTestCase::UTF16LETests() void MBConvTestCase::UTF16LETests()