Correct UTF-8 encoding of U+FFFF

Overlong (and hence invalid) 4-byte encoding was used for this character
instead of the correct 3-byte 0xEF 0xBF 0xBF sequence.

Fix this by using 3 bytes for the code points up to 0xFFFF included,
instead of excluding it as was done before.

Closes #17920.
This commit is contained in:
Vadim Zeitlin
2018-01-28 17:50:51 +01:00
parent 26997607b6
commit 5bc208df3c
4 changed files with 17 additions and 2 deletions

View File

@@ -474,6 +474,20 @@ void MBConvTestCase::UTF8Tests()
const wchar_t wc = 0xd800;
CPPUNIT_ASSERT_EQUAL(wxCONV_FAILED, wxConvUTF8.FromWChar(NULL, 0, &wc, 1));
#endif // SIZEOF_WCHAR_T == 2
SECTION("UTF-8-FFFF")
{
const wchar_t wcFFFF = 0xFFFF;
REQUIRE(wxConvUTF8.FromWChar(NULL, 0, &wcFFFF, 1) == 3);
char buf[4];
buf[3] = '\0';
REQUIRE(wxConvUTF8.FromWChar(buf, 3, &wcFFFF, 1) == 3);
CHECK(static_cast<unsigned char>(buf[0]) == 0xef);
CHECK(static_cast<unsigned char>(buf[1]) == 0xbf);
CHECK(static_cast<unsigned char>(buf[2]) == 0xbf);
}
}
void MBConvTestCase::UTF16LETests()