Correct UTF-8 encoding of U+FFFF

Overlong (and hence invalid) 4-byte encoding was used for this character instead of the correct 3-byte 0xEF 0xBF 0xBF sequence. Fix this by using 3 bytes for the code points up to 0xFFFF included, instead of excluding it as was done before. Closes #17920.
2018-01-28 17:50:51 +01:00
parent 26997607b6
commit 5bc208df3c
4 changed files with 17 additions and 2 deletions
--- a/tests/mbconv/mbconvtest.cpp
+++ b/tests/mbconv/mbconvtest.cpp
@@ -474,6 +474,20 @@ void MBConvTestCase::UTF8Tests()
    const wchar_t wc = 0xd800;
    CPPUNIT_ASSERT_EQUAL(wxCONV_FAILED, wxConvUTF8.FromWChar(NULL, 0, &wc, 1));
 #endif // SIZEOF_WCHAR_T == 2
+
+    SECTION("UTF-8-FFFF")
+    {
+        const wchar_t wcFFFF = 0xFFFF;
+        REQUIRE(wxConvUTF8.FromWChar(NULL, 0, &wcFFFF, 1) == 3);
+
+        char buf[4];
+        buf[3] = '\0';
+        REQUIRE(wxConvUTF8.FromWChar(buf, 3, &wcFFFF, 1) == 3);
+
+        CHECK(static_cast<unsigned char>(buf[0]) == 0xef);
+        CHECK(static_cast<unsigned char>(buf[1]) == 0xbf);
+        CHECK(static_cast<unsigned char>(buf[2]) == 0xbf);
+    }
 }

 void MBConvTestCase::UTF16LETests()