From 0c02d70fa57ff221b1ad4cb33f714c32b59c7d32 Mon Sep 17 00:00:00 2001 From: ARATA Mizuki Date: Fri, 13 Nov 2015 19:19:28 +0100 Subject: [PATCH] Add a test checking that conversions involving surrogates work After the fixes in the previous commits conversions between wchar_t containing surrogates and UTF-{8,16,32} work correctly, so add a test ensuring that this is the case. Notice that other conversions are still broken in presence of surrogates. See #17070. --- tests/mbconv/mbconvtest.cpp | 82 +++++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) diff --git a/tests/mbconv/mbconvtest.cpp b/tests/mbconv/mbconvtest.cpp index 69044cc057..4674a67e9a 100644 --- a/tests/mbconv/mbconvtest.cpp +++ b/tests/mbconv/mbconvtest.cpp @@ -81,6 +81,7 @@ private: CPPUNIT_TEST( FontmapTests ); CPPUNIT_TEST( BufSize ); CPPUNIT_TEST( FromWCharTests ); + CPPUNIT_TEST( NonBMPCharTests ); #ifdef HAVE_WCHAR_H CPPUNIT_TEST( UTF8_41 ); CPPUNIT_TEST( UTF8_7f ); @@ -116,6 +117,7 @@ private: void FontmapTests(); void BufSize(); void FromWCharTests(); + void NonBMPCharTests(); void IconvTests(); void Latin1Tests(); @@ -940,6 +942,86 @@ void MBConvTestCase::FromWCharTests() CPPUNIT_ASSERT_EQUAL( '!', mbuf[6]); } +void MBConvTestCase::NonBMPCharTests() +{ + // U+1F363 (UTF-16: D83C DF63, UTF-8: F0 9F 8D A3) sushi (emoji) + // U+732B (UTF-8: E7 8C AB) cat (kanji) + // U+1F408 (UTF-16: D83D DC08, UTF-8: F0 9F 90 88) cat (emoji) + // U+845B U+E0101 (UTF-16: 845B DB40 DD01, UTF-8: E8 91 9B F3 A0 84 81) (a kanji + an IVS) + const char u8[] = + "\xF0\x9F\x8D\xA3" /* U+1F363 */ + "\xE7\x8C\xAB\xF0\x9F\x90\x88" /* U+732B U+1F408 */ + "\xE8\x91\x9B\xF3\xA0\x84\x81"; /* U+845B U+E0101 */ + const wxChar16 u16[] = { + 0xD83C, 0xDF63, + 0x732B, 0xD83D, 0xDC08, + 0x845B, 0xDB40, 0xDD01, + 0}; + const wxChar32 u32[] = { + 0x1F363, + 0x732B, 0x1F408, + 0x845B, 0xE0101, + 0}; +#if SIZEOF_WCHAR_T == 2 + const wchar_t *const w = u16; + const size_t wchars = sizeof(u16)/sizeof(wxChar16) - 1; +#else + const wchar_t *const w = u32; + const size_t wchars = sizeof(u32)/sizeof(wxChar32) - 1; +#endif + { + // Notice that these tests can only be done with strict UTF-8 + // converter, the use of any MAP_INVALID_UTF8_XXX options currently + // completely breaks wxTextInputStream use. + TestDecoder(w, wchars, u8, sizeof(u8)-1, wxConvUTF8, 1); + TestEncoder(w, wchars, u8, sizeof(u8)-1, wxConvUTF8, 1); + } + { + char u16le[sizeof(u16)]; + for (size_t i = 0; i < sizeof(u16)/2; ++i) { + u16le[2*i] = (char)(unsigned char)(u16[i] & 0xFF); + u16le[2*i+1] = (char)(unsigned char)((u16[i] >> 8) & 0xFF); + } + wxMBConvUTF16LE conv; + TestDecoder(w, wchars, u16le, sizeof(u16le)-2, conv, 2); + TestEncoder(w, wchars, u16le, sizeof(u16le)-2, conv, 2); + } + { + char u16be[sizeof(u16)]; + for (size_t i = 0; i < sizeof(u16)/2; ++i) { + u16be[2*i] = (char)(unsigned char)((u16[i] >> 8) & 0xFF); + u16be[2*i+1] = (char)(unsigned char)(u16[i] & 0xFF); + } + wxMBConvUTF16BE conv; + TestDecoder(w, wchars, u16be, sizeof(u16be)-2, conv, 2); + TestEncoder(w, wchars, u16be, sizeof(u16be)-2, conv, 2); + } + { + char u32le[sizeof(u32)]; + for (size_t i = 0; i < sizeof(u32)/4; ++i) { + u32le[4*i] = (char)(unsigned char)(u32[i] & 0xFF); + u32le[4*i+1] = (char)(unsigned char)((u32[i] >> 8) & 0xFF); + u32le[4*i+2] = (char)(unsigned char)((u32[i] >> 16) & 0xFF); + u32le[4*i+3] = (char)(unsigned char)((u32[i] >> 24) & 0xFF); + } + wxMBConvUTF32LE conv; + TestDecoder(w, wchars, u32le, sizeof(u32le)-4, conv, 4); + TestEncoder(w, wchars, u32le, sizeof(u32le)-4, conv, 4); + } + { + char u32be[sizeof(u32)]; + for (size_t i = 0; i < sizeof(u32)/4; ++i) { + u32be[4*i] = (char)(unsigned char)((u32[i] >> 24) & 0xFF); + u32be[4*i+1] = (char)(unsigned char)((u32[i] >> 16) & 0xFF); + u32be[4*i+2] = (char)(unsigned char)((u32[i] >> 8) & 0xFF); + u32be[4*i+3] = (char)(unsigned char)(u32[i] & 0xFF); + } + wxMBConvUTF32BE conv; + TestDecoder(w, wchars, u32be, sizeof(u32be)-4, conv, 4); + TestEncoder(w, wchars, u32be, sizeof(u32be)-4, conv, 4); + } +} + WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const char* name ); void MBConvTestCase::IconvTests()