diff --git a/src/common/strconv.cpp b/src/common/strconv.cpp index f51660808a..c3191201c4 100644 --- a/src/common/strconv.cpp +++ b/src/common/strconv.cpp @@ -1122,13 +1122,30 @@ wxMBConvStrictUTF8::FromWChar(char *dst, size_t dstLen, wxUint32 code; #ifdef WC_UTF16 - // cast is ok for WC_UTF16 - if ( decode_utf16((const wxUint16 *)wp, code) == 2 ) + // Be careful here: decode_utf16() may need to read the next wchar_t + // but we might not have any left, so pass it a temporary buffer which + // always has 2 wide characters and take care to set its second element + // to 0, which is invalid as a second half of a surrogate, to ensure + // that we return an error when trying to convert a buffer ending with + // half of a surrogate. + wxUint16 tmp[2]; + tmp[0] = wp[0]; + tmp[1] = srcLen != 0 ? wp[1] : 0; + switch ( decode_utf16(tmp, code) ) { - // skip the next char too as we decoded a surrogate - wp++; - if ( srcLen != wxNO_LEN ) - srcLen--; + case 1: + // Nothing special to do, just a character from BMP. + break; + + case 2: + // skip the next char too as we decoded a surrogate + wp++; + if ( srcLen != wxNO_LEN ) + srcLen--; + break; + + case wxCONV_FAILED: + return wxCONV_FAILED; } #else // wchar_t is UTF-32 code = *wp & 0x7fffffff; diff --git a/tests/mbconv/mbconvtest.cpp b/tests/mbconv/mbconvtest.cpp index 932ba3f416..10985fe3d5 100644 --- a/tests/mbconv/mbconvtest.cpp +++ b/tests/mbconv/mbconvtest.cpp @@ -203,6 +203,12 @@ private: void UTF8PUA_f4_80_82_a5() { UTF8PUA("\xf4\x80\x82\xa5", u1000a5); } void UTF8Octal_backslash245() { UTF8Octal("\\245", L"\\245"); } + // Test that converting string with incomplete surrogates in them fails + // (surrogates are only used in UTF-16, i.e. when wchar_t is 16 bits). +#if SIZEOF_WCHAR_T == 2 + void UTF8_fail_broken_surrogates(); +#endif // SIZEOF_WCHAR_T == 2 + // implementation for the utf-8 tests (see comments below) void UTF8(const char *charSequence, const wchar_t *wideSequence); void UTF8PUA(const char *charSequence, const wchar_t *wideSequence); @@ -461,6 +467,12 @@ void MBConvTestCase::UTF8Tests() wxConvUTF8, 1 ); + +#if SIZEOF_WCHAR_T == 2 + // Can't use \ud800 as it's an invalid Unicode character. + const wchar_t wc = 0xd800; + CPPUNIT_ASSERT_EQUAL(wxCONV_FAILED, wxConvUTF8.FromWChar(NULL, 0, &wc, 1)); +#endif // SIZEOF_WCHAR_T == 2 } void MBConvTestCase::UTF16LETests()