From e570e8b6ac652a4dabd9af67c263157519c0f2c1 Mon Sep 17 00:00:00 2001 From: ARATA Mizuki Date: Fri, 13 Nov 2015 19:17:37 +0100 Subject: [PATCH] Fix conversion from wchar_t string with surrogates to UTF-8 Correctly account for the second half of the surrogate in wxMBConvUTF8::FromWChar() implementation, this makes it actually work for the strings containing surrogates on the platforms using UTF-16 encoding for wchar_t (such as MSW). See #17070. --- src/common/strconv.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/common/strconv.cpp b/src/common/strconv.cpp index 3102294183..76c6df67a7 100644 --- a/src/common/strconv.cpp +++ b/src/common/strconv.cpp @@ -1419,7 +1419,12 @@ size_t wxMBConvUTF8::FromWChar(char *buf, size_t n, #ifdef WC_UTF16 // cast is ok for WC_UTF16 size_t pa = decode_utf16((const wxUint16 *)psz, cc); + + // we could have consumed two input code units if we decoded a + // surrogate, so adjust the input pointer and, if necessary, the length psz += (pa == wxCONV_FAILED) ? 1 : pa; + if ( pa == 2 && !isNulTerminated ) + srcLen--; #else cc = (*psz++) & 0x7fffffff; #endif