Fix conversion from wchar_t string with surrogates to UTF-8
Correctly account for the second half of the surrogate in wxMBConvUTF8::FromWChar() implementation, this makes it actually work for the strings containing surrogates on the platforms using UTF-16 encoding for wchar_t (such as MSW). See #17070.
This commit is contained in:
committed by
Vadim Zeitlin
parent
37dd89a0da
commit
e570e8b6ac
@@ -1419,7 +1419,12 @@ size_t wxMBConvUTF8::FromWChar(char *buf, size_t n,
|
|||||||
#ifdef WC_UTF16
|
#ifdef WC_UTF16
|
||||||
// cast is ok for WC_UTF16
|
// cast is ok for WC_UTF16
|
||||||
size_t pa = decode_utf16((const wxUint16 *)psz, cc);
|
size_t pa = decode_utf16((const wxUint16 *)psz, cc);
|
||||||
|
|
||||||
|
// we could have consumed two input code units if we decoded a
|
||||||
|
// surrogate, so adjust the input pointer and, if necessary, the length
|
||||||
psz += (pa == wxCONV_FAILED) ? 1 : pa;
|
psz += (pa == wxCONV_FAILED) ? 1 : pa;
|
||||||
|
if ( pa == 2 && !isNulTerminated )
|
||||||
|
srcLen--;
|
||||||
#else
|
#else
|
||||||
cc = (*psz++) & 0x7fffffff;
|
cc = (*psz++) & 0x7fffffff;
|
||||||
#endif
|
#endif
|
||||||
|
Reference in New Issue
Block a user