diff --git a/docs/changes.txt b/docs/changes.txt index a7653a1844..e95fd326ab 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -211,6 +211,7 @@ wxMSW: - Notify shell about the changes done by wxMimeTypesManager (Maarten Bent). - Fix wxPrintf() and friends when using MinGW with ANSI stdio option. - Fix strike-through support in wxFont with GDI+ (David Vanderson). +- Fix UTF-32 conversion for non-BMP characters (ARATA Mizuki). wxOSX/Cocoa: diff --git a/src/common/strconv.cpp b/src/common/strconv.cpp index 0390dc3795..867d663f99 100644 --- a/src/common/strconv.cpp +++ b/src/common/strconv.cpp @@ -1887,18 +1887,6 @@ wxMBConvUTF32straight::FromWChar(char *dst, size_t dstLen, if ( srcLen == wxNO_LEN ) srcLen = wxWcslen(src) + 1; - if ( !dst ) - { - // optimization: return maximal space which could be needed for this - // string instead of the exact amount which could be less if there are - // any surrogates in the input - // - // we consider that surrogates are rare enough to make it worthwhile to - // avoid running the loop below at the cost of slightly extra memory - // consumption - return srcLen * BYTES_PER_CHAR; - } - wxUint32 *outBuff = reinterpret_cast(dst); size_t outLen = 0; for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; ) @@ -1909,10 +1897,13 @@ wxMBConvUTF32straight::FromWChar(char *dst, size_t dstLen, outLen += BYTES_PER_CHAR; - if ( outLen > dstLen ) - return wxCONV_FAILED; + if ( outBuff ) + { + if ( outLen > dstLen ) + return wxCONV_FAILED; - *outBuff++ = ch; + *outBuff++ = ch; + } } return outLen; @@ -1965,18 +1956,6 @@ wxMBConvUTF32swap::FromWChar(char *dst, size_t dstLen, if ( srcLen == wxNO_LEN ) srcLen = wxWcslen(src) + 1; - if ( !dst ) - { - // optimization: return maximal space which could be needed for this - // string instead of the exact amount which could be less if there are - // any surrogates in the input - // - // we consider that surrogates are rare enough to make it worthwhile to - // avoid running the loop below at the cost of slightly extra memory - // consumption - return srcLen*BYTES_PER_CHAR; - } - wxUint32 *outBuff = reinterpret_cast(dst); size_t outLen = 0; for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; ) @@ -1987,10 +1966,13 @@ wxMBConvUTF32swap::FromWChar(char *dst, size_t dstLen, outLen += BYTES_PER_CHAR; - if ( outLen > dstLen ) - return wxCONV_FAILED; + if ( outBuff ) + { + if ( outLen > dstLen ) + return wxCONV_FAILED; - *outBuff++ = wxUINT32_SWAP_ALWAYS(ch); + *outBuff++ = wxUINT32_SWAP_ALWAYS(ch); + } } return outLen; diff --git a/tests/strings/unicode.cpp b/tests/strings/unicode.cpp index 565bd7b22b..20751f23fe 100644 --- a/tests/strings/unicode.cpp +++ b/tests/strings/unicode.cpp @@ -403,6 +403,19 @@ void UnicodeTestCase::ConversionUTF16() wxMBConvUTF16BE().cMB2WC("\xd8\x03\xdc\x01\0" /* OLD TURKIC LETTER YENISEI A */, wxNO_LEN, &len); CPPUNIT_ASSERT_EQUAL( 1, len ); #endif // UTF-32 internal representation + +#if SIZEOF_WCHAR_T == 2 + // Verify that the length of UTF-32 string is correct even when converting + // to it from a longer UTF-16 string with surrogates. + + // Construct CAT FACE U+1F431 without using \U which is not supported by + // ancient compilers and without using \u with surrogates which is + // (correctly) flagged as an error by the newer ones. + wchar_t ws[2]; + ws[0] = 0xd83d; + ws[1] = 0xdc31; + CPPUNIT_ASSERT_EQUAL( 4, wxMBConvUTF32BE().FromWChar(NULL, 0, ws, 2) ); +#endif // UTF-16 internal representation } void UnicodeTestCase::ConversionUTF32()