Fix wrong resulting string length in UTF-16 to wchar_t conversion.

Don't optimize the returned length for surrogate-less case, this does save a
pass of the string but at the price of returning a wrong result, which is not
worth it, just compute the really required length exactly.

Closes #16298.

git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@76622 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
Vadim Zeitlin
2014-05-29 23:48:40 +00:00
parent a309157a66
commit 3410aa372f
2 changed files with 21 additions and 22 deletions

View File

@@ -1638,14 +1638,6 @@ wxMBConvUTF16straight::ToWChar(wchar_t *dst, size_t dstLen,
return wxCONV_FAILED; return wxCONV_FAILED;
const size_t inLen = srcLen / BYTES_PER_CHAR; const size_t inLen = srcLen / BYTES_PER_CHAR;
if ( !dst )
{
// optimization: return maximal space which could be needed for this
// string even if the real size could be smaller if the buffer contains
// any surrogates
return inLen;
}
size_t outLen = 0; size_t outLen = 0;
const wxUint16 *inBuff = reinterpret_cast<const wxUint16 *>(src); const wxUint16 *inBuff = reinterpret_cast<const wxUint16 *>(src);
for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; ) for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; )
@@ -1654,11 +1646,16 @@ wxMBConvUTF16straight::ToWChar(wchar_t *dst, size_t dstLen,
if ( !inBuff ) if ( !inBuff )
return wxCONV_FAILED; return wxCONV_FAILED;
if ( ++outLen > dstLen ) outLen++;
if ( dst )
{
if ( outLen > dstLen )
return wxCONV_FAILED; return wxCONV_FAILED;
*dst++ = ch; *dst++ = ch;
} }
}
return outLen; return outLen;
@@ -1711,14 +1708,6 @@ wxMBConvUTF16swap::ToWChar(wchar_t *dst, size_t dstLen,
return wxCONV_FAILED; return wxCONV_FAILED;
const size_t inLen = srcLen / BYTES_PER_CHAR; const size_t inLen = srcLen / BYTES_PER_CHAR;
if ( !dst )
{
// optimization: return maximal space which could be needed for this
// string even if the real size could be smaller if the buffer contains
// any surrogates
return inLen;
}
size_t outLen = 0; size_t outLen = 0;
const wxUint16 *inBuff = reinterpret_cast<const wxUint16 *>(src); const wxUint16 *inBuff = reinterpret_cast<const wxUint16 *>(src);
for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; ) for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; )
@@ -1737,11 +1726,16 @@ wxMBConvUTF16swap::ToWChar(wchar_t *dst, size_t dstLen,
if ( numChars == 2 ) if ( numChars == 2 )
inBuff++; inBuff++;
if ( ++outLen > dstLen ) outLen++;
if ( dst )
{
if ( outLen > dstLen )
return wxCONV_FAILED; return wxCONV_FAILED;
*dst++ = ch; *dst++ = ch;
} }
}
return outLen; return outLen;

View File

@@ -390,6 +390,11 @@ void UnicodeTestCase::ConversionUTF16()
size_t len; size_t len;
conv.cMB2WC("\x01\0\0B\0C" /* A macron BC */, 6, &len); conv.cMB2WC("\x01\0\0B\0C" /* A macron BC */, 6, &len);
CPPUNIT_ASSERT_EQUAL( 3, len ); CPPUNIT_ASSERT_EQUAL( 3, len );
// Another one: verify that the length of the resulting string is computed
// correctly when there is a surrogate in the input.
wxMBConvUTF16BE().cMB2WC("\xd8\x03\xdc\x01" /* OLD TURKIC LETTER YENISEI A */, wxNO_LEN, &len);
CPPUNIT_ASSERT_EQUAL( 1, len );
} }
void UnicodeTestCase::ConversionUTF32() void UnicodeTestCase::ConversionUTF32()