Merge wchar_t-surrogates branch
Fix several problems with surrogates in UTF-16-encoded wchar_t strings: correct bugs in UTF-8 and UTF-32 conversion code and add surrogates support to wxText{Input,Output}Stream classes. Closes #17070.
This commit is contained in:
@@ -87,7 +87,16 @@ protected:
|
|||||||
|
|
||||||
#if wxUSE_UNICODE
|
#if wxUSE_UNICODE
|
||||||
wxMBConv *m_conv;
|
wxMBConv *m_conv;
|
||||||
#endif
|
|
||||||
|
// The second half of a surrogate character when using UTF-16 for wchar_t:
|
||||||
|
// we can't return it immediately from GetChar() when we read a Unicode
|
||||||
|
// code point outside of the BMP, but we can't keep it in m_lastBytes
|
||||||
|
// neither because it can't separately decoded, so we have a separate 1
|
||||||
|
// wchar_t buffer just for this case.
|
||||||
|
#if SIZEOF_WCHAR_T == 2
|
||||||
|
wchar_t m_lastWChar;
|
||||||
|
#endif // SIZEOF_WCHAR_T == 2
|
||||||
|
#endif // wxUSE_UNICODE
|
||||||
|
|
||||||
bool EatEOL(const wxChar &c);
|
bool EatEOL(const wxChar &c);
|
||||||
void UngetLast(); // should be used instead of wxInputStream::Ungetch() because of Unicode issues
|
void UngetLast(); // should be used instead of wxInputStream::Ungetch() because of Unicode issues
|
||||||
@@ -165,7 +174,13 @@ protected:
|
|||||||
|
|
||||||
#if wxUSE_UNICODE
|
#if wxUSE_UNICODE
|
||||||
wxMBConv *m_conv;
|
wxMBConv *m_conv;
|
||||||
#endif
|
|
||||||
|
#if SIZEOF_WCHAR_T == 2
|
||||||
|
// The first half of a surrogate character if one was passed to PutChar()
|
||||||
|
// and couldn't be output when it was called the last time.
|
||||||
|
wchar_t m_lastWChar;
|
||||||
|
#endif // SIZEOF_WCHAR_T == 2
|
||||||
|
#endif // wxUSE_UNICODE
|
||||||
|
|
||||||
wxDECLARE_NO_COPY_CLASS(wxTextOutputStream);
|
wxDECLARE_NO_COPY_CLASS(wxTextOutputStream);
|
||||||
};
|
};
|
||||||
|
@@ -121,7 +121,7 @@ public:
|
|||||||
including the terminating @c NUL character(s).
|
including the terminating @c NUL character(s).
|
||||||
|
|
||||||
@return
|
@return
|
||||||
The number of character written (or which would have been written
|
The number of characters written (or which would have been written
|
||||||
if it were non-@NULL) to @a dst or @c wxCONV_FAILED on error.
|
if it were non-@NULL) to @a dst or @c wxCONV_FAILED on error.
|
||||||
*/
|
*/
|
||||||
virtual size_t ToWChar(wchar_t* dst, size_t dstLen, const char* src,
|
virtual size_t ToWChar(wchar_t* dst, size_t dstLen, const char* src,
|
||||||
@@ -148,8 +148,13 @@ public:
|
|||||||
including the terminating @c NUL character.
|
including the terminating @c NUL character.
|
||||||
|
|
||||||
@return
|
@return
|
||||||
The number of character written (or which would have been written
|
If @dst is non-@NULL, the number of characters actually written to
|
||||||
if it were non-@NULL) to @a dst or @c wxCONV_FAILED on error.
|
it. If @dst is @NULL, the returned value is at least equal to the
|
||||||
|
number of characters that would have been written out if it were
|
||||||
|
non-@NULL, but can be larger than it under the platforms using
|
||||||
|
UTF-16 as @c wchar_t encoding (this allows a useful optimization in
|
||||||
|
the implementation of this function for UTF-32). In any case,
|
||||||
|
@c wxCONV_FAILED is returned on conversion error.
|
||||||
*/
|
*/
|
||||||
virtual size_t FromWChar(char* dst, size_t dstLen, const wchar_t* src,
|
virtual size_t FromWChar(char* dst, size_t dstLen, const wchar_t* src,
|
||||||
size_t srcLen = wxNO_LEN) const;
|
size_t srcLen = wxNO_LEN) const;
|
||||||
|
@@ -488,7 +488,12 @@ wxMBConv::cWC2MB(const wchar_t *inBuff, size_t inLen, size_t *outLen) const
|
|||||||
// the input is not
|
// the input is not
|
||||||
wxCharBuffer buf(dstLen + nulLen - 1);
|
wxCharBuffer buf(dstLen + nulLen - 1);
|
||||||
memset(buf.data() + dstLen, 0, nulLen);
|
memset(buf.data() + dstLen, 0, nulLen);
|
||||||
if ( FromWChar(buf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED )
|
|
||||||
|
// Notice that return value of the call to FromWChar() here may be
|
||||||
|
// different from the one above as it could have overestimated the
|
||||||
|
// space needed, while what we get here is the exact length.
|
||||||
|
dstLen = FromWChar(buf.data(), dstLen, inBuff, inLen);
|
||||||
|
if ( dstLen != wxCONV_FAILED )
|
||||||
{
|
{
|
||||||
if ( outLen )
|
if ( outLen )
|
||||||
{
|
{
|
||||||
@@ -1122,13 +1127,30 @@ wxMBConvStrictUTF8::FromWChar(char *dst, size_t dstLen,
|
|||||||
|
|
||||||
wxUint32 code;
|
wxUint32 code;
|
||||||
#ifdef WC_UTF16
|
#ifdef WC_UTF16
|
||||||
// cast is ok for WC_UTF16
|
// Be careful here: decode_utf16() may need to read the next wchar_t
|
||||||
if ( decode_utf16((const wxUint16 *)wp, code) == 2 )
|
// but we might not have any left, so pass it a temporary buffer which
|
||||||
|
// always has 2 wide characters and take care to set its second element
|
||||||
|
// to 0, which is invalid as a second half of a surrogate, to ensure
|
||||||
|
// that we return an error when trying to convert a buffer ending with
|
||||||
|
// half of a surrogate.
|
||||||
|
wxUint16 tmp[2];
|
||||||
|
tmp[0] = wp[0];
|
||||||
|
tmp[1] = srcLen != 0 ? wp[1] : 0;
|
||||||
|
switch ( decode_utf16(tmp, code) )
|
||||||
{
|
{
|
||||||
// skip the next char too as we decoded a surrogate
|
case 1:
|
||||||
wp++;
|
// Nothing special to do, just a character from BMP.
|
||||||
if ( srcLen != wxNO_LEN )
|
break;
|
||||||
srcLen--;
|
|
||||||
|
case 2:
|
||||||
|
// skip the next char too as we decoded a surrogate
|
||||||
|
wp++;
|
||||||
|
if ( srcLen != wxNO_LEN )
|
||||||
|
srcLen--;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case wxCONV_FAILED:
|
||||||
|
return wxCONV_FAILED;
|
||||||
}
|
}
|
||||||
#else // wchar_t is UTF-32
|
#else // wchar_t is UTF-32
|
||||||
code = *wp & 0x7fffffff;
|
code = *wp & 0x7fffffff;
|
||||||
@@ -1397,7 +1419,12 @@ size_t wxMBConvUTF8::FromWChar(char *buf, size_t n,
|
|||||||
#ifdef WC_UTF16
|
#ifdef WC_UTF16
|
||||||
// cast is ok for WC_UTF16
|
// cast is ok for WC_UTF16
|
||||||
size_t pa = decode_utf16((const wxUint16 *)psz, cc);
|
size_t pa = decode_utf16((const wxUint16 *)psz, cc);
|
||||||
|
|
||||||
|
// we could have consumed two input code units if we decoded a
|
||||||
|
// surrogate, so adjust the input pointer and, if necessary, the length
|
||||||
psz += (pa == wxCONV_FAILED) ? 1 : pa;
|
psz += (pa == wxCONV_FAILED) ? 1 : pa;
|
||||||
|
if ( pa == 2 && !isNulTerminated )
|
||||||
|
srcLen--;
|
||||||
#else
|
#else
|
||||||
cc = (*psz++) & 0x7fffffff;
|
cc = (*psz++) & 0x7fffffff;
|
||||||
#endif
|
#endif
|
||||||
|
@@ -36,6 +36,10 @@ wxTextInputStream::wxTextInputStream(wxInputStream &s,
|
|||||||
: m_input(s), m_separators(sep), m_conv(conv.Clone())
|
: m_input(s), m_separators(sep), m_conv(conv.Clone())
|
||||||
{
|
{
|
||||||
memset((void*)m_lastBytes, 0, 10);
|
memset((void*)m_lastBytes, 0, 10);
|
||||||
|
|
||||||
|
#if SIZEOF_WCHAR_T == 2
|
||||||
|
m_lastWChar = 0;
|
||||||
|
#endif // SIZEOF_WCHAR_T == 2
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
wxTextInputStream::wxTextInputStream(wxInputStream &s, const wxString &sep)
|
wxTextInputStream::wxTextInputStream(wxInputStream &s, const wxString &sep)
|
||||||
@@ -64,6 +68,17 @@ void wxTextInputStream::UngetLast()
|
|||||||
wxChar wxTextInputStream::NextChar()
|
wxChar wxTextInputStream::NextChar()
|
||||||
{
|
{
|
||||||
#if wxUSE_UNICODE
|
#if wxUSE_UNICODE
|
||||||
|
#if SIZEOF_WCHAR_T == 2
|
||||||
|
// Return the already raed character remaining from the last call to this
|
||||||
|
// function, if any.
|
||||||
|
if ( m_lastWChar )
|
||||||
|
{
|
||||||
|
const wxChar wc = m_lastWChar;
|
||||||
|
m_lastWChar = 0;
|
||||||
|
return wc;
|
||||||
|
}
|
||||||
|
#endif // !SWIG_ONLY_SCRIPT_API
|
||||||
|
|
||||||
wxChar wbuf[2];
|
wxChar wbuf[2];
|
||||||
memset((void*)m_lastBytes, 0, 10);
|
memset((void*)m_lastBytes, 0, 10);
|
||||||
for(size_t inlen = 0; inlen < 9; inlen++)
|
for(size_t inlen = 0; inlen < 9; inlen++)
|
||||||
@@ -91,10 +106,23 @@ wxChar wxTextInputStream::NextChar()
|
|||||||
// if we couldn't decode a single character during the last
|
// if we couldn't decode a single character during the last
|
||||||
// loop iteration we shouldn't be able to decode 2 or more of
|
// loop iteration we shouldn't be able to decode 2 or more of
|
||||||
// them with an extra single byte, something fishy is going on
|
// them with an extra single byte, something fishy is going on
|
||||||
|
// (except if we use UTF-16, see below)
|
||||||
wxFAIL_MSG("unexpected decoding result");
|
wxFAIL_MSG("unexpected decoding result");
|
||||||
wxFALLTHROUGH;// fall through nevertheless and return at least something
|
return wxEOT;
|
||||||
|
|
||||||
|
#if SIZEOF_WCHAR_T == 2
|
||||||
|
case 2:
|
||||||
|
// When wchar_t uses UTF-16, we could have decoded a single
|
||||||
|
// Unicode code point as 2 wchar_t characters and there is
|
||||||
|
// nothing else to do here but to return the first one now and
|
||||||
|
// remember the second one for the next call, as there is no
|
||||||
|
// way to fit both of them into a single wxChar in this case.
|
||||||
|
m_lastWChar = wbuf[1];
|
||||||
|
#endif // !SWIG_ONLY_SCRIPT_API
|
||||||
|
wxFALLTHROUGH;
|
||||||
|
|
||||||
case 1:
|
case 1:
|
||||||
|
|
||||||
// we finally decoded a character
|
// we finally decoded a character
|
||||||
return wbuf[0];
|
return wbuf[0];
|
||||||
}
|
}
|
||||||
@@ -374,6 +402,10 @@ wxTextOutputStream::wxTextOutputStream(wxOutputStream& s, wxEOL mode)
|
|||||||
m_mode = wxEOL_UNIX;
|
m_mode = wxEOL_UNIX;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if wxUSE_UNICODE && SIZEOF_WCHAR_T == 2
|
||||||
|
m_lastWChar = 0;
|
||||||
|
#endif // SIZEOF_WCHAR_T == 2
|
||||||
}
|
}
|
||||||
|
|
||||||
wxTextOutputStream::~wxTextOutputStream()
|
wxTextOutputStream::~wxTextOutputStream()
|
||||||
@@ -480,7 +512,66 @@ void wxTextOutputStream::WriteString(const wxString& string)
|
|||||||
wxTextOutputStream& wxTextOutputStream::PutChar(wxChar c)
|
wxTextOutputStream& wxTextOutputStream::PutChar(wxChar c)
|
||||||
{
|
{
|
||||||
#if wxUSE_UNICODE
|
#if wxUSE_UNICODE
|
||||||
|
#if SIZEOF_WCHAR_T == 2
|
||||||
|
wxCharBuffer buffer;
|
||||||
|
size_t len;
|
||||||
|
if ( m_lastWChar )
|
||||||
|
{
|
||||||
|
wxChar buf[2];
|
||||||
|
buf[0] = m_lastWChar;
|
||||||
|
buf[1] = c;
|
||||||
|
buffer = m_conv->cWC2MB(buf, WXSIZEOF(buf), &len);
|
||||||
|
m_lastWChar = 0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
buffer = m_conv->cWC2MB(&c, 1, &len);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( !len )
|
||||||
|
{
|
||||||
|
// Conversion failed, possibly because we have the first half of a
|
||||||
|
// surrogate character, so just store it and write it out when the
|
||||||
|
// second half is written to the stream too later.
|
||||||
|
//
|
||||||
|
// Notice that if we already had had a valid m_lastWChar, it is simply
|
||||||
|
// discarded here which is very bad, but there is no way to signal an
|
||||||
|
// error from here and this is not worse than the old code behaviour.
|
||||||
|
m_lastWChar = c;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for ( size_t n = 0; n < len; n++ )
|
||||||
|
{
|
||||||
|
const char c = buffer[n];
|
||||||
|
if ( c == '\n' )
|
||||||
|
{
|
||||||
|
switch ( m_mode )
|
||||||
|
{
|
||||||
|
case wxEOL_DOS:
|
||||||
|
m_output.Write("\r\n", 2);
|
||||||
|
continue;
|
||||||
|
|
||||||
|
case wxEOL_MAC:
|
||||||
|
m_output.Write("\r", 1);
|
||||||
|
continue;
|
||||||
|
|
||||||
|
default:
|
||||||
|
wxFAIL_MSG( wxT("unknown EOL mode in wxTextOutputStream") );
|
||||||
|
wxFALLTHROUGH;
|
||||||
|
|
||||||
|
case wxEOL_UNIX:
|
||||||
|
// don't treat '\n' specially
|
||||||
|
;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
m_output.Write(&c, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#else // SIZEOF_WCHAR_T == 4
|
||||||
WriteString( wxString(&c, *m_conv, 1) );
|
WriteString( wxString(&c, *m_conv, 1) );
|
||||||
|
#endif // SIZEOF_WCHAR_T == 2 or 4
|
||||||
#else
|
#else
|
||||||
WriteString( wxString(&c, wxConvLocal, 1) );
|
WriteString( wxString(&c, wxConvLocal, 1) );
|
||||||
#endif
|
#endif
|
||||||
|
@@ -81,6 +81,7 @@ private:
|
|||||||
CPPUNIT_TEST( FontmapTests );
|
CPPUNIT_TEST( FontmapTests );
|
||||||
CPPUNIT_TEST( BufSize );
|
CPPUNIT_TEST( BufSize );
|
||||||
CPPUNIT_TEST( FromWCharTests );
|
CPPUNIT_TEST( FromWCharTests );
|
||||||
|
CPPUNIT_TEST( NonBMPCharTests );
|
||||||
#ifdef HAVE_WCHAR_H
|
#ifdef HAVE_WCHAR_H
|
||||||
CPPUNIT_TEST( UTF8_41 );
|
CPPUNIT_TEST( UTF8_41 );
|
||||||
CPPUNIT_TEST( UTF8_7f );
|
CPPUNIT_TEST( UTF8_7f );
|
||||||
@@ -116,6 +117,7 @@ private:
|
|||||||
void FontmapTests();
|
void FontmapTests();
|
||||||
void BufSize();
|
void BufSize();
|
||||||
void FromWCharTests();
|
void FromWCharTests();
|
||||||
|
void NonBMPCharTests();
|
||||||
void IconvTests();
|
void IconvTests();
|
||||||
void Latin1Tests();
|
void Latin1Tests();
|
||||||
|
|
||||||
@@ -203,6 +205,12 @@ private:
|
|||||||
void UTF8PUA_f4_80_82_a5() { UTF8PUA("\xf4\x80\x82\xa5", u1000a5); }
|
void UTF8PUA_f4_80_82_a5() { UTF8PUA("\xf4\x80\x82\xa5", u1000a5); }
|
||||||
void UTF8Octal_backslash245() { UTF8Octal("\\245", L"\\245"); }
|
void UTF8Octal_backslash245() { UTF8Octal("\\245", L"\\245"); }
|
||||||
|
|
||||||
|
// Test that converting string with incomplete surrogates in them fails
|
||||||
|
// (surrogates are only used in UTF-16, i.e. when wchar_t is 16 bits).
|
||||||
|
#if SIZEOF_WCHAR_T == 2
|
||||||
|
void UTF8_fail_broken_surrogates();
|
||||||
|
#endif // SIZEOF_WCHAR_T == 2
|
||||||
|
|
||||||
// implementation for the utf-8 tests (see comments below)
|
// implementation for the utf-8 tests (see comments below)
|
||||||
void UTF8(const char *charSequence, const wchar_t *wideSequence);
|
void UTF8(const char *charSequence, const wchar_t *wideSequence);
|
||||||
void UTF8PUA(const char *charSequence, const wchar_t *wideSequence);
|
void UTF8PUA(const char *charSequence, const wchar_t *wideSequence);
|
||||||
@@ -461,6 +469,12 @@ void MBConvTestCase::UTF8Tests()
|
|||||||
wxConvUTF8,
|
wxConvUTF8,
|
||||||
1
|
1
|
||||||
);
|
);
|
||||||
|
|
||||||
|
#if SIZEOF_WCHAR_T == 2
|
||||||
|
// Can't use \ud800 as it's an invalid Unicode character.
|
||||||
|
const wchar_t wc = 0xd800;
|
||||||
|
CPPUNIT_ASSERT_EQUAL(wxCONV_FAILED, wxConvUTF8.FromWChar(NULL, 0, &wc, 1));
|
||||||
|
#endif // SIZEOF_WCHAR_T == 2
|
||||||
}
|
}
|
||||||
|
|
||||||
void MBConvTestCase::UTF16LETests()
|
void MBConvTestCase::UTF16LETests()
|
||||||
@@ -928,6 +942,86 @@ void MBConvTestCase::FromWCharTests()
|
|||||||
CPPUNIT_ASSERT_EQUAL( '!', mbuf[6]);
|
CPPUNIT_ASSERT_EQUAL( '!', mbuf[6]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void MBConvTestCase::NonBMPCharTests()
|
||||||
|
{
|
||||||
|
// U+1F363 (UTF-16: D83C DF63, UTF-8: F0 9F 8D A3) sushi (emoji)
|
||||||
|
// U+732B (UTF-8: E7 8C AB) cat (kanji)
|
||||||
|
// U+1F408 (UTF-16: D83D DC08, UTF-8: F0 9F 90 88) cat (emoji)
|
||||||
|
// U+845B U+E0101 (UTF-16: 845B DB40 DD01, UTF-8: E8 91 9B F3 A0 84 81) (a kanji + an IVS)
|
||||||
|
const char u8[] =
|
||||||
|
"\xF0\x9F\x8D\xA3" /* U+1F363 */
|
||||||
|
"\xE7\x8C\xAB\xF0\x9F\x90\x88" /* U+732B U+1F408 */
|
||||||
|
"\xE8\x91\x9B\xF3\xA0\x84\x81"; /* U+845B U+E0101 */
|
||||||
|
const wxChar16 u16[] = {
|
||||||
|
0xD83C, 0xDF63,
|
||||||
|
0x732B, 0xD83D, 0xDC08,
|
||||||
|
0x845B, 0xDB40, 0xDD01,
|
||||||
|
0};
|
||||||
|
const wxChar32 u32[] = {
|
||||||
|
0x1F363,
|
||||||
|
0x732B, 0x1F408,
|
||||||
|
0x845B, 0xE0101,
|
||||||
|
0};
|
||||||
|
#if SIZEOF_WCHAR_T == 2
|
||||||
|
const wchar_t *const w = u16;
|
||||||
|
const size_t wchars = sizeof(u16)/sizeof(wxChar16) - 1;
|
||||||
|
#else
|
||||||
|
const wchar_t *const w = u32;
|
||||||
|
const size_t wchars = sizeof(u32)/sizeof(wxChar32) - 1;
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
// Notice that these tests can only be done with strict UTF-8
|
||||||
|
// converter, the use of any MAP_INVALID_UTF8_XXX options currently
|
||||||
|
// completely breaks wxTextInputStream use.
|
||||||
|
TestDecoder(w, wchars, u8, sizeof(u8)-1, wxConvUTF8, 1);
|
||||||
|
TestEncoder(w, wchars, u8, sizeof(u8)-1, wxConvUTF8, 1);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
char u16le[sizeof(u16)];
|
||||||
|
for (size_t i = 0; i < sizeof(u16)/2; ++i) {
|
||||||
|
u16le[2*i] = (char)(unsigned char)(u16[i] & 0xFF);
|
||||||
|
u16le[2*i+1] = (char)(unsigned char)((u16[i] >> 8) & 0xFF);
|
||||||
|
}
|
||||||
|
wxMBConvUTF16LE conv;
|
||||||
|
TestDecoder(w, wchars, u16le, sizeof(u16le)-2, conv, 2);
|
||||||
|
TestEncoder(w, wchars, u16le, sizeof(u16le)-2, conv, 2);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
char u16be[sizeof(u16)];
|
||||||
|
for (size_t i = 0; i < sizeof(u16)/2; ++i) {
|
||||||
|
u16be[2*i] = (char)(unsigned char)((u16[i] >> 8) & 0xFF);
|
||||||
|
u16be[2*i+1] = (char)(unsigned char)(u16[i] & 0xFF);
|
||||||
|
}
|
||||||
|
wxMBConvUTF16BE conv;
|
||||||
|
TestDecoder(w, wchars, u16be, sizeof(u16be)-2, conv, 2);
|
||||||
|
TestEncoder(w, wchars, u16be, sizeof(u16be)-2, conv, 2);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
char u32le[sizeof(u32)];
|
||||||
|
for (size_t i = 0; i < sizeof(u32)/4; ++i) {
|
||||||
|
u32le[4*i] = (char)(unsigned char)(u32[i] & 0xFF);
|
||||||
|
u32le[4*i+1] = (char)(unsigned char)((u32[i] >> 8) & 0xFF);
|
||||||
|
u32le[4*i+2] = (char)(unsigned char)((u32[i] >> 16) & 0xFF);
|
||||||
|
u32le[4*i+3] = (char)(unsigned char)((u32[i] >> 24) & 0xFF);
|
||||||
|
}
|
||||||
|
wxMBConvUTF32LE conv;
|
||||||
|
TestDecoder(w, wchars, u32le, sizeof(u32le)-4, conv, 4);
|
||||||
|
TestEncoder(w, wchars, u32le, sizeof(u32le)-4, conv, 4);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
char u32be[sizeof(u32)];
|
||||||
|
for (size_t i = 0; i < sizeof(u32)/4; ++i) {
|
||||||
|
u32be[4*i] = (char)(unsigned char)((u32[i] >> 24) & 0xFF);
|
||||||
|
u32be[4*i+1] = (char)(unsigned char)((u32[i] >> 16) & 0xFF);
|
||||||
|
u32be[4*i+2] = (char)(unsigned char)((u32[i] >> 8) & 0xFF);
|
||||||
|
u32be[4*i+3] = (char)(unsigned char)(u32[i] & 0xFF);
|
||||||
|
}
|
||||||
|
wxMBConvUTF32BE conv;
|
||||||
|
TestDecoder(w, wchars, u32be, sizeof(u32be)-4, conv, 4);
|
||||||
|
TestEncoder(w, wchars, u32be, sizeof(u32be)-4, conv, 4);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const char* name );
|
WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const char* name );
|
||||||
|
|
||||||
void MBConvTestCase::IconvTests()
|
void MBConvTestCase::IconvTests()
|
||||||
@@ -1084,15 +1178,16 @@ void MBConvTestCase::TestEncoder(
|
|||||||
memcpy( inputCopy.data(), wideBuffer, (wideChars*sizeof(wchar_t)) );
|
memcpy( inputCopy.data(), wideBuffer, (wideChars*sizeof(wchar_t)) );
|
||||||
inputCopy.data()[wideChars] = 0;
|
inputCopy.data()[wideChars] = 0;
|
||||||
|
|
||||||
// calculate the output size
|
// calculate the output size: notice that it can be greater than the real
|
||||||
|
// size as the converter is allowed to estimate the maximal size needed
|
||||||
|
// instead of computing it precisely
|
||||||
size_t outputWritten = converter.WC2MB
|
size_t outputWritten = converter.WC2MB
|
||||||
(
|
(
|
||||||
0,
|
0,
|
||||||
(const wchar_t*)inputCopy.data(),
|
(const wchar_t*)inputCopy.data(),
|
||||||
0
|
0
|
||||||
);
|
);
|
||||||
// make sure the correct output length was calculated
|
CPPUNIT_ASSERT( outputWritten >= multiBytes );
|
||||||
CPPUNIT_ASSERT_EQUAL( multiBytes, outputWritten );
|
|
||||||
|
|
||||||
// convert the string
|
// convert the string
|
||||||
size_t guardBytes = 8; // to make sure we're not overrunning the output buffer
|
size_t guardBytes = 8; // to make sure we're not overrunning the output buffer
|
||||||
|
Reference in New Issue
Block a user