Merge fixes for handling Unicode characters outside of BMP

Closes https://github.com/wxWidgets/wxWidgets/pull/467
This commit is contained in:
Vadim Zeitlin
2017-06-21 18:53:40 +02:00
10 changed files with 483 additions and 119 deletions

View File

@@ -175,6 +175,16 @@
#define wxUSE_UTF8_LOCALE_ONLY 0 #define wxUSE_UTF8_LOCALE_ONLY 0
#endif #endif
#ifndef SIZEOF_WCHAR_T
#error "SIZEOF_WCHAR_T must be defined before including this file in wx/defs.h"
#endif
#if wxUSE_UNICODE_WCHAR && SIZEOF_WCHAR_T == 2
#define wxUSE_UNICODE_UTF16 1
#else
#define wxUSE_UNICODE_UTF16 0
#endif
/* define char type used by wxString internal representation: */ /* define char type used by wxString internal representation: */
#if wxUSE_UNICODE_WCHAR #if wxUSE_UNICODE_WCHAR
typedef wchar_t wxStringCharType; typedef wchar_t wxStringCharType;

View File

@@ -672,47 +672,6 @@ typedef short int WXTYPE;
/* breaks C++ code) */ /* breaks C++ code) */
#include <stddef.h> #include <stddef.h>
#ifdef __cplusplus
// everybody gets the assert and other debug macros
#include "wx/debug.h"
// delete pointer if it is not NULL and NULL it afterwards
template <typename T>
inline void wxDELETE(T*& ptr)
{
typedef char TypeIsCompleteCheck[sizeof(T)] WX_ATTRIBUTE_UNUSED;
if ( ptr != NULL )
{
delete ptr;
ptr = NULL;
}
}
// delete an array and NULL it (see comments above)
template <typename T>
inline void wxDELETEA(T*& ptr)
{
typedef char TypeIsCompleteCheck[sizeof(T)] WX_ATTRIBUTE_UNUSED;
if ( ptr != NULL )
{
delete [] ptr;
ptr = NULL;
}
}
// trivial implementation of std::swap() for primitive types
template <typename T>
inline void wxSwap(T& first, T& second)
{
T tmp(first);
first = second;
second = tmp;
}
#endif /*__cplusplus*/
/* size of statically declared array */ /* size of statically declared array */
#define WXSIZEOF(array) (sizeof(array)/sizeof(array[0])) #define WXSIZEOF(array) (sizeof(array)/sizeof(array[0]))
@@ -1227,6 +1186,45 @@ typedef wxUint32 wxDword;
#endif #endif
#ifdef __cplusplus #ifdef __cplusplus
// everybody gets the assert and other debug macros
#include "wx/debug.h"
// delete pointer if it is not NULL and NULL it afterwards
template <typename T>
inline void wxDELETE(T*& ptr)
{
typedef char TypeIsCompleteCheck[sizeof(T)] WX_ATTRIBUTE_UNUSED;
if ( ptr != NULL )
{
delete ptr;
ptr = NULL;
}
}
// delete an array and NULL it (see comments above)
template <typename T>
inline void wxDELETEA(T*& ptr)
{
typedef char TypeIsCompleteCheck[sizeof(T)] WX_ATTRIBUTE_UNUSED;
if ( ptr != NULL )
{
delete [] ptr;
ptr = NULL;
}
}
// trivial implementation of std::swap() for primitive types
template <typename T>
inline void wxSwap(T& first, T& second)
{
T tmp(first);
first = second;
second = tmp;
}
/* And also define a couple of simple functions to cast pointer to/from it. */ /* And also define a couple of simple functions to cast pointer to/from it. */
inline wxUIntPtr wxPtrToUInt(const void *p) inline wxUIntPtr wxPtrToUInt(const void *p)
{ {

View File

@@ -898,9 +898,6 @@ public:
wxStringIteratorNode m_node; wxStringIteratorNode m_node;
}; };
size_t IterToImplPos(wxString::iterator i) const
{ return wxStringImpl::const_iterator(i.impl()) - m_impl.begin(); }
iterator GetIterForNthChar(size_t n) iterator GetIterForNthChar(size_t n)
{ return iterator(this, m_impl.begin() + PosToImpl(n)); } { return iterator(this, m_impl.begin() + PosToImpl(n)); }
const_iterator GetIterForNthChar(size_t n) const const_iterator GetIterForNthChar(size_t n) const
@@ -975,6 +972,9 @@ public:
const_iterator GetIterForNthChar(size_t n) const { return begin() + n; } const_iterator GetIterForNthChar(size_t n) const { return begin() + n; }
#endif // wxUSE_UNICODE_UTF8/!wxUSE_UNICODE_UTF8 #endif // wxUSE_UNICODE_UTF8/!wxUSE_UNICODE_UTF8
size_t IterToImplPos(wxString::iterator i) const
{ return wxStringImpl::const_iterator(i.impl()) - m_impl.begin(); }
#undef WX_STR_ITERATOR_TAG #undef WX_STR_ITERATOR_TAG
#undef WX_STR_ITERATOR_IMPL #undef WX_STR_ITERATOR_IMPL
@@ -1820,12 +1820,11 @@ public:
{ {
wxSTRING_INVALIDATE_CACHE(); wxSTRING_INVALIDATE_CACHE();
#if wxUSE_UNICODE_UTF8 if ( wxStringOperations::IsSingleCodeUnitCharacter(ch) )
if ( !ch.IsAscii() )
m_impl = wxStringOperations::EncodeChar(ch);
else
#endif // wxUSE_UNICODE_UTF8
m_impl = (wxStringCharType)ch; m_impl = (wxStringCharType)ch;
else
m_impl = wxStringOperations::EncodeChar(ch);
return *this; return *this;
} }
@@ -2410,20 +2409,18 @@ public:
// append n copies of ch // append n copies of ch
wxString& append(size_t n, wxUniChar ch) wxString& append(size_t n, wxUniChar ch)
{ {
#if wxUSE_UNICODE_UTF8 if ( wxStringOperations::IsSingleCodeUnitCharacter(ch) )
if ( !ch.IsAscii() )
{
wxSTRING_INVALIDATE_CACHED_LENGTH();
m_impl.append(wxStringOperations::EncodeNChars(n, ch));
}
else // ASCII
#endif
{ {
wxSTRING_UPDATE_CACHED_LENGTH(n); wxSTRING_UPDATE_CACHED_LENGTH(n);
m_impl.append(n, (wxStringCharType)ch); m_impl.append(n, (wxStringCharType)ch);
} }
else
{
wxSTRING_INVALIDATE_CACHED_LENGTH();
m_impl.append(wxStringOperations::EncodeNChars(n, ch));
}
return *this; return *this;
} }
@@ -2556,12 +2553,10 @@ public:
{ {
wxSTRING_SET_CACHED_LENGTH(n); wxSTRING_SET_CACHED_LENGTH(n);
#if wxUSE_UNICODE_UTF8 if ( wxStringOperations::IsSingleCodeUnitCharacter(ch) )
if ( !ch.IsAscii() )
m_impl.assign(wxStringOperations::EncodeNChars(n, ch));
else
#endif
m_impl.assign(n, (wxStringCharType)ch); m_impl.assign(n, (wxStringCharType)ch);
else
m_impl.assign(wxStringOperations::EncodeNChars(n, ch));
return *this; return *this;
} }
@@ -2671,12 +2666,11 @@ public:
{ {
wxSTRING_UPDATE_CACHED_LENGTH(n); wxSTRING_UPDATE_CACHED_LENGTH(n);
#if wxUSE_UNICODE_UTF8 if ( wxStringOperations::IsSingleCodeUnitCharacter(ch) )
if ( !ch.IsAscii() )
m_impl.insert(PosToImpl(nPos), wxStringOperations::EncodeNChars(n, ch));
else
#endif
m_impl.insert(PosToImpl(nPos), n, (wxStringCharType)ch); m_impl.insert(PosToImpl(nPos), n, (wxStringCharType)ch);
else
m_impl.insert(PosToImpl(nPos), wxStringOperations::EncodeNChars(n, ch));
return *this; return *this;
} }
@@ -2684,16 +2678,14 @@ public:
{ {
wxSTRING_UPDATE_CACHED_LENGTH(1); wxSTRING_UPDATE_CACHED_LENGTH(1);
#if wxUSE_UNICODE_UTF8 if ( wxStringOperations::IsSingleCodeUnitCharacter(ch) )
if ( !ch.IsAscii() ) return iterator(this, m_impl.insert(it.impl(), (wxStringCharType)ch));
else
{ {
size_t pos = IterToImplPos(it); size_t pos = IterToImplPos(it);
m_impl.insert(pos, wxStringOperations::EncodeChar(ch)); m_impl.insert(pos, wxStringOperations::EncodeChar(ch));
return iterator(this, m_impl.begin() + pos); return iterator(this, m_impl.begin() + pos);
} }
else
#endif
return iterator(this, m_impl.insert(it.impl(), (wxStringCharType)ch));
} }
void insert(iterator it, const_iterator first, const_iterator last) void insert(iterator it, const_iterator first, const_iterator last)
@@ -2716,12 +2708,10 @@ public:
{ {
wxSTRING_UPDATE_CACHED_LENGTH(n); wxSTRING_UPDATE_CACHED_LENGTH(n);
#if wxUSE_UNICODE_UTF8 if ( wxStringOperations::IsSingleCodeUnitCharacter(ch) )
if ( !ch.IsAscii() )
m_impl.insert(IterToImplPos(it), wxStringOperations::EncodeNChars(n, ch));
else
#endif
m_impl.insert(it.impl(), n, (wxStringCharType)ch); m_impl.insert(it.impl(), n, (wxStringCharType)ch);
else
m_impl.insert(IterToImplPos(it), wxStringOperations::EncodeNChars(n, ch));
} }
// delete characters from nStart to nStart + nLen // delete characters from nStart to nStart + nLen
@@ -2800,12 +2790,11 @@ public:
size_t from, len; size_t from, len;
PosLenToImpl(nStart, nLen, &from, &len); PosLenToImpl(nStart, nLen, &from, &len);
#if wxUSE_UNICODE_UTF8
if ( !ch.IsAscii() ) if ( wxStringOperations::IsSingleCodeUnitCharacter(ch) )
m_impl.replace(from, len, wxStringOperations::EncodeNChars(nCount, ch));
else
#endif
m_impl.replace(from, len, nCount, (wxStringCharType)ch); m_impl.replace(from, len, nCount, (wxStringCharType)ch);
else
m_impl.replace(from, len, wxStringOperations::EncodeNChars(nCount, ch));
return *this; return *this;
} }
@@ -2921,13 +2910,11 @@ public:
{ {
wxSTRING_INVALIDATE_CACHE(); wxSTRING_INVALIDATE_CACHE();
#if wxUSE_UNICODE_UTF8 if ( wxStringOperations::IsSingleCodeUnitCharacter(ch) )
if ( !ch.IsAscii() ) m_impl.replace(first.impl(), last.impl(), n, (wxStringCharType)ch);
else
m_impl.replace(first.impl(), last.impl(), m_impl.replace(first.impl(), last.impl(),
wxStringOperations::EncodeNChars(n, ch)); wxStringOperations::EncodeNChars(n, ch));
else
#endif
m_impl.replace(first.impl(), last.impl(), n, (wxStringCharType)ch);
return *this; return *this;
} }
@@ -2988,15 +2975,12 @@ public:
// find the first occurrence of character ch after nStart // find the first occurrence of character ch after nStart
size_t find(wxUniChar ch, size_t nStart = 0) const size_t find(wxUniChar ch, size_t nStart = 0) const
{ {
#if wxUSE_UNICODE_UTF8 if ( wxStringOperations::IsSingleCodeUnitCharacter(ch) )
if ( !ch.IsAscii() )
return PosFromImpl(m_impl.find(wxStringOperations::EncodeChar(ch),
PosToImpl(nStart)));
else
#endif
return PosFromImpl(m_impl.find((wxStringCharType)ch, return PosFromImpl(m_impl.find((wxStringCharType)ch,
PosToImpl(nStart))); PosToImpl(nStart)));
else
return PosFromImpl(m_impl.find(wxStringOperations::EncodeChar(ch),
PosToImpl(nStart)));
} }
size_t find(wxUniCharRef ch, size_t nStart = 0) const size_t find(wxUniCharRef ch, size_t nStart = 0) const
{ return find(wxUniChar(ch), nStart); } { return find(wxUniChar(ch), nStart); }
@@ -3033,13 +3017,11 @@ public:
// as find, but from the end // as find, but from the end
size_t rfind(wxUniChar ch, size_t nStart = npos) const size_t rfind(wxUniChar ch, size_t nStart = npos) const
{ {
#if wxUSE_UNICODE_UTF8 if ( wxStringOperations::IsSingleCodeUnitCharacter(ch) )
if ( !ch.IsAscii() ) return PosFromImpl(m_impl.rfind((wxStringCharType)ch,
return PosFromImpl(m_impl.rfind(wxStringOperations::EncodeChar(ch),
PosToImpl(nStart))); PosToImpl(nStart)));
else else
#endif return PosFromImpl(m_impl.rfind(wxStringOperations::EncodeChar(ch),
return PosFromImpl(m_impl.rfind((wxStringCharType)ch,
PosToImpl(nStart))); PosToImpl(nStart)));
} }
size_t rfind(wxUniCharRef ch, size_t nStart = npos) const size_t rfind(wxUniCharRef ch, size_t nStart = npos) const
@@ -3301,12 +3283,11 @@ public:
{ {
wxSTRING_UPDATE_CACHED_LENGTH(1); wxSTRING_UPDATE_CACHED_LENGTH(1);
#if wxUSE_UNICODE_UTF8 if ( wxStringOperations::IsSingleCodeUnitCharacter(ch) )
if ( !ch.IsAscii() )
m_impl += wxStringOperations::EncodeChar(ch);
else
#endif
m_impl += (wxStringCharType)ch; m_impl += (wxStringCharType)ch;
else
m_impl += wxStringOperations::EncodeChar(ch);
return *this; return *this;
} }
wxString& operator+=(wxUniCharRef ch) { return *this += wxUniChar(ch); } wxString& operator+=(wxUniCharRef ch) { return *this += wxUniChar(ch); }

View File

@@ -44,9 +44,36 @@ struct WXDLLIMPEXP_BASE wxStringOperationsWchar
static ptrdiff_t DiffIters(const Iterator& i1, const Iterator& i2) static ptrdiff_t DiffIters(const Iterator& i1, const Iterator& i2)
{ return i1 - i2; } { return i1 - i2; }
#if wxUSE_UNICODE_UTF16
// encodes the characters as UTF-16:
struct Utf16CharBuffer
{
Utf16CharBuffer() : data() {}
wchar_t data[3];
operator const wchar_t*() const { return data; }
};
static Utf16CharBuffer EncodeChar(const wxUniChar& ch);
static wxWCharBuffer EncodeNChars(size_t n, const wxUniChar& ch);
static bool IsSingleCodeUnitCharacter(const wxUniChar& ch)
{ return !ch.IsSupplementary(); }
#else
// encodes the character to a form used to represent it in internal // encodes the character to a form used to represent it in internal
// representation (returns a string in UTF8 version) // representation
static wxChar EncodeChar(const wxUniChar& ch) { return (wxChar)ch; } struct SingleCharBuffer
{
SingleCharBuffer() : data() {}
wxChar data[2];
operator const wxChar*() const { return data; }
};
static SingleCharBuffer EncodeChar(const wxUniChar& ch)
{
SingleCharBuffer buf;
buf.data[0] = (wxChar)ch;
return buf;
}
static wxWxCharBuffer EncodeNChars(size_t n, const wxUniChar& ch);
static bool IsSingleCodeUnitCharacter(const wxUniChar&) { return true; }
#endif
static wxUniChar DecodeChar(const wxStringImpl::const_iterator& i) static wxUniChar DecodeChar(const wxStringImpl::const_iterator& i)
{ return *i; } { return *i; }
@@ -134,6 +161,9 @@ struct WXDLLIMPEXP_BASE wxStringOperationsUtf8
return dist; return dist;
} }
static bool IsSingleCodeUnitCharacter(const wxUniChar& ch)
{ return ch.IsAscii(); }
// encodes the character as UTF-8: // encodes the character as UTF-8:
typedef wxUniChar::Utf8CharBuffer Utf8CharBuffer; typedef wxUniChar::Utf8CharBuffer Utf8CharBuffer;
static Utf8CharBuffer EncodeChar(const wxUniChar& ch) static Utf8CharBuffer EncodeChar(const wxUniChar& ch)

View File

@@ -83,6 +83,38 @@ public:
return true; return true;
} }
// Returns true if the character is a BMP character:
static bool IsBMP(wxUint32 value) { return value < 0x10000; }
// Returns true if the character is a supplementary character:
static bool IsSupplementary(wxUint32 value) { return 0x10000 <= value && value < 0x110000; }
// Returns the high surrogate code unit for the supplementary character
static wxUint16 HighSurrogate(wxUint32 value)
{
wxASSERT_MSG(IsSupplementary(value), "wxUniChar::HighSurrogate() must be called on a supplementary character");
return 0xD800 | ((value - 0x10000) >> 10);
}
// Returns the low surrogate code unit for the supplementary character
static wxUint16 LowSurrogate(wxUint32 value)
{
wxASSERT_MSG(IsSupplementary(value), "wxUniChar::LowSurrogate() must be called on a supplementary character");
return 0xDC00 | ((value - 0x10000) & 0x03FF);
}
// Returns true if the character is a BMP character:
bool IsBMP() const { return IsBMP(m_value); }
// Returns true if the character is a supplementary character:
bool IsSupplementary() const { return IsSupplementary(m_value); }
// Returns the high surrogate code unit for the supplementary character
wxUint16 HighSurrogate() const { return HighSurrogate(m_value); }
// Returns the low surrogate code unit for the supplementary character
wxUint16 LowSurrogate() const { return LowSurrogate(m_value); }
// Conversions to char and wchar_t types: all of those are needed to be // Conversions to char and wchar_t types: all of those are needed to be
// able to pass wxUniChars to verious standard narrow and wide character // able to pass wxUniChars to verious standard narrow and wide character
// functions // functions
@@ -216,6 +248,11 @@ public:
bool IsAscii() const { return UniChar().IsAscii(); } bool IsAscii() const { return UniChar().IsAscii(); }
bool GetAsChar(char *c) const { return UniChar().GetAsChar(c); } bool GetAsChar(char *c) const { return UniChar().GetAsChar(c); }
bool IsBMP() const { return UniChar().IsBMP(); }
bool IsSupplementary() const { return UniChar().IsSupplementary(); }
wxUint16 HighSurrogate() const { return UniChar().HighSurrogate(); }
wxUint16 LowSurrogate() const { return UniChar().LowSurrogate(); }
// Assignment operators: // Assignment operators:
#if wxUSE_UNICODE_UTF8 #if wxUSE_UNICODE_UTF8
wxUniCharRef& operator=(const wxUniChar& c); wxUniCharRef& operator=(const wxUniChar& c);

View File

@@ -83,6 +83,82 @@ public:
*/ */
bool GetAsChar(char *c) const; bool GetAsChar(char *c) const;
/**
Returns true if the character is a BMP character (i.e.\ if its value is less than 0x10000).
@since 3.1.1
*/
bool IsBMP() const;
/**
Returns true if the character is a BMP character (i.e.\ if its value is less than 0x10000).
@param value
The Unicode code point of the character.
@since 3.1.1
*/
static bool IsBMP(wxUint32 value);
/**
Returns true if the character is a supplementary character (i.e.\ between 0x10000 and 0x10FFFF).
@since 3.1.1
*/
bool IsSupplementary() const;
/**
Returns true if the character is a supplementary character (i.e.\ between 0x10000 and 0x10FFFF).
@param value
The Unicode code point of the character.
@since 3.1.1
*/
static bool IsSupplementary(wxUint32 value);
/**
Returns the high surrogate code unit for the supplementary character.
@pre IsSupplementary() const
@since 3.1.1
*/
wxUint16 HighSurrogate() const;
/**
Returns the high surrogate code unit for the supplementary character.
@param value
The Unicode code point of the character.
@pre IsSupplementary(wxUint32 value)
@since 3.1.1
*/
static wxUint16 HighSurrogate(wxUint32 value);
/**
Returns the low surrogate code unit for the supplementary character.
@pre IsSupplementary() const
@since 3.1.1
*/
wxUint16 LowSurrogate() const;
/**
Returns the low surrogate code unit for the supplementary character.
@param value
The Unicode code point of the character.
@pre IsSupplementary(wxUint32 value)
@since 3.1.1
*/
static wxUint16 LowSurrogate(wxUint32 value);
//@{ //@{
/** /**
Conversions to char and wchar_t types: all of those are needed to be Conversions to char and wchar_t types: all of those are needed to be

View File

@@ -80,27 +80,27 @@ static bool NotAllNULs(const char *p, size_t n)
static size_t encode_utf16(wxUint32 input, wxUint16 *output) static size_t encode_utf16(wxUint32 input, wxUint16 *output)
{ {
if (input <= 0xffff) if (wxUniChar::IsBMP(input))
{ {
if (output) if (output)
*output = (wxUint16) input; *output = (wxUint16) input;
return 1; return 1;
} }
else if (input >= 0x110000) else if (wxUniChar::IsSupplementary(input))
{
return wxCONV_FAILED;
}
else
{ {
if (output) if (output)
{ {
*output++ = (wxUint16) ((input >> 10) + 0xd7c0); *output++ = wxUniChar::HighSurrogate(input);
*output = (wxUint16) ((input & 0x3ff) + 0xdc00); *output = wxUniChar::LowSurrogate(input);
} }
return 2; return 2;
} }
else
{
return wxCONV_FAILED;
}
} }
static size_t decode_utf16(const wxUint16* input, wxUint32& output) static size_t decode_utf16(const wxUint16* input, wxUint32& output)

View File

@@ -27,6 +27,68 @@
// implementation // implementation
// =========================================================================== // ===========================================================================
#if wxUSE_UNICODE_WCHAR || !wxUSE_UNICODE
#if wxUSE_UNICODE_UTF16
wxStringOperationsWchar::Utf16CharBuffer wxStringOperationsWchar::EncodeChar(const wxUniChar& ch)
{
Utf16CharBuffer buf;
if ( ch.IsSupplementary() )
{
buf.data[0] = (wchar_t)ch.HighSurrogate();
buf.data[1] = (wchar_t)ch.LowSurrogate();
}
else
{
// Assume ch is a BMP character
buf.data[0] = (wchar_t)ch;
}
return buf;
}
wxWCharBuffer wxStringOperationsWchar::EncodeNChars(size_t n, const wxUniChar& ch)
{
if ( ch.IsSupplementary() )
{
wxWCharBuffer buf(n * 2);
wchar_t s[2] = {
(wchar_t)ch.HighSurrogate(),
(wchar_t)ch.LowSurrogate(),
};
wchar_t *ptr = buf.data();
for (size_t i = 0; i < n; i++, ptr += 2)
{
wmemcpy(ptr, s, 2);
}
return buf;
}
else
{
// Assume ch is a BMP character
wxWCharBuffer buf(n);
wmemset(buf.data(), (wchar_t)ch, n);
return buf;
}
}
#else
wxWxCharBuffer wxStringOperationsWchar::EncodeNChars(size_t n, const wxUniChar& ch)
{
wxWxCharBuffer buf(n);
#if wxUSE_UNICODE_WCHAR
wmemset(buf.data(), (wchar_t)ch, n);
#else // ANSI
memset(buf.data(), (unsigned char)ch, n);
#endif
return buf;
}
#endif // wxUSE_UNICODE_UTF16
#endif // wxUSE_UNICODE_WCHAR || !wxUSE_UNICODE
#if wxUSE_UNICODE_UTF8 #if wxUSE_UNICODE_UTF8
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------

View File

@@ -502,7 +502,7 @@ wxScopedU16CharBuffer wxUString::utf16_str() const
// TODO: error range checks // TODO: error range checks
if (code < 0x10000) if (wxUniChar::IsBMP(code))
utf16_length++; utf16_length++;
else else
utf16_length += 2; utf16_length += 2;
@@ -520,15 +520,15 @@ wxScopedU16CharBuffer wxUString::utf16_str() const
// TODO: error range checks // TODO: error range checks
if (code < 0x10000) if (wxUniChar::IsBMP(code))
{ {
out[0] = code; out[0] = code;
out++; out++;
} }
else else
{ {
out[0] = (code - 0x10000) / 0x400 + 0xd800; out[0] = wxUniChar::HighSurrogate(code);
out[1] = (code - 0x10000) % 0x400 + 0xdc00; out[1] = wxUniChar::LowSurrogate(code);
out += 2; out += 2;
} }
} }

View File

@@ -63,6 +63,7 @@ private:
CPPUNIT_TEST( IndexedAccess ); CPPUNIT_TEST( IndexedAccess );
CPPUNIT_TEST( BeforeAndAfter ); CPPUNIT_TEST( BeforeAndAfter );
CPPUNIT_TEST( ScopedBuffers ); CPPUNIT_TEST( ScopedBuffers );
CPPUNIT_TEST( SupplementaryUniChar );
CPPUNIT_TEST_SUITE_END(); CPPUNIT_TEST_SUITE_END();
void String(); void String();
@@ -98,6 +99,7 @@ private:
void IndexedAccess(); void IndexedAccess();
void BeforeAndAfter(); void BeforeAndAfter();
void ScopedBuffers(); void ScopedBuffers();
void SupplementaryUniChar();
wxDECLARE_NO_COPY_CLASS(StringTestCase); wxDECLARE_NO_COPY_CLASS(StringTestCase);
}; };
@@ -1142,3 +1144,171 @@ void StringTestCase::ScopedBuffers()
buf5.extend(len); buf5.extend(len);
CPPUNIT_ASSERT_EQUAL('\0', buf5.data()[len]); CPPUNIT_ASSERT_EQUAL('\0', buf5.data()[len]);
} }
void StringTestCase::SupplementaryUniChar()
{
#if wxUSE_UNICODE
// Test wxString(wxUniChar ch, size_t nRepeat = 1),
// which is implemented upon assign(size_t n, wxUniChar ch).
{
wxString s(wxUniChar(0x12345));
#if wxUSE_UNICODE_UTF16
CPPUNIT_ASSERT_EQUAL(2, s.length());
CPPUNIT_ASSERT_EQUAL(0xD808, s[0].GetValue());
CPPUNIT_ASSERT_EQUAL(0xDF45, s[1].GetValue());
#else
CPPUNIT_ASSERT_EQUAL(1, s.length());
CPPUNIT_ASSERT_EQUAL(0x12345, s[0].GetValue());
#endif
}
// Test operator=(wxUniChar ch).
{
wxString s;
s = wxUniChar(0x23456);
#if wxUSE_UNICODE_UTF16
CPPUNIT_ASSERT_EQUAL(2, s.length());
CPPUNIT_ASSERT_EQUAL(0xD84D, s[0].GetValue());
CPPUNIT_ASSERT_EQUAL(0xDC56, s[1].GetValue());
#else
CPPUNIT_ASSERT_EQUAL(1, s.length());
CPPUNIT_ASSERT_EQUAL(0x23456, s[0].GetValue());
#endif
}
// Test operator+=(wxUniChar ch).
{
wxString s = "A";
s += wxUniChar(0x34567);
#if wxUSE_UNICODE_UTF16
CPPUNIT_ASSERT_EQUAL(3, s.length());
CPPUNIT_ASSERT_EQUAL(0xD891, s[1].GetValue());
CPPUNIT_ASSERT_EQUAL(0xDD67, s[2].GetValue());
#else
CPPUNIT_ASSERT_EQUAL(2, s.length());
CPPUNIT_ASSERT_EQUAL(0x34567, s[1].GetValue());
#endif
}
// Test operator<<(wxUniChar ch),
// which is implemented upon append(size_t n, wxUniChar ch).
{
wxString s = "A";
s << wxUniChar(0x45678);
#if wxUSE_UNICODE_UTF16
CPPUNIT_ASSERT_EQUAL(3, s.length());
CPPUNIT_ASSERT_EQUAL(0xD8D5, s[1].GetValue());
CPPUNIT_ASSERT_EQUAL(0xDE78, s[2].GetValue());
#else
CPPUNIT_ASSERT_EQUAL(2, s.length());
CPPUNIT_ASSERT_EQUAL(0x45678, s[1].GetValue());
#endif
}
// Test insert(size_t nPos, size_t n, wxUniChar ch).
{
wxString s = L"\x3042\x208\x3059";
s.insert(1, 2, wxUniChar(0x12345));
#if wxUSE_UNICODE_UTF16
CPPUNIT_ASSERT_EQUAL(7, s.length());
CPPUNIT_ASSERT_EQUAL(0xD808, s[1].GetValue());
CPPUNIT_ASSERT_EQUAL(0xDF45, s[2].GetValue());
CPPUNIT_ASSERT_EQUAL(0xD808, s[3].GetValue());
CPPUNIT_ASSERT_EQUAL(0xDF45, s[4].GetValue());
#else
CPPUNIT_ASSERT_EQUAL(5, s.length());
CPPUNIT_ASSERT_EQUAL(0x12345, s[1].GetValue());
CPPUNIT_ASSERT_EQUAL(0x12345, s[2].GetValue());
#endif
}
// Test insert(iterator it, wxUniChar ch).
{
wxString s = L"\x3042\x208\x3059";
s.insert(s.begin() + 1, wxUniChar(0x23456));
#if wxUSE_UNICODE_UTF16
CPPUNIT_ASSERT_EQUAL(5, s.length());
CPPUNIT_ASSERT_EQUAL(0xD84D, s[1].GetValue());
CPPUNIT_ASSERT_EQUAL(0xDC56, s[2].GetValue());
#else
CPPUNIT_ASSERT_EQUAL(4, s.length());
CPPUNIT_ASSERT_EQUAL(0x23456, s[1].GetValue());
#endif
}
// Test insert(iterator it, size_type n, wxUniChar ch).
{
wxString s = L"\x3042\x208\x3059";
s.insert(s.begin() + 1, 2, wxUniChar(0x34567));
#if wxUSE_UNICODE_UTF16
CPPUNIT_ASSERT_EQUAL(7, s.length());
CPPUNIT_ASSERT_EQUAL(0xD891, s[1].GetValue());
CPPUNIT_ASSERT_EQUAL(0xDD67, s[2].GetValue());
#else
CPPUNIT_ASSERT_EQUAL(5, s.length());
CPPUNIT_ASSERT_EQUAL(0x34567, s[1].GetValue());
#endif
}
// Test replace(size_t nStart, size_t nLen, size_t nCount, wxUniChar ch).
{
wxString s = L"\x3042\x208\x3059";
s.replace(1, 2, 2, wxUniChar(0x45678));
#if wxUSE_UNICODE_UTF16
CPPUNIT_ASSERT_EQUAL(5, s.length());
CPPUNIT_ASSERT_EQUAL(0xD8D5, s[1].GetValue());
CPPUNIT_ASSERT_EQUAL(0xDE78, s[2].GetValue());
CPPUNIT_ASSERT_EQUAL(0xD8D5, s[3].GetValue());
CPPUNIT_ASSERT_EQUAL(0xDE78, s[4].GetValue());
#else
CPPUNIT_ASSERT_EQUAL(3, s.length());
CPPUNIT_ASSERT_EQUAL(0x45678, s[1].GetValue());
CPPUNIT_ASSERT_EQUAL(0x45678, s[2].GetValue());
#endif
}
// Test replace(iterator first, iterator last, size_type n, wxUniChar ch).
{
wxString s = L"\x3042\x208\x3059";
s.replace(s.begin() + 1, s.end(), 2, wxUniChar(0x34567));
#if wxUSE_UNICODE_UTF16
CPPUNIT_ASSERT_EQUAL(5, s.length());
CPPUNIT_ASSERT_EQUAL(0xD891, s[1].GetValue());
CPPUNIT_ASSERT_EQUAL(0xDD67, s[2].GetValue());
CPPUNIT_ASSERT_EQUAL(0xD891, s[3].GetValue());
CPPUNIT_ASSERT_EQUAL(0xDD67, s[4].GetValue());
#else
CPPUNIT_ASSERT_EQUAL(3, s.length());
CPPUNIT_ASSERT_EQUAL(0x34567, s[1].GetValue());
CPPUNIT_ASSERT_EQUAL(0x34567, s[2].GetValue());
#endif
}
// Test find(wxUniChar ch, size_t nStart = 0)
// and rfind(wxUniChar ch, size_t nStart = npos).
{
wxString s = L"\x308\x2063";
s << wxUniChar(0x12345);
s << "x";
s += wxUniChar(0x12345);
s += "y";
#if wxUSE_UNICODE_UTF16
CPPUNIT_ASSERT_EQUAL(8, s.length());
CPPUNIT_ASSERT_EQUAL(2, s.find(wxUniChar(0x12345)));
CPPUNIT_ASSERT_EQUAL(5, s.find(wxUniChar(0x12345), 3));
CPPUNIT_ASSERT_EQUAL(5, s.rfind(wxUniChar(0x12345)));
CPPUNIT_ASSERT_EQUAL(2, s.rfind(wxUniChar(0x12345), 4));
#else
CPPUNIT_ASSERT_EQUAL(6, s.length());
CPPUNIT_ASSERT_EQUAL(2, s.find(wxUniChar(0x12345)));
CPPUNIT_ASSERT_EQUAL(4, s.find(wxUniChar(0x12345), 3));
CPPUNIT_ASSERT_EQUAL(4, s.rfind(wxUniChar(0x12345)));
CPPUNIT_ASSERT_EQUAL(2, s.rfind(wxUniChar(0x12345), 3));
#endif
}
/* Not tested here:
find_first_of, find_last_of, find_first_not_of, find_last_not_of
*/
#endif
}