Merge fixes for handling Unicode characters outside of BMP

Closes https://github.com/wxWidgets/wxWidgets/pull/467
2017-06-21 18:53:40 +02:00
parent f008a9d2d5 8311715bdf
commit bb306b7ed0
10 changed files with 483 additions and 119 deletions
--- a/include/wx/chartype.h
+++ b/include/wx/chartype.h
@@ -175,6 +175,16 @@
    #define wxUSE_UTF8_LOCALE_ONLY 0
 #endif

+#ifndef SIZEOF_WCHAR_T
+    #error "SIZEOF_WCHAR_T must be defined before including this file in wx/defs.h"
+#endif
+
+#if wxUSE_UNICODE_WCHAR && SIZEOF_WCHAR_T == 2
+    #define wxUSE_UNICODE_UTF16 1
+#else
+    #define wxUSE_UNICODE_UTF16 0
+#endif
+
 /* define char type used by wxString internal representation: */
 #if wxUSE_UNICODE_WCHAR
    typedef wchar_t wxStringCharType;
--- a/include/wx/defs.h
+++ b/include/wx/defs.h
@@ -672,47 +672,6 @@ typedef short int WXTYPE;
 /*  breaks C++ code) */
 #include <stddef.h>

-#ifdef __cplusplus
-
-// everybody gets the assert and other debug macros
-#include "wx/debug.h"
-
-    // delete pointer if it is not NULL and NULL it afterwards
-    template <typename T>
-    inline void wxDELETE(T*& ptr)
-    {
-        typedef char TypeIsCompleteCheck[sizeof(T)] WX_ATTRIBUTE_UNUSED;
-
-        if ( ptr != NULL )
-        {
-            delete ptr;
-            ptr = NULL;
-        }
-    }
-
-    // delete an array and NULL it (see comments above)
-    template <typename T>
-    inline void wxDELETEA(T*& ptr)
-    {
-        typedef char TypeIsCompleteCheck[sizeof(T)] WX_ATTRIBUTE_UNUSED;
-
-        if ( ptr != NULL )
-        {
-            delete [] ptr;
-            ptr = NULL;
-        }
-    }
-
-    // trivial implementation of std::swap() for primitive types
-    template <typename T>
-    inline void wxSwap(T& first, T& second)
-    {
-        T tmp(first);
-        first = second;
-        second = tmp;
-    }
-#endif /*__cplusplus*/
-
 /*  size of statically declared array */
 #define WXSIZEOF(array)   (sizeof(array)/sizeof(array[0]))

@@ -1227,6 +1186,45 @@ typedef wxUint32 wxDword;
 #endif

 #ifdef __cplusplus
+
+// everybody gets the assert and other debug macros
+#include "wx/debug.h"
+
+    // delete pointer if it is not NULL and NULL it afterwards
+    template <typename T>
+    inline void wxDELETE(T*& ptr)
+    {
+        typedef char TypeIsCompleteCheck[sizeof(T)] WX_ATTRIBUTE_UNUSED;
+
+        if ( ptr != NULL )
+        {
+            delete ptr;
+            ptr = NULL;
+        }
+    }
+
+    // delete an array and NULL it (see comments above)
+    template <typename T>
+    inline void wxDELETEA(T*& ptr)
+    {
+        typedef char TypeIsCompleteCheck[sizeof(T)] WX_ATTRIBUTE_UNUSED;
+
+        if ( ptr != NULL )
+        {
+            delete [] ptr;
+            ptr = NULL;
+        }
+    }
+
+    // trivial implementation of std::swap() for primitive types
+    template <typename T>
+    inline void wxSwap(T& first, T& second)
+    {
+        T tmp(first);
+        first = second;
+        second = tmp;
+    }
+
 /* And also define a couple of simple functions to cast pointer to/from it. */
 inline wxUIntPtr wxPtrToUInt(const void *p)
 {
--- a/include/wx/string.h
+++ b/include/wx/string.h
@@ -898,9 +898,6 @@ public:
      wxStringIteratorNode m_node;
  };

-  size_t IterToImplPos(wxString::iterator i) const
-    { return wxStringImpl::const_iterator(i.impl()) - m_impl.begin(); }
-
  iterator GetIterForNthChar(size_t n)
    { return iterator(this, m_impl.begin() + PosToImpl(n)); }
  const_iterator GetIterForNthChar(size_t n) const
@@ -975,6 +972,9 @@ public:
  const_iterator GetIterForNthChar(size_t n) const { return begin() + n; }
 #endif // wxUSE_UNICODE_UTF8/!wxUSE_UNICODE_UTF8

+  size_t IterToImplPos(wxString::iterator i) const
+    { return wxStringImpl::const_iterator(i.impl()) - m_impl.begin(); }
+
  #undef WX_STR_ITERATOR_TAG
  #undef WX_STR_ITERATOR_IMPL

@@ -1820,12 +1820,11 @@ public:
  {
    wxSTRING_INVALIDATE_CACHE();

-#if wxUSE_UNICODE_UTF8
-    if ( !ch.IsAscii() )
-        m_impl = wxStringOperations::EncodeChar(ch);
-    else
-#endif // wxUSE_UNICODE_UTF8
+    if ( wxStringOperations::IsSingleCodeUnitCharacter(ch) )
        m_impl = (wxStringCharType)ch;
+    else
+        m_impl = wxStringOperations::EncodeChar(ch);
+
    return *this;
  }

@@ -2410,20 +2409,18 @@ public:
    // append n copies of ch
  wxString& append(size_t n, wxUniChar ch)
  {
-#if wxUSE_UNICODE_UTF8
-      if ( !ch.IsAscii() )
-      {
-          wxSTRING_INVALIDATE_CACHED_LENGTH();
-
-          m_impl.append(wxStringOperations::EncodeNChars(n, ch));
-      }
-      else // ASCII
-#endif
+      if ( wxStringOperations::IsSingleCodeUnitCharacter(ch) )
      {
          wxSTRING_UPDATE_CACHED_LENGTH(n);

          m_impl.append(n, (wxStringCharType)ch);
      }
+      else
+      {
+          wxSTRING_INVALIDATE_CACHED_LENGTH();
+
+          m_impl.append(wxStringOperations::EncodeNChars(n, ch));
+      }

      return *this;
  }
@@ -2556,12 +2553,10 @@ public:
  {
      wxSTRING_SET_CACHED_LENGTH(n);

-#if wxUSE_UNICODE_UTF8
-      if ( !ch.IsAscii() )
-          m_impl.assign(wxStringOperations::EncodeNChars(n, ch));
-      else
-#endif
+      if ( wxStringOperations::IsSingleCodeUnitCharacter(ch) )
          m_impl.assign(n, (wxStringCharType)ch);
+      else
+          m_impl.assign(wxStringOperations::EncodeNChars(n, ch));

      return *this;
  }
@@ -2671,12 +2666,11 @@ public:
  {
      wxSTRING_UPDATE_CACHED_LENGTH(n);

-#if wxUSE_UNICODE_UTF8
-      if ( !ch.IsAscii() )
-          m_impl.insert(PosToImpl(nPos), wxStringOperations::EncodeNChars(n, ch));
-      else
-#endif
+      if ( wxStringOperations::IsSingleCodeUnitCharacter(ch) )
          m_impl.insert(PosToImpl(nPos), n, (wxStringCharType)ch);
+      else
+          m_impl.insert(PosToImpl(nPos), wxStringOperations::EncodeNChars(n, ch));
+
      return *this;
  }

@@ -2684,16 +2678,14 @@ public:
  {
      wxSTRING_UPDATE_CACHED_LENGTH(1);

-#if wxUSE_UNICODE_UTF8
-      if ( !ch.IsAscii() )
+      if ( wxStringOperations::IsSingleCodeUnitCharacter(ch) )
+          return iterator(this, m_impl.insert(it.impl(), (wxStringCharType)ch));
+      else
      {
          size_t pos = IterToImplPos(it);
          m_impl.insert(pos, wxStringOperations::EncodeChar(ch));
          return iterator(this, m_impl.begin() + pos);
      }
-      else
-#endif
-          return iterator(this, m_impl.insert(it.impl(), (wxStringCharType)ch));
  }

  void insert(iterator it, const_iterator first, const_iterator last)
@@ -2716,12 +2708,10 @@ public:
  {
      wxSTRING_UPDATE_CACHED_LENGTH(n);

-#if wxUSE_UNICODE_UTF8
-      if ( !ch.IsAscii() )
-          m_impl.insert(IterToImplPos(it), wxStringOperations::EncodeNChars(n, ch));
-      else
-#endif
+      if ( wxStringOperations::IsSingleCodeUnitCharacter(ch) )
          m_impl.insert(it.impl(), n, (wxStringCharType)ch);
+      else
+          m_impl.insert(IterToImplPos(it), wxStringOperations::EncodeNChars(n, ch));
  }

    // delete characters from nStart to nStart + nLen
@@ -2800,12 +2790,11 @@ public:

      size_t from, len;
      PosLenToImpl(nStart, nLen, &from, &len);
-#if wxUSE_UNICODE_UTF8
-      if ( !ch.IsAscii() )
-          m_impl.replace(from, len, wxStringOperations::EncodeNChars(nCount, ch));
-      else
-#endif
+
+      if ( wxStringOperations::IsSingleCodeUnitCharacter(ch) )
          m_impl.replace(from, len, nCount, (wxStringCharType)ch);
+      else
+          m_impl.replace(from, len, wxStringOperations::EncodeNChars(nCount, ch));

      return *this;
  }
@@ -2921,13 +2910,11 @@ public:
  {
      wxSTRING_INVALIDATE_CACHE();

-#if wxUSE_UNICODE_UTF8
-      if ( !ch.IsAscii() )
+      if ( wxStringOperations::IsSingleCodeUnitCharacter(ch) )
+          m_impl.replace(first.impl(), last.impl(), n, (wxStringCharType)ch);
+      else
          m_impl.replace(first.impl(), last.impl(),
                  wxStringOperations::EncodeNChars(n, ch));
-      else
-#endif
-          m_impl.replace(first.impl(), last.impl(), n, (wxStringCharType)ch);

      return *this;
  }
@@ -2988,15 +2975,12 @@ public:
    // find the first occurrence of character ch after nStart
  size_t find(wxUniChar ch, size_t nStart = 0) const
  {
-#if wxUSE_UNICODE_UTF8
-    if ( !ch.IsAscii() )
-        return PosFromImpl(m_impl.find(wxStringOperations::EncodeChar(ch),
-                                       PosToImpl(nStart)));
-    else
-#endif
+    if ( wxStringOperations::IsSingleCodeUnitCharacter(ch) )
        return PosFromImpl(m_impl.find((wxStringCharType)ch,
                                       PosToImpl(nStart)));
-
+    else
+        return PosFromImpl(m_impl.find(wxStringOperations::EncodeChar(ch),
+                                       PosToImpl(nStart)));
  }
  size_t find(wxUniCharRef ch, size_t nStart = 0) const
    {  return find(wxUniChar(ch), nStart); }
@@ -3033,13 +3017,11 @@ public:
    // as find, but from the end
  size_t rfind(wxUniChar ch, size_t nStart = npos) const
  {
-#if wxUSE_UNICODE_UTF8
-    if ( !ch.IsAscii() )
-        return PosFromImpl(m_impl.rfind(wxStringOperations::EncodeChar(ch),
+    if ( wxStringOperations::IsSingleCodeUnitCharacter(ch) )
+        return PosFromImpl(m_impl.rfind((wxStringCharType)ch,
                                        PosToImpl(nStart)));
    else
-#endif
-        return PosFromImpl(m_impl.rfind((wxStringCharType)ch,
+        return PosFromImpl(m_impl.rfind(wxStringOperations::EncodeChar(ch),
                                        PosToImpl(nStart)));
  }
  size_t rfind(wxUniCharRef ch, size_t nStart = npos) const
@@ -3301,12 +3283,11 @@ public:
  {
      wxSTRING_UPDATE_CACHED_LENGTH(1);

-#if wxUSE_UNICODE_UTF8
-      if ( !ch.IsAscii() )
-          m_impl += wxStringOperations::EncodeChar(ch);
-      else
-#endif
+      if ( wxStringOperations::IsSingleCodeUnitCharacter(ch) )
          m_impl += (wxStringCharType)ch;
+      else
+          m_impl += wxStringOperations::EncodeChar(ch);
+
      return *this;
  }
  wxString& operator+=(wxUniCharRef ch) { return *this += wxUniChar(ch); }
--- a/include/wx/stringops.h
+++ b/include/wx/stringops.h
@@ -44,9 +44,36 @@ struct WXDLLIMPEXP_BASE wxStringOperationsWchar
    static ptrdiff_t DiffIters(const Iterator& i1, const Iterator& i2)
        { return i1 - i2; }

+#if wxUSE_UNICODE_UTF16
+    // encodes the characters as UTF-16:
+    struct Utf16CharBuffer
+    {
+        Utf16CharBuffer() : data() {}
+        wchar_t data[3];
+        operator const wchar_t*() const { return data; }
+    };
+    static Utf16CharBuffer EncodeChar(const wxUniChar& ch);
+    static wxWCharBuffer EncodeNChars(size_t n, const wxUniChar& ch);
+    static bool IsSingleCodeUnitCharacter(const wxUniChar& ch)
+        { return !ch.IsSupplementary(); }
+#else
    // encodes the character to a form used to represent it in internal
-    // representation (returns a string in UTF8 version)
-    static wxChar EncodeChar(const wxUniChar& ch) { return (wxChar)ch; }
+    // representation
+    struct SingleCharBuffer
+    {
+        SingleCharBuffer() : data() {}
+        wxChar data[2];
+        operator const wxChar*() const { return data; }
+    };
+    static SingleCharBuffer EncodeChar(const wxUniChar& ch)
+    {
+        SingleCharBuffer buf;
+        buf.data[0] = (wxChar)ch;
+        return buf;
+    }
+    static wxWxCharBuffer EncodeNChars(size_t n, const wxUniChar& ch);
+    static bool IsSingleCodeUnitCharacter(const wxUniChar&) { return true; }
+#endif

    static wxUniChar DecodeChar(const wxStringImpl::const_iterator& i)
        { return *i; }
@@ -134,6 +161,9 @@ struct WXDLLIMPEXP_BASE wxStringOperationsUtf8
        return dist;
    }

+    static bool IsSingleCodeUnitCharacter(const wxUniChar& ch)
+        { return ch.IsAscii(); }
+
    // encodes the character as UTF-8:
    typedef wxUniChar::Utf8CharBuffer Utf8CharBuffer;
    static Utf8CharBuffer EncodeChar(const wxUniChar& ch)
--- a/include/wx/unichar.h
+++ b/include/wx/unichar.h
@@ -83,6 +83,38 @@ public:
        return true;
    }

+    // Returns true if the character is a BMP character:
+    static bool IsBMP(wxUint32 value) { return value < 0x10000; }
+
+    // Returns true if the character is a supplementary character:
+    static bool IsSupplementary(wxUint32 value) { return 0x10000 <= value && value < 0x110000; }
+
+    // Returns the high surrogate code unit for the supplementary character
+    static wxUint16 HighSurrogate(wxUint32 value)
+    {
+        wxASSERT_MSG(IsSupplementary(value), "wxUniChar::HighSurrogate() must be called on a supplementary character");
+        return 0xD800 | ((value - 0x10000) >> 10);
+    }
+
+    // Returns the low surrogate code unit for the supplementary character
+    static wxUint16 LowSurrogate(wxUint32 value)
+    {
+        wxASSERT_MSG(IsSupplementary(value), "wxUniChar::LowSurrogate() must be called on a supplementary character");
+        return 0xDC00 | ((value - 0x10000) & 0x03FF);
+    }
+
+    // Returns true if the character is a BMP character:
+    bool IsBMP() const { return IsBMP(m_value); }
+
+    // Returns true if the character is a supplementary character:
+    bool IsSupplementary() const { return IsSupplementary(m_value); }
+
+    // Returns the high surrogate code unit for the supplementary character
+    wxUint16 HighSurrogate() const { return HighSurrogate(m_value); }
+
+    // Returns the low surrogate code unit for the supplementary character
+    wxUint16 LowSurrogate() const { return LowSurrogate(m_value); }
+
    // Conversions to char and wchar_t types: all of those are needed to be
    // able to pass wxUniChars to verious standard narrow and wide character
    // functions
@@ -216,6 +248,11 @@ public:
    bool IsAscii() const { return UniChar().IsAscii(); }
    bool GetAsChar(char *c) const { return UniChar().GetAsChar(c); }

+    bool IsBMP() const { return UniChar().IsBMP(); }
+    bool IsSupplementary() const { return UniChar().IsSupplementary(); }
+    wxUint16 HighSurrogate() const { return UniChar().HighSurrogate(); }
+    wxUint16 LowSurrogate() const { return UniChar().LowSurrogate(); }
+
    // Assignment operators:
 #if wxUSE_UNICODE_UTF8
    wxUniCharRef& operator=(const wxUniChar& c);
--- a/interface/wx/unichar.h
+++ b/interface/wx/unichar.h
@@ -83,6 +83,82 @@ public:
     */
    bool GetAsChar(char *c) const;

+    /**
+        Returns true if the character is a BMP character (i.e.\ if its value is less than 0x10000).
+
+        @since 3.1.1
+    */
+    bool IsBMP() const;
+
+    /**
+        Returns true if the character is a BMP character (i.e.\ if its value is less than 0x10000).
+
+        @param value
+            The Unicode code point of the character.
+
+        @since 3.1.1
+    */
+    static bool IsBMP(wxUint32 value);
+
+    /**
+        Returns true if the character is a supplementary character (i.e.\ between 0x10000 and 0x10FFFF).
+
+        @since 3.1.1
+    */
+    bool IsSupplementary() const;
+
+    /**
+        Returns true if the character is a supplementary character (i.e.\ between 0x10000 and 0x10FFFF).
+
+        @param value
+            The Unicode code point of the character.
+
+        @since 3.1.1
+    */
+    static bool IsSupplementary(wxUint32 value);
+
+    /**
+        Returns the high surrogate code unit for the supplementary character.
+
+        @pre IsSupplementary() const
+
+        @since 3.1.1
+    */
+    wxUint16 HighSurrogate() const;
+
+    /**
+        Returns the high surrogate code unit for the supplementary character.
+
+        @param value
+            The Unicode code point of the character.
+
+        @pre IsSupplementary(wxUint32 value)
+
+        @since 3.1.1
+    */
+    static wxUint16 HighSurrogate(wxUint32 value);
+
+    /**
+        Returns the low surrogate code unit for the supplementary character.
+
+        @pre IsSupplementary() const
+
+        @since 3.1.1
+    */
+    wxUint16 LowSurrogate() const;
+
+    /**
+        Returns the low surrogate code unit for the supplementary character.
+
+        @param value
+            The Unicode code point of the character.
+
+        @pre IsSupplementary(wxUint32 value)
+
+        @since 3.1.1
+    */
+    static wxUint16 LowSurrogate(wxUint32 value);
+
    //@{
    /**
        Conversions to char and wchar_t types: all of those are needed to be
--- a/src/common/strconv.cpp
+++ b/src/common/strconv.cpp
@@ -80,27 +80,27 @@ static bool NotAllNULs(const char *p, size_t n)

 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
 {
-    if (input <= 0xffff)
+    if (wxUniChar::IsBMP(input))
    {
        if (output)
            *output = (wxUint16) input;

        return 1;
    }
-    else if (input >= 0x110000)
-    {
-        return wxCONV_FAILED;
-    }
-    else
+    else if (wxUniChar::IsSupplementary(input))
    {
        if (output)
        {
-            *output++ = (wxUint16) ((input >> 10) + 0xd7c0);
-            *output = (wxUint16) ((input & 0x3ff) + 0xdc00);
+            *output++ = wxUniChar::HighSurrogate(input);
+            *output = wxUniChar::LowSurrogate(input);
        }

        return 2;
    }
+    else
+    {
+        return wxCONV_FAILED;
+    }
 }

 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
--- a/src/common/stringops.cpp
+++ b/src/common/stringops.cpp
@@ -27,6 +27,68 @@
 // implementation
 // ===========================================================================

+#if wxUSE_UNICODE_WCHAR || !wxUSE_UNICODE
+
+#if wxUSE_UNICODE_UTF16
+
+wxStringOperationsWchar::Utf16CharBuffer wxStringOperationsWchar::EncodeChar(const wxUniChar& ch)
+{
+    Utf16CharBuffer buf;
+    if ( ch.IsSupplementary() )
+    {
+        buf.data[0] = (wchar_t)ch.HighSurrogate();
+        buf.data[1] = (wchar_t)ch.LowSurrogate();
+    }
+    else
+    {
+        // Assume ch is a BMP character
+        buf.data[0] = (wchar_t)ch;
+    }
+    return buf;
+}
+
+wxWCharBuffer wxStringOperationsWchar::EncodeNChars(size_t n, const wxUniChar& ch)
+{
+    if ( ch.IsSupplementary() )
+    {
+        wxWCharBuffer buf(n * 2);
+        wchar_t s[2] = {
+            (wchar_t)ch.HighSurrogate(),
+            (wchar_t)ch.LowSurrogate(),
+        };
+        wchar_t *ptr = buf.data();
+        for (size_t i = 0; i < n; i++, ptr += 2)
+        {
+            wmemcpy(ptr, s, 2);
+        }
+        return buf;
+    }
+    else
+    {
+        // Assume ch is a BMP character
+        wxWCharBuffer buf(n);
+        wmemset(buf.data(), (wchar_t)ch, n);
+        return buf;
+    }
+}
+
+#else
+
+wxWxCharBuffer wxStringOperationsWchar::EncodeNChars(size_t n, const wxUniChar& ch)
+{
+    wxWxCharBuffer buf(n);
+#if wxUSE_UNICODE_WCHAR
+    wmemset(buf.data(), (wchar_t)ch, n);
+#else // ANSI
+    memset(buf.data(), (unsigned char)ch, n);
+#endif
+    return buf;
+}
+
+#endif // wxUSE_UNICODE_UTF16
+
+#endif // wxUSE_UNICODE_WCHAR || !wxUSE_UNICODE
+
 #if wxUSE_UNICODE_UTF8

 // ---------------------------------------------------------------------------
--- a/src/common/ustring.cpp
+++ b/src/common/ustring.cpp
@@ -502,7 +502,7 @@ wxScopedU16CharBuffer wxUString::utf16_str() const

        // TODO: error range checks

-        if (code < 0x10000)
+        if (wxUniChar::IsBMP(code))
           utf16_length++;
        else
           utf16_length += 2;
@@ -520,15 +520,15 @@ wxScopedU16CharBuffer wxUString::utf16_str() const

        // TODO: error range checks

-        if (code < 0x10000)
+        if (wxUniChar::IsBMP(code))
        {
           out[0] = code;
           out++;
        }
        else
        {
-           out[0] = (code - 0x10000) / 0x400 + 0xd800;
-           out[1] = (code - 0x10000) % 0x400 + 0xdc00;
+           out[0] = wxUniChar::HighSurrogate(code);
+           out[1] = wxUniChar::LowSurrogate(code);
           out += 2;
        }
    }
--- a/tests/strings/strings.cpp
+++ b/tests/strings/strings.cpp
@@ -63,6 +63,7 @@ private:
        CPPUNIT_TEST( IndexedAccess );
        CPPUNIT_TEST( BeforeAndAfter );
        CPPUNIT_TEST( ScopedBuffers );
+        CPPUNIT_TEST( SupplementaryUniChar );
    CPPUNIT_TEST_SUITE_END();

    void String();
@@ -98,6 +99,7 @@ private:
    void IndexedAccess();
    void BeforeAndAfter();
    void ScopedBuffers();
+    void SupplementaryUniChar();

    wxDECLARE_NO_COPY_CLASS(StringTestCase);
 };
@@ -1142,3 +1144,171 @@ void StringTestCase::ScopedBuffers()
    buf5.extend(len);
    CPPUNIT_ASSERT_EQUAL('\0', buf5.data()[len]);
 }
+
+void StringTestCase::SupplementaryUniChar()
+{
+#if wxUSE_UNICODE
+    // Test wxString(wxUniChar ch, size_t nRepeat = 1),
+    // which is implemented upon assign(size_t n, wxUniChar ch).
+    {
+        wxString s(wxUniChar(0x12345));
+#if wxUSE_UNICODE_UTF16
+        CPPUNIT_ASSERT_EQUAL(2, s.length());
+        CPPUNIT_ASSERT_EQUAL(0xD808, s[0].GetValue());
+        CPPUNIT_ASSERT_EQUAL(0xDF45, s[1].GetValue());
+#else
+        CPPUNIT_ASSERT_EQUAL(1, s.length());
+        CPPUNIT_ASSERT_EQUAL(0x12345, s[0].GetValue());
+#endif
+    }
+
+    // Test operator=(wxUniChar ch).
+    {
+        wxString s;
+        s = wxUniChar(0x23456);
+#if wxUSE_UNICODE_UTF16
+        CPPUNIT_ASSERT_EQUAL(2, s.length());
+        CPPUNIT_ASSERT_EQUAL(0xD84D, s[0].GetValue());
+        CPPUNIT_ASSERT_EQUAL(0xDC56, s[1].GetValue());
+#else
+        CPPUNIT_ASSERT_EQUAL(1, s.length());
+        CPPUNIT_ASSERT_EQUAL(0x23456, s[0].GetValue());
+#endif
+    }
+
+    // Test operator+=(wxUniChar ch).
+    {
+        wxString s = "A";
+        s += wxUniChar(0x34567);
+#if wxUSE_UNICODE_UTF16
+        CPPUNIT_ASSERT_EQUAL(3, s.length());
+        CPPUNIT_ASSERT_EQUAL(0xD891, s[1].GetValue());
+        CPPUNIT_ASSERT_EQUAL(0xDD67, s[2].GetValue());
+#else
+        CPPUNIT_ASSERT_EQUAL(2, s.length());
+        CPPUNIT_ASSERT_EQUAL(0x34567, s[1].GetValue());
+#endif
+    }
+
+    // Test operator<<(wxUniChar ch),
+    // which is implemented upon append(size_t n, wxUniChar ch).
+    {
+        wxString s = "A";
+        s << wxUniChar(0x45678);
+#if wxUSE_UNICODE_UTF16
+        CPPUNIT_ASSERT_EQUAL(3, s.length());
+        CPPUNIT_ASSERT_EQUAL(0xD8D5, s[1].GetValue());
+        CPPUNIT_ASSERT_EQUAL(0xDE78, s[2].GetValue());
+#else
+        CPPUNIT_ASSERT_EQUAL(2, s.length());
+        CPPUNIT_ASSERT_EQUAL(0x45678, s[1].GetValue());
+#endif
+    }
+
+    // Test insert(size_t nPos, size_t n, wxUniChar ch).
+    {
+        wxString s = L"\x3042\x208\x3059";
+        s.insert(1, 2, wxUniChar(0x12345));
+#if wxUSE_UNICODE_UTF16
+        CPPUNIT_ASSERT_EQUAL(7, s.length());
+        CPPUNIT_ASSERT_EQUAL(0xD808, s[1].GetValue());
+        CPPUNIT_ASSERT_EQUAL(0xDF45, s[2].GetValue());
+        CPPUNIT_ASSERT_EQUAL(0xD808, s[3].GetValue());
+        CPPUNIT_ASSERT_EQUAL(0xDF45, s[4].GetValue());
+#else
+        CPPUNIT_ASSERT_EQUAL(5, s.length());
+        CPPUNIT_ASSERT_EQUAL(0x12345, s[1].GetValue());
+        CPPUNIT_ASSERT_EQUAL(0x12345, s[2].GetValue());
+#endif
+    }
+
+    // Test insert(iterator it, wxUniChar ch).
+    {
+        wxString s = L"\x3042\x208\x3059";
+        s.insert(s.begin() + 1, wxUniChar(0x23456));
+#if wxUSE_UNICODE_UTF16
+        CPPUNIT_ASSERT_EQUAL(5, s.length());
+        CPPUNIT_ASSERT_EQUAL(0xD84D, s[1].GetValue());
+        CPPUNIT_ASSERT_EQUAL(0xDC56, s[2].GetValue());
+#else
+        CPPUNIT_ASSERT_EQUAL(4, s.length());
+        CPPUNIT_ASSERT_EQUAL(0x23456, s[1].GetValue());
+#endif
+    }
+
+    // Test insert(iterator it, size_type n, wxUniChar ch).
+    {
+        wxString s = L"\x3042\x208\x3059";
+        s.insert(s.begin() + 1, 2, wxUniChar(0x34567));
+#if wxUSE_UNICODE_UTF16
+        CPPUNIT_ASSERT_EQUAL(7, s.length());
+        CPPUNIT_ASSERT_EQUAL(0xD891, s[1].GetValue());
+        CPPUNIT_ASSERT_EQUAL(0xDD67, s[2].GetValue());
+#else
+        CPPUNIT_ASSERT_EQUAL(5, s.length());
+        CPPUNIT_ASSERT_EQUAL(0x34567, s[1].GetValue());
+#endif
+    }
+
+    // Test replace(size_t nStart, size_t nLen, size_t nCount, wxUniChar ch).
+    {
+        wxString s = L"\x3042\x208\x3059";
+        s.replace(1, 2, 2, wxUniChar(0x45678));
+#if wxUSE_UNICODE_UTF16
+        CPPUNIT_ASSERT_EQUAL(5, s.length());
+        CPPUNIT_ASSERT_EQUAL(0xD8D5, s[1].GetValue());
+        CPPUNIT_ASSERT_EQUAL(0xDE78, s[2].GetValue());
+        CPPUNIT_ASSERT_EQUAL(0xD8D5, s[3].GetValue());
+        CPPUNIT_ASSERT_EQUAL(0xDE78, s[4].GetValue());
+#else
+        CPPUNIT_ASSERT_EQUAL(3, s.length());
+        CPPUNIT_ASSERT_EQUAL(0x45678, s[1].GetValue());
+        CPPUNIT_ASSERT_EQUAL(0x45678, s[2].GetValue());
+#endif
+    }
+
+    // Test replace(iterator first, iterator last, size_type n, wxUniChar ch).
+    {
+        wxString s = L"\x3042\x208\x3059";
+        s.replace(s.begin() + 1, s.end(), 2, wxUniChar(0x34567));
+#if wxUSE_UNICODE_UTF16
+        CPPUNIT_ASSERT_EQUAL(5, s.length());
+        CPPUNIT_ASSERT_EQUAL(0xD891, s[1].GetValue());
+        CPPUNIT_ASSERT_EQUAL(0xDD67, s[2].GetValue());
+        CPPUNIT_ASSERT_EQUAL(0xD891, s[3].GetValue());
+        CPPUNIT_ASSERT_EQUAL(0xDD67, s[4].GetValue());
+#else
+        CPPUNIT_ASSERT_EQUAL(3, s.length());
+        CPPUNIT_ASSERT_EQUAL(0x34567, s[1].GetValue());
+        CPPUNIT_ASSERT_EQUAL(0x34567, s[2].GetValue());
+#endif
+    }
+
+    // Test find(wxUniChar ch, size_t nStart = 0)
+    // and rfind(wxUniChar ch, size_t nStart = npos).
+    {
+        wxString s = L"\x308\x2063";
+        s << wxUniChar(0x12345);
+        s << "x";
+        s += wxUniChar(0x12345);
+        s += "y";
+#if wxUSE_UNICODE_UTF16
+        CPPUNIT_ASSERT_EQUAL(8, s.length());
+        CPPUNIT_ASSERT_EQUAL(2, s.find(wxUniChar(0x12345)));
+        CPPUNIT_ASSERT_EQUAL(5, s.find(wxUniChar(0x12345), 3));
+        CPPUNIT_ASSERT_EQUAL(5, s.rfind(wxUniChar(0x12345)));
+        CPPUNIT_ASSERT_EQUAL(2, s.rfind(wxUniChar(0x12345), 4));
+#else
+        CPPUNIT_ASSERT_EQUAL(6, s.length());
+        CPPUNIT_ASSERT_EQUAL(2, s.find(wxUniChar(0x12345)));
+        CPPUNIT_ASSERT_EQUAL(4, s.find(wxUniChar(0x12345), 3));
+        CPPUNIT_ASSERT_EQUAL(4, s.rfind(wxUniChar(0x12345)));
+        CPPUNIT_ASSERT_EQUAL(2, s.rfind(wxUniChar(0x12345), 3));
+#endif
+    }
+
+    /* Not tested here:
+         find_first_of, find_last_of, find_first_not_of, find_last_not_of
+    */
+#endif
+}