From 61f0e0ce221325f5b9125fd766bb26c9825537f3 Mon Sep 17 00:00:00 2001
From: ARATA Mizuki <minorinoki@gmail.com>
Date: Mon, 24 Apr 2017 15:56:35 +0900
Subject: [PATCH 1/8] Add wxUSE_UNICODE_UTF16 macro to represent if the
 internal representation of wxString is UTF-16 or not

The value of this macro is equal to (wxUSE_UNICODE_WCHAR && SIZEOF_WCHAR_T == 2).
---
 include/wx/chartype.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/include/wx/chartype.h b/include/wx/chartype.h
index e8ccb35f05..0e81a3344d 100644
--- a/include/wx/chartype.h
+++ b/include/wx/chartype.h
@@ -175,6 +175,12 @@
     #define wxUSE_UTF8_LOCALE_ONLY 0
 #endif
 
+#if wxUSE_UNICODE_WCHAR && SIZEOF_WCHAR_T == 2
+    #define wxUSE_UNICODE_UTF16 1
+#else
+    #define wxUSE_UNICODE_UTF16 0
+#endif
+
 /* define char type used by wxString internal representation: */
 #if wxUSE_UNICODE_WCHAR
     typedef wchar_t wxStringCharType;

From 90c990cf8366a5b4d6cf3dcb73eeeefcc71f4884 Mon Sep 17 00:00:00 2001
From: ARATA Mizuki <minorinoki@gmail.com>
Date: Mon, 1 May 2017 01:47:28 +0900
Subject: [PATCH 2/8] Add some functions to handle supplementary characters

The added functions are:

- wxUniChar::IsBMP()
- wxUniChar::IsSupplementary()
- wxUniChar::HighSurrogate()
- wxUniChar::LowSurrogate()
---
 include/wx/unichar.h   | 37 ++++++++++++++++++++
 interface/wx/unichar.h | 76 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 113 insertions(+)

diff --git a/include/wx/unichar.h b/include/wx/unichar.h
index a966d5909e..bbd75466bf 100644
--- a/include/wx/unichar.h
+++ b/include/wx/unichar.h
@@ -83,6 +83,38 @@ public:
         return true;
     }
 
+    // Returns true if the character is a BMP character:
+    static bool IsBMP(wxUint32 value) { return value < 0x10000; }
+
+    // Returns true if the character is a supplementary character:
+    static bool IsSupplementary(wxUint32 value) { return 0x10000 <= value && value < 0x110000; }
+
+    // Returns the high surrogate code unit for the supplementary character
+    static wxUint16 HighSurrogate(wxUint32 value)
+    {
+        wxASSERT_MSG(IsSupplementary(value), "wxUniChar::HighSurrogate() must be called on a supplementary character");
+        return 0xD800 | ((value - 0x10000) >> 10);
+    }
+
+    // Returns the low surrogate code unit for the supplementary character
+    static wxUint16 LowSurrogate(wxUint32 value)
+    {
+        wxASSERT_MSG(IsSupplementary(value), "wxUniChar::LowSurrogate() must be called on a supplementary character");
+        return 0xDC00 | ((value - 0x10000) & 0x03FF);
+    }
+
+    // Returns true if the character is a BMP character:
+    bool IsBMP() const { return IsBMP(m_value); }
+
+    // Returns true if the character is a supplementary character:
+    bool IsSupplementary() const { return IsSupplementary(m_value); }
+
+    // Returns the high surrogate code unit for the supplementary character
+    wxUint16 HighSurrogate() const { return HighSurrogate(m_value); }
+
+    // Returns the low surrogate code unit for the supplementary character
+    wxUint16 LowSurrogate() const { return LowSurrogate(m_value); }
+
     // Conversions to char and wchar_t types: all of those are needed to be
     // able to pass wxUniChars to verious standard narrow and wide character
     // functions
@@ -216,6 +248,11 @@ public:
     bool IsAscii() const { return UniChar().IsAscii(); }
     bool GetAsChar(char *c) const { return UniChar().GetAsChar(c); }
 
+    bool IsBMP() const { return UniChar().IsBMP(); }
+    bool IsSupplementary() const { return UniChar().IsSupplementary(); }
+    wxUint16 HighSurrogate() const { return UniChar().HighSurrogate(); }
+    wxUint16 LowSurrogate() const { return UniChar().LowSurrogate(); }
+
     // Assignment operators:
 #if wxUSE_UNICODE_UTF8
     wxUniCharRef& operator=(const wxUniChar& c);
diff --git a/interface/wx/unichar.h b/interface/wx/unichar.h
index 75012879f2..0536c50dd4 100644
--- a/interface/wx/unichar.h
+++ b/interface/wx/unichar.h
@@ -83,6 +83,82 @@ public:
      */
     bool GetAsChar(char *c) const;
 
+    /**
+        Returns true if the character is a BMP character (i.e.\ if its value is less than 0x10000).
+
+        @since 3.1.1
+    */
+    bool IsBMP() const;
+
+    /**
+        Returns true if the character is a BMP character (i.e.\ if its value is less than 0x10000).
+
+        @param value
+            The Unicode code point of the character.
+
+        @since 3.1.1
+    */
+    static bool IsBMP(wxUint32 value);
+
+    /**
+        Returns true if the character is a supplementary character (i.e.\ between 0x10000 and 0x10FFFF).
+
+        @since 3.1.1
+    */
+    bool IsSupplementary() const;
+
+    /**
+        Returns true if the character is a supplementary character (i.e.\ between 0x10000 and 0x10FFFF).
+
+        @param value
+            The Unicode code point of the character.
+
+        @since 3.1.1
+    */
+    static bool IsSupplementary(wxUint32 value);
+
+    /**
+        Returns the high surrogate code unit for the supplementary character.
+
+        @pre IsSupplementary() const
+
+        @since 3.1.1
+    */
+    wxUint16 HighSurrogate() const;
+
+    /**
+        Returns the high surrogate code unit for the supplementary character.
+
+        @param value
+            The Unicode code point of the character.
+
+        @pre IsSupplementary(wxUint32 value)
+
+        @since 3.1.1
+    */
+    static wxUint16 HighSurrogate(wxUint32 value);
+
+    /**
+        Returns the low surrogate code unit for the supplementary character.
+
+        @pre IsSupplementary() const
+
+        @since 3.1.1
+    */
+    wxUint16 LowSurrogate() const;
+
+    /**
+        Returns the low surrogate code unit for the supplementary character.
+
+        @param value
+            The Unicode code point of the character.
+
+        @pre IsSupplementary(wxUint32 value)
+
+        @since 3.1.1
+    */
+    static wxUint16 LowSurrogate(wxUint32 value);
+
     //@{
     /**
         Conversions to char and wchar_t types: all of those are needed to be

From 8a29c5c09fbcf6fa89be9a1fa807861909b7fdb5 Mon Sep 17 00:00:00 2001
From: ARATA Mizuki <minorinoki@gmail.com>
Date: Mon, 1 May 2017 14:44:45 +0900
Subject: [PATCH 3/8] Use the added wxUniChar functions in the existing code

---
 src/common/strconv.cpp | 16 ++++++++--------
 src/common/ustring.cpp |  8 ++++----
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/common/strconv.cpp b/src/common/strconv.cpp
index 1e40478ee8..83c08ec547 100644
--- a/src/common/strconv.cpp
+++ b/src/common/strconv.cpp
@@ -80,27 +80,27 @@ static bool NotAllNULs(const char *p, size_t n)
 
 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
 {
-    if (input <= 0xffff)
+    if (wxUniChar::IsBMP(input))
     {
         if (output)
             *output = (wxUint16) input;
 
         return 1;
     }
-    else if (input >= 0x110000)
-    {
-        return wxCONV_FAILED;
-    }
-    else
+    else if (wxUniChar::IsSupplementary(input))
     {
         if (output)
         {
-            *output++ = (wxUint16) ((input >> 10) + 0xd7c0);
-            *output = (wxUint16) ((input & 0x3ff) + 0xdc00);
+            *output++ = wxUniChar::HighSurrogate(input);
+            *output = wxUniChar::LowSurrogate(input);
         }
 
         return 2;
     }
+    else
+    {
+        return wxCONV_FAILED;
+    }
 }
 
 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
diff --git a/src/common/ustring.cpp b/src/common/ustring.cpp
index ab70ce5ae2..87d5158234 100644
--- a/src/common/ustring.cpp
+++ b/src/common/ustring.cpp
@@ -502,7 +502,7 @@ wxScopedU16CharBuffer wxUString::utf16_str() const
 
         // TODO: error range checks
 
-        if (code < 0x10000)
+        if (wxUniChar::IsBMP(code))
            utf16_length++;
         else
            utf16_length += 2;
@@ -520,15 +520,15 @@ wxScopedU16CharBuffer wxUString::utf16_str() const
 
         // TODO: error range checks
 
-        if (code < 0x10000)
+        if (wxUniChar::IsBMP(code))
         {
            out[0] = code;
            out++;
         }
         else
         {
-           out[0] = (code - 0x10000) / 0x400 + 0xd800;
-           out[1] = (code - 0x10000) % 0x400 + 0xdc00;
+           out[0] = wxUniChar::HighSurrogate(code);
+           out[1] = wxUniChar::LowSurrogate(code);
            out += 2;
         }
     }

From ad4785707247ddb73c824c96ff2b645789d61bcc Mon Sep 17 00:00:00 2001
From: ARATA Mizuki <minorinoki@gmail.com>
Date: Fri, 21 Apr 2017 04:23:52 +0900
Subject: [PATCH 4/8] Add a test case for constructing wxString with
 supplementary wxUniChar values

See #11827
---
 tests/strings/strings.cpp | 170 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 170 insertions(+)

diff --git a/tests/strings/strings.cpp b/tests/strings/strings.cpp
index 3562510b51..1cf282b0eb 100644
--- a/tests/strings/strings.cpp
+++ b/tests/strings/strings.cpp
@@ -63,6 +63,7 @@ private:
         CPPUNIT_TEST( IndexedAccess );
         CPPUNIT_TEST( BeforeAndAfter );
         CPPUNIT_TEST( ScopedBuffers );
+        CPPUNIT_TEST( SupplementaryUniChar );
     CPPUNIT_TEST_SUITE_END();
 
     void String();
@@ -98,6 +99,7 @@ private:
     void IndexedAccess();
     void BeforeAndAfter();
     void ScopedBuffers();
+    void SupplementaryUniChar();
 
     wxDECLARE_NO_COPY_CLASS(StringTestCase);
 };
@@ -1142,3 +1144,171 @@ void StringTestCase::ScopedBuffers()
     buf5.extend(len);
     CPPUNIT_ASSERT_EQUAL('\0', buf5.data()[len]);
 }
+
+void StringTestCase::SupplementaryUniChar()
+{
+#if wxUSE_UNICODE
+    // Test wxString(wxUniChar ch, size_t nRepeat = 1),
+    // which is implemented upon assign(size_t n, wxUniChar ch).
+    {
+        wxString s(wxUniChar(0x12345));
+#if wxUSE_UNICODE_UTF16
+        CPPUNIT_ASSERT_EQUAL(2, s.length());
+        CPPUNIT_ASSERT_EQUAL(0xD808, s[0].GetValue());
+        CPPUNIT_ASSERT_EQUAL(0xDF45, s[1].GetValue());
+#else
+        CPPUNIT_ASSERT_EQUAL(1, s.length());
+        CPPUNIT_ASSERT_EQUAL(0x12345, s[0].GetValue());
+#endif
+    }
+
+    // Test operator=(wxUniChar ch).
+    {
+        wxString s;
+        s = wxUniChar(0x23456);
+#if wxUSE_UNICODE_UTF16
+        CPPUNIT_ASSERT_EQUAL(2, s.length());
+        CPPUNIT_ASSERT_EQUAL(0xD84D, s[0].GetValue());
+        CPPUNIT_ASSERT_EQUAL(0xDC56, s[1].GetValue());
+#else
+        CPPUNIT_ASSERT_EQUAL(1, s.length());
+        CPPUNIT_ASSERT_EQUAL(0x23456, s[0].GetValue());
+#endif
+    }
+
+    // Test operator+=(wxUniChar ch).
+    {
+        wxString s = "A";
+        s += wxUniChar(0x34567);
+#if wxUSE_UNICODE_UTF16
+        CPPUNIT_ASSERT_EQUAL(3, s.length());
+        CPPUNIT_ASSERT_EQUAL(0xD891, s[1].GetValue());
+        CPPUNIT_ASSERT_EQUAL(0xDD67, s[2].GetValue());
+#else
+        CPPUNIT_ASSERT_EQUAL(2, s.length());
+        CPPUNIT_ASSERT_EQUAL(0x34567, s[1].GetValue());
+#endif
+    }
+
+    // Test operator<<(wxUniChar ch),
+    // which is implemented upon append(size_t n, wxUniChar ch).
+    {
+        wxString s = "A";
+        s << wxUniChar(0x45678);
+#if wxUSE_UNICODE_UTF16
+        CPPUNIT_ASSERT_EQUAL(3, s.length());
+        CPPUNIT_ASSERT_EQUAL(0xD8D5, s[1].GetValue());
+        CPPUNIT_ASSERT_EQUAL(0xDE78, s[2].GetValue());
+#else
+        CPPUNIT_ASSERT_EQUAL(2, s.length());
+        CPPUNIT_ASSERT_EQUAL(0x45678, s[1].GetValue());
+#endif
+    }
+
+    // Test insert(size_t nPos, size_t n, wxUniChar ch).
+    {
+        wxString s = L"\x3042\x208\x3059";
+        s.insert(1, 2, wxUniChar(0x12345));
+#if wxUSE_UNICODE_UTF16
+        CPPUNIT_ASSERT_EQUAL(7, s.length());
+        CPPUNIT_ASSERT_EQUAL(0xD808, s[1].GetValue());
+        CPPUNIT_ASSERT_EQUAL(0xDF45, s[2].GetValue());
+        CPPUNIT_ASSERT_EQUAL(0xD808, s[3].GetValue());
+        CPPUNIT_ASSERT_EQUAL(0xDF45, s[4].GetValue());
+#else
+        CPPUNIT_ASSERT_EQUAL(5, s.length());
+        CPPUNIT_ASSERT_EQUAL(0x12345, s[1].GetValue());
+        CPPUNIT_ASSERT_EQUAL(0x12345, s[2].GetValue());
+#endif
+    }
+
+    // Test insert(iterator it, wxUniChar ch).
+    {
+        wxString s = L"\x3042\x208\x3059";
+        s.insert(s.begin() + 1, wxUniChar(0x23456));
+#if wxUSE_UNICODE_UTF16
+        CPPUNIT_ASSERT_EQUAL(5, s.length());
+        CPPUNIT_ASSERT_EQUAL(0xD84D, s[1].GetValue());
+        CPPUNIT_ASSERT_EQUAL(0xDC56, s[2].GetValue());
+#else
+        CPPUNIT_ASSERT_EQUAL(4, s.length());
+        CPPUNIT_ASSERT_EQUAL(0x23456, s[1].GetValue());
+#endif
+    }
+
+    // Test insert(iterator it, size_type n, wxUniChar ch).
+    {
+        wxString s = L"\x3042\x208\x3059";
+        s.insert(s.begin() + 1, 2, wxUniChar(0x34567));
+#if wxUSE_UNICODE_UTF16
+        CPPUNIT_ASSERT_EQUAL(7, s.length());
+        CPPUNIT_ASSERT_EQUAL(0xD891, s[1].GetValue());
+        CPPUNIT_ASSERT_EQUAL(0xDD67, s[2].GetValue());
+#else
+        CPPUNIT_ASSERT_EQUAL(5, s.length());
+        CPPUNIT_ASSERT_EQUAL(0x34567, s[1].GetValue());
+#endif
+    }
+
+    // Test replace(size_t nStart, size_t nLen, size_t nCount, wxUniChar ch).
+    {
+        wxString s = L"\x3042\x208\x3059";
+        s.replace(1, 2, 2, wxUniChar(0x45678));
+#if wxUSE_UNICODE_UTF16
+        CPPUNIT_ASSERT_EQUAL(5, s.length());
+        CPPUNIT_ASSERT_EQUAL(0xD8D5, s[1].GetValue());
+        CPPUNIT_ASSERT_EQUAL(0xDE78, s[2].GetValue());
+        CPPUNIT_ASSERT_EQUAL(0xD8D5, s[3].GetValue());
+        CPPUNIT_ASSERT_EQUAL(0xDE78, s[4].GetValue());
+#else
+        CPPUNIT_ASSERT_EQUAL(3, s.length());
+        CPPUNIT_ASSERT_EQUAL(0x45678, s[1].GetValue());
+        CPPUNIT_ASSERT_EQUAL(0x45678, s[2].GetValue());
+#endif
+    }
+
+    // Test replace(iterator first, iterator last, size_type n, wxUniChar ch).
+    {
+        wxString s = L"\x3042\x208\x3059";
+        s.replace(s.begin() + 1, s.end(), 2, wxUniChar(0x34567));
+#if wxUSE_UNICODE_UTF16
+        CPPUNIT_ASSERT_EQUAL(5, s.length());
+        CPPUNIT_ASSERT_EQUAL(0xD891, s[1].GetValue());
+        CPPUNIT_ASSERT_EQUAL(0xDD67, s[2].GetValue());
+        CPPUNIT_ASSERT_EQUAL(0xD891, s[3].GetValue());
+        CPPUNIT_ASSERT_EQUAL(0xDD67, s[4].GetValue());
+#else
+        CPPUNIT_ASSERT_EQUAL(3, s.length());
+        CPPUNIT_ASSERT_EQUAL(0x34567, s[1].GetValue());
+        CPPUNIT_ASSERT_EQUAL(0x34567, s[2].GetValue());
+#endif
+    }
+
+    // Test find(wxUniChar ch, size_t nStart = 0)
+    // and rfind(wxUniChar ch, size_t nStart = npos).
+    {
+        wxString s = L"\x308\x2063";
+        s << wxUniChar(0x12345);
+        s << "x";
+        s += wxUniChar(0x12345);
+        s += "y";
+#if wxUSE_UNICODE_UTF16
+        CPPUNIT_ASSERT_EQUAL(8, s.length());
+        CPPUNIT_ASSERT_EQUAL(2, s.find(wxUniChar(0x12345)));
+        CPPUNIT_ASSERT_EQUAL(5, s.find(wxUniChar(0x12345), 3));
+        CPPUNIT_ASSERT_EQUAL(5, s.rfind(wxUniChar(0x12345)));
+        CPPUNIT_ASSERT_EQUAL(2, s.rfind(wxUniChar(0x12345), 4));
+#else
+        CPPUNIT_ASSERT_EQUAL(6, s.length());
+        CPPUNIT_ASSERT_EQUAL(2, s.find(wxUniChar(0x12345)));
+        CPPUNIT_ASSERT_EQUAL(4, s.find(wxUniChar(0x12345), 3));
+        CPPUNIT_ASSERT_EQUAL(4, s.rfind(wxUniChar(0x12345)));
+        CPPUNIT_ASSERT_EQUAL(2, s.rfind(wxUniChar(0x12345), 3));
+#endif
+    }
+
+    /* Not tested here:
+         find_first_of, find_last_of, find_first_not_of, find_last_not_of
+    */
+#endif
+}

From 58d940690abbb7f59fd6b3e189328afed7f53821 Mon Sep 17 00:00:00 2001
From: ARATA Mizuki <minorinoki@gmail.com>
Date: Fri, 21 Apr 2017 04:32:32 +0900
Subject: [PATCH 5/8] Better handling of supplementary wxUniChar values in some
 of wxString methods

On MSW, the Unicode code point is now properly encoded as UTF-16 when
assigned or appended to a wxString.

Closes #11827
---
 include/wx/string.h      | 109 ++++++++++++++++-----------------------
 include/wx/stringops.h   |  34 +++++++++++-
 src/common/stringops.cpp |  62 ++++++++++++++++++++++
 3 files changed, 139 insertions(+), 66 deletions(-)

diff --git a/include/wx/string.h b/include/wx/string.h
index 3441a7f7d6..b80998cc44 100644
--- a/include/wx/string.h
+++ b/include/wx/string.h
@@ -898,9 +898,6 @@ public:
       wxStringIteratorNode m_node;
   };
 
-  size_t IterToImplPos(wxString::iterator i) const
-    { return wxStringImpl::const_iterator(i.impl()) - m_impl.begin(); }
-
   iterator GetIterForNthChar(size_t n)
     { return iterator(this, m_impl.begin() + PosToImpl(n)); }
   const_iterator GetIterForNthChar(size_t n) const
@@ -975,6 +972,9 @@ public:
   const_iterator GetIterForNthChar(size_t n) const { return begin() + n; }
 #endif // wxUSE_UNICODE_UTF8/!wxUSE_UNICODE_UTF8
 
+  size_t IterToImplPos(wxString::iterator i) const
+    { return wxStringImpl::const_iterator(i.impl()) - m_impl.begin(); }
+
   #undef WX_STR_ITERATOR_TAG
   #undef WX_STR_ITERATOR_IMPL
 
@@ -1820,12 +1820,11 @@ public:
   {
     wxSTRING_INVALIDATE_CACHE();
 
-#if wxUSE_UNICODE_UTF8
-    if ( !ch.IsAscii() )
-        m_impl = wxStringOperations::EncodeChar(ch);
-    else
-#endif // wxUSE_UNICODE_UTF8
+    if ( wxStringOperations::IsSingleCodeUnitCharacter(ch) )
         m_impl = (wxStringCharType)ch;
+    else
+        m_impl = wxStringOperations::EncodeChar(ch);
+
     return *this;
   }
 
@@ -2410,20 +2409,18 @@ public:
     // append n copies of ch
   wxString& append(size_t n, wxUniChar ch)
   {
-#if wxUSE_UNICODE_UTF8
-      if ( !ch.IsAscii() )
-      {
-          wxSTRING_INVALIDATE_CACHED_LENGTH();
-
-          m_impl.append(wxStringOperations::EncodeNChars(n, ch));
-      }
-      else // ASCII
-#endif
+      if ( wxStringOperations::IsSingleCodeUnitCharacter(ch) )
       {
           wxSTRING_UPDATE_CACHED_LENGTH(n);
 
           m_impl.append(n, (wxStringCharType)ch);
       }
+      else
+      {
+          wxSTRING_INVALIDATE_CACHED_LENGTH();
+
+          m_impl.append(wxStringOperations::EncodeNChars(n, ch));
+      }
 
       return *this;
   }
@@ -2556,12 +2553,10 @@ public:
   {
       wxSTRING_SET_CACHED_LENGTH(n);
 
-#if wxUSE_UNICODE_UTF8
-      if ( !ch.IsAscii() )
-          m_impl.assign(wxStringOperations::EncodeNChars(n, ch));
-      else
-#endif
+      if ( wxStringOperations::IsSingleCodeUnitCharacter(ch) )
           m_impl.assign(n, (wxStringCharType)ch);
+      else
+          m_impl.assign(wxStringOperations::EncodeNChars(n, ch));
 
       return *this;
   }
@@ -2671,12 +2666,11 @@ public:
   {
       wxSTRING_UPDATE_CACHED_LENGTH(n);
 
-#if wxUSE_UNICODE_UTF8
-      if ( !ch.IsAscii() )
-          m_impl.insert(PosToImpl(nPos), wxStringOperations::EncodeNChars(n, ch));
-      else
-#endif
+      if ( wxStringOperations::IsSingleCodeUnitCharacter(ch) )
           m_impl.insert(PosToImpl(nPos), n, (wxStringCharType)ch);
+      else
+          m_impl.insert(PosToImpl(nPos), wxStringOperations::EncodeNChars(n, ch));
+
       return *this;
   }
 
@@ -2684,16 +2678,14 @@ public:
   {
       wxSTRING_UPDATE_CACHED_LENGTH(1);
 
-#if wxUSE_UNICODE_UTF8
-      if ( !ch.IsAscii() )
+      if ( wxStringOperations::IsSingleCodeUnitCharacter(ch) )
+          return iterator(this, m_impl.insert(it.impl(), (wxStringCharType)ch));
+      else
       {
           size_t pos = IterToImplPos(it);
           m_impl.insert(pos, wxStringOperations::EncodeChar(ch));
           return iterator(this, m_impl.begin() + pos);
       }
-      else
-#endif
-          return iterator(this, m_impl.insert(it.impl(), (wxStringCharType)ch));
   }
 
   void insert(iterator it, const_iterator first, const_iterator last)
@@ -2716,12 +2708,10 @@ public:
   {
       wxSTRING_UPDATE_CACHED_LENGTH(n);
 
-#if wxUSE_UNICODE_UTF8
-      if ( !ch.IsAscii() )
-          m_impl.insert(IterToImplPos(it), wxStringOperations::EncodeNChars(n, ch));
-      else
-#endif
+      if ( wxStringOperations::IsSingleCodeUnitCharacter(ch) )
           m_impl.insert(it.impl(), n, (wxStringCharType)ch);
+      else
+          m_impl.insert(IterToImplPos(it), wxStringOperations::EncodeNChars(n, ch));
   }
 
     // delete characters from nStart to nStart + nLen
@@ -2800,12 +2790,11 @@ public:
 
       size_t from, len;
       PosLenToImpl(nStart, nLen, &from, &len);
-#if wxUSE_UNICODE_UTF8
-      if ( !ch.IsAscii() )
-          m_impl.replace(from, len, wxStringOperations::EncodeNChars(nCount, ch));
-      else
-#endif
+
+      if ( wxStringOperations::IsSingleCodeUnitCharacter(ch) )
           m_impl.replace(from, len, nCount, (wxStringCharType)ch);
+      else
+          m_impl.replace(from, len, wxStringOperations::EncodeNChars(nCount, ch));
 
       return *this;
   }
@@ -2921,13 +2910,11 @@ public:
   {
       wxSTRING_INVALIDATE_CACHE();
 
-#if wxUSE_UNICODE_UTF8
-      if ( !ch.IsAscii() )
+      if ( wxStringOperations::IsSingleCodeUnitCharacter(ch) )
+          m_impl.replace(first.impl(), last.impl(), n, (wxStringCharType)ch);
+      else
           m_impl.replace(first.impl(), last.impl(),
                   wxStringOperations::EncodeNChars(n, ch));
-      else
-#endif
-          m_impl.replace(first.impl(), last.impl(), n, (wxStringCharType)ch);
 
       return *this;
   }
@@ -2988,15 +2975,12 @@ public:
     // find the first occurrence of character ch after nStart
   size_t find(wxUniChar ch, size_t nStart = 0) const
   {
-#if wxUSE_UNICODE_UTF8
-    if ( !ch.IsAscii() )
-        return PosFromImpl(m_impl.find(wxStringOperations::EncodeChar(ch),
-                                       PosToImpl(nStart)));
-    else
-#endif
+    if ( wxStringOperations::IsSingleCodeUnitCharacter(ch) )
         return PosFromImpl(m_impl.find((wxStringCharType)ch,
                                        PosToImpl(nStart)));
-
+    else
+        return PosFromImpl(m_impl.find(wxStringOperations::EncodeChar(ch),
+                                       PosToImpl(nStart)));
   }
   size_t find(wxUniCharRef ch, size_t nStart = 0) const
     {  return find(wxUniChar(ch), nStart); }
@@ -3033,13 +3017,11 @@ public:
     // as find, but from the end
   size_t rfind(wxUniChar ch, size_t nStart = npos) const
   {
-#if wxUSE_UNICODE_UTF8
-    if ( !ch.IsAscii() )
-        return PosFromImpl(m_impl.rfind(wxStringOperations::EncodeChar(ch),
+    if ( wxStringOperations::IsSingleCodeUnitCharacter(ch) )
+        return PosFromImpl(m_impl.rfind((wxStringCharType)ch,
                                         PosToImpl(nStart)));
     else
-#endif
-        return PosFromImpl(m_impl.rfind((wxStringCharType)ch,
+        return PosFromImpl(m_impl.rfind(wxStringOperations::EncodeChar(ch),
                                         PosToImpl(nStart)));
   }
   size_t rfind(wxUniCharRef ch, size_t nStart = npos) const
@@ -3301,12 +3283,11 @@ public:
   {
       wxSTRING_UPDATE_CACHED_LENGTH(1);
 
-#if wxUSE_UNICODE_UTF8
-      if ( !ch.IsAscii() )
-          m_impl += wxStringOperations::EncodeChar(ch);
-      else
-#endif
+      if ( wxStringOperations::IsSingleCodeUnitCharacter(ch) )
           m_impl += (wxStringCharType)ch;
+      else
+          m_impl += wxStringOperations::EncodeChar(ch);
+
       return *this;
   }
   wxString& operator+=(wxUniCharRef ch) { return *this += wxUniChar(ch); }
diff --git a/include/wx/stringops.h b/include/wx/stringops.h
index 21c6121787..fd6695116f 100644
--- a/include/wx/stringops.h
+++ b/include/wx/stringops.h
@@ -44,9 +44,36 @@ struct WXDLLIMPEXP_BASE wxStringOperationsWchar
     static ptrdiff_t DiffIters(const Iterator& i1, const Iterator& i2)
         { return i1 - i2; }
 
+#if wxUSE_UNICODE_UTF16
+    // encodes th characters as UTF-16:
+    struct Utf16CharBuffer
+    {
+        Utf16CharBuffer() : data() {}
+        wchar_t data[3];
+        operator const wchar_t*() const { return data; }
+    };
+    static Utf16CharBuffer EncodeChar(const wxUniChar& ch);
+    static wxWCharBuffer EncodeNChars(size_t n, const wxUniChar& ch);
+    static bool IsSingleCodeUnitCharacter(const wxUniChar& ch)
+        { return !ch.IsSupplementary(); }
+#else
     // encodes the character to a form used to represent it in internal
-    // representation (returns a string in UTF8 version)
-    static wxChar EncodeChar(const wxUniChar& ch) { return (wxChar)ch; }
+    // representation
+    struct SingleCharBuffer
+    {
+        SingleCharBuffer() : data() {}
+        wxChar data[2];
+        operator const wxChar*() const { return data; }
+    };
+    static SingleCharBuffer EncodeChar(const wxUniChar& ch)
+    {
+        SingleCharBuffer buf;
+        buf.data[0] = (wxChar)ch;
+        return buf;
+    }
+    static wxWxCharBuffer EncodeNChars(size_t n, const wxUniChar& ch);
+    static bool IsSingleCodeUnitCharacter(const wxUniChar&) { return true; }
+#endif
 
     static wxUniChar DecodeChar(const wxStringImpl::const_iterator& i)
         { return *i; }
@@ -134,6 +161,9 @@ struct WXDLLIMPEXP_BASE wxStringOperationsUtf8
         return dist;
     }
 
+    static bool IsSingleCodeUnitCharacter(const wxUniChar& ch)
+        { return ch.IsAscii(); }
+
     // encodes the character as UTF-8:
     typedef wxUniChar::Utf8CharBuffer Utf8CharBuffer;
     static Utf8CharBuffer EncodeChar(const wxUniChar& ch)
diff --git a/src/common/stringops.cpp b/src/common/stringops.cpp
index 36ff4045a4..2d8fcaee3e 100644
--- a/src/common/stringops.cpp
+++ b/src/common/stringops.cpp
@@ -27,6 +27,68 @@
 // implementation
 // ===========================================================================
 
+#if wxUSE_UNICODE_WCHAR || !wxUSE_UNICODE
+
+#if wxUSE_UNICODE_UTF16
+
+wxStringOperationsWchar::Utf16CharBuffer wxStringOperationsWchar::EncodeChar(const wxUniChar& ch)
+{
+    Utf16CharBuffer buf;
+    if ( ch.IsSupplementary() )
+    {
+        buf.data[0] = (wchar_t)ch.HighSurrogate();
+        buf.data[1] = (wchar_t)ch.LowSurrogate();
+    }
+    else
+    {
+        // Assume ch is a BMP character
+        buf.data[0] = (wchar_t)ch;
+    }
+    return buf;
+}
+
+wxWCharBuffer wxStringOperationsWchar::EncodeNChars(size_t n, const wxUniChar& ch)
+{
+    if ( ch.IsSupplementary() )
+    {
+        wxWCharBuffer buf(n * 2);
+        wchar_t s[2] = {
+            (wchar_t)ch.HighSurrogate(),
+            (wchar_t)ch.LowSurrogate(),
+        };
+        wchar_t *ptr = buf.data();
+        for (size_t i = 0; i < n; i++, ptr += 2)
+        {
+            wmemcpy(ptr, s, 2);
+        }
+        return buf;
+    }
+    else
+    {
+        // Assume ch is a BMP character
+        wxWCharBuffer buf(n);
+        wmemset(buf.data(), (wchar_t)ch, n);
+        return buf;
+    }
+}
+
+#else
+
+wxWxCharBuffer wxStringOperationsWchar::EncodeNChars(size_t n, const wxUniChar& ch)
+{
+    wxWxCharBuffer buf(n);
+#if wxUSE_UNICODE_WCHAR
+    wmemset(buf.data(), (wchar_t)ch, n);
+#else // ANSI
+    memset(buf.data(), (unsigned char)ch, n);
+#endif
+    return buf;
+}
+
+#endif // wxUSE_UNICODE_UTF16
+
+#endif // wxUSE_UNICODE_WCHAR || !wxUSE_UNICODE
+
 #if wxUSE_UNICODE_UTF8
 
 // ---------------------------------------------------------------------------

From 24f3ff3b787d17262e8ddf09fb5b1a3f41e4c7df Mon Sep 17 00:00:00 2001
From: Vadim Zeitlin <vadim@wxwidgets.org>
Date: Wed, 21 Jun 2017 19:07:13 +0200
Subject: [PATCH 6/8] No changes, just fix a typo in a recently added comment

---
 include/wx/stringops.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/wx/stringops.h b/include/wx/stringops.h
index fd6695116f..740603e8f7 100644
--- a/include/wx/stringops.h
+++ b/include/wx/stringops.h
@@ -45,7 +45,7 @@ struct WXDLLIMPEXP_BASE wxStringOperationsWchar
         { return i1 - i2; }
 
 #if wxUSE_UNICODE_UTF16
-    // encodes th characters as UTF-16:
+    // encodes the characters as UTF-16:
     struct Utf16CharBuffer
     {
         Utf16CharBuffer() : data() {}

From a86d0f8d6564b6346faf2afe2ea2569ffdc062c0 Mon Sep 17 00:00:00 2001
From: Vadim Zeitlin <vadim@wxwidgets.org>
Date: Wed, 21 Jun 2017 19:07:43 +0200
Subject: [PATCH 7/8] Move wx/debug.h inclusion after SIZEOF_WCHAR_T in
 wx/defs.h

This is required now because wx/debug.h includes wx/chartype.h which uses
SIZEOF_WCHAR_T to define wxUSE_UNICODE_UTF16.
---
 include/wx/defs.h | 80 +++++++++++++++++++++++------------------------
 1 file changed, 39 insertions(+), 41 deletions(-)

diff --git a/include/wx/defs.h b/include/wx/defs.h
index f21937cd23..b63bd7491c 100644
--- a/include/wx/defs.h
+++ b/include/wx/defs.h
@@ -672,47 +672,6 @@ typedef short int WXTYPE;
 /*  breaks C++ code) */
 #include <stddef.h>
 
-#ifdef __cplusplus
-
-// everybody gets the assert and other debug macros
-#include "wx/debug.h"
-
-    // delete pointer if it is not NULL and NULL it afterwards
-    template <typename T>
-    inline void wxDELETE(T*& ptr)
-    {
-        typedef char TypeIsCompleteCheck[sizeof(T)] WX_ATTRIBUTE_UNUSED;
-
-        if ( ptr != NULL )
-        {
-            delete ptr;
-            ptr = NULL;
-        }
-    }
-
-    // delete an array and NULL it (see comments above)
-    template <typename T>
-    inline void wxDELETEA(T*& ptr)
-    {
-        typedef char TypeIsCompleteCheck[sizeof(T)] WX_ATTRIBUTE_UNUSED;
-
-        if ( ptr != NULL )
-        {
-            delete [] ptr;
-            ptr = NULL;
-        }
-    }
-
-    // trivial implementation of std::swap() for primitive types
-    template <typename T>
-    inline void wxSwap(T& first, T& second)
-    {
-        T tmp(first);
-        first = second;
-        second = tmp;
-    }
-#endif /*__cplusplus*/
-
 /*  size of statically declared array */
 #define WXSIZEOF(array)   (sizeof(array)/sizeof(array[0]))
 
@@ -1227,6 +1186,45 @@ typedef wxUint32 wxDword;
 #endif
 
 #ifdef __cplusplus
+
+// everybody gets the assert and other debug macros
+#include "wx/debug.h"
+
+    // delete pointer if it is not NULL and NULL it afterwards
+    template <typename T>
+    inline void wxDELETE(T*& ptr)
+    {
+        typedef char TypeIsCompleteCheck[sizeof(T)] WX_ATTRIBUTE_UNUSED;
+
+        if ( ptr != NULL )
+        {
+            delete ptr;
+            ptr = NULL;
+        }
+    }
+
+    // delete an array and NULL it (see comments above)
+    template <typename T>
+    inline void wxDELETEA(T*& ptr)
+    {
+        typedef char TypeIsCompleteCheck[sizeof(T)] WX_ATTRIBUTE_UNUSED;
+
+        if ( ptr != NULL )
+        {
+            delete [] ptr;
+            ptr = NULL;
+        }
+    }
+
+    // trivial implementation of std::swap() for primitive types
+    template <typename T>
+    inline void wxSwap(T& first, T& second)
+    {
+        T tmp(first);
+        first = second;
+        second = tmp;
+    }
+
 /* And also define a couple of simple functions to cast pointer to/from it. */
 inline wxUIntPtr wxPtrToUInt(const void *p)
 {

From 8311715bdf2d0e3446c6875efec945ff1b87fb6b Mon Sep 17 00:00:00 2001
From: Vadim Zeitlin <vadim@wxwidgets.org>
Date: Wed, 21 Jun 2017 19:08:27 +0200
Subject: [PATCH 8/8] Ensure that SIZEOF_WCHAR_T is defined in wx/chartype.h

This should always the case now, but wasn't when not using configure
(e.g. in MSVC builds) before, so verify this explicitly to ensure that
we don't just silently define wxUSE_UNICODE_UTF16 wrongly, as it
happened before the fix in the previous commit.
---
 include/wx/chartype.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/include/wx/chartype.h b/include/wx/chartype.h
index 0e81a3344d..3ff445f602 100644
--- a/include/wx/chartype.h
+++ b/include/wx/chartype.h
@@ -175,6 +175,10 @@
     #define wxUSE_UTF8_LOCALE_ONLY 0
 #endif
 
+#ifndef SIZEOF_WCHAR_T
+    #error "SIZEOF_WCHAR_T must be defined before including this file in wx/defs.h"
+#endif
+
 #if wxUSE_UNICODE_WCHAR && SIZEOF_WCHAR_T == 2
     #define wxUSE_UNICODE_UTF16 1
 #else