From 8b812a92aecb2081adc1e42f85d6ce090197854d Mon Sep 17 00:00:00 2001
From: Vadim Zeitlin <vadim@wxwidgets.org>
Date: Thu, 15 Jul 2021 22:13:21 +0200
Subject: [PATCH] Always convert to UTF-8 if conversion is necessary

When using system regex functions in Unicode build, convert to UTF-8 and
not the current locale encoding, as this should work just as well if the
conversion doesn't fail and even better if it would fail when converting
to the current locale encoding because conversion to UTF-8 never does.
---
 src/common/regex.cpp | 36 ++++++++++++++++--------------------
 1 file changed, 16 insertions(+), 20 deletions(-)

diff --git a/src/common/regex.cpp b/src/common/regex.cpp
index ed86b31c0c..55112667ba 100644
--- a/src/common/regex.cpp
+++ b/src/common/regex.cpp
@@ -44,17 +44,11 @@
 // WXREGEX_IF_NEED_LEN()    wrap the len parameter only used with the built-in
 //                          or GNU regex
 // WXREGEX_CONVERT_TO_MB    defined when the regex lib is using chars and
-//                          wxChar is wide, so conversion must be done
-// WXREGEX_CHAR(x)          Convert wxChar to wxRegChar
+//                          wxChar is wide, so conversion to UTF-8 must be done
 //
 #ifdef __REG_NOFRONT
 #   define WXREGEX_USING_BUILTIN
 #   define WXREGEX_IF_NEED_LEN(x) ,x
-#   if wxUSE_UNICODE
-#       define WXREGEX_CHAR(x) (x).wc_str()
-#   else
-#       define WXREGEX_CHAR(x) (x).mb_str()
-#   endif
 #else
 #   ifdef HAVE_RE_SEARCH
 #       define WXREGEX_IF_NEED_LEN(x) ,x
@@ -745,14 +739,8 @@ int wxRegExImpl::Replace(wxString *text,
     const wxChar *textstr = text->c_str();
     size_t textlen = text->length();
 #else
-    const wxWX2MBbuf textstr = WXREGEX_CHAR(*text);
-    if (!textstr)
-    {
-        wxLogError(_("Failed to find match for regular expression: %s"),
-                   GetErrorMsg(0, true));
-        return 0;
-    }
-    size_t textlen = strlen(textstr);
+    const wxScopedCharBuffer textstr = text->utf8_str();
+    size_t textlen = textstr.length();
     text->clear();
 #endif
 
@@ -841,7 +829,7 @@ int wxRegExImpl::Replace(wxString *text,
                                 textstr.data()
 #endif
                                 + matchStart + start,
-                                *wxConvCurrent, len);
+                                wxConvUTF8, len);
 
                         mayHaveBackrefs = true;
                     }
@@ -870,7 +858,7 @@ int wxRegExImpl::Replace(wxString *text,
 #ifndef WXREGEX_CONVERT_TO_MB
         result.append(*text, matchStart, start);
 #else
-        result.append(wxString(textstr.data() + matchStart, *wxConvCurrent, start));
+        result.append(wxString(textstr.data() + matchStart, wxConvUTF8, start));
 #endif
         matchStart += start;
         result.append(textNew);
@@ -883,7 +871,7 @@ int wxRegExImpl::Replace(wxString *text,
 #ifndef WXREGEX_CONVERT_TO_MB
     result.append(*text, matchStart, wxString::npos);
 #else
-    result.append(wxString(textstr.data() + matchStart, *wxConvCurrent));
+    result.append(wxString(textstr.data() + matchStart, wxConvUTF8));
 #endif
     *text = result;
 
@@ -926,8 +914,16 @@ bool wxRegEx::Matches(const wxString& str, int flags) const
 {
     wxCHECK_MSG( IsValid(), false, wxT("must successfully Compile() first") );
 
-    return m_impl->Matches(WXREGEX_CHAR(str), flags
-                            WXREGEX_IF_NEED_LEN(str.length()));
+#ifndef WXREGEX_CONVERT_TO_MB
+    const wxChar* const textstr = str.c_str();
+    const size_t textlen = str.length();
+#else
+    const wxScopedCharBuffer textstr = str.utf8_str();
+    const size_t textlen = textstr.length();
+#endif
+
+    return m_impl->Matches(textstr, flags
+                           WXREGEX_IF_NEED_LEN(textlen));
 }
 
 bool wxRegEx::GetMatch(size_t *start, size_t *len, size_t index) const