Add wxString::utf8_string()

This adds a yet another conversion function, which is not ideal, but still better than having to write ToStdString(wxConvUTF8) every time for losslessly converting wxString to std::string: not only this is too long, but it's also too easy to forget to specify wxConvUTF8, resulting in data loss when using non-UTF-8 locale.
2021-03-06 15:12:07 +01:00
parent 0aacc6a9ab
commit 0f8e976ac3
3 changed files with 24 additions and 2 deletions
--- a/include/wx/string.h
+++ b/include/wx/string.h
@@ -1715,6 +1715,8 @@ public:
            return wxString();
        return FromImpl(utf8);
    }
    std::string utf8_string() const { return m_impl; }
 #endif
    const wxScopedCharBuffer utf8_str() const
@@ -1738,6 +1740,8 @@ public:
      { return FromUTF8(utf8.c_str(), utf8.length()); }
    static wxString FromUTF8Unchecked(const std::string& utf8)
      { return FromUTF8Unchecked(utf8.c_str(), utf8.length()); }
    std::string utf8_string() const { return ToStdString(wxMBConvUTF8()); }
 #endif
    const wxScopedCharBuffer utf8_str() const { return mb_str(wxMBConvUTF8()); }
 #else // ANSI
@@ -1771,6 +1775,8 @@ public:
      { return FromUTF8(utf8.c_str(), utf8.length()); }
    static wxString FromUTF8Unchecked(const std::string& utf8)
      { return FromUTF8Unchecked(utf8.c_str(), utf8.length()); }
    std::string utf8_string() const { return ToStdString(wxMBConvUTF8()); }
 #endif
    const wxScopedCharBuffer utf8_str() const
    {
--- a/interface/wx/string.h
+++ b/interface/wx/string.h
@@ -87,7 +87,8 @@
        wxMBConv object. This is also a potentially destructive operation.
        - Standard @c std::string using wxString::ToStdString(). The encoding
        of the returned string is specified with a wxMBConv object, so this
-        conversion is potentially destructive as well.
+        conversion is potentially destructive as well. To ensure that there is
        no data loss, use @c wxConvUTF8 conversion or wxString::utf8_string().
        - Wide C string using wxString::wc_str().
        - Standard @c std::wstring using wxString::ToStdWstring().
@@ -679,6 +680,16 @@ public:
    */
    const wxScopedCharBuffer utf8_str() const;
    /**
        Return the string as an std::string using UTF-8 encoding.
        This is a shorter and more readable equivalent of calling ToStdString()
        with @c wxConvUTF8 argument.
        @since 3.1.5
     */
    const std::string utf8_string() const;
    /**
        Converts the strings contents to the wide character representation
        and returns it as a temporary wxWCharBuffer object (Unix and macOS)
@@ -749,7 +760,9 @@ public:
        Note that if the conversion of (Unicode) string contents using @e conv
        fails, the return string will be empty. Be sure to check for
-        this to avoid silent data loss.
+        this to avoid silent data loss. Alternatively, pass @c wxConvUTF8 as @a
        conv or use utf8_string() to always use UTF-8 encoding, rather than the
        current one.
        Instead of using this function it's also possible to write
        @code
--- a/tests/strings/stdstrings.cpp
+++ b/tests/strings/stdstrings.cpp
@@ -571,10 +571,13 @@ TEST_CASE("StdString::Conversion", "[stdstring]")
    std::string s9("\xF0\x9F\x90\xB1\0\xE7\x8C\xAB", 9); /* U+1F431 U+0000 U+732B */
    wxString s10 = wxString::FromUTF8(s9);
    CHECK( s10.ToStdString(wxConvUTF8) == s9 );
    CHECK( s10.utf8_string() == s9 );
 #endif // wxUSE_UNICODE
    std::string s11("xyz\0\xFF", 5); /* an invalid UTF-8 sequence */
    CHECK( "" == wxString::FromUTF8(s11) );
    CHECK( wxString("bye").utf8_string() == std::string("bye") );
 }
 #endif // wxUSE_STD_STRING