Add wxString::utf8_string()

This adds a yet another conversion function, which is not ideal, but
still better than having to write ToStdString(wxConvUTF8) every time for
losslessly converting wxString to std::string: not only this is too
long, but it's also too easy to forget to specify wxConvUTF8, resulting
in data loss when using non-UTF-8 locale.
This commit is contained in:
Vadim Zeitlin
2021-03-06 15:12:07 +01:00
parent 0aacc6a9ab
commit 0f8e976ac3
3 changed files with 24 additions and 2 deletions

View File

@@ -1715,6 +1715,8 @@ public:
return wxString(); return wxString();
return FromImpl(utf8); return FromImpl(utf8);
} }
std::string utf8_string() const { return m_impl; }
#endif #endif
const wxScopedCharBuffer utf8_str() const const wxScopedCharBuffer utf8_str() const
@@ -1738,6 +1740,8 @@ public:
{ return FromUTF8(utf8.c_str(), utf8.length()); } { return FromUTF8(utf8.c_str(), utf8.length()); }
static wxString FromUTF8Unchecked(const std::string& utf8) static wxString FromUTF8Unchecked(const std::string& utf8)
{ return FromUTF8Unchecked(utf8.c_str(), utf8.length()); } { return FromUTF8Unchecked(utf8.c_str(), utf8.length()); }
std::string utf8_string() const { return ToStdString(wxMBConvUTF8()); }
#endif #endif
const wxScopedCharBuffer utf8_str() const { return mb_str(wxMBConvUTF8()); } const wxScopedCharBuffer utf8_str() const { return mb_str(wxMBConvUTF8()); }
#else // ANSI #else // ANSI
@@ -1771,6 +1775,8 @@ public:
{ return FromUTF8(utf8.c_str(), utf8.length()); } { return FromUTF8(utf8.c_str(), utf8.length()); }
static wxString FromUTF8Unchecked(const std::string& utf8) static wxString FromUTF8Unchecked(const std::string& utf8)
{ return FromUTF8Unchecked(utf8.c_str(), utf8.length()); } { return FromUTF8Unchecked(utf8.c_str(), utf8.length()); }
std::string utf8_string() const { return ToStdString(wxMBConvUTF8()); }
#endif #endif
const wxScopedCharBuffer utf8_str() const const wxScopedCharBuffer utf8_str() const
{ {

View File

@@ -87,7 +87,8 @@
wxMBConv object. This is also a potentially destructive operation. wxMBConv object. This is also a potentially destructive operation.
- Standard @c std::string using wxString::ToStdString(). The encoding - Standard @c std::string using wxString::ToStdString(). The encoding
of the returned string is specified with a wxMBConv object, so this of the returned string is specified with a wxMBConv object, so this
conversion is potentially destructive as well. conversion is potentially destructive as well. To ensure that there is
no data loss, use @c wxConvUTF8 conversion or wxString::utf8_string().
- Wide C string using wxString::wc_str(). - Wide C string using wxString::wc_str().
- Standard @c std::wstring using wxString::ToStdWstring(). - Standard @c std::wstring using wxString::ToStdWstring().
@@ -679,6 +680,16 @@ public:
*/ */
const wxScopedCharBuffer utf8_str() const; const wxScopedCharBuffer utf8_str() const;
/**
Return the string as an std::string using UTF-8 encoding.
This is a shorter and more readable equivalent of calling ToStdString()
with @c wxConvUTF8 argument.
@since 3.1.5
*/
const std::string utf8_string() const;
/** /**
Converts the strings contents to the wide character representation Converts the strings contents to the wide character representation
and returns it as a temporary wxWCharBuffer object (Unix and macOS) and returns it as a temporary wxWCharBuffer object (Unix and macOS)
@@ -749,7 +760,9 @@ public:
Note that if the conversion of (Unicode) string contents using @e conv Note that if the conversion of (Unicode) string contents using @e conv
fails, the return string will be empty. Be sure to check for fails, the return string will be empty. Be sure to check for
this to avoid silent data loss. this to avoid silent data loss. Alternatively, pass @c wxConvUTF8 as @a
conv or use utf8_string() to always use UTF-8 encoding, rather than the
current one.
Instead of using this function it's also possible to write Instead of using this function it's also possible to write
@code @code

View File

@@ -571,10 +571,13 @@ TEST_CASE("StdString::Conversion", "[stdstring]")
std::string s9("\xF0\x9F\x90\xB1\0\xE7\x8C\xAB", 9); /* U+1F431 U+0000 U+732B */ std::string s9("\xF0\x9F\x90\xB1\0\xE7\x8C\xAB", 9); /* U+1F431 U+0000 U+732B */
wxString s10 = wxString::FromUTF8(s9); wxString s10 = wxString::FromUTF8(s9);
CHECK( s10.ToStdString(wxConvUTF8) == s9 ); CHECK( s10.ToStdString(wxConvUTF8) == s9 );
CHECK( s10.utf8_string() == s9 );
#endif // wxUSE_UNICODE #endif // wxUSE_UNICODE
std::string s11("xyz\0\xFF", 5); /* an invalid UTF-8 sequence */ std::string s11("xyz\0\xFF", 5); /* an invalid UTF-8 sequence */
CHECK( "" == wxString::FromUTF8(s11) ); CHECK( "" == wxString::FromUTF8(s11) );
CHECK( wxString("bye").utf8_string() == std::string("bye") );
} }
#endif // wxUSE_STD_STRING #endif // wxUSE_STD_STRING