Add wxString::utf8_string()

This adds a yet another conversion function, which is not ideal, but
still better than having to write ToStdString(wxConvUTF8) every time for
losslessly converting wxString to std::string: not only this is too
long, but it's also too easy to forget to specify wxConvUTF8, resulting
in data loss when using non-UTF-8 locale.
This commit is contained in:
Vadim Zeitlin
2021-03-06 15:12:07 +01:00
parent 0aacc6a9ab
commit 0f8e976ac3
3 changed files with 24 additions and 2 deletions

View File

@@ -1715,6 +1715,8 @@ public:
return wxString();
return FromImpl(utf8);
}
std::string utf8_string() const { return m_impl; }
#endif
const wxScopedCharBuffer utf8_str() const
@@ -1738,6 +1740,8 @@ public:
{ return FromUTF8(utf8.c_str(), utf8.length()); }
static wxString FromUTF8Unchecked(const std::string& utf8)
{ return FromUTF8Unchecked(utf8.c_str(), utf8.length()); }
std::string utf8_string() const { return ToStdString(wxMBConvUTF8()); }
#endif
const wxScopedCharBuffer utf8_str() const { return mb_str(wxMBConvUTF8()); }
#else // ANSI
@@ -1771,6 +1775,8 @@ public:
{ return FromUTF8(utf8.c_str(), utf8.length()); }
static wxString FromUTF8Unchecked(const std::string& utf8)
{ return FromUTF8Unchecked(utf8.c_str(), utf8.length()); }
std::string utf8_string() const { return ToStdString(wxMBConvUTF8()); }
#endif
const wxScopedCharBuffer utf8_str() const
{

View File

@@ -87,7 +87,8 @@
wxMBConv object. This is also a potentially destructive operation.
- Standard @c std::string using wxString::ToStdString(). The encoding
of the returned string is specified with a wxMBConv object, so this
conversion is potentially destructive as well.
conversion is potentially destructive as well. To ensure that there is
no data loss, use @c wxConvUTF8 conversion or wxString::utf8_string().
- Wide C string using wxString::wc_str().
- Standard @c std::wstring using wxString::ToStdWstring().
@@ -679,6 +680,16 @@ public:
*/
const wxScopedCharBuffer utf8_str() const;
/**
Return the string as an std::string using UTF-8 encoding.
This is a shorter and more readable equivalent of calling ToStdString()
with @c wxConvUTF8 argument.
@since 3.1.5
*/
const std::string utf8_string() const;
/**
Converts the strings contents to the wide character representation
and returns it as a temporary wxWCharBuffer object (Unix and macOS)
@@ -749,7 +760,9 @@ public:
Note that if the conversion of (Unicode) string contents using @e conv
fails, the return string will be empty. Be sure to check for
this to avoid silent data loss.
this to avoid silent data loss. Alternatively, pass @c wxConvUTF8 as @a
conv or use utf8_string() to always use UTF-8 encoding, rather than the
current one.
Instead of using this function it's also possible to write
@code

View File

@@ -571,10 +571,13 @@ TEST_CASE("StdString::Conversion", "[stdstring]")
std::string s9("\xF0\x9F\x90\xB1\0\xE7\x8C\xAB", 9); /* U+1F431 U+0000 U+732B */
wxString s10 = wxString::FromUTF8(s9);
CHECK( s10.ToStdString(wxConvUTF8) == s9 );
CHECK( s10.utf8_string() == s9 );
#endif // wxUSE_UNICODE
std::string s11("xyz\0\xFF", 5); /* an invalid UTF-8 sequence */
CHECK( "" == wxString::FromUTF8(s11) );
CHECK( wxString("bye").utf8_string() == std::string("bye") );
}
#endif // wxUSE_STD_STRING