check that string passed to FromUTF8() is valid even in release build, this is safer; add a separate FromUTF8Unchecked() which can be used for maximal efficiency

git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@54721 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
Vadim Zeitlin
2008-07-19 19:59:59 +00:00
parent 9f10e7c758
commit cc209a518f
5 changed files with 61 additions and 12 deletions

View File

@@ -44,7 +44,7 @@ inline NSString* wxInitNSStringWithWxString(NSString *nsstring, const wxString &
inline wxString wxStringWithNSString(NSString *nsstring)
{
#if wxUSE_UNICODE
return wxString::FromUTF8([nsstring UTF8String]);
return wxString::FromUTF8Unchecked([nsstring UTF8String]);
#else
return wxString([nsstring lossyCString]);
#endif // wxUSE_UNICODE

View File

@@ -32,7 +32,7 @@ extern const gchar *wx_pango_version_check(int major, int minor, int micro);
#define wxGTK_CONV_FONT(s, font) wxGTK_CONV((s))
#define wxGTK_CONV_SYS(s) wxGTK_CONV((s))
#define wxGTK_CONV_BACK(s) wxString::FromUTF8(s)
#define wxGTK_CONV_BACK(s) wxString::FromUTF8Unchecked(s)
#define wxGTK_CONV_BACK_ENC(s, enc) wxGTK_CONV_BACK(s)
#define wxGTK_CONV_BACK_FONT(s, font) wxGTK_CONV_BACK(s)
#define wxGTK_CONV_BACK_SYS(s) wxGTK_CONV_BACK(s)

View File

@@ -1268,7 +1268,7 @@ public:
// conversion to/from UTF-8:
#if wxUSE_UNICODE_UTF8
static wxString FromUTF8(const char *utf8)
static wxString FromUTF8Unchecked(const char *utf8)
{
if ( !utf8 )
return wxEmptyString;
@@ -1276,16 +1276,35 @@ public:
wxASSERT( wxStringOperations::IsValidUtf8String(utf8) );
return FromImpl(wxStringImpl(utf8));
}
static wxString FromUTF8(const char *utf8, size_t len)
static wxString FromUTF8Unchecked(const char *utf8, size_t len)
{
if ( !utf8 )
return wxEmptyString;
if ( len == npos )
return FromUTF8(utf8);
return FromUTF8Unchecked(utf8);
wxASSERT( wxStringOperations::IsValidUtf8String(utf8, len) );
return FromImpl(wxStringImpl(utf8, len));
}
static wxString FromUTF8(const char *utf8)
{
if ( !utf8 || !wxStringOperations::IsValidUtf8String(utf8) )
return "";
return FromImpl(wxStringImpl(utf8));
}
static wxString FromUTF8(const char *utf8, size_t len)
{
if ( len == npos )
return FromUTF8(utf8);
if ( !utf8 || !wxStringOperations::IsValidUtf8String(utf8, len) )
return "";
return FromImpl(wxStringImpl(utf8, len));
}
const char* utf8_str() const { return wx_str(); }
const char* ToUTF8() const { return wx_str(); }
@@ -1293,10 +1312,15 @@ public:
// internal UTF-8 representation
size_t utf8_length() const { return m_impl.length(); }
#elif wxUSE_UNICODE_WCHAR
static wxString FromUTF8(const char *utf8)
{ return wxString(utf8, wxMBConvUTF8()); }
static wxString FromUTF8(const char *utf8, size_t len)
static wxString FromUTF8(const char *utf8, size_t len = npos)
{ return wxString(utf8, wxMBConvUTF8(), len); }
static wxString FromUTF8Unchecked(const char *utf8, size_t len = npos)
{
const wxString s(utf8, wxMBConvUTF8(), len);
wxASSERT_MSG( !utf8 || !*utf8 || !s.empty(),
"string must be valid UTF-8" );
return s;
}
const wxCharBuffer utf8_str() const { return mb_str(wxMBConvUTF8()); }
const wxCharBuffer ToUTF8() const { return utf8_str(); }
#else // ANSI

View File

@@ -623,14 +623,39 @@ public:
//@{
/**
Converts C string encoded in UTF-8 to wxString.
Note that this method assumes that @a s is a valid UTF-8 sequence and
doesn't do any validation in release builds, it's validity is only checked in
debug builds.
If @a s is not a valid UTF-8 string, an empty string is returned.
Notice that when using UTF-8 wxWidgets build there is a more efficient
alternative to this function called FromUTF8Unchecked() which, unlike
this one, doesn't check that the input string is valid.
@since 2.8.4
*/
static wxString FromUTF8(const char* s);
static wxString FromUTF8(const char* s, size_t len);
//@}
//@{
/**
Converts C string encoded in UTF-8 to wxString without checking its
validity.
This method assumes that @a s is a valid UTF-8 sequence and doesn't do
any validation (although an assert failure is triggered in debug builds
if the string is invalid). Only use it if you are absolutely sure that
@a s is a correct UTF-8 string (e.g. because it comes from another
library using UTF-8) and if the performance matters, otherwise use
slower (in UTF-8 build) but safer FromUTF8(). Passing a bad UTF-8
string to this function will result in creating a corrupted wxString
and all the subsequent operations on it will be undefined.
@since 2.8.9
*/
static wxString FromUTF8Unchecked(const char* s);
static wxString FromUTF8Unchecked(const char* s, size_t len);
//@}
/**
Returns the character at position @a n (read-only).
*/

View File

@@ -485,7 +485,7 @@ static wxString CharToString(wxMBConv *conv,
#endif // !wxUSE_UNICODE
wxUnusedVar(conv);
return wxString::FromUTF8(s, len);
return wxString::FromUTF8Unchecked(s, len);
}
// returns true if the given string contains only whitespaces