check that string passed to FromUTF8() is valid even in release build, this is safer; add a separate FromUTF8Unchecked() which can be used for maximal efficiency

git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@54721 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
Vadim Zeitlin
2008-07-19 19:59:59 +00:00
parent 9f10e7c758
commit cc209a518f
5 changed files with 61 additions and 12 deletions

View File

@@ -44,7 +44,7 @@ inline NSString* wxInitNSStringWithWxString(NSString *nsstring, const wxString &
inline wxString wxStringWithNSString(NSString *nsstring) inline wxString wxStringWithNSString(NSString *nsstring)
{ {
#if wxUSE_UNICODE #if wxUSE_UNICODE
return wxString::FromUTF8([nsstring UTF8String]); return wxString::FromUTF8Unchecked([nsstring UTF8String]);
#else #else
return wxString([nsstring lossyCString]); return wxString([nsstring lossyCString]);
#endif // wxUSE_UNICODE #endif // wxUSE_UNICODE

View File

@@ -32,7 +32,7 @@ extern const gchar *wx_pango_version_check(int major, int minor, int micro);
#define wxGTK_CONV_FONT(s, font) wxGTK_CONV((s)) #define wxGTK_CONV_FONT(s, font) wxGTK_CONV((s))
#define wxGTK_CONV_SYS(s) wxGTK_CONV((s)) #define wxGTK_CONV_SYS(s) wxGTK_CONV((s))
#define wxGTK_CONV_BACK(s) wxString::FromUTF8(s) #define wxGTK_CONV_BACK(s) wxString::FromUTF8Unchecked(s)
#define wxGTK_CONV_BACK_ENC(s, enc) wxGTK_CONV_BACK(s) #define wxGTK_CONV_BACK_ENC(s, enc) wxGTK_CONV_BACK(s)
#define wxGTK_CONV_BACK_FONT(s, font) wxGTK_CONV_BACK(s) #define wxGTK_CONV_BACK_FONT(s, font) wxGTK_CONV_BACK(s)
#define wxGTK_CONV_BACK_SYS(s) wxGTK_CONV_BACK(s) #define wxGTK_CONV_BACK_SYS(s) wxGTK_CONV_BACK(s)

View File

@@ -1268,7 +1268,7 @@ public:
// conversion to/from UTF-8: // conversion to/from UTF-8:
#if wxUSE_UNICODE_UTF8 #if wxUSE_UNICODE_UTF8
static wxString FromUTF8(const char *utf8) static wxString FromUTF8Unchecked(const char *utf8)
{ {
if ( !utf8 ) if ( !utf8 )
return wxEmptyString; return wxEmptyString;
@@ -1276,16 +1276,35 @@ public:
wxASSERT( wxStringOperations::IsValidUtf8String(utf8) ); wxASSERT( wxStringOperations::IsValidUtf8String(utf8) );
return FromImpl(wxStringImpl(utf8)); return FromImpl(wxStringImpl(utf8));
} }
static wxString FromUTF8(const char *utf8, size_t len) static wxString FromUTF8Unchecked(const char *utf8, size_t len)
{ {
if ( !utf8 ) if ( !utf8 )
return wxEmptyString; return wxEmptyString;
if ( len == npos ) if ( len == npos )
return FromUTF8(utf8); return FromUTF8Unchecked(utf8);
wxASSERT( wxStringOperations::IsValidUtf8String(utf8, len) ); wxASSERT( wxStringOperations::IsValidUtf8String(utf8, len) );
return FromImpl(wxStringImpl(utf8, len)); return FromImpl(wxStringImpl(utf8, len));
} }
static wxString FromUTF8(const char *utf8)
{
if ( !utf8 || !wxStringOperations::IsValidUtf8String(utf8) )
return "";
return FromImpl(wxStringImpl(utf8));
}
static wxString FromUTF8(const char *utf8, size_t len)
{
if ( len == npos )
return FromUTF8(utf8);
if ( !utf8 || !wxStringOperations::IsValidUtf8String(utf8, len) )
return "";
return FromImpl(wxStringImpl(utf8, len));
}
const char* utf8_str() const { return wx_str(); } const char* utf8_str() const { return wx_str(); }
const char* ToUTF8() const { return wx_str(); } const char* ToUTF8() const { return wx_str(); }
@@ -1293,10 +1312,15 @@ public:
// internal UTF-8 representation // internal UTF-8 representation
size_t utf8_length() const { return m_impl.length(); } size_t utf8_length() const { return m_impl.length(); }
#elif wxUSE_UNICODE_WCHAR #elif wxUSE_UNICODE_WCHAR
static wxString FromUTF8(const char *utf8) static wxString FromUTF8(const char *utf8, size_t len = npos)
{ return wxString(utf8, wxMBConvUTF8()); }
static wxString FromUTF8(const char *utf8, size_t len)
{ return wxString(utf8, wxMBConvUTF8(), len); } { return wxString(utf8, wxMBConvUTF8(), len); }
static wxString FromUTF8Unchecked(const char *utf8, size_t len = npos)
{
const wxString s(utf8, wxMBConvUTF8(), len);
wxASSERT_MSG( !utf8 || !*utf8 || !s.empty(),
"string must be valid UTF-8" );
return s;
}
const wxCharBuffer utf8_str() const { return mb_str(wxMBConvUTF8()); } const wxCharBuffer utf8_str() const { return mb_str(wxMBConvUTF8()); }
const wxCharBuffer ToUTF8() const { return utf8_str(); } const wxCharBuffer ToUTF8() const { return utf8_str(); }
#else // ANSI #else // ANSI

View File

@@ -623,14 +623,39 @@ public:
//@{ //@{
/** /**
Converts C string encoded in UTF-8 to wxString. Converts C string encoded in UTF-8 to wxString.
Note that this method assumes that @a s is a valid UTF-8 sequence and
doesn't do any validation in release builds, it's validity is only checked in If @a s is not a valid UTF-8 string, an empty string is returned.
debug builds.
Notice that when using UTF-8 wxWidgets build there is a more efficient
alternative to this function called FromUTF8Unchecked() which, unlike
this one, doesn't check that the input string is valid.
@since 2.8.4
*/ */
static wxString FromUTF8(const char* s); static wxString FromUTF8(const char* s);
static wxString FromUTF8(const char* s, size_t len); static wxString FromUTF8(const char* s, size_t len);
//@} //@}
//@{
/**
Converts C string encoded in UTF-8 to wxString without checking its
validity.
This method assumes that @a s is a valid UTF-8 sequence and doesn't do
any validation (although an assert failure is triggered in debug builds
if the string is invalid). Only use it if you are absolutely sure that
@a s is a correct UTF-8 string (e.g. because it comes from another
library using UTF-8) and if the performance matters, otherwise use
slower (in UTF-8 build) but safer FromUTF8(). Passing a bad UTF-8
string to this function will result in creating a corrupted wxString
and all the subsequent operations on it will be undefined.
@since 2.8.9
*/
static wxString FromUTF8Unchecked(const char* s);
static wxString FromUTF8Unchecked(const char* s, size_t len);
//@}
/** /**
Returns the character at position @a n (read-only). Returns the character at position @a n (read-only).
*/ */

View File

@@ -485,7 +485,7 @@ static wxString CharToString(wxMBConv *conv,
#endif // !wxUSE_UNICODE #endif // !wxUSE_UNICODE
wxUnusedVar(conv); wxUnusedVar(conv);
return wxString::FromUTF8(s, len); return wxString::FromUTF8Unchecked(s, len);
} }
// returns true if the given string contains only whitespaces // returns true if the given string contains only whitespaces