From 97badc2a86968b8e09bdf03b43a8b22142f81946 Mon Sep 17 00:00:00 2001 From: Stefan Csomor Date: Thu, 16 Apr 2009 07:11:57 +0000 Subject: [PATCH] merging r60108, r60116, r60120, r60121, r60125 and r60126 git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/branches/WX_2_9_0_BRANCH@60194 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775 --- include/wx/private/wxprintf.h | 195 +++++++++++++++++++---------- include/wx/string.h | 229 +++++++++++++++++++++++++--------- src/common/string.cpp | 211 ++++++++++++------------------- src/common/wxprintf.cpp | 15 ++- src/xml/xml.cpp | 12 +- tests/strings/vsnprintf.cpp | 10 +- 6 files changed, 402 insertions(+), 270 deletions(-) diff --git a/include/wx/private/wxprintf.h b/include/wx/private/wxprintf.h index 4b7b890454..1bdc7b62ac 100644 --- a/include/wx/private/wxprintf.h +++ b/include/wx/private/wxprintf.h @@ -56,7 +56,8 @@ using namespace std ; // the conversion specifiers accepted by wxCRT_VsnprintfW -enum wxPrintfArgType { +enum wxPrintfArgType +{ wxPAT_INVALID = -1, wxPAT_INT, // %d, %i, %o, %u, %x, %X @@ -79,15 +80,18 @@ enum wxPrintfArgType { wxPAT_NINT, // %n wxPAT_NSHORTINT, // %hn - wxPAT_NLONGINT // %ln + wxPAT_NLONGINT, // %ln + + wxPAT_STAR // '*' used for width or precision }; // an argument passed to wxCRT_VsnprintfW -typedef union { +union wxPrintfArg +{ int pad_int; // %d, %i, %o, %u, %x, %X long int pad_longint; // %ld, etc #ifdef wxLongLong_t - wxLongLong_t pad_longlongint; // %Ld, etc + wxLongLong_t pad_longlongint; // %Ld, etc #endif size_t pad_sizet; // %Zd, etc @@ -104,9 +108,9 @@ typedef union { int *pad_nint; // %n short int *pad_nshortint; // %hn long int *pad_nlongint; // %ln -} wxPrintfArg; +}; -// helper for converting string into either char* or wchar_t* dependening +// helper for converting string into either char* or wchar_t* depending // on the type of wxPrintfConvSpec instantiation: template struct wxPrintfStringHelper {}; @@ -159,10 +163,6 @@ public: // a little buffer where formatting flags like #+\.hlqLZ are stored by Parse() // for use in Process() - // NB: even if this buffer is used only for numeric conversion specifiers - // and thus could be safely declared as a char[] buffer, we want it to - // be wchar_t so that in Unicode builds we can avoid to convert its - // contents to Unicode chars when copying it in user's buffer. char m_szFlags[wxMAX_SVNPRINTF_FLAGBUFFER_LEN]; @@ -609,6 +609,10 @@ bool wxPrintfConvSpec::LoadArg(wxPrintfArg *p, va_list &argptr) p->pad_nlongint = va_arg(argptr, long int *); break; + case wxPAT_STAR: + // this will be handled as part of the next argument + return true; + case wxPAT_INVALID: default: return false; @@ -788,72 +792,133 @@ int wxPrintfConvSpec::Process(CharType *buf, size_t lenMax, wxPrintfAr template struct wxPrintfConvSpecParser { - wxPrintfConvSpecParser(const CharType *format) - : posarg_present(false), nonposarg_present(false), - nargs(0) + typedef wxPrintfConvSpec ConvSpec; + + wxPrintfConvSpecParser(const CharType *fmt) { + nargs = 0; + posarg_present = + nonposarg_present = false; + memset(pspec, 0, sizeof(pspec)); - const CharType *toparse = format; - // parse the format string - for (; *toparse != wxT('\0'); toparse++) + for ( const CharType *toparse = fmt; *toparse != wxT('\0'); toparse++ ) { - if (*toparse == wxT('%') ) + // skip everything except format specifications + if ( *toparse != '%' ) + continue; + + // also skip escaped percent signs + if ( toparse[1] == '%' ) { - arg[nargs].Init(); - - // let's see if this is a (valid) conversion specifier... - if (arg[nargs].Parse(toparse)) - { - // ...yes it is - wxPrintfConvSpec *current = &arg[nargs]; - - // make toparse point to the end of this specifier - toparse = current->m_pArgEnd; - - if (current->m_pos > 0) - { - // the positionals start from number 1... adjust the index - current->m_pos--; - posarg_present = true; - } - else - { - // not a positional argument... - current->m_pos = nargs; - nonposarg_present = true; - } - - // this conversion specifier is tied to the pos-th argument... - pspec[current->m_pos] = current; - nargs++; - - if (nargs == wxMAX_SVNPRINTF_ARGUMENTS) - { - wxLogDebug(wxT("A single call to wxVsnprintf() has more than %d arguments; ") - wxT("ignoring all remaining arguments."), wxMAX_SVNPRINTF_ARGUMENTS); - break; // cannot handle any additional conv spec - } - } - else - { - // it's safe to look in the next character of toparse as at - // worst we'll hit its \0 - if (*(toparse+1) == wxT('%')) - { - // the Parse() returned false because we've found a %% - toparse++; - } - } + toparse++; + continue; } + + ConvSpec *spec = &specs[nargs]; + spec->Init(); + + // attempt to parse this format specification + if ( !spec->Parse(toparse) ) + continue; + + // advance to the end of this specifier + toparse = spec->m_pArgEnd; + + // special handling for specifications including asterisks: we need + // to reserve an extra slot (or two if asterisks were used for both + // width and precision) in specs array in this case + for ( const char *f = strchr(spec->m_szFlags, '*'); + f; + f = strchr(f + 1, '*') ) + { + if ( nargs++ == wxMAX_SVNPRINTF_ARGUMENTS ) + break; + + // TODO: we need to support specifiers of the form "%2$*1$s" + // (this is the same as "%*s") as if any positional arguments + // are used all asterisks must be positional as well but this + // requires a lot of changes in this code (basically we'd need + // to rewrite Parse() to return "*" and conversion itself as + // separate entries) + if ( posarg_present ) + { + wxFAIL_MSG + ( + wxString::Format + ( + "Format string \"%s\" uses both positional " + "parameters and '*' but this is not currently " + "supported by this implementation, sorry.", + fmt + ) + ); + } + + specs[nargs] = *spec; + + // make an entry for '*' and point to it from pspec + spec->Init(); + spec->m_type = wxPAT_STAR; + pspec[nargs - 1] = spec; + + spec = &specs[nargs]; + } + + // check if this is a positional or normal argument + if ( spec->m_pos > 0 ) + { + // the positional arguments start from number 1 so we need + // to adjust the index + spec->m_pos--; + posarg_present = true; + } + else // not a positional argument... + { + spec->m_pos = nargs; + nonposarg_present = true; + } + + // this conversion specifier is tied to the pos-th argument... + pspec[spec->m_pos] = spec; + + if ( nargs++ == wxMAX_SVNPRINTF_ARGUMENTS ) + break; + } + + + // warn if we lost any arguments (the program probably will crash + // anyhow because of stack corruption...) + if ( nargs == wxMAX_SVNPRINTF_ARGUMENTS ) + { + wxFAIL_MSG + ( + wxString::Format + ( + "wxVsnprintf() currently supports only %d arguments, " + "but format string \"%s\" defines more of them.\n" + "You need to change wxMAX_SVNPRINTF_ARGUMENTS and " + "recompile if more are really needed.", + fmt, wxMAX_SVNPRINTF_ARGUMENTS + ) + ); } } - wxPrintfConvSpec arg[wxMAX_SVNPRINTF_ARGUMENTS]; - wxPrintfConvSpec *pspec[wxMAX_SVNPRINTF_ARGUMENTS]; - bool posarg_present, nonposarg_present; + // total number of valid elements in specs unsigned nargs; + + // all format specifications in this format string in order of their + // appearance (which may be different from arguments order) + ConvSpec specs[wxMAX_SVNPRINTF_ARGUMENTS]; + + // pointer to specs array element for the N-th argument + ConvSpec *pspec[wxMAX_SVNPRINTF_ARGUMENTS]; + + // true if any positional/non-positional parameters are used + bool posarg_present, + nonposarg_present; }; #undef APPEND_CH diff --git a/include/wx/string.h b/include/wx/string.h index 5b29a2546d..a67e8e8fb9 100644 --- a/include/wx/string.h +++ b/include/wx/string.h @@ -246,8 +246,15 @@ public: operator const void*() const { return AsChar(); } // returns buffers that are valid as long as the associated wxString exists - inline const wxScopedCharBuffer AsCharBuf() const; - inline const wxScopedWCharBuffer AsWCharBuf() const; + const wxScopedCharBuffer AsCharBuf() const + { + return wxScopedCharBuffer::CreateNonOwned(AsChar()); + } + + const wxScopedWCharBuffer AsWCharBuf() const + { + return wxScopedWCharBuffer::CreateNonOwned(AsWChar()); + } inline wxString AsString() const; @@ -1711,9 +1718,7 @@ public: } const wxScopedCharBuffer utf8_str() const - { return wxCharBuffer::CreateNonOwned(wx_str()); } - const wxScopedCharBuffer ToUTF8() const - { return wxCharBuffer::CreateNonOwned(wx_str()); } + { return wxCharBuffer::CreateNonOwned(m_impl.c_str(), m_impl.length()); } // this function exists in UTF-8 build only and returns the length of the // internal UTF-8 representation @@ -1729,7 +1734,6 @@ public: return s; } const wxScopedCharBuffer utf8_str() const { return mb_str(wxMBConvUTF8()); } - const wxScopedCharBuffer ToUTF8() const { return utf8_str(); } #else // ANSI static wxString FromUTF8(const char *utf8) { return wxString(wxMBConvUTF8().cMB2WC(utf8)); } @@ -1758,9 +1762,10 @@ public: } const wxScopedCharBuffer utf8_str() const { return wxMBConvUTF8().cWC2MB(wc_str()); } - const wxScopedCharBuffer ToUTF8() const { return utf8_str(); } #endif + const wxScopedCharBuffer ToUTF8() const { return utf8_str(); } + // functions for storing binary data in wxString: #if wxUSE_UNICODE static wxString From8BitData(const char *data, size_t len) @@ -1788,21 +1793,34 @@ public: // accepting the file names. The return value is always the same, but the // type differs because a function may either return pointer to the buffer // directly or have to use intermediate buffer for translation. + #if wxUSE_UNICODE + // this is an optimization: even though using mb_str(wxConvLibc) does the + // same thing (i.e. returns pointer to internal representation as locale is + // always an UTF-8 one) in wxUSE_UTF8_LOCALE_ONLY case, we can avoid the + // extra checks and the temporary buffer construction by providing a + // separate mb_str() overload #if wxUSE_UTF8_LOCALE_ONLY const char* mb_str() const { return wx_str(); } - const wxScopedCharBuffer mb_str(const wxMBConv& conv) const; -#else - const wxScopedCharBuffer mb_str(const wxMBConv& conv = wxConvLibc) const; -#endif + const wxScopedCharBuffer mb_str(const wxMBConv& conv) const + { + return AsCharBuf(conv); + } +#else // !wxUSE_UTF8_LOCALE_ONLY + const wxScopedCharBuffer mb_str(const wxMBConv& conv = wxConvLibc) const + { + return AsCharBuf(conv); + } +#endif // wxUSE_UTF8_LOCALE_ONLY/!wxUSE_UTF8_LOCALE_ONLY const wxWX2MBbuf mbc_str() const { return mb_str(*wxConvCurrent); } #if wxUSE_UNICODE_WCHAR const wchar_t* wc_str() const { return wx_str(); } #elif wxUSE_UNICODE_UTF8 - const wxScopedWCharBuffer wc_str() const; + const wxScopedWCharBuffer wc_str() const + { return AsWCharBuf(wxMBConvStrictUTF8()); } #endif // for compatibility with !wxUSE_UNICODE version const wxWX2WCbuf wc_str(const wxMBConv& WXUNUSED(conv)) const @@ -1815,16 +1833,16 @@ public: #endif // wxMBFILES/!wxMBFILES #else // ANSI - const wxChar* mb_str() const { return wx_str(); } + const char* mb_str() const { return wx_str(); } // for compatibility with wxUSE_UNICODE version const char* mb_str(const wxMBConv& WXUNUSED(conv)) const { return wx_str(); } const wxWX2MBbuf mbc_str() const { return mb_str(); } -#if wxUSE_WCHAR_T - const wxScopedWCharBuffer wc_str(const wxMBConv& conv = wxConvLibc) const; -#endif // wxUSE_WCHAR_T + const wxScopedWCharBuffer wc_str(const wxMBConv& conv = wxConvLibc) const + { return AsWCharBuf(conv); } + const wxScopedCharBuffer fn_str() const { return wxConvFile.cWC2WX( wc_str( wxConvLibc ) ); } #endif // Unicode/ANSI @@ -3423,36 +3441,117 @@ private: wxStringImpl m_impl; // buffers for compatibility conversion from (char*)c_str() and - // (wchar_t*)c_str(): - // FIXME-UTF8: bechmark various approaches to keeping compatibility buffers + // (wchar_t*)c_str(): the pointers returned by these functions should remain + // valid until the string itself is modified for compatibility with the + // existing code and consistency with std::string::c_str() so returning a + // temporary buffer won't do and we need to cache the conversion results + + // TODO-UTF8: benchmark various approaches to keeping compatibility buffers template struct ConvertedBuffer { - ConvertedBuffer() : m_buf(NULL) {} + // notice that there is no need to initialize m_len here as it's unused + // as long as m_str is NULL + ConvertedBuffer() : m_str(NULL) {} ~ConvertedBuffer() - { free(m_buf); } + { free(m_str); } - operator T*() const { return m_buf; } - - ConvertedBuffer& operator=(T *str) + bool Extend(size_t len) { - free(m_buf); - m_buf = str; - return *this; + // add extra 1 for the trailing NUL + void * const str = realloc(m_str, sizeof(T)*(len + 1)); + if ( !str ) + return false; + + m_str = static_cast(str); + m_len = len; + + return true; } - T *m_buf; + const wxScopedCharTypeBuffer AsScopedBuffer() const + { + return wxScopedCharTypeBuffer::CreateNonOwned(m_str, m_len); + } + + T *m_str; // pointer to the string data + size_t m_len; // length, not size, i.e. in chars and without last NUL }; -#if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY + + +#if wxUSE_UNICODE + // common mb_str() and wxCStrData::AsChar() helper: performs the conversion + // and returns either m_convertedToChar.m_str (in which case its m_len is + // also updated) or NULL if it failed + // + // there is an important exception: in wxUSE_UNICODE_UTF8 build if conv is a + // UTF-8 one, we return m_impl.c_str() directly, without doing any conversion + // as optimization and so the caller needs to check for this before using + // m_convertedToChar + // + // NB: AsChar() returns char* in any build, unlike mb_str() + const char *AsChar(const wxMBConv& conv) const; + + // mb_str() implementation helper + wxScopedCharBuffer AsCharBuf(const wxMBConv& conv) const + { +#if wxUSE_UNICODE_UTF8 + // avoid conversion if we can + if ( conv.IsUTF8() ) + { + return wxScopedCharBuffer::CreateNonOwned(m_impl.c_str(), + m_impl.length()); + } +#endif // wxUSE_UNICODE_UTF8 + + // call this solely in order to fill in m_convertedToChar as AsChar() + // updates it as a side effect: this is a bit ugly but it's a completely + // internal function so the users of this class shouldn't care or know + // about it and doing it like this, i.e. having a separate AsChar(), + // allows us to avoid the creation and destruction of a temporary buffer + // when using wxCStrData without duplicating any code + if ( !AsChar(conv) ) + { + // although it would be probably more correct to return NULL buffer + // from here if the conversion fails, a lot of existing code doesn't + // expect mb_str() (or wc_str()) to ever return NULL so return an + // empty string otherwise to avoid crashes in it + // + // also, some existing code does check for the conversion success and + // so asserting here would be bad too -- even if it does mean that + // silently losing data is possible for badly written code + return wxScopedCharBuffer::CreateNonOwned("", 0); + } + + return m_convertedToChar.AsScopedBuffer(); + } + ConvertedBuffer m_convertedToChar; -#endif +#endif // !wxUSE_UNICODE + #if !wxUSE_UNICODE_WCHAR + // common wc_str() and wxCStrData::AsWChar() helper for both UTF-8 and ANSI + // builds: converts the string contents into m_convertedToWChar and returns + // NULL if the conversion failed (this can only happen in ANSI build) + // + // NB: AsWChar() returns wchar_t* in any build, unlike wc_str() + const wchar_t *AsWChar(const wxMBConv& conv) const; + + // wc_str() implementation helper + wxScopedWCharBuffer AsWCharBuf(const wxMBConv& conv) const + { + if ( !AsWChar(conv) ) + return wxScopedWCharBuffer::CreateNonOwned(L"", 0); + + return m_convertedToWChar.AsScopedBuffer(); + } + ConvertedBuffer m_convertedToWChar; -#endif +#endif // !wxUSE_UNICODE_WCHAR #if wxUSE_UNICODE_UTF8 // FIXME-UTF8: (try to) move this elsewhere (TLS) or solve differently - // assigning to character pointer to by wxString::interator may + // assigning to character pointer to by wxString::iterator may // change the underlying wxStringImpl iterator, so we have to // keep track of all iterators and update them as necessary: struct wxStringIteratorNodeHead @@ -3996,45 +4095,53 @@ inline wxCStrData::~wxCStrData() delete const_cast(m_str); // cast to silence warnings } -// simple cases for AsChar() and AsWChar(), the complicated ones are -// in string.cpp -#if wxUSE_UNICODE_WCHAR +// AsChar() and AsWChar() implementations simply forward to wxString methods + inline const wchar_t* wxCStrData::AsWChar() const { - return m_str->wx_str() + m_offset; -} -#endif // wxUSE_UNICODE_WCHAR + const wchar_t * const p = +#if wxUSE_UNICODE_WCHAR + m_str->wc_str(); +#elif wxUSE_UNICODE_UTF8 + m_str->AsWChar(wxMBConvStrictUTF8()); +#else + m_str->AsWChar(wxConvLibc); +#endif + // in Unicode build the string always has a valid Unicode representation + // and even if a conversion is needed (as in UTF8 case) it can't fail + // + // but in ANSI build the string contents might be not convertible to + // Unicode using the current locale encoding so we do need to check for + // errors #if !wxUSE_UNICODE -inline const char* wxCStrData::AsChar() const -{ - return m_str->wx_str() + m_offset; -} + if ( !p ) + { + // if conversion fails, return empty string and not NULL to avoid + // crashes in code written with either wxWidgets 2 wxString or + // std::string behaviour in mind: neither of them ever returns NULL + // from its c_str() and so we shouldn't neither + // + // notice that the same is done in AsChar() below and + // wxString::wc_str() and mb_str() for the same reasons + return L""; + } #endif // !wxUSE_UNICODE -#if wxUSE_UTF8_LOCALE_ONLY + return p + m_offset; +} + inline const char* wxCStrData::AsChar() const { - return wxStringOperations::AddToIter(m_str->wx_str(), m_offset); -} -#endif // wxUSE_UTF8_LOCALE_ONLY +#if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY + const char * const p = m_str->AsChar(wxConvLibc); + if ( !p ) + return ""; +#else // !wxUSE_UNICODE || wxUSE_UTF8_LOCALE_ONLY + const char * const p = m_str->mb_str(); +#endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY -inline const wxScopedCharBuffer wxCStrData::AsCharBuf() const -{ -#if !wxUSE_UNICODE || wxUSE_UTF8_LOCALE_ONLY - return wxScopedCharBuffer::CreateNonOwned(AsChar()); -#else - return AsString().mb_str(); -#endif -} - -inline const wxScopedWCharBuffer wxCStrData::AsWCharBuf() const -{ -#if wxUSE_UNICODE_WCHAR - return wxScopedWCharBuffer::CreateNonOwned(AsWChar()); -#else - return AsString().wc_str(); -#endif + return p + m_offset; } inline wxString wxCStrData::AsString() const diff --git a/src/common/string.cpp b/src/common/string.cpp index 762410411d..af0b91750e 100644 --- a/src/common/string.cpp +++ b/src/common/string.cpp @@ -366,95 +366,6 @@ wxString::~wxString() } #endif -#if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY -const char* wxCStrData::AsChar() const -{ -#if wxUSE_UNICODE_UTF8 - if ( wxLocaleIsUtf8 ) - return AsInternal(); -#endif - // under non-UTF8 locales, we have to convert the internal UTF-8 - // representation using wxConvLibc and cache the result - - wxString *str = wxConstCast(m_str, wxString); - - // convert the string: - // - // FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we - // have it) but it's unfortunately not obvious to implement - // because we don't know how big buffer do we need for the - // given string length (in case of multibyte encodings, e.g. - // ISO-2022-JP or UTF-8 when internal representation is wchar_t) - // - // One idea would be to store more than just m_convertedToChar - // in wxString: then we could record the length of the string - // which was converted the last time and try to reuse the same - // buffer if the current length is not greater than it (this - // could still fail because string could have been modified in - // place but it would work most of the time, so we'd do it and - // only allocate the new buffer if in-place conversion returned - // an error). We could also store a bit saying if the string - // was modified since the last conversion (and update it in all - // operation modifying the string, of course) to avoid unneeded - // consequential conversions. But both of these ideas require - // adding more fields to wxString and require profiling results - // to be sure that we really gain enough from them to justify - // doing it. - wxScopedCharBuffer buf(str->mb_str()); - - // if it failed, return empty string and not NULL to avoid crashes in code - // written with either wxWidgets 2 wxString or std::string behaviour in - // mind: neither of them ever returns NULL and so we shouldn't neither - if ( !buf ) - return ""; - - if ( str->m_convertedToChar && - strlen(buf) == strlen(str->m_convertedToChar) ) - { - // keep the same buffer for as long as possible, so that several calls - // to c_str() in a row still work: - strcpy(str->m_convertedToChar, buf); - } - else - { - str->m_convertedToChar = buf.release(); - } - - // and keep it: - return str->m_convertedToChar + m_offset; -} -#endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY - -#if !wxUSE_UNICODE_WCHAR -const wchar_t* wxCStrData::AsWChar() const -{ - wxString *str = wxConstCast(m_str, wxString); - - // convert the string: - wxScopedWCharBuffer buf(str->wc_str()); - - // notice that here, unlike above in AsChar(), conversion can't fail as our - // internal UTF-8 is always well-formed -- or the string was corrupted and - // all bets are off anyhow - - // FIXME-UTF8: do the conversion in-place in the existing buffer - if ( str->m_convertedToWChar && - wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) ) - { - // keep the same buffer for as long as possible, so that several calls - // to c_str() in a row still work: - memcpy(str->m_convertedToWChar, buf, sizeof(wchar_t) * wxWcslen(buf)); - } - else - { - str->m_convertedToWChar = buf.release(); - } - - // and keep it: - return str->m_convertedToWChar + m_offset; -} -#endif // !wxUSE_UNICODE_WCHAR - // =========================================================================== // wxString class core // =========================================================================== @@ -549,61 +460,97 @@ wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLengt } #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE +// This std::string::c_str()-like method returns a wide char pointer to string +// contents. In wxUSE_UNICODE_WCHAR case it is trivial as it can simply return +// a pointer to the internal representation. Otherwise a conversion is required +// and it returns a temporary buffer. +// +// However for compatibility with c_str() and to avoid breaking existing code +// doing +// +// for ( const wchar_t *p = s.wc_str(); *p; p++ ) +// ... use *p... +// +// we actually need to ensure that the returned buffer is _not_ temporary and +// so we use wxString::m_convertedToWChar to store the returned data +#if !wxUSE_UNICODE_WCHAR -#if wxUSE_UNICODE_WCHAR - -//Convert wxString in Unicode mode to a multi-byte string -const wxScopedCharBuffer wxString::mb_str(const wxMBConv& conv) const +const wchar_t *wxString::AsWChar(const wxMBConv& conv) const { - // NB: Length passed to cWC2MB() doesn't include terminating NUL, it's - // added by it automatically. If we passed length()+1 here, it would - // create a buffer with 2 trailing NULs of length one greater than - // expected. - return conv.cWC2MB(wx_str(), length(), NULL); + const char * const strMB = m_impl.c_str(); + const size_t lenMB = m_impl.length(); + + // find out the size of the buffer needed + const size_t lenWC = conv.ToWChar(NULL, 0, strMB, lenMB); + if ( lenWC == wxCONV_FAILED ) + return NULL; + + // keep the same buffer if the string size didn't change: this is not only + // an optimization but also ensure that code which modifies string + // character by character (without changing its length) can continue to use + // the pointer returned by a previous wc_str() call even after changing the + // string + + // TODO-UTF8: we could check for ">" instead of "!=" here as this would + // allow to save on buffer reallocations but at the cost of + // consuming (even) more memory, we should benchmark this to + // determine if it's worth doing + if ( !m_convertedToWChar.m_str || lenWC != m_convertedToWChar.m_len ) + { + if ( !const_cast(this)->m_convertedToWChar.Extend(lenWC) ) + return NULL; + } + + // finally do convert + m_convertedToWChar.m_str[lenWC] = L'\0'; + if ( conv.ToWChar(m_convertedToWChar.m_str, lenWC, + strMB, lenMB) == wxCONV_FAILED ) + return NULL; + + return m_convertedToWChar.m_str; } -#elif wxUSE_UNICODE_UTF8 +#endif // !wxUSE_UNICODE_WCHAR -const wxScopedWCharBuffer wxString::wc_str() const -{ - // NB: Length passed to cMB2WC() doesn't include terminating NUL, it's - // added by it automatically. If we passed length()+1 here, it would - // create a buffer with 2 trailing NULs of length one greater than - // expected. - return wxMBConvStrictUTF8().cMB2WC - ( - m_impl.c_str(), - m_impl.length(), - NULL - ); -} -const wxScopedCharBuffer wxString::mb_str(const wxMBConv& conv) const +// Same thing for mb_str() which returns a normal char pointer to string +// contents: this always requires converting it to the specified encoding in +// non-ANSI build except if we need to convert to UTF-8 and this is what we +// already use internally. +#if wxUSE_UNICODE + +const char *wxString::AsChar(const wxMBConv& conv) const { +#if wxUSE_UNICODE_UTF8 if ( conv.IsUTF8() ) - return wxScopedCharBuffer::CreateNonOwned(m_impl.c_str(), m_impl.length()); + return m_impl.c_str(); - wxScopedWCharBuffer wcBuf(wc_str()); - if ( !wcBuf.length() ) - return wxCharBuffer(""); + const wchar_t * const strWC = AsWChar(wxMBConvStrictUTF8()); + const size_t lenWC = m_convertedToWChar.m_len; +#else // wxUSE_UNICODE_WCHAR + const wchar_t * const strWC = m_impl.c_str(); + const size_t lenWC = m_impl.length(); +#endif // wxUSE_UNICODE_UTF8/wxUSE_UNICODE_WCHAR - return conv.cWC2MB(wcBuf.data(), wcBuf.length(), NULL); + const size_t lenMB = conv.FromWChar(NULL, 0, strWC, lenWC); + if ( lenMB == wxCONV_FAILED ) + return NULL; + + if ( !m_convertedToChar.m_str || lenMB != m_convertedToChar.m_len ) + { + if ( !const_cast(this)->m_convertedToChar.Extend(lenMB) ) + return NULL; + } + + m_convertedToChar.m_str[lenMB] = '\0'; + if ( conv.FromWChar(m_convertedToChar.m_str, lenMB, + strWC, lenWC) == wxCONV_FAILED ) + return NULL; + + return m_convertedToChar.m_str; } -#else // ANSI - -//Converts this string to a wide character string if unicode -//mode is not enabled and wxUSE_WCHAR_T is enabled -const wxScopedWCharBuffer wxString::wc_str(const wxMBConv& conv) const -{ - // NB: Length passed to cMB2WC() doesn't include terminating NUL, it's - // added by it automatically. If we passed length()+1 here, it would - // create a buffer with 2 trailing NULs of length one greater than - // expected. - return conv.cMB2WC(wx_str(), length(), NULL); -} - -#endif // Unicode/ANSI +#endif // wxUSE_UNICODE // shrink to minimal size (releasing extra memory) bool wxString::Shrink() diff --git a/src/common/wxprintf.cpp b/src/common/wxprintf.cpp index fada74e496..87d7deba33 100644 --- a/src/common/wxprintf.cpp +++ b/src/common/wxprintf.cpp @@ -142,9 +142,16 @@ static int wxDoVsnprintf(CharType *buf, size_t lenMax, const CharType *toparse = format; for (i=0; i < parser.nargs; i++) { + wxPrintfConvSpec& spec = parser.specs[i]; + + // skip any asterisks, they're processed as part of the conversion they + // apply to + if ( spec.m_type == wxPAT_STAR ) + continue; + // copy in the output buffer the portion of the format string between // last specifier and the current one - size_t tocopy = ( parser.arg[i].m_pArgPos - toparse ); + size_t tocopy = ( spec.m_pArgPos - toparse ); lenCur += wxCopyStrWithPercents(lenMax - lenCur, buf + lenCur, tocopy, toparse); @@ -155,8 +162,8 @@ static int wxDoVsnprintf(CharType *buf, size_t lenMax, } // process this specifier directly in the output buffer - int n = parser.arg[i].Process(buf+lenCur, lenMax - lenCur, - &argdata[parser.arg[i].m_pos], lenCur); + int n = spec.Process(buf+lenCur, lenMax - lenCur, + &argdata[spec.m_pos], lenCur); if (n == -1) { buf[lenMax-1] = wxT('\0'); // be sure to always NUL-terminate the string @@ -166,7 +173,7 @@ static int wxDoVsnprintf(CharType *buf, size_t lenMax, // the +1 is because wxPrintfConvSpec::m_pArgEnd points to the last character // of the format specifier, but we are not interested to it... - toparse = parser.arg[i].m_pArgEnd + 1; + toparse = spec.m_pArgEnd + 1; } // copy portion of the format string after last specifier diff --git a/src/xml/xml.cpp b/src/xml/xml.cpp index 0f5e61d83c..a37225c3dd 100644 --- a/src/xml/xml.cpp +++ b/src/xml/xml.cpp @@ -783,12 +783,18 @@ bool OutputString(wxOutputStream& stream, #if wxUSE_UNICODE wxUnusedVar(convMem); + if ( !convFile ) + convFile = &wxConvUTF8; - const wxWX2MBbuf buf(str.mb_str(*(convFile ? convFile : &wxConvUTF8))); - if ( !buf ) + const wxScopedCharBuffer buf(str.mb_str(*convFile)); + if ( !buf.length() ) + { + // conversion failed, can't write this string in an XML file in this + // (presumably non-UTF-8) encoding return false; + } - stream.Write(buf, strlen(buf)); + stream.Write(buf, buf.length()); #else // !wxUSE_UNICODE if ( convFile && convMem ) { diff --git a/tests/strings/vsnprintf.cpp b/tests/strings/vsnprintf.cpp index e77d87c28c..751f7c5b85 100644 --- a/tests/strings/vsnprintf.cpp +++ b/tests/strings/vsnprintf.cpp @@ -55,27 +55,27 @@ int r; #define CMP6(expected, fmt, y, z, w, t) \ r=wxSnprintf(buf, MAX_TEST_LEN, wxT(fmt), y, z, w, t); \ - CPPUNIT_ASSERT( r == (int)wxStrlen(buf) ); \ + CPPUNIT_ASSERT_EQUAL( r, wxStrlen(buf) ); \ ASSERT_STR_EQUAL( wxT(expected), buf ); #define CMP5(expected, fmt, y, z, w) \ r=wxSnprintf(buf, MAX_TEST_LEN, wxT(fmt), y, z, w); \ - CPPUNIT_ASSERT( r == (int)wxStrlen(buf) ); \ + CPPUNIT_ASSERT_EQUAL( r, wxStrlen(buf) ); \ ASSERT_STR_EQUAL( wxT(expected), buf ); #define CMP4(expected, fmt, y, z) \ r=wxSnprintf(buf, MAX_TEST_LEN, wxT(fmt), y, z); \ - CPPUNIT_ASSERT( r == (int)wxStrlen(buf) ); \ + CPPUNIT_ASSERT_EQUAL( r, wxStrlen(buf) ); \ ASSERT_STR_EQUAL( wxT(expected), buf ); #define CMP3(expected, fmt, y) \ r=wxSnprintf(buf, MAX_TEST_LEN, wxT(fmt), y); \ - CPPUNIT_ASSERT( r == (int)wxStrlen(buf) ); \ + CPPUNIT_ASSERT_EQUAL( r, wxStrlen(buf) ); \ ASSERT_STR_EQUAL( wxT(expected), buf ); #define CMP2(expected, fmt) \ r=wxSnprintf(buf, MAX_TEST_LEN, wxT(fmt)); \ - CPPUNIT_ASSERT( r == (int)wxStrlen(buf) ); \ + CPPUNIT_ASSERT_EQUAL( r, wxStrlen(buf) ); \ ASSERT_STR_EQUAL( wxT(expected), buf ); // NOTE: this macro is used also with too-small buffers (see Miscellaneous())