Files
wxWidgets/src/common/string.cpp
2009-03-29 20:58:39 +00:00

2195 lines
62 KiB
C++

/////////////////////////////////////////////////////////////////////////////
// Name: src/common/string.cpp
// Purpose: wxString class
// Author: Vadim Zeitlin, Ryan Norton
// Modified by:
// Created: 29/01/98
// RCS-ID: $Id$
// Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
// (c) 2004 Ryan Norton <wxprojects@comcast.net>
// Licence: wxWindows licence
/////////////////////////////////////////////////////////////////////////////
// ===========================================================================
// headers, declarations, constants
// ===========================================================================
// For compilers that support precompilation, includes "wx.h".
#include "wx/wxprec.h"
#ifdef __BORLANDC__
#pragma hdrstop
#endif
#ifndef WX_PRECOMP
#include "wx/string.h"
#include "wx/wxcrtvararg.h"
#endif
#include <ctype.h>
#ifndef __WXWINCE__
#include <errno.h>
#endif
#include <string.h>
#include <stdlib.h>
#include "wx/hashmap.h"
#include "wx/vector.h"
#include "wx/xlocale.h"
// string handling functions used by wxString:
#if wxUSE_UNICODE_UTF8
#define wxStringMemcpy memcpy
#define wxStringMemcmp memcmp
#define wxStringMemchr memchr
#define wxStringStrlen strlen
#else
#define wxStringMemcpy wxTmemcpy
#define wxStringMemcmp wxTmemcmp
#define wxStringMemchr wxTmemchr
#define wxStringStrlen wxStrlen
#endif
// ----------------------------------------------------------------------------
// global variables
// ----------------------------------------------------------------------------
namespace wxPrivate
{
static UntypedBufferData s_untypedNullData(NULL, 0);
UntypedBufferData * const untypedNullDataPtr = &s_untypedNullData;
} // namespace wxPrivate
// ---------------------------------------------------------------------------
// static class variables definition
// ---------------------------------------------------------------------------
//According to STL _must_ be a -1 size_t
const size_t wxString::npos = (size_t) -1;
#if wxUSE_STRING_POS_CACHE
#ifdef wxHAS_COMPILER_TLS
wxTLS_TYPE(wxString::Cache) wxString::ms_cache;
#else // !wxHAS_COMPILER_TLS
struct wxStrCacheInitializer
{
wxStrCacheInitializer()
{
// calling this function triggers s_cache initialization in it, and
// from now on it becomes safe to call from multiple threads
wxString::GetCache();
}
};
/*
wxString::Cache& wxString::GetCache()
{
static wxTLS_TYPE(Cache) s_cache;
return wxTLS_VALUE(s_cache);
}
*/
static wxStrCacheInitializer gs_stringCacheInit;
#endif // wxHAS_COMPILER_TLS/!wxHAS_COMPILER_TLS
// gdb seems to be unable to display thread-local variables correctly, at least
// not my 6.4.98 version under amd64, so provide this debugging helper to do it
#if wxDEBUG_LEVEL >= 2
struct wxStrCacheDumper
{
static void ShowAll()
{
puts("*** wxString cache dump:");
for ( unsigned n = 0; n < wxString::Cache::SIZE; n++ )
{
const wxString::Cache::Element&
c = wxString::GetCacheBegin()[n];
printf("\t%u%s\t%p: pos=(%lu, %lu), len=%ld\n",
n,
n == wxString::LastUsedCacheElement() ? " [*]" : "",
c.str,
(unsigned long)c.pos,
(unsigned long)c.impl,
(long)c.len);
}
}
};
void wxDumpStrCache() { wxStrCacheDumper::ShowAll(); }
#endif // wxDEBUG_LEVEL >= 2
#ifdef wxPROFILE_STRING_CACHE
wxString::CacheStats wxString::ms_cacheStats;
struct wxStrCacheStatsDumper
{
~wxStrCacheStatsDumper()
{
const wxString::CacheStats& stats = wxString::ms_cacheStats;
if ( stats.postot )
{
puts("*** wxString cache statistics:");
printf("\tTotal non-trivial calls to PosToImpl(): %u\n",
stats.postot);
printf("\tHits %u (of which %u not used) or %.2f%%\n",
stats.poshits,
stats.mishits,
100.*float(stats.poshits - stats.mishits)/stats.postot);
printf("\tAverage position requested: %.2f\n",
float(stats.sumpos) / stats.postot);
printf("\tAverage offset after cached hint: %.2f\n",
float(stats.sumofs) / stats.postot);
}
if ( stats.lentot )
{
printf("\tNumber of calls to length(): %u, hits=%.2f%%\n",
stats.lentot, 100.*float(stats.lenhits)/stats.lentot);
}
}
};
static wxStrCacheStatsDumper s_showCacheStats;
#endif // wxPROFILE_STRING_CACHE
#endif // wxUSE_STRING_POS_CACHE
// ----------------------------------------------------------------------------
// global functions
// ----------------------------------------------------------------------------
#if wxUSE_STD_IOSTREAM
#include <iostream>
wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
{
#if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
const wxScopedCharBuffer buf(str.AsCharBuf());
if ( !buf )
os.clear(wxSTD ios_base::failbit);
else
os << buf.data();
return os;
#else
return os << str.AsInternal();
#endif
}
wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
{
return os << str.c_str();
}
wxSTD ostream& operator<<(wxSTD ostream& os, const wxScopedCharBuffer& str)
{
return os << str.data();
}
#ifndef __BORLANDC__
wxSTD ostream& operator<<(wxSTD ostream& os, const wxScopedWCharBuffer& str)
{
return os << str.data();
}
#endif
#if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
wxSTD wostream& operator<<(wxSTD wostream& wos, const wxString& str)
{
return wos << str.wc_str();
}
wxSTD wostream& operator<<(wxSTD wostream& wos, const wxCStrData& str)
{
return wos << str.AsWChar();
}
wxSTD wostream& operator<<(wxSTD wostream& wos, const wxScopedWCharBuffer& str)
{
return wos << str.data();
}
#endif // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
#endif // wxUSE_STD_IOSTREAM
// ===========================================================================
// wxString class core
// ===========================================================================
#if wxUSE_UNICODE_UTF8
void wxString::PosLenToImpl(size_t pos, size_t len,
size_t *implPos, size_t *implLen) const
{
if ( pos == npos )
{
*implPos = npos;
}
else // have valid start position
{
const const_iterator b = GetIterForNthChar(pos);
*implPos = wxStringImpl::const_iterator(b.impl()) - m_impl.begin();
if ( len == npos )
{
*implLen = npos;
}
else // have valid length too
{
// we need to handle the case of length specifying a substring
// going beyond the end of the string, just as std::string does
const const_iterator e(end());
const_iterator i(b);
while ( len && i <= e )
{
++i;
--len;
}
*implLen = i.impl() - b.impl();
}
}
}
#endif // wxUSE_UNICODE_UTF8
// ----------------------------------------------------------------------------
// wxCStrData converted strings caching
// ----------------------------------------------------------------------------
// FIXME-UTF8: temporarily disabled because it doesn't work with global
// string objects; re-enable after fixing this bug and benchmarking
// performance to see if using a hash is a good idea at all
#if 0
// For backward compatibility reasons, it must be possible to assign the value
// returned by wxString::c_str() to a char* or wchar_t* variable and work with
// it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
// because the memory would be freed immediately, but it has to be valid as long
// as the string is not modified, so that code like this still works:
//
// const wxChar *s = str.c_str();
// while ( s ) { ... }
// FIXME-UTF8: not thread safe!
// FIXME-UTF8: we currently clear the cached conversion only when the string is
// destroyed, but we should do it when the string is modified, to
// keep memory usage down
// FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
// invalidated the cache on every change, we could keep the previous
// conversion
// FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
// to use mb_str() or wc_str() instead of (const [w]char*)c_str()
template<typename T>
static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
{
typename T::iterator i = hash.find(wxConstCast(s, wxString));
if ( i != hash.end() )
{
free(i->second);
hash.erase(i);
}
}
#if wxUSE_UNICODE
// NB: non-STL implementation doesn't compile with "const wxString*" key type,
// so we have to use wxString* here and const-cast when used
WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
wxStringCharConversionCache);
static wxStringCharConversionCache gs_stringsCharCache;
const char* wxCStrData::AsChar() const
{
// remove previously cache value, if any (see FIXMEs above):
DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
// convert the string and keep it:
const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
m_str->mb_str().release();
return s + m_offset;
}
#endif // wxUSE_UNICODE
#if !wxUSE_UNICODE_WCHAR
WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
wxStringWCharConversionCache);
static wxStringWCharConversionCache gs_stringsWCharCache;
const wchar_t* wxCStrData::AsWChar() const
{
// remove previously cache value, if any (see FIXMEs above):
DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
// convert the string and keep it:
const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
m_str->wc_str().release();
return s + m_offset;
}
#endif // !wxUSE_UNICODE_WCHAR
wxString::~wxString()
{
#if wxUSE_UNICODE
// FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
DeleteStringFromConversionCache(gs_stringsCharCache, this);
#endif
#if !wxUSE_UNICODE_WCHAR
DeleteStringFromConversionCache(gs_stringsWCharCache, this);
#endif
}
#endif
#if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
const char* wxCStrData::AsChar() const
{
#if wxUSE_UNICODE_UTF8
if ( wxLocaleIsUtf8 )
return AsInternal();
#endif
// under non-UTF8 locales, we have to convert the internal UTF-8
// representation using wxConvLibc and cache the result
wxString *str = wxConstCast(m_str, wxString);
// convert the string:
//
// FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we
// have it) but it's unfortunately not obvious to implement
// because we don't know how big buffer do we need for the
// given string length (in case of multibyte encodings, e.g.
// ISO-2022-JP or UTF-8 when internal representation is wchar_t)
//
// One idea would be to store more than just m_convertedToChar
// in wxString: then we could record the length of the string
// which was converted the last time and try to reuse the same
// buffer if the current length is not greater than it (this
// could still fail because string could have been modified in
// place but it would work most of the time, so we'd do it and
// only allocate the new buffer if in-place conversion returned
// an error). We could also store a bit saying if the string
// was modified since the last conversion (and update it in all
// operation modifying the string, of course) to avoid unneeded
// consequential conversions. But both of these ideas require
// adding more fields to wxString and require profiling results
// to be sure that we really gain enough from them to justify
// doing it.
wxScopedCharBuffer buf(str->mb_str());
// if it failed, return empty string and not NULL to avoid crashes in code
// written with either wxWidgets 2 wxString or std::string behaviour in
// mind: neither of them ever returns NULL and so we shouldn't neither
if ( !buf )
return "";
if ( str->m_convertedToChar &&
strlen(buf) == strlen(str->m_convertedToChar) )
{
// keep the same buffer for as long as possible, so that several calls
// to c_str() in a row still work:
strcpy(str->m_convertedToChar, buf);
}
else
{
str->m_convertedToChar = buf.release();
}
// and keep it:
return str->m_convertedToChar + m_offset;
}
#endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
#if !wxUSE_UNICODE_WCHAR
const wchar_t* wxCStrData::AsWChar() const
{
wxString *str = wxConstCast(m_str, wxString);
// convert the string:
wxScopedWCharBuffer buf(str->wc_str());
// notice that here, unlike above in AsChar(), conversion can't fail as our
// internal UTF-8 is always well-formed -- or the string was corrupted and
// all bets are off anyhow
// FIXME-UTF8: do the conversion in-place in the existing buffer
if ( str->m_convertedToWChar &&
wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) )
{
// keep the same buffer for as long as possible, so that several calls
// to c_str() in a row still work:
memcpy(str->m_convertedToWChar, buf, sizeof(wchar_t) * wxWcslen(buf));
}
else
{
str->m_convertedToWChar = buf.release();
}
// and keep it:
return str->m_convertedToWChar + m_offset;
}
#endif // !wxUSE_UNICODE_WCHAR
// ===========================================================================
// wxString class core
// ===========================================================================
// ---------------------------------------------------------------------------
// construction and conversion
// ---------------------------------------------------------------------------
#if wxUSE_UNICODE_WCHAR
/* static */
wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
const wxMBConv& conv)
{
// anything to do?
if ( !psz || nLength == 0 )
return SubstrBufFromMB(wxWCharBuffer(L""), 0);
if ( nLength == npos )
nLength = wxNO_LEN;
size_t wcLen;
wxScopedWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
if ( !wcLen )
return SubstrBufFromMB(wxWCharBuffer(L""), 0);
else
return SubstrBufFromMB(wcBuf, wcLen);
}
#endif // wxUSE_UNICODE_WCHAR
#if wxUSE_UNICODE_UTF8
/* static */
wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
const wxMBConv& conv)
{
// anything to do?
if ( !psz || nLength == 0 )
return SubstrBufFromMB(wxCharBuffer(""), 0);
// if psz is already in UTF-8, we don't have to do the roundtrip to
// wchar_t* and back:
if ( conv.IsUTF8() )
{
// we need to validate the input because UTF8 iterators assume valid
// UTF-8 sequence and psz may be invalid:
if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
{
// we must pass the real string length to SubstrBufFromMB ctor
if ( nLength == npos )
nLength = psz ? strlen(psz) : 0;
return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz, nLength),
nLength);
}
// else: do the roundtrip through wchar_t*
}
if ( nLength == npos )
nLength = wxNO_LEN;
// first convert to wide string:
size_t wcLen;
wxScopedWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
if ( !wcLen )
return SubstrBufFromMB(wxCharBuffer(""), 0);
// and then to UTF-8:
SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvStrictUTF8()));
// widechar -> UTF-8 conversion isn't supposed to ever fail:
wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") );
return buf;
}
#endif // wxUSE_UNICODE_UTF8
#if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
/* static */
wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
const wxMBConv& conv)
{
// anything to do?
if ( !pwz || nLength == 0 )
return SubstrBufFromWC(wxCharBuffer(""), 0);
if ( nLength == npos )
nLength = wxNO_LEN;
size_t mbLen;
wxScopedCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
if ( !mbLen )
return SubstrBufFromWC(wxCharBuffer(""), 0);
else
return SubstrBufFromWC(mbBuf, mbLen);
}
#endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
#if wxUSE_UNICODE_WCHAR
//Convert wxString in Unicode mode to a multi-byte string
const wxScopedCharBuffer wxString::mb_str(const wxMBConv& conv) const
{
return conv.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL);
}
#elif wxUSE_UNICODE_UTF8
const wxScopedWCharBuffer wxString::wc_str() const
{
return wxMBConvStrictUTF8().cMB2WC
(
m_impl.c_str(),
m_impl.length() + 1, // size, not length
NULL
);
}
const wxScopedCharBuffer wxString::mb_str(const wxMBConv& conv) const
{
if ( conv.IsUTF8() )
return wxScopedCharBuffer::CreateNonOwned(m_impl.c_str(), m_impl.length());
// FIXME-UTF8: use wc_str() here once we have buffers with length
size_t wcLen;
wxScopedWCharBuffer wcBuf
(
wxMBConvStrictUTF8().cMB2WC
(
m_impl.c_str(),
m_impl.length() + 1, // size
&wcLen
)
);
if ( !wcLen )
return wxCharBuffer("");
return conv.cWC2MB(wcBuf, wcLen+1, NULL);
}
#else // ANSI
//Converts this string to a wide character string if unicode
//mode is not enabled and wxUSE_WCHAR_T is enabled
const wxScopedWCharBuffer wxString::wc_str(const wxMBConv& conv) const
{
return conv.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL);
}
#endif // Unicode/ANSI
// shrink to minimal size (releasing extra memory)
bool wxString::Shrink()
{
wxString tmp(begin(), end());
swap(tmp);
return tmp.length() == length();
}
// deprecated compatibility code:
#if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
wxStringCharType *wxString::GetWriteBuf(size_t nLen)
{
return DoGetWriteBuf(nLen);
}
void wxString::UngetWriteBuf()
{
DoUngetWriteBuf();
}
void wxString::UngetWriteBuf(size_t nLen)
{
DoUngetWriteBuf(nLen);
}
#endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
// ---------------------------------------------------------------------------
// data access
// ---------------------------------------------------------------------------
// all functions are inline in string.h
// ---------------------------------------------------------------------------
// concatenation operators
// ---------------------------------------------------------------------------
/*
* concatenation functions come in 5 flavours:
* string + string
* char + string and string + char
* C str + string and string + C str
*/
wxString operator+(const wxString& str1, const wxString& str2)
{
#if !wxUSE_STL_BASED_WXSTRING
wxASSERT( str1.IsValid() );
wxASSERT( str2.IsValid() );
#endif
wxString s = str1;
s += str2;
return s;
}
wxString operator+(const wxString& str, wxUniChar ch)
{
#if !wxUSE_STL_BASED_WXSTRING
wxASSERT( str.IsValid() );
#endif
wxString s = str;
s += ch;
return s;
}
wxString operator+(wxUniChar ch, const wxString& str)
{
#if !wxUSE_STL_BASED_WXSTRING
wxASSERT( str.IsValid() );
#endif
wxString s = ch;
s += str;
return s;
}
wxString operator+(const wxString& str, const char *psz)
{
#if !wxUSE_STL_BASED_WXSTRING
wxASSERT( str.IsValid() );
#endif
wxString s;
if ( !s.Alloc(strlen(psz) + str.length()) ) {
wxFAIL_MSG( _T("out of memory in wxString::operator+") );
}
s += str;
s += psz;
return s;
}
wxString operator+(const wxString& str, const wchar_t *pwz)
{
#if !wxUSE_STL_BASED_WXSTRING
wxASSERT( str.IsValid() );
#endif
wxString s;
if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
wxFAIL_MSG( _T("out of memory in wxString::operator+") );
}
s += str;
s += pwz;
return s;
}
wxString operator+(const char *psz, const wxString& str)
{
#if !wxUSE_STL_BASED_WXSTRING
wxASSERT( str.IsValid() );
#endif
wxString s;
if ( !s.Alloc(strlen(psz) + str.length()) ) {
wxFAIL_MSG( _T("out of memory in wxString::operator+") );
}
s = psz;
s += str;
return s;
}
wxString operator+(const wchar_t *pwz, const wxString& str)
{
#if !wxUSE_STL_BASED_WXSTRING
wxASSERT( str.IsValid() );
#endif
wxString s;
if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
wxFAIL_MSG( _T("out of memory in wxString::operator+") );
}
s = pwz;
s += str;
return s;
}
// ---------------------------------------------------------------------------
// string comparison
// ---------------------------------------------------------------------------
bool wxString::IsSameAs(wxUniChar c, bool compareWithCase) const
{
return (length() == 1) && (compareWithCase ? GetChar(0u) == c
: wxToupper(GetChar(0u)) == wxToupper(c));
}
#ifdef HAVE_STD_STRING_COMPARE
// NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
// UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
// sort strings in characters code point order by sorting the byte sequence
// in byte values order (i.e. what strcmp() and memcmp() do).
int wxString::compare(const wxString& str) const
{
return m_impl.compare(str.m_impl);
}
int wxString::compare(size_t nStart, size_t nLen,
const wxString& str) const
{
size_t pos, len;
PosLenToImpl(nStart, nLen, &pos, &len);
return m_impl.compare(pos, len, str.m_impl);
}
int wxString::compare(size_t nStart, size_t nLen,
const wxString& str,
size_t nStart2, size_t nLen2) const
{
size_t pos, len;
PosLenToImpl(nStart, nLen, &pos, &len);
size_t pos2, len2;
str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
return m_impl.compare(pos, len, str.m_impl, pos2, len2);
}
int wxString::compare(const char* sz) const
{
return m_impl.compare(ImplStr(sz));
}
int wxString::compare(const wchar_t* sz) const
{
return m_impl.compare(ImplStr(sz));
}
int wxString::compare(size_t nStart, size_t nLen,
const char* sz, size_t nCount) const
{
size_t pos, len;
PosLenToImpl(nStart, nLen, &pos, &len);
SubstrBufFromMB str(ImplStr(sz, nCount));
return m_impl.compare(pos, len, str.data, str.len);
}
int wxString::compare(size_t nStart, size_t nLen,
const wchar_t* sz, size_t nCount) const
{
size_t pos, len;
PosLenToImpl(nStart, nLen, &pos, &len);
SubstrBufFromWC str(ImplStr(sz, nCount));
return m_impl.compare(pos, len, str.data, str.len);
}
#else // !HAVE_STD_STRING_COMPARE
static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
const wxStringCharType* s2, size_t l2)
{
if( l1 == l2 )
return wxStringMemcmp(s1, s2, l1);
else if( l1 < l2 )
{
int ret = wxStringMemcmp(s1, s2, l1);
return ret == 0 ? -1 : ret;
}
else
{
int ret = wxStringMemcmp(s1, s2, l2);
return ret == 0 ? +1 : ret;
}
}
int wxString::compare(const wxString& str) const
{
return ::wxDoCmp(m_impl.data(), m_impl.length(),
str.m_impl.data(), str.m_impl.length());
}
int wxString::compare(size_t nStart, size_t nLen,
const wxString& str) const
{
wxASSERT(nStart <= length());
size_type strLen = length() - nStart;
nLen = strLen < nLen ? strLen : nLen;
size_t pos, len;
PosLenToImpl(nStart, nLen, &pos, &len);
return ::wxDoCmp(m_impl.data() + pos, len,
str.m_impl.data(), str.m_impl.length());
}
int wxString::compare(size_t nStart, size_t nLen,
const wxString& str,
size_t nStart2, size_t nLen2) const
{
wxASSERT(nStart <= length());
wxASSERT(nStart2 <= str.length());
size_type strLen = length() - nStart,
strLen2 = str.length() - nStart2;
nLen = strLen < nLen ? strLen : nLen;
nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
size_t pos, len;
PosLenToImpl(nStart, nLen, &pos, &len);
size_t pos2, len2;
str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
return ::wxDoCmp(m_impl.data() + pos, len,
str.m_impl.data() + pos2, len2);
}
int wxString::compare(const char* sz) const
{
SubstrBufFromMB str(ImplStr(sz, npos));
if ( str.len == npos )
str.len = wxStringStrlen(str.data);
return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
}
int wxString::compare(const wchar_t* sz) const
{
SubstrBufFromWC str(ImplStr(sz, npos));
if ( str.len == npos )
str.len = wxStringStrlen(str.data);
return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
}
int wxString::compare(size_t nStart, size_t nLen,
const char* sz, size_t nCount) const
{
wxASSERT(nStart <= length());
size_type strLen = length() - nStart;
nLen = strLen < nLen ? strLen : nLen;
size_t pos, len;
PosLenToImpl(nStart, nLen, &pos, &len);
SubstrBufFromMB str(ImplStr(sz, nCount));
if ( str.len == npos )
str.len = wxStringStrlen(str.data);
return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
}
int wxString::compare(size_t nStart, size_t nLen,
const wchar_t* sz, size_t nCount) const
{
wxASSERT(nStart <= length());
size_type strLen = length() - nStart;
nLen = strLen < nLen ? strLen : nLen;
size_t pos, len;
PosLenToImpl(nStart, nLen, &pos, &len);
SubstrBufFromWC str(ImplStr(sz, nCount));
if ( str.len == npos )
str.len = wxStringStrlen(str.data);
return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
}
#endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
// ---------------------------------------------------------------------------
// find_{first,last}_[not]_of functions
// ---------------------------------------------------------------------------
#if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
// NB: All these functions are implemented with the argument being wxChar*,
// i.e. widechar string in any Unicode build, even though native string
// representation is char* in the UTF-8 build. This is because we couldn't
// use memchr() to determine if a character is in a set encoded as UTF-8.
size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
{
return find_first_of(sz, nStart, wxStrlen(sz));
}
size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
{
return find_first_not_of(sz, nStart, wxStrlen(sz));
}
size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
{
wxASSERT_MSG( nStart <= length(), _T("invalid index") );
size_t idx = nStart;
for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
{
if ( wxTmemchr(sz, *i, n) )
return idx;
}
return npos;
}
size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
{
wxASSERT_MSG( nStart <= length(), _T("invalid index") );
size_t idx = nStart;
for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
{
if ( !wxTmemchr(sz, *i, n) )
return idx;
}
return npos;
}
size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
{
return find_last_of(sz, nStart, wxStrlen(sz));
}
size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
{
return find_last_not_of(sz, nStart, wxStrlen(sz));
}
size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
{
size_t len = length();
if ( nStart == npos )
{
nStart = len - 1;
}
else
{
wxASSERT_MSG( nStart <= len, _T("invalid index") );
}
size_t idx = nStart;
for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
i != rend(); --idx, ++i )
{
if ( wxTmemchr(sz, *i, n) )
return idx;
}
return npos;
}
size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
{
size_t len = length();
if ( nStart == npos )
{
nStart = len - 1;
}
else
{
wxASSERT_MSG( nStart <= len, _T("invalid index") );
}
size_t idx = nStart;
for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
i != rend(); --idx, ++i )
{
if ( !wxTmemchr(sz, *i, n) )
return idx;
}
return npos;
}
size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
{
wxASSERT_MSG( nStart <= length(), _T("invalid index") );
size_t idx = nStart;
for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
{
if ( *i != ch )
return idx;
}
return npos;
}
size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
{
size_t len = length();
if ( nStart == npos )
{
nStart = len - 1;
}
else
{
wxASSERT_MSG( nStart <= len, _T("invalid index") );
}
size_t idx = nStart;
for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
i != rend(); --idx, ++i )
{
if ( *i != ch )
return idx;
}
return npos;
}
// the functions above were implemented for wchar_t* arguments in Unicode
// build and char* in ANSI build; below are implementations for the other
// version:
#if wxUSE_UNICODE
#define wxOtherCharType char
#define STRCONV (const wxChar*)wxConvLibc.cMB2WC
#else
#define wxOtherCharType wchar_t
#define STRCONV (const wxChar*)wxConvLibc.cWC2MB
#endif
size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
{ return find_first_of(STRCONV(sz), nStart); }
size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
size_t n) const
{ return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
{ return find_last_of(STRCONV(sz), nStart); }
size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
size_t n) const
{ return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
{ return find_first_not_of(STRCONV(sz), nStart); }
size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
size_t n) const
{ return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
{ return find_last_not_of(STRCONV(sz), nStart); }
size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
size_t n) const
{ return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
#undef wxOtherCharType
#undef STRCONV
#endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
// ===========================================================================
// other common string functions
// ===========================================================================
int wxString::CmpNoCase(const wxString& s) const
{
#if wxUSE_UNICODE_UTF8
// FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
const_iterator i1 = begin();
const_iterator end1 = end();
const_iterator i2 = s.begin();
const_iterator end2 = s.end();
for ( ; i1 != end1 && i2 != end2; ++i1, ++i2 )
{
wxUniChar lower1 = (wxChar)wxTolower(*i1);
wxUniChar lower2 = (wxChar)wxTolower(*i2);
if ( lower1 != lower2 )
return lower1 < lower2 ? -1 : 1;
}
size_t len1 = length();
size_t len2 = s.length();
if ( len1 < len2 )
return -1;
else if ( len1 > len2 )
return 1;
return 0;
#else // wxUSE_UNICODE_WCHAR or ANSI
return wxStricmp(m_impl.c_str(), s.m_impl.c_str());
#endif
}
#if wxUSE_UNICODE
#ifdef __MWERKS__
#ifndef __SCHAR_MAX__
#define __SCHAR_MAX__ 127
#endif
#endif
wxString wxString::FromAscii(const char *ascii, size_t len)
{
if (!ascii || len == 0)
return wxEmptyString;
wxString res;
{
wxStringInternalBuffer buf(res, len);
wxStringCharType *dest = buf;
for ( ; len > 0; --len )
{
unsigned char c = (unsigned char)*ascii++;
wxASSERT_MSG( c < 0x80,
_T("Non-ASCII value passed to FromAscii().") );
*dest++ = (wchar_t)c;
}
}
return res;
}
wxString wxString::FromAscii(const char *ascii)
{
return FromAscii(ascii, wxStrlen(ascii));
}
wxString wxString::FromAscii(char ascii)
{
// What do we do with '\0' ?
unsigned char c = (unsigned char)ascii;
wxASSERT_MSG( c < 0x80, _T("Non-ASCII value passed to FromAscii().") );
// NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
return wxString(wxUniChar((wchar_t)c));
}
const wxScopedCharBuffer wxString::ToAscii() const
{
// this will allocate enough space for the terminating NUL too
wxCharBuffer buffer(length());
char *dest = buffer.data();
for ( const_iterator i = begin(); i != end(); ++i )
{
wxUniChar c(*i);
// FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
*dest++ = c.IsAscii() ? (char)c : '_';
// the output string can't have embedded NULs anyhow, so we can safely
// stop at first of them even if we do have any
if ( !c )
break;
}
return buffer;
}
#endif // wxUSE_UNICODE
// extract string of length nCount starting at nFirst
wxString wxString::Mid(size_t nFirst, size_t nCount) const
{
size_t nLen = length();
// default value of nCount is npos and means "till the end"
if ( nCount == npos )
{
nCount = nLen - nFirst;
}
// out-of-bounds requests return sensible things
if ( nFirst + nCount > nLen )
{
nCount = nLen - nFirst;
}
if ( nFirst > nLen )
{
// AllocCopy() will return empty string
return wxEmptyString;
}
wxString dest(*this, nFirst, nCount);
if ( dest.length() != nCount )
{
wxFAIL_MSG( _T("out of memory in wxString::Mid") );
}
return dest;
}
// check that the string starts with prefix and return the rest of the string
// in the provided pointer if it is not NULL, otherwise return false
bool wxString::StartsWith(const wxString& prefix, wxString *rest) const
{
if ( compare(0, prefix.length(), prefix) != 0 )
return false;
if ( rest )
{
// put the rest of the string into provided pointer
rest->assign(*this, prefix.length(), npos);
}
return true;
}
// check that the string ends with suffix and return the rest of it in the
// provided pointer if it is not NULL, otherwise return false
bool wxString::EndsWith(const wxString& suffix, wxString *rest) const
{
int start = length() - suffix.length();
if ( start < 0 || compare(start, npos, suffix) != 0 )
return false;
if ( rest )
{
// put the rest of the string into provided pointer
rest->assign(*this, 0, start);
}
return true;
}
// extract nCount last (rightmost) characters
wxString wxString::Right(size_t nCount) const
{
if ( nCount > length() )
nCount = length();
wxString dest(*this, length() - nCount, nCount);
if ( dest.length() != nCount ) {
wxFAIL_MSG( _T("out of memory in wxString::Right") );
}
return dest;
}
// get all characters after the last occurrence of ch
// (returns the whole string if ch not found)
wxString wxString::AfterLast(wxUniChar ch) const
{
wxString str;
int iPos = Find(ch, true);
if ( iPos == wxNOT_FOUND )
str = *this;
else
str.assign(*this, iPos + 1, npos);
return str;
}
// extract nCount first (leftmost) characters
wxString wxString::Left(size_t nCount) const
{
if ( nCount > length() )
nCount = length();
wxString dest(*this, 0, nCount);
if ( dest.length() != nCount ) {
wxFAIL_MSG( _T("out of memory in wxString::Left") );
}
return dest;
}
// get all characters before the first occurrence of ch
// (returns the whole string if ch not found)
wxString wxString::BeforeFirst(wxUniChar ch) const
{
int iPos = Find(ch);
if ( iPos == wxNOT_FOUND )
iPos = length();
return wxString(*this, 0, iPos);
}
/// get all characters before the last occurrence of ch
/// (returns empty string if ch not found)
wxString wxString::BeforeLast(wxUniChar ch) const
{
wxString str;
int iPos = Find(ch, true);
if ( iPos != wxNOT_FOUND && iPos != 0 )
str = wxString(c_str(), iPos);
return str;
}
/// get all characters after the first occurrence of ch
/// (returns empty string if ch not found)
wxString wxString::AfterFirst(wxUniChar ch) const
{
wxString str;
int iPos = Find(ch);
if ( iPos != wxNOT_FOUND )
str.assign(*this, iPos + 1, npos);
return str;
}
// replace first (or all) occurrences of some substring with another one
size_t wxString::Replace(const wxString& strOld,
const wxString& strNew, bool bReplaceAll)
{
// if we tried to replace an empty string we'd enter an infinite loop below
wxCHECK_MSG( !strOld.empty(), 0,
_T("wxString::Replace(): invalid parameter") );
wxSTRING_INVALIDATE_CACHE();
size_t uiCount = 0; // count of replacements made
// optimize the special common case: replacement of one character by
// another one (in UTF-8 case we can only do this for ASCII characters)
//
// benchmarks show that this special version is around 3 times faster
// (depending on the proportion of matching characters and UTF-8/wchar_t
// build)
if ( strOld.m_impl.length() == 1 && strNew.m_impl.length() == 1 )
{
const wxStringCharType chOld = strOld.m_impl[0],
chNew = strNew.m_impl[0];
// this loop is the simplified version of the one below
for ( size_t pos = 0; ; )
{
pos = m_impl.find(chOld, pos);
if ( pos == npos )
break;
m_impl[pos++] = chNew;
uiCount++;
if ( !bReplaceAll )
break;
}
}
else if ( !bReplaceAll)
{
size_t pos = m_impl.find(strOld, 0);
if ( pos != npos )
{
m_impl.replace(pos, strOld.m_impl.length(), strNew.m_impl);
uiCount = 1;
}
}
else // replace all occurrences
{
const size_t uiOldLen = strOld.m_impl.length();
const size_t uiNewLen = strNew.m_impl.length();
// first scan the string to find all positions at which the replacement
// should be made
wxVector<size_t> replacePositions;
size_t pos;
for ( pos = m_impl.find(strOld.m_impl, 0);
pos != npos;
pos = m_impl.find(strOld.m_impl, pos + uiOldLen))
{
replacePositions.push_back(pos);
++uiCount;
}
if ( !uiCount )
return 0;
// allocate enough memory for the whole new string
wxString tmp;
tmp.m_impl.reserve(m_impl.length() + uiCount*(uiNewLen - uiOldLen));
// copy this string to tmp doing replacements on the fly
size_t replNum = 0;
for ( pos = 0; replNum < uiCount; replNum++ )
{
const size_t nextReplPos = replacePositions[replNum];
if ( pos != nextReplPos )
{
tmp.m_impl.append(m_impl, pos, nextReplPos - pos);
}
tmp.m_impl.append(strNew.m_impl);
pos = nextReplPos + uiOldLen;
}
if ( pos != m_impl.length() )
{
// append the rest of the string unchanged
tmp.m_impl.append(m_impl, pos, m_impl.length() - pos);
}
swap(tmp);
}
return uiCount;
}
bool wxString::IsAscii() const
{
for ( const_iterator i = begin(); i != end(); ++i )
{
if ( !(*i).IsAscii() )
return false;
}
return true;
}
bool wxString::IsWord() const
{
for ( const_iterator i = begin(); i != end(); ++i )
{
if ( !wxIsalpha(*i) )
return false;
}
return true;
}
bool wxString::IsNumber() const
{
if ( empty() )
return true;
const_iterator i = begin();
if ( *i == _T('-') || *i == _T('+') )
++i;
for ( ; i != end(); ++i )
{
if ( !wxIsdigit(*i) )
return false;
}
return true;
}
wxString wxString::Strip(stripType w) const
{
wxString s = *this;
if ( w & leading ) s.Trim(false);
if ( w & trailing ) s.Trim(true);
return s;
}
// ---------------------------------------------------------------------------
// case conversion
// ---------------------------------------------------------------------------
wxString& wxString::MakeUpper()
{
for ( iterator it = begin(), en = end(); it != en; ++it )
*it = (wxChar)wxToupper(*it);
return *this;
}
wxString& wxString::MakeLower()
{
for ( iterator it = begin(), en = end(); it != en; ++it )
*it = (wxChar)wxTolower(*it);
return *this;
}
wxString& wxString::MakeCapitalized()
{
const iterator en = end();
iterator it = begin();
if ( it != en )
{
*it = (wxChar)wxToupper(*it);
for ( ++it; it != en; ++it )
*it = (wxChar)wxTolower(*it);
}
return *this;
}
// ---------------------------------------------------------------------------
// trimming and padding
// ---------------------------------------------------------------------------
// some compilers (VC++ 6.0 not to name them) return true for a call to
// isspace('\xEA') in the C locale which seems to be broken to me, but we have
// to live with this by checking that the character is a 7 bit one - even if
// this may fail to detect some spaces (I don't know if Unicode doesn't have
// space-like symbols somewhere except in the first 128 chars), it is arguably
// still better than trimming away accented letters
inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
// trims spaces (in the sense of isspace) from left or right side
wxString& wxString::Trim(bool bFromRight)
{
// first check if we're going to modify the string at all
if ( !empty() &&
(
(bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
(!bFromRight && wxSafeIsspace(GetChar(0u)))
)
)
{
if ( bFromRight )
{
// find last non-space character
reverse_iterator psz = rbegin();
while ( (psz != rend()) && wxSafeIsspace(*psz) )
++psz;
// truncate at trailing space start
erase(psz.base(), end());
}
else
{
// find first non-space character
iterator psz = begin();
while ( (psz != end()) && wxSafeIsspace(*psz) )
++psz;
// fix up data and length
erase(begin(), psz);
}
}
return *this;
}
// adds nCount characters chPad to the string from either side
wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
{
wxString s(chPad, nCount);
if ( bFromRight )
*this += s;
else
{
s += *this;
swap(s);
}
return *this;
}
// truncate the string
wxString& wxString::Truncate(size_t uiLen)
{
if ( uiLen < length() )
{
erase(begin() + uiLen, end());
}
//else: nothing to do, string is already short enough
return *this;
}
// ---------------------------------------------------------------------------
// finding (return wxNOT_FOUND if not found and index otherwise)
// ---------------------------------------------------------------------------
// find a character
int wxString::Find(wxUniChar ch, bool bFromEnd) const
{
size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
return (idx == npos) ? wxNOT_FOUND : (int)idx;
}
// ----------------------------------------------------------------------------
// conversion to numbers
// ----------------------------------------------------------------------------
// The implementation of all the functions below is exactly the same so factor
// it out. Note that number extraction works correctly on UTF-8 strings, so
// we can use wxStringCharType and wx_str() for maximum efficiency.
#ifndef __WXWINCE__
#define DO_IF_NOT_WINCE(x) x
#else
#define DO_IF_NOT_WINCE(x)
#endif
#define WX_STRING_TO_X_TYPE_START \
wxCHECK_MSG( pVal, false, _T("NULL output pointer") ); \
DO_IF_NOT_WINCE( errno = 0; ) \
const wxStringCharType *start = wx_str(); \
wxStringCharType *end;
#define WX_STRING_TO_X_TYPE_END \
/* return true only if scan was stopped by the terminating NUL and */ \
/* if the string was not empty to start with and no under/overflow */ \
/* occurred: */ \
if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) ) \
return false; \
*pVal = val; \
return true;
bool wxString::ToLong(long *pVal, int base) const
{
wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );
WX_STRING_TO_X_TYPE_START
long val = wxStrtol(start, &end, base);
WX_STRING_TO_X_TYPE_END
}
bool wxString::ToULong(unsigned long *pVal, int base) const
{
wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );
WX_STRING_TO_X_TYPE_START
unsigned long val = wxStrtoul(start, &end, base);
WX_STRING_TO_X_TYPE_END
}
bool wxString::ToLongLong(wxLongLong_t *pVal, int base) const
{
wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );
WX_STRING_TO_X_TYPE_START
wxLongLong_t val = wxStrtoll(start, &end, base);
WX_STRING_TO_X_TYPE_END
}
bool wxString::ToULongLong(wxULongLong_t *pVal, int base) const
{
wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );
WX_STRING_TO_X_TYPE_START
wxULongLong_t val = wxStrtoull(start, &end, base);
WX_STRING_TO_X_TYPE_END
}
bool wxString::ToDouble(double *pVal) const
{
WX_STRING_TO_X_TYPE_START
double val = wxStrtod(start, &end);
WX_STRING_TO_X_TYPE_END
}
#if wxUSE_XLOCALE
bool wxString::ToCLong(long *pVal, int base) const
{
wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );
WX_STRING_TO_X_TYPE_START
#if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
long val = wxStrtol_lA(start, &end, base, wxCLocale);
#else
long val = wxStrtol_l(start, &end, base, wxCLocale);
#endif
WX_STRING_TO_X_TYPE_END
}
bool wxString::ToCULong(unsigned long *pVal, int base) const
{
wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );
WX_STRING_TO_X_TYPE_START
#if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
unsigned long val = wxStrtoul_lA(start, &end, base, wxCLocale);
#else
unsigned long val = wxStrtoul_l(start, &end, base, wxCLocale);
#endif
WX_STRING_TO_X_TYPE_END
}
bool wxString::ToCDouble(double *pVal) const
{
WX_STRING_TO_X_TYPE_START
#if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
double val = wxStrtod_lA(start, &end, wxCLocale);
#else
double val = wxStrtod_l(start, &end, wxCLocale);
#endif
WX_STRING_TO_X_TYPE_END
}
#endif // wxUSE_XLOCALE
// ---------------------------------------------------------------------------
// formatted output
// ---------------------------------------------------------------------------
#if !wxUSE_UTF8_LOCALE_ONLY
/* static */
#ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
wxString wxStringPrintfMixinBase::DoFormatWchar(const wxChar *format, ...)
#else
wxString wxString::DoFormatWchar(const wxChar *format, ...)
#endif
{
va_list argptr;
va_start(argptr, format);
wxString s;
s.PrintfV(format, argptr);
va_end(argptr);
return s;
}
#endif // !wxUSE_UTF8_LOCALE_ONLY
#if wxUSE_UNICODE_UTF8
/* static */
wxString wxString::DoFormatUtf8(const char *format, ...)
{
va_list argptr;
va_start(argptr, format);
wxString s;
s.PrintfV(format, argptr);
va_end(argptr);
return s;
}
#endif // wxUSE_UNICODE_UTF8
/* static */
wxString wxString::FormatV(const wxString& format, va_list argptr)
{
wxString s;
s.PrintfV(format, argptr);
return s;
}
#if !wxUSE_UTF8_LOCALE_ONLY
#ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar *format, ...)
#else
int wxString::DoPrintfWchar(const wxChar *format, ...)
#endif
{
va_list argptr;
va_start(argptr, format);
#ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
// get a pointer to the wxString instance; we have to use dynamic_cast<>
// because it's the only cast that works safely for downcasting when
// multiple inheritance is used:
wxString *str = static_cast<wxString*>(this);
#else
wxString *str = this;
#endif
int iLen = str->PrintfV(format, argptr);
va_end(argptr);
return iLen;
}
#endif // !wxUSE_UTF8_LOCALE_ONLY
#if wxUSE_UNICODE_UTF8
int wxString::DoPrintfUtf8(const char *format, ...)
{
va_list argptr;
va_start(argptr, format);
int iLen = PrintfV(format, argptr);
va_end(argptr);
return iLen;
}
#endif // wxUSE_UNICODE_UTF8
/*
Uses wxVsnprintf and places the result into the this string.
In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
it is vswprintf. Due to a discrepancy between vsnprintf and vswprintf in
the ISO C99 (and thus SUSv3) standard the return value for the case of
an undersized buffer is inconsistent. For conforming vsnprintf
implementations the function must return the number of characters that
would have been printed had the buffer been large enough. For conforming
vswprintf implementations the function must return a negative number
and set errno.
What vswprintf sets errno to is undefined but Darwin seems to set it to
EOVERFLOW. The only expected errno are EILSEQ and EINVAL. Both of
those are defined in the standard and backed up by several conformance
statements. Note that ENOMEM mentioned in the manual page does not
apply to swprintf, only wprintf and fwprintf.
Official manual page:
http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
Some conformance statements (AIX, Solaris):
http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
EILSEQ and EINVAL are specifically defined to mean the error is other than
an undersized buffer and no other errno are defined we treat those two
as meaning hard errors and everything else gets the old behavior which
is to keep looping and increasing buffer size until the function succeeds.
In practice it's impossible to determine before compilation which behavior
may be used. The vswprintf function may have vsnprintf-like behavior or
vice-versa. Behavior detected on one release can theoretically change
with an updated release. Not to mention that configure testing for it
would require the test to be run on the host system, not the build system
which makes cross compilation difficult. Therefore, we make no assumptions
about behavior and try our best to handle every known case, including the
case where wxVsnprintf returns a negative number and fails to set errno.
There is yet one more non-standard implementation and that is our own.
Fortunately, that can be detected at compile-time.
On top of all that, ISO C99 explicitly defines snprintf to write a null
character to the last position of the specified buffer. That would be at
at the given buffer size minus 1. It is supposed to do this even if it
turns out that the buffer is sized too small.
Darwin (tested on 10.5) follows the C99 behavior exactly.
Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
errno even when it fails. However, it only seems to ever fail due
to an undersized buffer.
*/
#if wxUSE_UNICODE_UTF8
template<typename BufferType>
#else
// we only need one version in non-UTF8 builds and at least two Windows
// compilers have problems with this function template, so use just one
// normal function here
#endif
static int DoStringPrintfV(wxString& str,
const wxString& format, va_list argptr)
{
int size = 1024;
for ( ;; )
{
#if wxUSE_UNICODE_UTF8
BufferType tmp(str, size + 1);
typename BufferType::CharType *buf = tmp;
#else
wxStringBuffer tmp(str, size + 1);
wxChar *buf = tmp;
#endif
if ( !buf )
{
// out of memory
// in UTF-8 build, leaving uninitialized junk in the buffer
// could result in invalid non-empty UTF-8 string, so just
// reset the string to empty on failure:
buf[0] = '\0';
return -1;
}
// wxVsnprintf() may modify the original arg pointer, so pass it
// only a copy
va_list argptrcopy;
wxVaCopy(argptrcopy, argptr);
#ifndef __WXWINCE__
// Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
errno = 0;
#endif
int len = wxVsnprintf(buf, size, format, argptrcopy);
va_end(argptrcopy);
// some implementations of vsnprintf() don't NUL terminate
// the string if there is not enough space for it so
// always do it manually
// FIXME: This really seems to be the wrong and would be an off-by-one
// bug except the code above allocates an extra character.
buf[size] = _T('\0');
// vsnprintf() may return either -1 (traditional Unix behaviour) or the
// total number of characters which would have been written if the
// buffer were large enough (newer standards such as Unix98)
if ( len < 0 )
{
// NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
// wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
// is true if *both* of them use our own implementation,
// otherwise we can't be sure
#if wxUSE_WXVSNPRINTF
// we know that our own implementation of wxVsnprintf() returns -1
// only for a format error - thus there's something wrong with
// the user's format string
buf[0] = '\0';
return -1;
#else // possibly using system version
// assume it only returns error if there is not enough space, but
// as we don't know how much we need, double the current size of
// the buffer
#ifndef __WXWINCE__
if( (errno == EILSEQ) || (errno == EINVAL) )
// If errno was set to one of the two well-known hard errors
// then fail immediately to avoid an infinite loop.
return -1;
else
#endif // __WXWINCE__
// still not enough, as we don't know how much we need, double the
// current size of the buffer
size *= 2;
#endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
}
else if ( len >= size )
{
#if wxUSE_WXVSNPRINTF
// we know that our own implementation of wxVsnprintf() returns
// size+1 when there's not enough space but that's not the size
// of the required buffer!
size *= 2; // so we just double the current size of the buffer
#else
// some vsnprintf() implementations NUL-terminate the buffer and
// some don't in len == size case, to be safe always add 1
// FIXME: I don't quite understand this comment. The vsnprintf
// function is specifically defined to return the number of
// characters printed not including the null terminator.
// So OF COURSE you need to add 1 to get the right buffer size.
// The following line is definitely correct, no question.
size = len + 1;
#endif
}
else // ok, there was enough space
{
break;
}
}
// we could have overshot
str.Shrink();
return str.length();
}
int wxString::PrintfV(const wxString& format, va_list argptr)
{
#if wxUSE_UNICODE_UTF8
#if wxUSE_STL_BASED_WXSTRING
typedef wxStringTypeBuffer<char> Utf8Buffer;
#else
typedef wxStringInternalBuffer Utf8Buffer;
#endif
#endif
#if wxUSE_UTF8_LOCALE_ONLY
return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
#else
#if wxUSE_UNICODE_UTF8
if ( wxLocaleIsUtf8 )
return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
else
// wxChar* version
return DoStringPrintfV<wxStringBuffer>(*this, format, argptr);
#else
return DoStringPrintfV(*this, format, argptr);
#endif // UTF8/WCHAR
#endif
}
// ----------------------------------------------------------------------------
// misc other operations
// ----------------------------------------------------------------------------
// returns true if the string matches the pattern which may contain '*' and
// '?' metacharacters (as usual, '?' matches any character and '*' any number
// of them)
bool wxString::Matches(const wxString& mask) const
{
// I disable this code as it doesn't seem to be faster (in fact, it seems
// to be much slower) than the old, hand-written code below and using it
// here requires always linking with libregex even if the user code doesn't
// use it
#if 0 // wxUSE_REGEX
// first translate the shell-like mask into a regex
wxString pattern;
pattern.reserve(wxStrlen(pszMask));
pattern += _T('^');
while ( *pszMask )
{
switch ( *pszMask )
{
case _T('?'):
pattern += _T('.');
break;
case _T('*'):
pattern += _T(".*");
break;
case _T('^'):
case _T('.'):
case _T('$'):
case _T('('):
case _T(')'):
case _T('|'):
case _T('+'):
case _T('\\'):
// these characters are special in a RE, quote them
// (however note that we don't quote '[' and ']' to allow
// using them for Unix shell like matching)
pattern += _T('\\');
// fall through
default:
pattern += *pszMask;
}
pszMask++;
}
pattern += _T('$');
// and now use it
return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
#else // !wxUSE_REGEX
// TODO: this is, of course, awfully inefficient...
// FIXME-UTF8: implement using iterators, remove #if
#if wxUSE_UNICODE_UTF8
const wxScopedWCharBuffer maskBuf = mask.wc_str();
const wxScopedWCharBuffer txtBuf = wc_str();
const wxChar *pszMask = maskBuf.data();
const wxChar *pszTxt = txtBuf.data();
#else
const wxChar *pszMask = mask.wx_str();
// the char currently being checked
const wxChar *pszTxt = wx_str();
#endif
// the last location where '*' matched
const wxChar *pszLastStarInText = NULL;
const wxChar *pszLastStarInMask = NULL;
match:
for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
switch ( *pszMask ) {
case wxT('?'):
if ( *pszTxt == wxT('\0') )
return false;
// pszTxt and pszMask will be incremented in the loop statement
break;
case wxT('*'):
{
// remember where we started to be able to backtrack later
pszLastStarInText = pszTxt;
pszLastStarInMask = pszMask;
// ignore special chars immediately following this one
// (should this be an error?)
while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
pszMask++;
// if there is nothing more, match
if ( *pszMask == wxT('\0') )
return true;
// are there any other metacharacters in the mask?
size_t uiLenMask;
const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
if ( pEndMask != NULL ) {
// we have to match the string between two metachars
uiLenMask = pEndMask - pszMask;
}
else {
// we have to match the remainder of the string
uiLenMask = wxStrlen(pszMask);
}
wxString strToMatch(pszMask, uiLenMask);
const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
if ( pMatch == NULL )
return false;
// -1 to compensate "++" in the loop
pszTxt = pMatch + uiLenMask - 1;
pszMask += uiLenMask - 1;
}
break;
default:
if ( *pszMask != *pszTxt )
return false;
break;
}
}
// match only if nothing left
if ( *pszTxt == wxT('\0') )
return true;
// if we failed to match, backtrack if we can
if ( pszLastStarInText ) {
pszTxt = pszLastStarInText + 1;
pszMask = pszLastStarInMask;
pszLastStarInText = NULL;
// don't bother resetting pszLastStarInMask, it's unnecessary
goto match;
}
return false;
#endif // wxUSE_REGEX/!wxUSE_REGEX
}
// Count the number of chars
int wxString::Freq(wxUniChar ch) const
{
int count = 0;
for ( const_iterator i = begin(); i != end(); ++i )
{
if ( *i == ch )
count ++;
}
return count;
}