Files
wxWidgets/src/common/arrstr.cpp
Vadim Zeitlin 1035ae27a7 Make wxSplit(wxJoin()) idempotent for string ending with escape
Previously, splitting a string obtained by joining together array
with (any but last) elements ending in the escape character (normally
the backslash), didn't recover the original array because the separator
character following it in the resulting string was considered to be
escaped by wxSplit().

Fix this by escaping the trailing escape character itself.

Add a test confirming that this works as expected now, document this
behaviour and also slightly simplify wxSPlit() logic.

See https://github.com/wxWidgets/wxWidgets/pull/2311

Closes #19131.
2021-04-07 01:38:43 +02:00

927 lines
24 KiB
C++

/////////////////////////////////////////////////////////////////////////////
// Name: src/common/arrstr.cpp
// Purpose: wxArrayString class
// Author: Vadim Zeitlin
// Modified by:
// Created: 29/01/98
// Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
// Licence: wxWindows licence
/////////////////////////////////////////////////////////////////////////////
// ===========================================================================
// headers, declarations, constants
// ===========================================================================
// For compilers that support precompilation, includes "wx.h".
#include "wx/wxprec.h"
#include "wx/arrstr.h"
#include "wx/scopedarray.h"
#include "wx/wxcrt.h"
#include "wx/beforestd.h"
#include <algorithm>
#include <functional>
#include "wx/afterstd.h"
#if defined( __WINDOWS__ )
#include <shlwapi.h>
// In some distributions of MinGW32, this function is exported in the library,
// but not declared in shlwapi.h. Therefore we declare it here.
#if defined( __MINGW32_TOOLCHAIN__ )
extern "C" __declspec(dllimport) int WINAPI StrCmpLogicalW(LPCWSTR psz1, LPCWSTR psz2);
#endif
// For MSVC we can also link the library containing StrCmpLogicalW()
// directly from here, for the other compilers this needs to be done at
// makefiles level.
#ifdef __VISUALC__
#pragma comment(lib, "shlwapi")
#endif
#endif
// ============================================================================
// ArrayString
// ============================================================================
wxArrayString::wxArrayString(size_t sz, const char** a)
{
#if !wxUSE_STD_CONTAINERS
Init(false);
#endif
assign(a, a + sz);
}
wxArrayString::wxArrayString(size_t sz, const wchar_t** a)
{
#if !wxUSE_STD_CONTAINERS
Init(false);
#endif
assign(a, a + sz);
}
wxArrayString::wxArrayString(size_t sz, const wxString* a)
{
#if !wxUSE_STD_CONTAINERS
Init(false);
#endif
assign(a, a + sz);
}
#if wxUSE_STD_CONTAINERS
#include "wx/arrstr.h"
#if __cplusplus >= 201103L || wxCHECK_VISUALC_VERSION(14)
int wxArrayString::Index(const wxString& str, bool bCase, bool WXUNUSED(bFromEnd)) const
{
int n = 0;
for ( const auto& s: *this )
{
if ( s.IsSameAs(str, bCase) )
return n;
++n;
}
return wxNOT_FOUND;
}
#else // C++98 version
#include "wx/beforestd.h"
#include <functional>
#include "wx/afterstd.h"
// some compilers (Sun CC being the only known example) distinguish between
// extern "C" functions and the functions with C++ linkage and ptr_fun and
// wxStringCompareLess can't take wxStrcmp/wxStricmp directly as arguments in
// this case, we need the wrappers below to make this work
struct wxStringCmp
{
typedef wxString first_argument_type;
typedef wxString second_argument_type;
typedef int result_type;
int operator()(const wxString& s1, const wxString& s2) const
{
return s1.compare(s2);
}
};
struct wxStringCmpNoCase
{
typedef wxString first_argument_type;
typedef wxString second_argument_type;
typedef int result_type;
int operator()(const wxString& s1, const wxString& s2) const
{
return s1.CmpNoCase(s2);
}
};
int wxArrayString::Index(const wxString& str, bool bCase, bool WXUNUSED(bFromEnd)) const
{
wxArrayString::const_iterator it;
if (bCase)
{
it = std::find_if(begin(), end(),
std::not1(
std::bind2nd(
wxStringCmp(), str)));
}
else // !bCase
{
it = std::find_if(begin(), end(),
std::not1(
std::bind2nd(
wxStringCmpNoCase(), str)));
}
return it == end() ? wxNOT_FOUND : it - begin();
}
template<class F>
class wxStringCompareLess
{
public:
wxStringCompareLess(F f) : m_f(f) { }
bool operator()(const wxString& s1, const wxString& s2)
{ return m_f(s1, s2) < 0; }
private:
F m_f;
};
template<class F>
wxStringCompareLess<F> wxStringCompare(F f)
{
return wxStringCompareLess<F>(f);
}
#endif // C++11/C++98
void wxArrayString::Sort(CompareFunction function)
{
std::sort(begin(), end(),
#if __cplusplus >= 201103L || wxCHECK_VISUALC_VERSION(14)
[function](const wxString& s1, const wxString& s2)
{
return function(s1, s2) < 0;
}
#else // C++98 version
wxStringCompare(function)
#endif // C++11/C++98
);
}
void wxArrayString::Sort(bool reverseOrder)
{
if (reverseOrder)
{
std::sort(begin(), end(), std::greater<wxString>());
}
else
{
std::sort(begin(), end());
}
}
int wxSortedArrayString::Index(const wxString& str,
bool WXUNUSED_UNLESS_DEBUG(bCase),
bool WXUNUSED_UNLESS_DEBUG(bFromEnd)) const
{
wxASSERT_MSG( bCase && !bFromEnd,
"search parameters ignored for sorted array" );
SCMPFUNC function = GetCompareFunction();
wxSortedArrayString::const_iterator
it = std::lower_bound(begin(), end(), str,
#if __cplusplus >= 201103L || wxCHECK_VISUALC_VERSION(14)
[function](const wxString& s1, const wxString& s2)
{
return function(s1, s2) < 0;
}
#else // C++98 version
wxStringCompare(function)
#endif // C++11/C++98
);
if ( it == end() || str.Cmp(*it) != 0 )
return wxNOT_FOUND;
return it - begin();
}
#else // !wxUSE_STD_CONTAINERS
#ifndef ARRAY_DEFAULT_INITIAL_SIZE // also defined in dynarray.h
#define ARRAY_DEFAULT_INITIAL_SIZE (16)
#endif
// ctor
void wxArrayString::Init(bool autoSort)
{
m_nSize =
m_nCount = 0;
m_pItems = NULL;
m_compareFunction = NULL;
m_autoSort = autoSort;
}
// copy ctor
wxArrayString::wxArrayString(const wxArrayString& src)
{
Init(src.m_autoSort);
*this = src;
}
// assignment operator
wxArrayString& wxArrayString::operator=(const wxArrayString& src)
{
if ( m_nSize > 0 )
{
// Do this test here to avoid unnecessary overhead when assigning to an
// empty array, in that case there is no harm in self-assignment.
if ( &src == this )
return *this;
Clear();
}
Copy(src);
m_autoSort = src.m_autoSort;
return *this;
}
void wxArrayString::Copy(const wxArrayString& src)
{
if ( src.m_nCount > ARRAY_DEFAULT_INITIAL_SIZE )
Alloc(src.m_nCount);
for ( size_t n = 0; n < src.m_nCount; n++ )
Add(src[n]);
}
// grow the array
wxString *wxArrayString::Grow(size_t nIncrement)
{
if ( (m_nSize - m_nCount) >= nIncrement )
{
// We already have enough space.
return NULL;
}
// if ARRAY_DEFAULT_INITIAL_SIZE were set to 0, the initially empty would
// be never resized!
#if ARRAY_DEFAULT_INITIAL_SIZE == 0
#error "ARRAY_DEFAULT_INITIAL_SIZE must be > 0!"
#endif
if ( m_nSize == 0 ) {
// was empty, alloc some memory
m_nSize = ARRAY_DEFAULT_INITIAL_SIZE;
if (m_nSize < nIncrement)
m_nSize = nIncrement;
m_pItems = new wxString[m_nSize];
// Nothing to free, we hadn't had any memory before.
return NULL;
}
else {
// otherwise when it's called for the first time, nIncrement would be 0
// and the array would never be expanded
size_t ndefIncrement = m_nSize < ARRAY_DEFAULT_INITIAL_SIZE
? ARRAY_DEFAULT_INITIAL_SIZE : m_nSize;
if ( nIncrement < ndefIncrement )
nIncrement = ndefIncrement;
m_nSize += nIncrement;
wxString *pNew = new wxString[m_nSize];
// copy data to new location
for ( size_t j = 0; j < m_nCount; j++ )
pNew[j] = m_pItems[j];
wxString* const pItemsOld = m_pItems;
m_pItems = pNew;
return pItemsOld;
}
}
// deletes all the strings from the list
void wxArrayString::Empty()
{
m_nCount = 0;
}
// as Empty, but also frees memory
void wxArrayString::Clear()
{
m_nSize =
m_nCount = 0;
wxDELETEA(m_pItems);
}
// dtor
wxArrayString::~wxArrayString()
{
delete [] m_pItems;
}
void wxArrayString::reserve(size_t nSize)
{
Alloc(nSize);
}
// pre-allocates memory (frees the previous data!)
void wxArrayString::Alloc(size_t nSize)
{
// only if old buffer was not big enough
if ( nSize > m_nSize ) {
wxString *pNew = new wxString[nSize];
if ( !pNew )
return;
for ( size_t j = 0; j < m_nCount; j++ )
pNew[j] = m_pItems[j];
delete [] m_pItems;
m_pItems = pNew;
m_nSize = nSize;
}
}
// minimizes the memory usage by freeing unused memory
void wxArrayString::Shrink()
{
// only do it if we have some memory to free
if( m_nCount < m_nSize ) {
// allocates exactly as much memory as we need
wxString *pNew = new wxString[m_nCount];
// copy data to new location
for ( size_t j = 0; j < m_nCount; j++ )
pNew[j] = m_pItems[j];
delete [] m_pItems;
m_pItems = pNew;
m_nSize = m_nCount;
}
}
// Binary search in the sorted array
size_t wxArrayString::BinarySearch(const wxString& str, bool lowerBound) const
{
size_t
lo = 0,
hi = m_nCount;
while (lo < hi) {
size_t i;
i = (lo + hi) / 2;
int res;
res = m_compareFunction ? m_compareFunction(str, m_pItems[i]) : str.Cmp(m_pItems[i]);
if (res < 0)
hi = i;
else if (res > 0)
lo = i + 1;
else
return i;
}
wxASSERT_MSG(lo == hi, wxT("binary search broken"));
return lowerBound ? lo : wxNOT_FOUND;
}
// searches the array for an item (forward or backwards)
int wxArrayString::Index(const wxString& str, bool bCase, bool bFromEnd) const
{
if ( m_autoSort ) {
// use binary search in the sorted array
wxASSERT_MSG( bCase && !bFromEnd,
wxT("search parameters ignored for auto sorted array") );
return BinarySearch(str, false /* not lower bound */);
}
else {
// use linear search in unsorted array
if ( bFromEnd ) {
if ( m_nCount > 0 ) {
size_t ui = m_nCount;
do {
if ( m_pItems[--ui].IsSameAs(str, bCase) )
return ui;
}
while ( ui != 0 );
}
}
else {
for( size_t ui = 0; ui < m_nCount; ui++ ) {
if( m_pItems[ui].IsSameAs(str, bCase) )
return ui;
}
}
}
return wxNOT_FOUND;
}
// add item at the end
size_t wxArrayString::Add(const wxString& str, size_t nInsert)
{
if ( m_autoSort ) {
// insert the string at the correct position to keep the array sorted
size_t nIndex = BinarySearch(str, true /* return lower bound */);
Insert(str, nIndex, nInsert);
return nIndex;
}
else {
// Now that we must postpone freeing the old memory until we don't need it
// any more, i.e. don't reference "str" which could be a reference to one
// of our own strings.
wxScopedArray<wxString> oldStrings(Grow(nInsert));
for (size_t i = 0; i < nInsert; i++)
{
// just append
m_pItems[m_nCount + i] = str;
}
size_t ret = m_nCount;
m_nCount += nInsert;
return ret;
}
}
// add item at the given position
void wxArrayString::Insert(const wxString& str, size_t nIndex, size_t nInsert)
{
wxCHECK_RET( nIndex <= m_nCount, wxT("bad index in wxArrayString::Insert") );
wxCHECK_RET( m_nCount <= m_nCount + nInsert,
wxT("array size overflow in wxArrayString::Insert") );
wxScopedArray<wxString> oldStrings(Grow(nInsert));
for (int j = m_nCount - nIndex - 1; j >= 0; j--)
m_pItems[nIndex + nInsert + j] = m_pItems[nIndex + j];
for (size_t i = 0; i < nInsert; i++)
{
m_pItems[nIndex + i] = str;
}
m_nCount += nInsert;
}
// range insert (STL 23.2.4.3)
void
wxArrayString::insert(iterator it, const_iterator first, const_iterator last)
{
const int idx = it - begin();
// grow it once
wxScopedArray<wxString> oldStrings(Grow(last - first));
// reset "it" since it can change inside Grow()
it = begin() + idx;
while ( first != last )
{
it = insert(it, *first);
// insert returns an iterator to the last element inserted but we need
// insert the next after this one, that is before the next one
++it;
++first;
}
}
void wxArrayString::resize(size_type n, value_type v)
{
if ( n < m_nCount )
m_nCount = n;
else if ( n > m_nCount )
Add(v, n - m_nCount);
}
// expand the array
void wxArrayString::SetCount(size_t count)
{
Alloc(count);
wxString s;
while ( m_nCount < count )
m_pItems[m_nCount++] = s;
}
// removes item from array (by index)
void wxArrayString::RemoveAt(size_t nIndex, size_t nRemove)
{
wxCHECK_RET( nIndex < m_nCount, wxT("bad index in wxArrayString::Remove") );
wxCHECK_RET( nIndex + nRemove <= m_nCount,
wxT("removing too many elements in wxArrayString::Remove") );
for ( size_t j = 0; j < m_nCount - nIndex -nRemove; j++)
m_pItems[nIndex + j] = m_pItems[nIndex + nRemove + j];
m_nCount -= nRemove;
}
// removes item from array (by value)
void wxArrayString::Remove(const wxString& sz)
{
int iIndex = Index(sz);
wxCHECK_RET( iIndex != wxNOT_FOUND,
wxT("removing inexistent element in wxArrayString::Remove") );
RemoveAt(iIndex);
}
// ----------------------------------------------------------------------------
// sorting
// ----------------------------------------------------------------------------
// we need an adaptor as our predicates use qsort() convention and so return
// negative, null or positive value depending on whether the first item is less
// than, equal to or greater than the other one while we need a real boolean
// predicate now that we use std::sort()
struct wxSortPredicateAdaptor
{
wxSortPredicateAdaptor(wxArrayString::CompareFunction compareFunction)
: m_compareFunction(compareFunction)
{
}
bool operator()(const wxString& first, const wxString& second) const
{
return (*m_compareFunction)(first, second) < 0;
}
wxArrayString::CompareFunction m_compareFunction;
};
void wxArrayString::Sort(CompareFunction compareFunction)
{
wxCHECK_RET( !m_autoSort, wxT("can't use this method with sorted arrays") );
std::sort(m_pItems, m_pItems + m_nCount,
wxSortPredicateAdaptor(compareFunction));
}
struct wxSortPredicateAdaptor2
{
wxSortPredicateAdaptor2(wxArrayString::CompareFunction2 compareFunction)
: m_compareFunction(compareFunction)
{
}
bool operator()(const wxString& first, const wxString& second) const
{
return (*m_compareFunction)(const_cast<wxString *>(&first),
const_cast<wxString *>(&second)) < 0;
}
wxArrayString::CompareFunction2 m_compareFunction;
};
void wxArrayString::Sort(CompareFunction2 compareFunction)
{
std::sort(m_pItems, m_pItems + m_nCount,
wxSortPredicateAdaptor2(compareFunction));
}
void wxArrayString::Sort(bool reverseOrder)
{
if ( reverseOrder )
std::sort(m_pItems, m_pItems + m_nCount, std::greater<wxString>());
else // normal sort
std::sort(m_pItems, m_pItems + m_nCount);
}
bool wxArrayString::operator==(const wxArrayString& a) const
{
if ( m_nCount != a.m_nCount )
return false;
for ( size_t n = 0; n < m_nCount; n++ )
{
if ( Item(n) != a[n] )
return false;
}
return true;
}
#endif // !wxUSE_STD_CONTAINERS
// ===========================================================================
// wxJoin and wxSplit
// ===========================================================================
#include "wx/tokenzr.h"
wxString wxJoin(const wxArrayString& arr, const wxChar sep, const wxChar escape)
{
wxString str;
size_t count = arr.size();
if ( count == 0 )
return str;
// pre-allocate memory using the estimation of the average length of the
// strings in the given array: this is very imprecise, of course, but
// better than nothing
str.reserve(count*(arr[0].length() + arr[count-1].length()) / 2);
if ( escape == wxT('\0') )
{
// escaping is disabled:
for ( size_t i = 0; i < count; i++ )
{
if ( i )
str += sep;
str += arr[i];
}
}
else // use escape character
{
for ( size_t n = 0; n < count; n++ )
{
if ( n )
{
// We don't escape the escape characters in the middle of the
// string because this is not needed, strictly speaking, but we
// must do it if they occur at the end because otherwise we
// wouldn't split the string back correctly as the separator
// would appear to be escaped.
if ( !str.empty() && *str.rbegin() == escape )
str += escape;
str += sep;
}
for ( wxString::const_iterator i = arr[n].begin(),
end = arr[n].end();
i != end;
++i )
{
const wxChar ch = *i;
if ( ch == sep )
str += escape; // escape this separator
str += ch;
}
}
}
str.Shrink(); // release extra memory if we allocated too much
return str;
}
wxArrayString wxSplit(const wxString& str, const wxChar sep, const wxChar escape)
{
if ( escape == wxT('\0') )
{
// simple case: we don't need to honour the escape character
return wxStringTokenize(str, sep, wxTOKEN_RET_EMPTY_ALL);
}
wxArrayString ret;
wxString curr;
for ( wxString::const_iterator i = str.begin(),
end = str.end();
i != end;
++i )
{
const wxChar ch = *i;
// Order of tests matters here in the uncommon, but possible, case when
// the separator is the same as the escape character: it has to be
// recognized as a separator in this case (escaping doesn't work at all
// in this case).
if ( ch == sep )
{
ret.push_back(curr);
curr.clear();
}
else if ( ch == escape )
{
++i;
if ( i == end )
{
// Escape at the end of the string is not handled specially.
curr += ch;
break;
}
// Separator or the escape character itself may be escaped,
// cancelling their special meaning, but escape character followed
// by anything else is not handled specially.
if ( *i != sep && *i != escape )
curr += ch;
curr += *i;
}
else // normal character
{
curr += ch;
}
}
// add the last token, which we always have unless the string is empty
if ( !str.empty() )
ret.Add(curr);
return ret;
}
namespace // helpers needed by wxCmpNaturalGeneric()
{
// Used for comparison of string parts
struct wxStringFragment
{
// Fragment types are generally sorted like this:
// Empty < SpaceOrPunct < Digit < LetterOrSymbol
// Fragments of the same type are compared as follows:
// SpaceOrPunct - collated, Digit - as numbers using value
// LetterOrSymbol - lower-cased and then collated
enum Type
{
Empty,
SpaceOrPunct, // whitespace or punctuation
Digit, // a sequence of decimal digits
LetterOrSymbol // letters and symbols, i.e., anything not covered by the above types
};
wxStringFragment() : type(Empty), value(0) {}
Type type;
wxString text;
wxUint64 value; // used only for Digit type
};
wxStringFragment GetFragment(wxString& text)
{
if ( text.empty() )
return wxStringFragment();
// the maximum length of a sequence of digits that
// can fit into wxUint64 when converted to a number
static const ptrdiff_t maxDigitSequenceLength = 19;
wxStringFragment fragment;
wxString::const_iterator it;
for ( it = text.cbegin(); it != text.cend(); ++it )
{
const wxUniChar& ch = *it;
wxStringFragment::Type chType = wxStringFragment::Empty;
if ( wxIsspace(ch) || wxIspunct(ch) )
chType = wxStringFragment::SpaceOrPunct;
else if ( wxIsdigit(ch) )
chType = wxStringFragment::Digit;
else
chType = wxStringFragment::LetterOrSymbol;
// check if evaluating the first character
if ( fragment.type == wxStringFragment::Empty )
{
fragment.type = chType;
continue;
}
// stop processing when the current character has a different
// string fragment type than the previously processed characters had
// or a sequence of digits is too long
if ( fragment.type != chType
|| (fragment.type == wxStringFragment::Digit
&& it - text.cbegin() > maxDigitSequenceLength) )
{
break;
}
}
fragment.text.assign(text.cbegin(), it);
if ( fragment.type == wxStringFragment::Digit )
fragment.text.ToULongLong(&fragment.value);
text.erase(0, it - text.cbegin());
return fragment;
}
int CompareFragmentNatural(const wxStringFragment& lhs, const wxStringFragment& rhs)
{
switch ( lhs.type )
{
case wxStringFragment::Empty:
switch ( rhs.type )
{
case wxStringFragment::Empty:
return 0;
case wxStringFragment::SpaceOrPunct:
case wxStringFragment::Digit:
case wxStringFragment::LetterOrSymbol:
return -1;
}
break;
case wxStringFragment::SpaceOrPunct:
switch ( rhs.type )
{
case wxStringFragment::Empty:
return 1;
case wxStringFragment::SpaceOrPunct:
return wxStrcoll_String(lhs.text, rhs.text);
case wxStringFragment::Digit:
case wxStringFragment::LetterOrSymbol:
return -1;
}
break;
case wxStringFragment::Digit:
switch ( rhs.type )
{
case wxStringFragment::Empty:
case wxStringFragment::SpaceOrPunct:
return 1;
case wxStringFragment::Digit:
if ( lhs.value > rhs.value )
return 1;
else if ( lhs.value < rhs.value )
return -1;
else
return 0;
case wxStringFragment::LetterOrSymbol:
return -1;
}
break;
case wxStringFragment::LetterOrSymbol:
switch ( rhs.type )
{
case wxStringFragment::Empty:
case wxStringFragment::SpaceOrPunct:
case wxStringFragment::Digit:
return 1;
case wxStringFragment::LetterOrSymbol:
return wxStrcoll_String(lhs.text.Lower(), rhs.text.Lower());
}
break;
}
// all possible cases should be covered by the switch above
// but return also from here to prevent the compiler warning
return 1;
}
} // unnamed namespace
// ----------------------------------------------------------------------------
// wxCmpNaturalGeneric
// ----------------------------------------------------------------------------
//
int wxCMPFUNC_CONV wxCmpNaturalGeneric(const wxString& s1, const wxString& s2)
{
wxString lhs(s1);
wxString rhs(s2);
int comparison = 0;
while ( (comparison == 0) && (!lhs.empty() || !rhs.empty()) )
{
const wxStringFragment fragmentLHS = GetFragment(lhs);
const wxStringFragment fragmentRHS = GetFragment(rhs);
comparison = CompareFragmentNatural(fragmentLHS, fragmentRHS);
}
return comparison;
}
// ----------------------------------------------------------------------------
// wxCmpNatural
// ----------------------------------------------------------------------------
//
// If a native version of Natural sort is available, then use that, otherwise
// use the generic version.
int wxCMPFUNC_CONV wxCmpNatural(const wxString& s1, const wxString& s2)
{
#if defined( __WINDOWS__ )
return StrCmpLogicalW(s1.wc_str(), s2.wc_str());
#else
return wxCmpNaturalGeneric(s1, s2);
#endif // #if defined( __WINDOWS__ )
}