Make wxSplit(wxJoin()) idempotent for string ending with escape
Previously, splitting a string obtained by joining together array with (any but last) elements ending in the escape character (normally the backslash), didn't recover the original array because the separator character following it in the resulting string was considered to be escaped by wxSplit(). Fix this by escaping the trailing escape character itself. Add a test confirming that this works as expected now, document this behaviour and also slightly simplify wxSPlit() logic. See https://github.com/wxWidgets/wxWidgets/pull/2311 Closes #19131.
This commit is contained in:
@@ -523,7 +523,18 @@ wxArrayString wxSplit(const wxString& str, const wxChar sep,
|
|||||||
If the @a escape character is non-@NULL, then it's used as prefix for each
|
If the @a escape character is non-@NULL, then it's used as prefix for each
|
||||||
occurrence of @a sep in the strings contained in @a arr before joining them
|
occurrence of @a sep in the strings contained in @a arr before joining them
|
||||||
which is necessary in order to be able to recover the original array
|
which is necessary in order to be able to recover the original array
|
||||||
contents from the string later using wxSplit().
|
contents from the string later using wxSplit(). The @a escape characters
|
||||||
|
themselves are @e not escaped when they occur in the middle of the @a arr
|
||||||
|
elements, but @e are escaped when they occur at the end, i.e.
|
||||||
|
@code
|
||||||
|
wxArrayString arr;
|
||||||
|
arr.push_back("foo^");
|
||||||
|
arr.push_back("bar^baz");
|
||||||
|
wxPuts(wxJoin(arr, ':', '^')); // prints "foo^^:bar^baz"
|
||||||
|
@endcode
|
||||||
|
|
||||||
|
In any case, applying wxSplit() to the result of wxJoin() is guaranteed to
|
||||||
|
recover the original array.
|
||||||
|
|
||||||
@see wxSplit()
|
@see wxSplit()
|
||||||
|
|
||||||
|
@@ -655,7 +655,17 @@ wxString wxJoin(const wxArrayString& arr, const wxChar sep, const wxChar escape)
|
|||||||
for ( size_t n = 0; n < count; n++ )
|
for ( size_t n = 0; n < count; n++ )
|
||||||
{
|
{
|
||||||
if ( n )
|
if ( n )
|
||||||
|
{
|
||||||
|
// We don't escape the escape characters in the middle of the
|
||||||
|
// string because this is not needed, strictly speaking, but we
|
||||||
|
// must do it if they occur at the end because otherwise we
|
||||||
|
// wouldn't split the string back correctly as the separator
|
||||||
|
// would appear to be escaped.
|
||||||
|
if ( !str.empty() && *str.rbegin() == escape )
|
||||||
|
str += escape;
|
||||||
|
|
||||||
str += sep;
|
str += sep;
|
||||||
|
}
|
||||||
|
|
||||||
for ( wxString::const_iterator i = arr[n].begin(),
|
for ( wxString::const_iterator i = arr[n].begin(),
|
||||||
end = arr[n].end();
|
end = arr[n].end();
|
||||||
@@ -684,7 +694,6 @@ wxArrayString wxSplit(const wxString& str, const wxChar sep, const wxChar escape
|
|||||||
|
|
||||||
wxArrayString ret;
|
wxArrayString ret;
|
||||||
wxString curr;
|
wxString curr;
|
||||||
wxChar prev = wxT('\0');
|
|
||||||
|
|
||||||
for ( wxString::const_iterator i = str.begin(),
|
for ( wxString::const_iterator i = str.begin(),
|
||||||
end = str.end();
|
end = str.end();
|
||||||
@@ -693,30 +702,41 @@ wxArrayString wxSplit(const wxString& str, const wxChar sep, const wxChar escape
|
|||||||
{
|
{
|
||||||
const wxChar ch = *i;
|
const wxChar ch = *i;
|
||||||
|
|
||||||
|
// Order of tests matters here in the uncommon, but possible, case when
|
||||||
|
// the separator is the same as the escape character: it has to be
|
||||||
|
// recognized as a separator in this case (escaping doesn't work at all
|
||||||
|
// in this case).
|
||||||
if ( ch == sep )
|
if ( ch == sep )
|
||||||
{
|
{
|
||||||
if ( prev == escape )
|
ret.push_back(curr);
|
||||||
|
curr.clear();
|
||||||
|
}
|
||||||
|
else if ( ch == escape )
|
||||||
|
{
|
||||||
|
++i;
|
||||||
|
if ( i == end )
|
||||||
{
|
{
|
||||||
// remove the escape character and don't consider this
|
// Escape at the end of the string is not handled specially.
|
||||||
// occurrence of 'sep' as a real separator
|
curr += ch;
|
||||||
*curr.rbegin() = sep;
|
break;
|
||||||
}
|
|
||||||
else // real separator
|
|
||||||
{
|
|
||||||
ret.push_back(curr);
|
|
||||||
curr.clear();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Separator or the escape character itself may be escaped,
|
||||||
|
// cancelling their special meaning, but escape character followed
|
||||||
|
// by anything else is not handled specially.
|
||||||
|
if ( *i != sep && *i != escape )
|
||||||
|
curr += ch;
|
||||||
|
|
||||||
|
curr += *i;
|
||||||
}
|
}
|
||||||
else // normal character
|
else // normal character
|
||||||
{
|
{
|
||||||
curr += ch;
|
curr += ch;
|
||||||
}
|
}
|
||||||
|
|
||||||
prev = ch;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// add the last token
|
// add the last token, which we always have unless the string is empty
|
||||||
if ( !curr.empty() || prev == sep )
|
if ( !str.empty() )
|
||||||
ret.Add(curr);
|
ret.Add(curr);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
|
@@ -155,6 +155,19 @@ struct Item
|
|||||||
|
|
||||||
WX_DEFINE_ARRAY_PTR(Item *, ItemPtrArray);
|
WX_DEFINE_ARRAY_PTR(Item *, ItemPtrArray);
|
||||||
|
|
||||||
|
std::ostream& operator<<(std::ostream& os, const wxArrayString& arr)
|
||||||
|
{
|
||||||
|
os << "[ ";
|
||||||
|
for ( size_t n = 0; n < arr.size(); ++n )
|
||||||
|
{
|
||||||
|
if ( n )
|
||||||
|
os << ", ";
|
||||||
|
os << '"' << arr[n] << '"';
|
||||||
|
}
|
||||||
|
os << " ] (size=" << arr.size() << ")";
|
||||||
|
return os;
|
||||||
|
}
|
||||||
|
|
||||||
// ----------------------------------------------------------------------------
|
// ----------------------------------------------------------------------------
|
||||||
// the tests
|
// the tests
|
||||||
// ----------------------------------------------------------------------------
|
// ----------------------------------------------------------------------------
|
||||||
@@ -481,6 +494,10 @@ TEST_CASE("Arrays::SplitJoin", "[dynarray]")
|
|||||||
for (i = 0; i < WXSIZEOF(separators); i++)
|
for (i = 0; i < WXSIZEOF(separators); i++)
|
||||||
{
|
{
|
||||||
wxArrayString arr = wxSplit(str, separators[i]);
|
wxArrayString arr = wxSplit(str, separators[i]);
|
||||||
|
|
||||||
|
INFO("Using separator '" << static_cast<char>(separators[i]) << "' "
|
||||||
|
"and split array \"" << arr << "\"");
|
||||||
|
|
||||||
CHECK( str == wxJoin(arr, separators[i]) );
|
CHECK( str == wxJoin(arr, separators[i]) );
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -498,6 +515,10 @@ TEST_CASE("Arrays::SplitJoin", "[dynarray]")
|
|||||||
for (i = 0; i < WXSIZEOF(separators); i++)
|
for (i = 0; i < WXSIZEOF(separators); i++)
|
||||||
{
|
{
|
||||||
wxString string = wxJoin(theArr, separators[i]);
|
wxString string = wxJoin(theArr, separators[i]);
|
||||||
|
|
||||||
|
INFO("Using separator '" << static_cast<char>(separators[i]) << "' "
|
||||||
|
"and joined string \"" << string << "\"");
|
||||||
|
|
||||||
CHECK( theArr == wxSplit(string, separators[i]) );
|
CHECK( theArr == wxSplit(string, separators[i]) );
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -508,6 +529,20 @@ TEST_CASE("Arrays::SplitJoin", "[dynarray]")
|
|||||||
CHECK( wxSplit(string, wxT(';')).empty() );
|
CHECK( wxSplit(string, wxT(';')).empty() );
|
||||||
|
|
||||||
CHECK( wxSplit(wxT(";"), wxT(';')).size() == 2 );
|
CHECK( wxSplit(wxT(";"), wxT(';')).size() == 2 );
|
||||||
|
|
||||||
|
// Check for bug with escaping the escape character at the end (but not in
|
||||||
|
// the middle).
|
||||||
|
wxArrayString withBackslashes;
|
||||||
|
withBackslashes.push_back("foo\\");
|
||||||
|
withBackslashes.push_back("bar\\baz");
|
||||||
|
|
||||||
|
string = wxJoin(withBackslashes, ':');
|
||||||
|
CHECK( string == "foo\\\\:bar\\baz" );
|
||||||
|
|
||||||
|
const wxArrayString withBackslashes2 = wxSplit(string, ':');
|
||||||
|
REQUIRE( withBackslashes2.size() == 2 );
|
||||||
|
CHECK( withBackslashes2[0] == withBackslashes[0] );
|
||||||
|
CHECK( withBackslashes2[1] == withBackslashes[1] );
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_CASE("wxObjArray", "[dynarray]")
|
TEST_CASE("wxObjArray", "[dynarray]")
|
||||||
|
Reference in New Issue
Block a user