Make wxSplit(wxJoin()) idempotent for string ending with escape

Previously, splitting a string obtained by joining together array
with (any but last) elements ending in the escape character (normally
the backslash), didn't recover the original array because the separator
character following it in the resulting string was considered to be
escaped by wxSplit().

Fix this by escaping the trailing escape character itself.

Add a test confirming that this works as expected now, document this
behaviour and also slightly simplify wxSPlit() logic.

See https://github.com/wxWidgets/wxWidgets/pull/2311

Closes #19131.
This commit is contained in:
Vadim Zeitlin
2021-04-06 11:31:12 +02:00
parent 6810fafa31
commit 1035ae27a7
3 changed files with 81 additions and 15 deletions

View File

@@ -523,7 +523,18 @@ wxArrayString wxSplit(const wxString& str, const wxChar sep,
If the @a escape character is non-@NULL, then it's used as prefix for each If the @a escape character is non-@NULL, then it's used as prefix for each
occurrence of @a sep in the strings contained in @a arr before joining them occurrence of @a sep in the strings contained in @a arr before joining them
which is necessary in order to be able to recover the original array which is necessary in order to be able to recover the original array
contents from the string later using wxSplit(). contents from the string later using wxSplit(). The @a escape characters
themselves are @e not escaped when they occur in the middle of the @a arr
elements, but @e are escaped when they occur at the end, i.e.
@code
wxArrayString arr;
arr.push_back("foo^");
arr.push_back("bar^baz");
wxPuts(wxJoin(arr, ':', '^')); // prints "foo^^:bar^baz"
@endcode
In any case, applying wxSplit() to the result of wxJoin() is guaranteed to
recover the original array.
@see wxSplit() @see wxSplit()

View File

@@ -655,7 +655,17 @@ wxString wxJoin(const wxArrayString& arr, const wxChar sep, const wxChar escape)
for ( size_t n = 0; n < count; n++ ) for ( size_t n = 0; n < count; n++ )
{ {
if ( n ) if ( n )
{
// We don't escape the escape characters in the middle of the
// string because this is not needed, strictly speaking, but we
// must do it if they occur at the end because otherwise we
// wouldn't split the string back correctly as the separator
// would appear to be escaped.
if ( !str.empty() && *str.rbegin() == escape )
str += escape;
str += sep; str += sep;
}
for ( wxString::const_iterator i = arr[n].begin(), for ( wxString::const_iterator i = arr[n].begin(),
end = arr[n].end(); end = arr[n].end();
@@ -684,7 +694,6 @@ wxArrayString wxSplit(const wxString& str, const wxChar sep, const wxChar escape
wxArrayString ret; wxArrayString ret;
wxString curr; wxString curr;
wxChar prev = wxT('\0');
for ( wxString::const_iterator i = str.begin(), for ( wxString::const_iterator i = str.begin(),
end = str.end(); end = str.end();
@@ -693,30 +702,41 @@ wxArrayString wxSplit(const wxString& str, const wxChar sep, const wxChar escape
{ {
const wxChar ch = *i; const wxChar ch = *i;
// Order of tests matters here in the uncommon, but possible, case when
// the separator is the same as the escape character: it has to be
// recognized as a separator in this case (escaping doesn't work at all
// in this case).
if ( ch == sep ) if ( ch == sep )
{ {
if ( prev == escape ) ret.push_back(curr);
curr.clear();
}
else if ( ch == escape )
{
++i;
if ( i == end )
{ {
// remove the escape character and don't consider this // Escape at the end of the string is not handled specially.
// occurrence of 'sep' as a real separator curr += ch;
*curr.rbegin() = sep; break;
}
else // real separator
{
ret.push_back(curr);
curr.clear();
} }
// Separator or the escape character itself may be escaped,
// cancelling their special meaning, but escape character followed
// by anything else is not handled specially.
if ( *i != sep && *i != escape )
curr += ch;
curr += *i;
} }
else // normal character else // normal character
{ {
curr += ch; curr += ch;
} }
prev = ch;
} }
// add the last token // add the last token, which we always have unless the string is empty
if ( !curr.empty() || prev == sep ) if ( !str.empty() )
ret.Add(curr); ret.Add(curr);
return ret; return ret;

View File

@@ -155,6 +155,19 @@ struct Item
WX_DEFINE_ARRAY_PTR(Item *, ItemPtrArray); WX_DEFINE_ARRAY_PTR(Item *, ItemPtrArray);
std::ostream& operator<<(std::ostream& os, const wxArrayString& arr)
{
os << "[ ";
for ( size_t n = 0; n < arr.size(); ++n )
{
if ( n )
os << ", ";
os << '"' << arr[n] << '"';
}
os << " ] (size=" << arr.size() << ")";
return os;
}
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
// the tests // the tests
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
@@ -481,6 +494,10 @@ TEST_CASE("Arrays::SplitJoin", "[dynarray]")
for (i = 0; i < WXSIZEOF(separators); i++) for (i = 0; i < WXSIZEOF(separators); i++)
{ {
wxArrayString arr = wxSplit(str, separators[i]); wxArrayString arr = wxSplit(str, separators[i]);
INFO("Using separator '" << static_cast<char>(separators[i]) << "' "
"and split array \"" << arr << "\"");
CHECK( str == wxJoin(arr, separators[i]) ); CHECK( str == wxJoin(arr, separators[i]) );
} }
@@ -498,6 +515,10 @@ TEST_CASE("Arrays::SplitJoin", "[dynarray]")
for (i = 0; i < WXSIZEOF(separators); i++) for (i = 0; i < WXSIZEOF(separators); i++)
{ {
wxString string = wxJoin(theArr, separators[i]); wxString string = wxJoin(theArr, separators[i]);
INFO("Using separator '" << static_cast<char>(separators[i]) << "' "
"and joined string \"" << string << "\"");
CHECK( theArr == wxSplit(string, separators[i]) ); CHECK( theArr == wxSplit(string, separators[i]) );
} }
@@ -508,6 +529,20 @@ TEST_CASE("Arrays::SplitJoin", "[dynarray]")
CHECK( wxSplit(string, wxT(';')).empty() ); CHECK( wxSplit(string, wxT(';')).empty() );
CHECK( wxSplit(wxT(";"), wxT(';')).size() == 2 ); CHECK( wxSplit(wxT(";"), wxT(';')).size() == 2 );
// Check for bug with escaping the escape character at the end (but not in
// the middle).
wxArrayString withBackslashes;
withBackslashes.push_back("foo\\");
withBackslashes.push_back("bar\\baz");
string = wxJoin(withBackslashes, ':');
CHECK( string == "foo\\\\:bar\\baz" );
const wxArrayString withBackslashes2 = wxSplit(string, ':');
REQUIRE( withBackslashes2.size() == 2 );
CHECK( withBackslashes2[0] == withBackslashes[0] );
CHECK( withBackslashes2[1] == withBackslashes[1] );
} }
TEST_CASE("wxObjArray", "[dynarray]") TEST_CASE("wxObjArray", "[dynarray]")