From 1035ae27a7a5d323963cf5249d29fe3112c5ee13 Mon Sep 17 00:00:00 2001 From: Vadim Zeitlin Date: Tue, 6 Apr 2021 11:31:12 +0200 Subject: [PATCH] Make wxSplit(wxJoin()) idempotent for string ending with escape Previously, splitting a string obtained by joining together array with (any but last) elements ending in the escape character (normally the backslash), didn't recover the original array because the separator character following it in the resulting string was considered to be escaped by wxSplit(). Fix this by escaping the trailing escape character itself. Add a test confirming that this works as expected now, document this behaviour and also slightly simplify wxSPlit() logic. See https://github.com/wxWidgets/wxWidgets/pull/2311 Closes #19131. --- interface/wx/arrstr.h | 13 ++++++++++- src/common/arrstr.cpp | 48 +++++++++++++++++++++++++++++------------ tests/arrays/arrays.cpp | 35 ++++++++++++++++++++++++++++++ 3 files changed, 81 insertions(+), 15 deletions(-) diff --git a/interface/wx/arrstr.h b/interface/wx/arrstr.h index 8e5f393e89..aecaf027c5 100644 --- a/interface/wx/arrstr.h +++ b/interface/wx/arrstr.h @@ -523,7 +523,18 @@ wxArrayString wxSplit(const wxString& str, const wxChar sep, If the @a escape character is non-@NULL, then it's used as prefix for each occurrence of @a sep in the strings contained in @a arr before joining them which is necessary in order to be able to recover the original array - contents from the string later using wxSplit(). + contents from the string later using wxSplit(). The @a escape characters + themselves are @e not escaped when they occur in the middle of the @a arr + elements, but @e are escaped when they occur at the end, i.e. + @code + wxArrayString arr; + arr.push_back("foo^"); + arr.push_back("bar^baz"); + wxPuts(wxJoin(arr, ':', '^')); // prints "foo^^:bar^baz" + @endcode + + In any case, applying wxSplit() to the result of wxJoin() is guaranteed to + recover the original array. @see wxSplit() diff --git a/src/common/arrstr.cpp b/src/common/arrstr.cpp index 65317bb1e8..6c830ef631 100644 --- a/src/common/arrstr.cpp +++ b/src/common/arrstr.cpp @@ -655,7 +655,17 @@ wxString wxJoin(const wxArrayString& arr, const wxChar sep, const wxChar escape) for ( size_t n = 0; n < count; n++ ) { if ( n ) + { + // We don't escape the escape characters in the middle of the + // string because this is not needed, strictly speaking, but we + // must do it if they occur at the end because otherwise we + // wouldn't split the string back correctly as the separator + // would appear to be escaped. + if ( !str.empty() && *str.rbegin() == escape ) + str += escape; + str += sep; + } for ( wxString::const_iterator i = arr[n].begin(), end = arr[n].end(); @@ -684,7 +694,6 @@ wxArrayString wxSplit(const wxString& str, const wxChar sep, const wxChar escape wxArrayString ret; wxString curr; - wxChar prev = wxT('\0'); for ( wxString::const_iterator i = str.begin(), end = str.end(); @@ -693,30 +702,41 @@ wxArrayString wxSplit(const wxString& str, const wxChar sep, const wxChar escape { const wxChar ch = *i; + // Order of tests matters here in the uncommon, but possible, case when + // the separator is the same as the escape character: it has to be + // recognized as a separator in this case (escaping doesn't work at all + // in this case). if ( ch == sep ) { - if ( prev == escape ) + ret.push_back(curr); + curr.clear(); + } + else if ( ch == escape ) + { + ++i; + if ( i == end ) { - // remove the escape character and don't consider this - // occurrence of 'sep' as a real separator - *curr.rbegin() = sep; - } - else // real separator - { - ret.push_back(curr); - curr.clear(); + // Escape at the end of the string is not handled specially. + curr += ch; + break; } + + // Separator or the escape character itself may be escaped, + // cancelling their special meaning, but escape character followed + // by anything else is not handled specially. + if ( *i != sep && *i != escape ) + curr += ch; + + curr += *i; } else // normal character { curr += ch; } - - prev = ch; } - // add the last token - if ( !curr.empty() || prev == sep ) + // add the last token, which we always have unless the string is empty + if ( !str.empty() ) ret.Add(curr); return ret; diff --git a/tests/arrays/arrays.cpp b/tests/arrays/arrays.cpp index 3476bfb62f..4d03aa1751 100644 --- a/tests/arrays/arrays.cpp +++ b/tests/arrays/arrays.cpp @@ -155,6 +155,19 @@ struct Item WX_DEFINE_ARRAY_PTR(Item *, ItemPtrArray); +std::ostream& operator<<(std::ostream& os, const wxArrayString& arr) +{ + os << "[ "; + for ( size_t n = 0; n < arr.size(); ++n ) + { + if ( n ) + os << ", "; + os << '"' << arr[n] << '"'; + } + os << " ] (size=" << arr.size() << ")"; + return os; +} + // ---------------------------------------------------------------------------- // the tests // ---------------------------------------------------------------------------- @@ -481,6 +494,10 @@ TEST_CASE("Arrays::SplitJoin", "[dynarray]") for (i = 0; i < WXSIZEOF(separators); i++) { wxArrayString arr = wxSplit(str, separators[i]); + + INFO("Using separator '" << static_cast(separators[i]) << "' " + "and split array \"" << arr << "\""); + CHECK( str == wxJoin(arr, separators[i]) ); } @@ -498,6 +515,10 @@ TEST_CASE("Arrays::SplitJoin", "[dynarray]") for (i = 0; i < WXSIZEOF(separators); i++) { wxString string = wxJoin(theArr, separators[i]); + + INFO("Using separator '" << static_cast(separators[i]) << "' " + "and joined string \"" << string << "\""); + CHECK( theArr == wxSplit(string, separators[i]) ); } @@ -508,6 +529,20 @@ TEST_CASE("Arrays::SplitJoin", "[dynarray]") CHECK( wxSplit(string, wxT(';')).empty() ); CHECK( wxSplit(wxT(";"), wxT(';')).size() == 2 ); + + // Check for bug with escaping the escape character at the end (but not in + // the middle). + wxArrayString withBackslashes; + withBackslashes.push_back("foo\\"); + withBackslashes.push_back("bar\\baz"); + + string = wxJoin(withBackslashes, ':'); + CHECK( string == "foo\\\\:bar\\baz" ); + + const wxArrayString withBackslashes2 = wxSplit(string, ':'); + REQUIRE( withBackslashes2.size() == 2 ); + CHECK( withBackslashes2[0] == withBackslashes[0] ); + CHECK( withBackslashes2[1] == withBackslashes[1] ); } TEST_CASE("wxObjArray", "[dynarray]")