Do not use wxRegEx in wxCmpNaturalGeneric()

Using wxRegEx in wxCmpNaturalGeneric() introduced a dependency of the
base library on the regex library.

Replace wxRegEx with character classification functions wxIsspace(),
wxIspunct(), and wxIsdigit() to remove this rather unnecessary
dependency.

Closes https://github.com/wxWidgets/wxWidgets/pull/2014
This commit is contained in:
PB
2020-08-06 15:42:23 +02:00
committed by Vadim Zeitlin
parent 8e2aad2621
commit a4647825cb
3 changed files with 35 additions and 49 deletions

View File

@@ -486,9 +486,6 @@ int wxCmpNatural(const wxString& s1, const wxString& s2);
/** /**
This is wxWidgets' own implementation of the natural sort comparison function. This is wxWidgets' own implementation of the natural sort comparison function.
Requires wxRegEx, if it is unavailable numbers within strings are not
recognised and only case-insensitive collation is performed.
@see wxCmpNatural() @see wxCmpNatural()
@since 3.1.4 @since 3.1.4

View File

@@ -20,7 +20,6 @@
#endif #endif
#include "wx/arrstr.h" #include "wx/arrstr.h"
#include "wx/regex.h"
#include "wx/scopedarray.h" #include "wx/scopedarray.h"
#include "wx/wxcrt.h" #include "wx/wxcrt.h"
@@ -729,8 +728,6 @@ wxArrayString wxSplit(const wxString& str, const wxChar sep, const wxChar escape
return ret; return ret;
} }
#if wxUSE_REGEX
namespace // helpers needed by wxCmpNaturalGeneric() namespace // helpers needed by wxCmpNaturalGeneric()
{ {
// Used for comparison of string parts // Used for comparison of string parts
@@ -759,47 +756,52 @@ struct wxStringFragment
wxStringFragment GetFragment(wxString& text) wxStringFragment GetFragment(wxString& text)
{ {
static const wxRegEx reSpaceOrPunct(wxS("^([[:space:]]|[[:punct:]])+"));
// Limit the length to make sure the value will fit into a wxUint64
static const wxRegEx reDigit(wxS("^[[:digit:]]{1,19}"));
static const wxRegEx reLetterOrSymbol("^[^[:space:]|[:punct:]|[:digit:]]+");
if ( text.empty() ) if ( text.empty() )
return wxStringFragment(); return wxStringFragment();
wxStringFragment fragment; // the maximum length of a sequence of digits that
size_t length = 0; // can fit into wxUint64 when converted to a number
static const ptrdiff_t maxDigitSequenceLength = 19;
// In attempt to minimize the number of wxRegEx.Matches() calls, wxStringFragment fragment;
// try to do them from the most expected to the least expected wxString::const_iterator it;
// string fragment type.
if ( reLetterOrSymbol.Matches(text) ) for ( it = text.cbegin(); it != text.cend(); ++it )
{ {
if ( reLetterOrSymbol.GetMatch(NULL, &length) ) const wxUniChar& ch = *it;
wxStringFragment::Type chType = wxStringFragment::Empty;
if ( wxIsspace(ch) || wxIspunct(ch) )
chType = wxStringFragment::SpaceOrPunct;
else if ( wxIsdigit(ch) )
chType = wxStringFragment::Digit;
else
chType = wxStringFragment::LetterOrSymbol;
// check if evaluating the first character
if ( fragment.type == wxStringFragment::Empty )
{ {
fragment.type = wxStringFragment::LetterOrSymbol; fragment.type = chType;
fragment.text = text.Left(length); continue;
} }
}
else if ( reDigit.Matches(text) ) // stop processing when the current character has a different
{ // string fragment type than the previously processed characters had
if ( reDigit.GetMatch(NULL, &length) ) // or a sequence of digits is too long
if ( fragment.type != chType
|| (fragment.type == wxStringFragment::Digit
&& it - text.cbegin() > maxDigitSequenceLength) )
{ {
fragment.type = wxStringFragment::Digit; break;
fragment.text = text.Left(length);
fragment.text.ToULongLong(&fragment.value);
}
}
else if ( reSpaceOrPunct.Matches(text) )
{
if ( reSpaceOrPunct.GetMatch(NULL, &length) )
{
fragment.type = wxStringFragment::SpaceOrPunct;
fragment.text = text.Left(length);
} }
} }
text.erase(0, length); fragment.text.assign(text.cbegin(), it);
if ( fragment.type == wxStringFragment::Digit )
fragment.text.ToULongLong(&fragment.value);
text.erase(0, it - text.cbegin());
return fragment; return fragment;
} }
@@ -893,15 +895,6 @@ int wxCMPFUNC_CONV wxCmpNaturalGeneric(const wxString& s1, const wxString& s2)
return comparison; return comparison;
} }
#else
int wxCMPFUNC_CONV wxCmpNaturalGeneric(const wxString& s1, const wxString& s2)
{
return wxStrcoll_String(s1.Lower(), s2.Lower());
}
#endif // #if wxUSE_REGEX
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
// Declaration of StrCmpLogicalW() // Declaration of StrCmpLogicalW()
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------

View File

@@ -784,9 +784,6 @@ void ArraysTestCase::IndexFromEnd()
TEST_CASE("wxNaturalStringComparisonGeneric()", "[wxString][compare]") TEST_CASE("wxNaturalStringComparisonGeneric()", "[wxString][compare]")
{ {
#if !wxUSE_REGEX
WARN("Skipping wxCmpNaturalGeneric() tests: wxRegEx not available");
#else
// simple string comparison // simple string comparison
CHECK(wxCmpNaturalGeneric("a", "a") == 0); CHECK(wxCmpNaturalGeneric("a", "a") == 0);
CHECK(wxCmpNaturalGeneric("a", "z") < 0); CHECK(wxCmpNaturalGeneric("a", "z") < 0);
@@ -858,6 +855,5 @@ TEST_CASE("wxNaturalStringComparisonGeneric()", "[wxString][compare]")
CHECK(wxCmpNaturalGeneric("a5th5", "a10th10") < 0); CHECK(wxCmpNaturalGeneric("a5th5", "a10th10") < 0);
CHECK(wxCmpNaturalGeneric("a 10th 10", "a5th 5") < 0); CHECK(wxCmpNaturalGeneric("a 10th 10", "a5th 5") < 0);
CHECK(wxCmpNaturalGeneric("a5th 5", "a 10th 10") > 0); CHECK(wxCmpNaturalGeneric("a5th 5", "a 10th 10") > 0);
#endif // #if !wxUSE_REGEX
} }