diff --git a/include/wx/arrstr.h b/include/wx/arrstr.h index 3194a0053f..e21dc78761 100644 --- a/include/wx/arrstr.h +++ b/include/wx/arrstr.h @@ -42,12 +42,30 @@ wxDictionaryStringSortAscending(const wxString& s1, const wxString& s2) return cmp ? cmp : s1.Cmp(s2); } + inline int wxCMPFUNC_CONV wxDictionaryStringSortDescending(const wxString& s1, const wxString& s2) { return wxDictionaryStringSortAscending(s2, s1); } +WXDLLIMPEXP_BASE +int wxCMPFUNC_CONV wxCmpNatural(const wxString& s1, const wxString& s2); + +WXDLLIMPEXP_BASE +int wxCMPFUNC_CONV wxCmpNaturalGeneric(const wxString& s1, const wxString& s2); + +inline int wxCMPFUNC_CONV wxNaturalStringSortAscending(const wxString& s1, const wxString& s2) +{ + return wxCmpNatural(s1, s2); +} + +inline int wxCMPFUNC_CONV wxNaturalStringSortDescending(const wxString& s1, const wxString& s2) +{ + return wxCmpNatural(s2, s1); +} + + #if wxUSE_STD_CONTAINERS typedef int (wxCMPFUNC_CONV *CMPFUNCwxString)(wxString*, wxString*); diff --git a/interface/wx/arrstr.h b/interface/wx/arrstr.h index 8e0f98089d..8c00e5b143 100644 --- a/interface/wx/arrstr.h +++ b/interface/wx/arrstr.h @@ -363,7 +363,8 @@ public: This function can be used with wxSortedArrayString::Sort() or passed as an argument to wxSortedArrayString constructor. - @see wxStringSortDescending(), wxDictionaryStringSortAscending() + @see wxStringSortDescending(), wxDictionaryStringSortAscending(), + wxNaturalStringSortAscending() @since 3.1.0 */ @@ -375,7 +376,8 @@ int wxStringSortAscending(const wxString& s1, const wxString& s2); This function can be used with wxSortedArrayString::Sort() or passed as an argument to wxSortedArrayString constructor. - @see wxStringSortAscending(), wxDictionaryStringSortAscending() + @see wxStringSortAscending(), wxDictionaryStringSortDescending(), + wxNaturalStringSortDescending() @since 3.1.0 */ @@ -392,7 +394,9 @@ int wxStringSortDescending(const wxString& s1, const wxString& s2); This function can be used with wxSortedArrayString::Sort() or passed as an argument to wxSortedArrayString constructor. - @see wxStringSortAscending(), wxDictionaryStringSortDescending() + @see wxDictionaryStringSortDescending(), + wxStringSortAscending(), + wxNaturalStringSortAscending() @since 3.1.0 */ @@ -403,11 +407,94 @@ int wxDictionaryStringSortAscending(const wxString& s1, const wxString& s2); See wxDictionaryStringSortAscending() for the dictionary sort description. - @see wxStringSortDescending() + @see wxDictionaryStringSortAscending(), + wxStringSortDescending(), + wxNaturalStringSortDescending() @since 3.1.0 */ -int wxDictionaryStringSortAscending(const wxString& s1, const wxString& s2); +int wxDictionaryStringSortDescending(const wxString& s1, const wxString& s2); + + +/** + Comparison function comparing strings in natural order. + + This function can be used with wxSortedArrayString::Sort() + or passed as an argument to wxSortedArrayString constructor. + + See wxCmpNatural() for more information about how natural + sort order is implemented. + + @see wxNaturalStringSortDescending(), + wxStringSortAscending(), wxDictionaryStringSortAscending() + + @since 3.1.4 +*/ +int wxNaturalStringSortAscending(const wxString& s1, const wxString& s2); + +/** + Comparison function comparing strings in reverse natural order. + + This function can be used with wxSortedArrayString::Sort() + or passed as an argument to wxSortedArrayString constructor. + + See wxCmpNatural() for more information about how natural + sort order is implemented. + + @see wxNaturalStringSortAscending(), + wxStringSortDescending(), wxDictionaryStringSortDescending() + + @since 3.1.4 +*/ +int wxNaturalStringSortDescending(const wxString& s1, const wxString& s2); + +/** + This function compares strings using case-insensitive collation and + additionally, numbers within strings are recognised and compared + numerically, rather than alphabetically. When used for sorting, + the result is that e.g. file names containing numbers are sorted + in a natural way. + + For example, sorting with a simple string comparison results in: + - file1.txt + - file10.txt + - file100.txt + - file2.txt + - file20.txt + - file3.txt + + But sorting the same strings in natural sort order results in: + - file1.txt + - file2.txt + - file3.txt + - file10.txt + - file20.txt + - file100.txt + + wxCmpNatural() uses an OS native natural sort function when available + (currently only under Microsoft Windows), wxCmpNaturalGeneric() otherwise. + + Be aware that OS native implementations might differ from each other, + and might change behaviour from release to release. + + @see wxNaturalStringSortAscending(), wxNaturalStringSortDescending() + + @since 3.1.4 +*/ +int wxCmpNatural(const wxString& s1, const wxString& s2); + +/** + This is wxWidgets' own implementation of the natural sort comparison function. + + Requires wxRegEx, if it is unavailable numbers within strings are not + recognised and only case-insensitive collation is performed. + + @see wxCmpNatural() + + @since 3.1.4 +*/ +int wxCmpNaturalGeneric(const wxString& s1, const wxString& s2); + // ============================================================================ // Global functions/macros diff --git a/src/common/arrstr.cpp b/src/common/arrstr.cpp index 1b05c1d556..176bd6e72d 100644 --- a/src/common/arrstr.cpp +++ b/src/common/arrstr.cpp @@ -20,13 +20,20 @@ #endif #include "wx/arrstr.h" +#include "wx/regex.h" #include "wx/scopedarray.h" +#include "wx/wxcrt.h" #include "wx/beforestd.h" #include #include #include "wx/afterstd.h" +#if defined( __WINDOWS__ ) + #include +#endif + + // ============================================================================ // ArrayString // ============================================================================ @@ -721,3 +728,199 @@ wxArrayString wxSplit(const wxString& str, const wxChar sep, const wxChar escape return ret; } + +#if wxUSE_REGEX + +namespace // helpers needed by wxCmpNaturalGeneric() +{ +// Used for comparison of string parts +struct wxStringFragment +{ + // Fragment types are generally sorted like this: + // Empty < SpaceOrPunct < Digit < LetterOrSymbol + // Fragments of the same type are compared as follows: + // SpaceOrPunct - collated, Digit - as numbers using value + // LetterOrSymbol - lower-cased and then collated + enum Type + { + Empty, + SpaceOrPunct, // whitespace or punctuation + Digit, // a sequence of decimal digits + LetterOrSymbol // letters and symbols, i.e., anything not covered by the above types + }; + + wxStringFragment() : type(Empty), value(0) {} + + Type type; + wxString text; + wxUint64 value; // used only for Digit type +}; + + +wxStringFragment GetFragment(wxString& text) +{ + static const wxRegEx reSpaceOrPunct(wxS("^([[:space:]]|[[:punct:]])+")); + // Limit the length to make sure the value will fit into a wxUint64 + static const wxRegEx reDigit(wxS("^[[:digit:]]{1,19}")); + static const wxRegEx reLetterOrSymbol("^[^[:space:]|[:punct:]|[:digit:]]+"); + + if ( text.empty() ) + return wxStringFragment(); + + wxStringFragment fragment; + size_t length = 0; + + // In attempt to minimize the number of wxRegEx.Matches() calls, + // try to do them from the most expected to the least expected + // string fragment type. + if ( reLetterOrSymbol.Matches(text) ) + { + if ( reLetterOrSymbol.GetMatch(NULL, &length) ) + { + fragment.type = wxStringFragment::LetterOrSymbol; + fragment.text = text.Left(length); + } + } + else if ( reDigit.Matches(text) ) + { + if ( reDigit.GetMatch(NULL, &length) ) + { + fragment.type = wxStringFragment::Digit; + fragment.text = text.Left(length); + fragment.text.ToULongLong(&fragment.value); + } + } + else if ( reSpaceOrPunct.Matches(text) ) + { + if ( reSpaceOrPunct.GetMatch(NULL, &length) ) + { + fragment.type = wxStringFragment::SpaceOrPunct; + fragment.text = text.Left(length); + } + } + + text.erase(0, length); + return fragment; +} + +int CompareFragmentNatural(const wxStringFragment& lhs, const wxStringFragment& rhs) +{ + switch ( lhs.type ) + { + case wxStringFragment::Empty: + switch ( rhs.type ) + { + case wxStringFragment::Empty: + return 0; + case wxStringFragment::SpaceOrPunct: + case wxStringFragment::Digit: + case wxStringFragment::LetterOrSymbol: + return -1; + } + + case wxStringFragment::SpaceOrPunct: + switch ( rhs.type ) + { + case wxStringFragment::Empty: + return 1; + case wxStringFragment::SpaceOrPunct: + return wxStrcoll_String(lhs.text, rhs.text); + case wxStringFragment::Digit: + case wxStringFragment::LetterOrSymbol: + return -1; + } + + case wxStringFragment::Digit: + switch ( rhs.type ) + { + case wxStringFragment::Empty: + case wxStringFragment::SpaceOrPunct: + return 1; + case wxStringFragment::Digit: + if ( lhs.value > rhs.value ) + return 1; + else if ( lhs.value < rhs.value ) + return -1; + else + return 0; + case wxStringFragment::LetterOrSymbol: + return -1; + } + + case wxStringFragment::LetterOrSymbol: + switch ( rhs.type ) + { + case wxStringFragment::Empty: + case wxStringFragment::SpaceOrPunct: + case wxStringFragment::Digit: + return 1; + case wxStringFragment::LetterOrSymbol: + return wxStrcoll_String(lhs.text.Lower(), rhs.text.Lower()); + } + } + + // all possible cases should be covered by the switch above + // but return also from here to prevent the compiler warning + return 1; +} + +} // unnamed namespace + + +// ---------------------------------------------------------------------------- +// wxCmpNaturalGeneric +// ---------------------------------------------------------------------------- +// +int wxCMPFUNC_CONV wxCmpNaturalGeneric(const wxString& s1, const wxString& s2) +{ + wxString lhs(s1); + wxString rhs(s2); + + int comparison = 0; + + while ( (comparison == 0) && (!lhs.empty() || !rhs.empty()) ) + { + const wxStringFragment fragmentLHS = GetFragment(lhs); + const wxStringFragment fragmentRHS = GetFragment(rhs); + + comparison = CompareFragmentNatural(fragmentLHS, fragmentRHS); + } + + return comparison; +} + +#else + +int wxCMPFUNC_CONV wxCmpNaturalGeneric(const wxString& s1, const wxString& s2) +{ + return wxStrcoll_String(s1.Lower(), s2.Lower()); +} + +#endif // #if wxUSE_REGEX + +// ---------------------------------------------------------------------------- +// Declaration of StrCmpLogicalW() +// ---------------------------------------------------------------------------- +// +// In some distributions of MinGW32, this function is exported in the library, +// but not declared in shlwapi.h. Therefore we declare it here. +#if defined( __MINGW32_TOOLCHAIN__ ) + extern "C" __declspec(dllimport) int WINAPI StrCmpLogicalW(LPCWSTR psz1, LPCWSTR psz2); +#endif + + +// ---------------------------------------------------------------------------- +// wxCmpNatural +// ---------------------------------------------------------------------------- +// +// If a native version of Natural sort is available, then use that, otherwise +// use the generic version. +inline int wxCMPFUNC_CONV wxCmpNatural(const wxString& s1, const wxString& s2) +{ +#if defined( __WINDOWS__ ) + return StrCmpLogicalW(s1.wc_str(), s2.wc_str()); +#else + return wxCmpNaturalGeneric(s1, s2); +#endif // #if defined( __WINDOWS__ ) +} + diff --git a/tests/arrays/arrays.cpp b/tests/arrays/arrays.cpp index 89f307cf43..0266915cfd 100644 --- a/tests/arrays/arrays.cpp +++ b/tests/arrays/arrays.cpp @@ -780,3 +780,84 @@ void ArraysTestCase::IndexFromEnd() CPPUNIT_ASSERT_EQUAL( 1, a.Index(1, /*bFromEnd=*/true) ); CPPUNIT_ASSERT_EQUAL( 2, a.Index(42, /*bFromEnd=*/true) ); } + + +TEST_CASE("wxNaturalStringComparisonGeneric()", "[wxString][compare]") +{ +#if !wxUSE_REGEX + WARN("Skipping wxCmpNaturalGeneric() tests: wxRegEx not available"); +#else + // simple string comparison + CHECK(wxCmpNaturalGeneric("a", "a") == 0); + CHECK(wxCmpNaturalGeneric("a", "z") < 0); + CHECK(wxCmpNaturalGeneric("z", "a") > 0); + + // case insensitivity + CHECK(wxCmpNaturalGeneric("a", "A") == 0); + CHECK(wxCmpNaturalGeneric("A", "a") == 0); + CHECK(wxCmpNaturalGeneric("AB", "a") > 0); + CHECK(wxCmpNaturalGeneric("a", "AB") < 0); + + // empty strings sort before whitespace and punctiation + CHECK(wxCmpNaturalGeneric("", " ") < 0); + CHECK(wxCmpNaturalGeneric(" ", "") > 0); + CHECK(wxCmpNaturalGeneric("", ",") < 0); + CHECK(wxCmpNaturalGeneric(",", "") > 0); + + // empty strings sort before numbers + CHECK(wxCmpNaturalGeneric("", "0") < 0); + CHECK(wxCmpNaturalGeneric("0", "") > 0); + + // empty strings sort before letters and symbols + CHECK(wxCmpNaturalGeneric("", "abc") < 0); + CHECK(wxCmpNaturalGeneric("abc", "") > 0); + + // whitespace and punctiation sort before numbers + CHECK(wxCmpNaturalGeneric(" ", "1") < 0); + CHECK(wxCmpNaturalGeneric("1", " ") > 0); + CHECK(wxCmpNaturalGeneric(",", "1") < 0); + CHECK(wxCmpNaturalGeneric("1", ",") > 0); + + // strings containing numbers sort before letters and symbols + CHECK(wxCmpNaturalGeneric("00", "a") < 0); + CHECK(wxCmpNaturalGeneric("a", "00") > 0); + + // strings containing numbers are compared by their value + CHECK(wxCmpNaturalGeneric("01", "1") == 0); + CHECK(wxCmpNaturalGeneric("1", "01") == 0); + CHECK(wxCmpNaturalGeneric("1", "05") < 0); + CHECK(wxCmpNaturalGeneric("05", "1") > 0); + CHECK(wxCmpNaturalGeneric("10", "5") > 0); + CHECK(wxCmpNaturalGeneric("5", "10") < 0); + CHECK(wxCmpNaturalGeneric("1", "9999999999999999999") < 0); + CHECK(wxCmpNaturalGeneric("9999999999999999999", "1") > 0); + + // comparing strings composed from whitespace, + // punctuation, numbers, letters, and symbols + CHECK(wxCmpNaturalGeneric("1st", " 1st") > 0); + CHECK(wxCmpNaturalGeneric(" 1st", "1st") < 0); + + CHECK(wxCmpNaturalGeneric("1st", ",1st") > 0); + CHECK(wxCmpNaturalGeneric(",1st", "1st") < 0); + + CHECK(wxCmpNaturalGeneric("1st", "01st") == 0); + CHECK(wxCmpNaturalGeneric("01st", "1st") == 0); + CHECK(wxCmpNaturalGeneric("10th", "5th") > 0); + CHECK(wxCmpNaturalGeneric("5th", "10th") < 0); + + CHECK(wxCmpNaturalGeneric("a1st", "a01st") == 0); + CHECK(wxCmpNaturalGeneric("a01st", "a1st") == 0); + CHECK(wxCmpNaturalGeneric("a10th", "a5th") > 0); + CHECK(wxCmpNaturalGeneric("a5th", "a10th") < 0); + CHECK(wxCmpNaturalGeneric("a 10th", "a5th") < 0); + CHECK(wxCmpNaturalGeneric("a5th", "a 10th") > 0); + + CHECK(wxCmpNaturalGeneric("a1st1", "a01st01") == 0); + CHECK(wxCmpNaturalGeneric("a01st01", "a1st1") == 0); + CHECK(wxCmpNaturalGeneric("a10th10", "a5th5") > 0); + CHECK(wxCmpNaturalGeneric("a5th5", "a10th10") < 0); + CHECK(wxCmpNaturalGeneric("a 10th 10", "a5th 5") < 0); + CHECK(wxCmpNaturalGeneric("a5th 5", "a 10th 10") > 0); +#endif // #if !wxUSE_REGEX +} +