diff --git a/include/wx/arrstr.h b/include/wx/arrstr.h index 27ff30dffd..e21dc78761 100644 --- a/include/wx/arrstr.h +++ b/include/wx/arrstr.h @@ -53,7 +53,7 @@ WXDLLIMPEXP_BASE int wxCMPFUNC_CONV wxCmpNatural(const wxString& s1, const wxString& s2); WXDLLIMPEXP_BASE -int wxCMPFUNC_CONV wxCmpNaturalNative(const wxString& s1, const wxString& s2); +int wxCMPFUNC_CONV wxCmpNaturalGeneric(const wxString& s1, const wxString& s2); inline int wxCMPFUNC_CONV wxNaturalStringSortAscending(const wxString& s1, const wxString& s2) { diff --git a/interface/wx/arrstr.h b/interface/wx/arrstr.h index 4d2ca36877..8c00e5b143 100644 --- a/interface/wx/arrstr.h +++ b/interface/wx/arrstr.h @@ -397,7 +397,7 @@ int wxStringSortDescending(const wxString& s1, const wxString& s2); @see wxDictionaryStringSortDescending(), wxStringSortAscending(), wxNaturalStringSortAscending() - + @since 3.1.0 */ int wxDictionaryStringSortAscending(const wxString& s1, const wxString& s2); @@ -416,88 +416,84 @@ int wxDictionaryStringSortAscending(const wxString& s1, const wxString& s2); int wxDictionaryStringSortDescending(const wxString& s1, const wxString& s2); - /** - Comparison function used for Natural Sort. - - Functions in the same way as wxDictionaryStringSortAscending(), with - the exception that numbers within the string are recognised, and - compared numerically, rather than alphabetically. When used for - sorting, the result is that e.g. file names containing numbers are - sorted in a natural way. +/** + Comparison function comparing strings in natural order. - This function will use an OS native function if one is available, - to ensure that the sort order is the same as the OS uses. - - Comparison is case insensitive. - - e.g. Sorting using wxDictionaryStringSortAscending() results in: - - file1.txt - - file10.txt - - file100.txt - - file2.txt - - file20.txt - - file3.txt - - e.g. Sorting using wxNaturalStringSortAscending() results in: - - file1.txt - - file2.txt - - file3.txt - - file11.txt - - file20.txt - - file100.txt - - @see wxNaturalStringSortDescending(), - wxStringSortAscending(), - wxDictionaryStringSortAscending() - - @since 3.1.2 - */ + This function can be used with wxSortedArrayString::Sort() + or passed as an argument to wxSortedArrayString constructor. + + See wxCmpNatural() for more information about how natural + sort order is implemented. + + @see wxNaturalStringSortDescending(), + wxStringSortAscending(), wxDictionaryStringSortAscending() + + @since 3.1.4 +*/ int wxNaturalStringSortAscending(const wxString& s1, const wxString& s2); +/** + Comparison function comparing strings in reverse natural order. - /** - Comparison function comparing strings in reverse natural order. - - See wxNaturalStringSortAscending() for the natural sort description. - - @see wxNaturalStringSortAscending(), - wxStringSortDescending(), - wxDictionaryStringSortDescending() - - @since 3.1.2 - */ + This function can be used with wxSortedArrayString::Sort() + or passed as an argument to wxSortedArrayString constructor. + + See wxCmpNatural() for more information about how natural + sort order is implemented. + + @see wxNaturalStringSortAscending(), + wxStringSortDescending(), wxDictionaryStringSortDescending() + + @since 3.1.4 +*/ int wxNaturalStringSortDescending(const wxString& s1, const wxString& s2); +/** + This function compares strings using case-insensitive collation and + additionally, numbers within strings are recognised and compared + numerically, rather than alphabetically. When used for sorting, + the result is that e.g. file names containing numbers are sorted + in a natural way. - /** - This is wxWidgets' own implementation of the natural sort comparison - function. This will be used whenever an OS native function is not available. - - Since OS native implementations might differ from each other, the user might - wish to use this function which behaves in the same way across all platforms. - - @since 3.1.2 - */ -int wxCMPFUNC_CONV wxCmpNatural(const wxString& s1, const wxString& s2); + For example, sorting with a simple string comparison results in: + - file1.txt + - file10.txt + - file100.txt + - file2.txt + - file20.txt + - file3.txt + But sorting the same strings in natural sort order results in: + - file1.txt + - file2.txt + - file3.txt + - file10.txt + - file20.txt + - file100.txt - /** - Comparison function, identical to wxNaturalStringSortAscending(). - In fact, wxNaturalStringSortAscending() and wxNaturalStringSortDescending() - are both implemented using this function. - - When an OS native natural sort function is available, that will be used, - otherwise wxCmpNatural() will be used. + wxCmpNatural() uses an OS native natural sort function when available + (currently only under Microsoft Windows), wxCmpNaturalGeneric() otherwise. - Be aware that OS native implementations might differ from each other, and - might change behaviour from release to release. - - @see wxNaturalStringSortAscending(), - wxNaturalStringSortDescending() - - @since 3.1.2 - */ -int wxCMPFUNC_CONV wxCmpNaturalNative(const wxString& s1, const wxString& s2); + Be aware that OS native implementations might differ from each other, + and might change behaviour from release to release. + + @see wxNaturalStringSortAscending(), wxNaturalStringSortDescending() + + @since 3.1.4 +*/ +int wxCmpNatural(const wxString& s1, const wxString& s2); + +/** + This is wxWidgets' own implementation of the natural sort comparison function. + + Requires wxRegEx, if it is unavailable numbers within strings are not + recognised and only case-insensitive collation is performed. + + @see wxCmpNatural() + + @since 3.1.4 +*/ +int wxCmpNaturalGeneric(const wxString& s1, const wxString& s2); // ============================================================================ diff --git a/src/common/arrstr.cpp b/src/common/arrstr.cpp index c123b402f8..176bd6e72d 100644 --- a/src/common/arrstr.cpp +++ b/src/common/arrstr.cpp @@ -20,13 +20,14 @@ #endif #include "wx/arrstr.h" +#include "wx/regex.h" #include "wx/scopedarray.h" +#include "wx/wxcrt.h" #include "wx/beforestd.h" #include #include #include "wx/afterstd.h" -#include "wx/regex.h" #if defined( __WINDOWS__ ) #include @@ -728,143 +729,149 @@ wxArrayString wxSplit(const wxString& str, const wxChar sep, const wxChar escape return ret; } +#if wxUSE_REGEX -namespace // enum, class and functions needed by wxCmpNatural(). +namespace // helpers needed by wxCmpNaturalGeneric() { - enum wxStringFragmentType +// Used for comparison of string parts +struct wxStringFragment +{ + // Fragment types are generally sorted like this: + // Empty < SpaceOrPunct < Digit < LetterOrSymbol + // Fragments of the same type are compared as follows: + // SpaceOrPunct - collated, Digit - as numbers using value + // LetterOrSymbol - lower-cased and then collated + enum Type { - wxFRAGMENT_TYPE_EMPTY = 0, - wxFRAGMENT_TYPE_ALPHA = 1, - wxFRAGMENT_TYPE_DIGIT = 2 + Empty, + SpaceOrPunct, // whitespace or punctuation + Digit, // a sequence of decimal digits + LetterOrSymbol // letters and symbols, i.e., anything not covered by the above types }; + wxStringFragment() : type(Empty), value(0) {} - // ---------------------------------------------------------------------------- - // wxStringFragment - // ---------------------------------------------------------------------------- - // - // Lightweight object returned by GetNaturalFragment(). - // Represents either a number, or a string which contains no numerical digits. - class wxStringFragment + Type type; + wxString text; + wxUint64 value; // used only for Digit type +}; + + +wxStringFragment GetFragment(wxString& text) +{ + static const wxRegEx reSpaceOrPunct(wxS("^([[:space:]]|[[:punct:]])+")); + // Limit the length to make sure the value will fit into a wxUint64 + static const wxRegEx reDigit(wxS("^[[:digit:]]{1,19}")); + static const wxRegEx reLetterOrSymbol("^[^[:space:]|[:punct:]|[:digit:]]+"); + + if ( text.empty() ) + return wxStringFragment(); + + wxStringFragment fragment; + size_t length = 0; + + // In attempt to minimize the number of wxRegEx.Matches() calls, + // try to do them from the most expected to the least expected + // string fragment type. + if ( reLetterOrSymbol.Matches(text) ) { - public: - wxStringFragment() - : type(wxFRAGMENT_TYPE_EMPTY) - {} - - wxString text; - long value; - wxStringFragmentType type; - }; - - - wxStringFragment GetFragment(wxString& text) + if ( reLetterOrSymbol.GetMatch(NULL, &length) ) + { + fragment.type = wxStringFragment::LetterOrSymbol; + fragment.text = text.Left(length); + } + } + else if ( reDigit.Matches(text) ) { - static const wxRegEx naturalNumeric(wxS("[0-9]+")); - static const wxRegEx naturalAlpha(wxS("[^0-9]+")); - - size_t digitStart = 0; - size_t digitLength = 0; - size_t alphaStart = 0; - size_t alphaLength = 0; - wxStringFragment fragment; - - if ( text.empty() ) - return fragment; - - if ( naturalNumeric.Matches(text) ) + if ( reDigit.GetMatch(NULL, &length) ) { - naturalNumeric.GetMatch(&digitStart, &digitLength, 0); + fragment.type = wxStringFragment::Digit; + fragment.text = text.Left(length); + fragment.text.ToULongLong(&fragment.value); } - - if ( naturalAlpha.Matches(text) ) + } + else if ( reSpaceOrPunct.Matches(text) ) + { + if ( reSpaceOrPunct.GetMatch(NULL, &length) ) { - naturalAlpha.GetMatch(&alphaStart, &alphaLength, 0); + fragment.type = wxStringFragment::SpaceOrPunct; + fragment.text = text.Left(length); } - - - if ( alphaStart == 0 ) - { - fragment.text = text.Mid(0, alphaLength); - fragment.value = 0; - fragment.type = wxFRAGMENT_TYPE_ALPHA; - - text.erase(0, alphaLength); - } - - if ( digitStart == 0 ) - { - fragment.text = text.Mid(0, digitLength); - fragment.text.ToLong(&fragment.value); - fragment.type = wxFRAGMENT_TYPE_DIGIT; - - text.erase(0, digitLength); - } - - return fragment; } - int CompareFragmentNatural(const wxStringFragment& lhs, const wxStringFragment& rhs) + text.erase(0, length); + return fragment; +} + +int CompareFragmentNatural(const wxStringFragment& lhs, const wxStringFragment& rhs) +{ + switch ( lhs.type ) { - if ( (lhs.type == wxFRAGMENT_TYPE_ALPHA) && - (rhs.type == wxFRAGMENT_TYPE_ALPHA) ) - { - return lhs.text.CmpNoCase(rhs.text); - } - - if ( (lhs.type == wxFRAGMENT_TYPE_DIGIT) && - (rhs.type == wxFRAGMENT_TYPE_DIGIT) ) - { - if ( lhs.value == rhs.value ) + case wxStringFragment::Empty: + switch ( rhs.type ) { - return 0; + case wxStringFragment::Empty: + return 0; + case wxStringFragment::SpaceOrPunct: + case wxStringFragment::Digit: + case wxStringFragment::LetterOrSymbol: + return -1; } - if ( lhs.value < rhs.value ) + case wxStringFragment::SpaceOrPunct: + switch ( rhs.type ) { - return -1; + case wxStringFragment::Empty: + return 1; + case wxStringFragment::SpaceOrPunct: + return wxStrcoll_String(lhs.text, rhs.text); + case wxStringFragment::Digit: + case wxStringFragment::LetterOrSymbol: + return -1; } - if ( lhs.value > rhs.value ) + case wxStringFragment::Digit: + switch ( rhs.type ) { - return 1; + case wxStringFragment::Empty: + case wxStringFragment::SpaceOrPunct: + return 1; + case wxStringFragment::Digit: + if ( lhs.value > rhs.value ) + return 1; + else if ( lhs.value < rhs.value ) + return -1; + else + return 0; + case wxStringFragment::LetterOrSymbol: + return -1; } - } - if ( (lhs.type == wxFRAGMENT_TYPE_DIGIT) && - (rhs.type == wxFRAGMENT_TYPE_ALPHA) ) - { - return -1; - } - - if ( (lhs.type == wxFRAGMENT_TYPE_ALPHA) && - (rhs.type == wxFRAGMENT_TYPE_DIGIT) ) - { - return 1; - } - - if ( lhs.type == wxFRAGMENT_TYPE_EMPTY ) - { - return -1; - } - - if ( rhs.type == wxFRAGMENT_TYPE_EMPTY ) - { - return 1; - } - - return 0; + case wxStringFragment::LetterOrSymbol: + switch ( rhs.type ) + { + case wxStringFragment::Empty: + case wxStringFragment::SpaceOrPunct: + case wxStringFragment::Digit: + return 1; + case wxStringFragment::LetterOrSymbol: + return wxStrcoll_String(lhs.text.Lower(), rhs.text.Lower()); + } } + // all possible cases should be covered by the switch above + // but return also from here to prevent the compiler warning + return 1; +} + } // unnamed namespace - // ---------------------------------------------------------------------------- -// wxCmpNaturalNative +// wxCmpNaturalGeneric // ---------------------------------------------------------------------------- -// -int wxCMPFUNC_CONV wxCmpNatural(const wxString& s1, const wxString& s2) +// +int wxCMPFUNC_CONV wxCmpNaturalGeneric(const wxString& s1, const wxString& s2) { wxString lhs(s1); wxString rhs(s2); @@ -873,19 +880,28 @@ int wxCMPFUNC_CONV wxCmpNatural(const wxString& s1, const wxString& s2) while ( (comparison == 0) && (!lhs.empty() || !rhs.empty()) ) { - wxStringFragment fragmentL = GetFragment(lhs); - wxStringFragment fragmentR = GetFragment(rhs); - comparison = CompareFragmentNatural(fragmentL, fragmentR); + const wxStringFragment fragmentLHS = GetFragment(lhs); + const wxStringFragment fragmentRHS = GetFragment(rhs); + + comparison = CompareFragmentNatural(fragmentLHS, fragmentRHS); } return comparison; } +#else + +int wxCMPFUNC_CONV wxCmpNaturalGeneric(const wxString& s1, const wxString& s2) +{ + return wxStrcoll_String(s1.Lower(), s2.Lower()); +} + +#endif // #if wxUSE_REGEX // ---------------------------------------------------------------------------- // Declaration of StrCmpLogicalW() // ---------------------------------------------------------------------------- -// +// // In some distributions of MinGW32, this function is exported in the library, // but not declared in shlwapi.h. Therefore we declare it here. #if defined( __MINGW32_TOOLCHAIN__ ) @@ -894,19 +910,17 @@ int wxCMPFUNC_CONV wxCmpNatural(const wxString& s1, const wxString& s2) // ---------------------------------------------------------------------------- -// wxCmpNaturalNative +// wxCmpNatural // ---------------------------------------------------------------------------- -// +// // If a native version of Natural sort is available, then use that, otherwise -// use the wxWidgets version, wxCmpNatural(). -int wxCMPFUNC_CONV wxCmpNaturalNative(const wxString& s1, const wxString& s2) +// use the generic version. +inline int wxCMPFUNC_CONV wxCmpNatural(const wxString& s1, const wxString& s2) { - #if defined( __WINDOWS__ ) - return StrCmpLogicalW( s1.wc_str(), s2.wc_str() ); - - #else - return wxCmpNatural( s1, s2 ); - - #endif +#if defined( __WINDOWS__ ) + return StrCmpLogicalW(s1.wc_str(), s2.wc_str()); +#else + return wxCmpNaturalGeneric(s1, s2); +#endif // #if defined( __WINDOWS__ ) } diff --git a/tests/arrays/arrays.cpp b/tests/arrays/arrays.cpp index 630f12d06c..0266915cfd 100644 --- a/tests/arrays/arrays.cpp +++ b/tests/arrays/arrays.cpp @@ -782,47 +782,82 @@ void ArraysTestCase::IndexFromEnd() } -TEST_CASE("wxNaturalStringSortAscending()", "[array][sort][string]") +TEST_CASE("wxNaturalStringComparisonGeneric()", "[wxString][compare]") { - wxString s01("3String"); - wxString s02("21String"); - - wxString s03("100string"); - wxString s04("100String"); - - wxString s05("10String"); - wxString s06("Str3ing"); - wxString s07("Str20ing"); - wxString s08("Str200ing"); - wxString s09("String8"); - wxString s10("String90"); - - wxString s11("7String3"); - wxString s12("07String20"); - wxString s13("007String100"); - - CHECK(wxCmpNatural(s01, s02) < 0); - CHECK(wxCmpNatural(s02, s03) < 0); - CHECK(wxCmpNatural(s03, s04) == 0); // Check that case is ignored - CHECK(wxCmpNatural(s05, s06) < 0); - CHECK(wxCmpNatural(s06, s07) < 0); - CHECK(wxCmpNatural(s07, s08) < 0); - CHECK(wxCmpNatural(s08, s09) < 0); - CHECK(wxCmpNatural(s09, s10) < 0); - CHECK(wxCmpNatural(s11, s12) < 0); - CHECK(wxCmpNatural(s12, s13) < 0); - CHECK(wxCmpNatural(s01, s01) == 0); // Check that equality works in all cases - CHECK(wxCmpNatural(s02, s02) == 0); - CHECK(wxCmpNatural(s03, s03) == 0); - CHECK(wxCmpNatural(s04, s04) == 0); - CHECK(wxCmpNatural(s05, s05) == 0); - CHECK(wxCmpNatural(s06, s06) == 0); - CHECK(wxCmpNatural(s07, s07) == 0); - CHECK(wxCmpNatural(s08, s08) == 0); - CHECK(wxCmpNatural(s09, s09) == 0); - CHECK(wxCmpNatural(s10, s10) == 0); - CHECK(wxCmpNatural(s11, s11) == 0); - CHECK(wxCmpNatural(s12, s12) == 0); - CHECK(wxCmpNatural(s13, s13) == 0); +#if !wxUSE_REGEX + WARN("Skipping wxCmpNaturalGeneric() tests: wxRegEx not available"); +#else + // simple string comparison + CHECK(wxCmpNaturalGeneric("a", "a") == 0); + CHECK(wxCmpNaturalGeneric("a", "z") < 0); + CHECK(wxCmpNaturalGeneric("z", "a") > 0); + + // case insensitivity + CHECK(wxCmpNaturalGeneric("a", "A") == 0); + CHECK(wxCmpNaturalGeneric("A", "a") == 0); + CHECK(wxCmpNaturalGeneric("AB", "a") > 0); + CHECK(wxCmpNaturalGeneric("a", "AB") < 0); + + // empty strings sort before whitespace and punctiation + CHECK(wxCmpNaturalGeneric("", " ") < 0); + CHECK(wxCmpNaturalGeneric(" ", "") > 0); + CHECK(wxCmpNaturalGeneric("", ",") < 0); + CHECK(wxCmpNaturalGeneric(",", "") > 0); + + // empty strings sort before numbers + CHECK(wxCmpNaturalGeneric("", "0") < 0); + CHECK(wxCmpNaturalGeneric("0", "") > 0); + + // empty strings sort before letters and symbols + CHECK(wxCmpNaturalGeneric("", "abc") < 0); + CHECK(wxCmpNaturalGeneric("abc", "") > 0); + + // whitespace and punctiation sort before numbers + CHECK(wxCmpNaturalGeneric(" ", "1") < 0); + CHECK(wxCmpNaturalGeneric("1", " ") > 0); + CHECK(wxCmpNaturalGeneric(",", "1") < 0); + CHECK(wxCmpNaturalGeneric("1", ",") > 0); + + // strings containing numbers sort before letters and symbols + CHECK(wxCmpNaturalGeneric("00", "a") < 0); + CHECK(wxCmpNaturalGeneric("a", "00") > 0); + + // strings containing numbers are compared by their value + CHECK(wxCmpNaturalGeneric("01", "1") == 0); + CHECK(wxCmpNaturalGeneric("1", "01") == 0); + CHECK(wxCmpNaturalGeneric("1", "05") < 0); + CHECK(wxCmpNaturalGeneric("05", "1") > 0); + CHECK(wxCmpNaturalGeneric("10", "5") > 0); + CHECK(wxCmpNaturalGeneric("5", "10") < 0); + CHECK(wxCmpNaturalGeneric("1", "9999999999999999999") < 0); + CHECK(wxCmpNaturalGeneric("9999999999999999999", "1") > 0); + + // comparing strings composed from whitespace, + // punctuation, numbers, letters, and symbols + CHECK(wxCmpNaturalGeneric("1st", " 1st") > 0); + CHECK(wxCmpNaturalGeneric(" 1st", "1st") < 0); + + CHECK(wxCmpNaturalGeneric("1st", ",1st") > 0); + CHECK(wxCmpNaturalGeneric(",1st", "1st") < 0); + + CHECK(wxCmpNaturalGeneric("1st", "01st") == 0); + CHECK(wxCmpNaturalGeneric("01st", "1st") == 0); + CHECK(wxCmpNaturalGeneric("10th", "5th") > 0); + CHECK(wxCmpNaturalGeneric("5th", "10th") < 0); + + CHECK(wxCmpNaturalGeneric("a1st", "a01st") == 0); + CHECK(wxCmpNaturalGeneric("a01st", "a1st") == 0); + CHECK(wxCmpNaturalGeneric("a10th", "a5th") > 0); + CHECK(wxCmpNaturalGeneric("a5th", "a10th") < 0); + CHECK(wxCmpNaturalGeneric("a 10th", "a5th") < 0); + CHECK(wxCmpNaturalGeneric("a5th", "a 10th") > 0); + + CHECK(wxCmpNaturalGeneric("a1st1", "a01st01") == 0); + CHECK(wxCmpNaturalGeneric("a01st01", "a1st1") == 0); + CHECK(wxCmpNaturalGeneric("a10th10", "a5th5") > 0); + CHECK(wxCmpNaturalGeneric("a5th5", "a10th10") < 0); + CHECK(wxCmpNaturalGeneric("a 10th 10", "a5th 5") < 0); + CHECK(wxCmpNaturalGeneric("a5th 5", "a 10th 10") > 0); +#endif // #if !wxUSE_REGEX }