From 371c4b13661641161f2c71ac2bc1044e48681705 Mon Sep 17 00:00:00 2001 From: Hugo Elias Date: Mon, 9 Apr 2018 22:35:00 +0100 Subject: [PATCH 1/2] Add functions for sorting strings in natural sort order Use StrCmpLogicalW() under MSW and generic implementation under the other platforms. See https://github.com/wxWidgets/wxWidgets/pull/780 --- include/wx/arrstr.h | 18 ++++ interface/wx/arrstr.h | 103 ++++++++++++++++++++-- src/common/arrstr.cpp | 189 ++++++++++++++++++++++++++++++++++++++++ tests/arrays/arrays.cpp | 46 ++++++++++ 4 files changed, 350 insertions(+), 6 deletions(-) diff --git a/include/wx/arrstr.h b/include/wx/arrstr.h index 3194a0053f..27ff30dffd 100644 --- a/include/wx/arrstr.h +++ b/include/wx/arrstr.h @@ -42,12 +42,30 @@ wxDictionaryStringSortAscending(const wxString& s1, const wxString& s2) return cmp ? cmp : s1.Cmp(s2); } + inline int wxCMPFUNC_CONV wxDictionaryStringSortDescending(const wxString& s1, const wxString& s2) { return wxDictionaryStringSortAscending(s2, s1); } +WXDLLIMPEXP_BASE +int wxCMPFUNC_CONV wxCmpNatural(const wxString& s1, const wxString& s2); + +WXDLLIMPEXP_BASE +int wxCMPFUNC_CONV wxCmpNaturalNative(const wxString& s1, const wxString& s2); + +inline int wxCMPFUNC_CONV wxNaturalStringSortAscending(const wxString& s1, const wxString& s2) +{ + return wxCmpNatural(s1, s2); +} + +inline int wxCMPFUNC_CONV wxNaturalStringSortDescending(const wxString& s1, const wxString& s2) +{ + return wxCmpNatural(s2, s1); +} + + #if wxUSE_STD_CONTAINERS typedef int (wxCMPFUNC_CONV *CMPFUNCwxString)(wxString*, wxString*); diff --git a/interface/wx/arrstr.h b/interface/wx/arrstr.h index 8e0f98089d..4d2ca36877 100644 --- a/interface/wx/arrstr.h +++ b/interface/wx/arrstr.h @@ -363,7 +363,8 @@ public: This function can be used with wxSortedArrayString::Sort() or passed as an argument to wxSortedArrayString constructor. - @see wxStringSortDescending(), wxDictionaryStringSortAscending() + @see wxStringSortDescending(), wxDictionaryStringSortAscending(), + wxNaturalStringSortAscending() @since 3.1.0 */ @@ -375,7 +376,8 @@ int wxStringSortAscending(const wxString& s1, const wxString& s2); This function can be used with wxSortedArrayString::Sort() or passed as an argument to wxSortedArrayString constructor. - @see wxStringSortAscending(), wxDictionaryStringSortAscending() + @see wxStringSortAscending(), wxDictionaryStringSortDescending(), + wxNaturalStringSortDescending() @since 3.1.0 */ @@ -392,8 +394,10 @@ int wxStringSortDescending(const wxString& s1, const wxString& s2); This function can be used with wxSortedArrayString::Sort() or passed as an argument to wxSortedArrayString constructor. - @see wxStringSortAscending(), wxDictionaryStringSortDescending() - + @see wxDictionaryStringSortDescending(), + wxStringSortAscending(), + wxNaturalStringSortAscending() + @since 3.1.0 */ int wxDictionaryStringSortAscending(const wxString& s1, const wxString& s2); @@ -403,11 +407,98 @@ int wxDictionaryStringSortAscending(const wxString& s1, const wxString& s2); See wxDictionaryStringSortAscending() for the dictionary sort description. - @see wxStringSortDescending() + @see wxDictionaryStringSortAscending(), + wxStringSortDescending(), + wxNaturalStringSortDescending() @since 3.1.0 */ -int wxDictionaryStringSortAscending(const wxString& s1, const wxString& s2); +int wxDictionaryStringSortDescending(const wxString& s1, const wxString& s2); + + + /** + Comparison function used for Natural Sort. + + Functions in the same way as wxDictionaryStringSortAscending(), with + the exception that numbers within the string are recognised, and + compared numerically, rather than alphabetically. When used for + sorting, the result is that e.g. file names containing numbers are + sorted in a natural way. + + This function will use an OS native function if one is available, + to ensure that the sort order is the same as the OS uses. + + Comparison is case insensitive. + + e.g. Sorting using wxDictionaryStringSortAscending() results in: + - file1.txt + - file10.txt + - file100.txt + - file2.txt + - file20.txt + - file3.txt + + e.g. Sorting using wxNaturalStringSortAscending() results in: + - file1.txt + - file2.txt + - file3.txt + - file11.txt + - file20.txt + - file100.txt + + @see wxNaturalStringSortDescending(), + wxStringSortAscending(), + wxDictionaryStringSortAscending() + + @since 3.1.2 + */ +int wxNaturalStringSortAscending(const wxString& s1, const wxString& s2); + + + /** + Comparison function comparing strings in reverse natural order. + + See wxNaturalStringSortAscending() for the natural sort description. + + @see wxNaturalStringSortAscending(), + wxStringSortDescending(), + wxDictionaryStringSortDescending() + + @since 3.1.2 + */ +int wxNaturalStringSortDescending(const wxString& s1, const wxString& s2); + + + /** + This is wxWidgets' own implementation of the natural sort comparison + function. This will be used whenever an OS native function is not available. + + Since OS native implementations might differ from each other, the user might + wish to use this function which behaves in the same way across all platforms. + + @since 3.1.2 + */ +int wxCMPFUNC_CONV wxCmpNatural(const wxString& s1, const wxString& s2); + + + /** + Comparison function, identical to wxNaturalStringSortAscending(). + In fact, wxNaturalStringSortAscending() and wxNaturalStringSortDescending() + are both implemented using this function. + + When an OS native natural sort function is available, that will be used, + otherwise wxCmpNatural() will be used. + + Be aware that OS native implementations might differ from each other, and + might change behaviour from release to release. + + @see wxNaturalStringSortAscending(), + wxNaturalStringSortDescending() + + @since 3.1.2 + */ +int wxCMPFUNC_CONV wxCmpNaturalNative(const wxString& s1, const wxString& s2); + // ============================================================================ // Global functions/macros diff --git a/src/common/arrstr.cpp b/src/common/arrstr.cpp index 1b05c1d556..c123b402f8 100644 --- a/src/common/arrstr.cpp +++ b/src/common/arrstr.cpp @@ -26,6 +26,12 @@ #include #include #include "wx/afterstd.h" +#include "wx/regex.h" + +#if defined( __WINDOWS__ ) + #include +#endif + // ============================================================================ // ArrayString @@ -721,3 +727,186 @@ wxArrayString wxSplit(const wxString& str, const wxChar sep, const wxChar escape return ret; } + + +namespace // enum, class and functions needed by wxCmpNatural(). +{ + enum wxStringFragmentType + { + wxFRAGMENT_TYPE_EMPTY = 0, + wxFRAGMENT_TYPE_ALPHA = 1, + wxFRAGMENT_TYPE_DIGIT = 2 + }; + + + // ---------------------------------------------------------------------------- + // wxStringFragment + // ---------------------------------------------------------------------------- + // + // Lightweight object returned by GetNaturalFragment(). + // Represents either a number, or a string which contains no numerical digits. + class wxStringFragment + { + public: + wxStringFragment() + : type(wxFRAGMENT_TYPE_EMPTY) + {} + + wxString text; + long value; + wxStringFragmentType type; + }; + + + wxStringFragment GetFragment(wxString& text) + { + static const wxRegEx naturalNumeric(wxS("[0-9]+")); + static const wxRegEx naturalAlpha(wxS("[^0-9]+")); + + size_t digitStart = 0; + size_t digitLength = 0; + size_t alphaStart = 0; + size_t alphaLength = 0; + wxStringFragment fragment; + + if ( text.empty() ) + return fragment; + + if ( naturalNumeric.Matches(text) ) + { + naturalNumeric.GetMatch(&digitStart, &digitLength, 0); + } + + if ( naturalAlpha.Matches(text) ) + { + naturalAlpha.GetMatch(&alphaStart, &alphaLength, 0); + } + + + if ( alphaStart == 0 ) + { + fragment.text = text.Mid(0, alphaLength); + fragment.value = 0; + fragment.type = wxFRAGMENT_TYPE_ALPHA; + + text.erase(0, alphaLength); + } + + if ( digitStart == 0 ) + { + fragment.text = text.Mid(0, digitLength); + fragment.text.ToLong(&fragment.value); + fragment.type = wxFRAGMENT_TYPE_DIGIT; + + text.erase(0, digitLength); + } + + return fragment; + } + + int CompareFragmentNatural(const wxStringFragment& lhs, const wxStringFragment& rhs) + { + if ( (lhs.type == wxFRAGMENT_TYPE_ALPHA) && + (rhs.type == wxFRAGMENT_TYPE_ALPHA) ) + { + return lhs.text.CmpNoCase(rhs.text); + } + + if ( (lhs.type == wxFRAGMENT_TYPE_DIGIT) && + (rhs.type == wxFRAGMENT_TYPE_DIGIT) ) + { + if ( lhs.value == rhs.value ) + { + return 0; + } + + if ( lhs.value < rhs.value ) + { + return -1; + } + + if ( lhs.value > rhs.value ) + { + return 1; + } + } + + if ( (lhs.type == wxFRAGMENT_TYPE_DIGIT) && + (rhs.type == wxFRAGMENT_TYPE_ALPHA) ) + { + return -1; + } + + if ( (lhs.type == wxFRAGMENT_TYPE_ALPHA) && + (rhs.type == wxFRAGMENT_TYPE_DIGIT) ) + { + return 1; + } + + if ( lhs.type == wxFRAGMENT_TYPE_EMPTY ) + { + return -1; + } + + if ( rhs.type == wxFRAGMENT_TYPE_EMPTY ) + { + return 1; + } + + return 0; + } + +} // unnamed namespace + + + +// ---------------------------------------------------------------------------- +// wxCmpNaturalNative +// ---------------------------------------------------------------------------- +// +int wxCMPFUNC_CONV wxCmpNatural(const wxString& s1, const wxString& s2) +{ + wxString lhs(s1); + wxString rhs(s2); + + int comparison = 0; + + while ( (comparison == 0) && (!lhs.empty() || !rhs.empty()) ) + { + wxStringFragment fragmentL = GetFragment(lhs); + wxStringFragment fragmentR = GetFragment(rhs); + comparison = CompareFragmentNatural(fragmentL, fragmentR); + } + + return comparison; +} + + +// ---------------------------------------------------------------------------- +// Declaration of StrCmpLogicalW() +// ---------------------------------------------------------------------------- +// +// In some distributions of MinGW32, this function is exported in the library, +// but not declared in shlwapi.h. Therefore we declare it here. +#if defined( __MINGW32_TOOLCHAIN__ ) + extern "C" __declspec(dllimport) int WINAPI StrCmpLogicalW(LPCWSTR psz1, LPCWSTR psz2); +#endif + + +// ---------------------------------------------------------------------------- +// wxCmpNaturalNative +// ---------------------------------------------------------------------------- +// +// If a native version of Natural sort is available, then use that, otherwise +// use the wxWidgets version, wxCmpNatural(). +int wxCMPFUNC_CONV wxCmpNaturalNative(const wxString& s1, const wxString& s2) +{ + #if defined( __WINDOWS__ ) + return StrCmpLogicalW( s1.wc_str(), s2.wc_str() ); + + #else + return wxCmpNatural( s1, s2 ); + + #endif +} + diff --git a/tests/arrays/arrays.cpp b/tests/arrays/arrays.cpp index 89f307cf43..630f12d06c 100644 --- a/tests/arrays/arrays.cpp +++ b/tests/arrays/arrays.cpp @@ -780,3 +780,49 @@ void ArraysTestCase::IndexFromEnd() CPPUNIT_ASSERT_EQUAL( 1, a.Index(1, /*bFromEnd=*/true) ); CPPUNIT_ASSERT_EQUAL( 2, a.Index(42, /*bFromEnd=*/true) ); } + + +TEST_CASE("wxNaturalStringSortAscending()", "[array][sort][string]") +{ + wxString s01("3String"); + wxString s02("21String"); + + wxString s03("100string"); + wxString s04("100String"); + + wxString s05("10String"); + wxString s06("Str3ing"); + wxString s07("Str20ing"); + wxString s08("Str200ing"); + wxString s09("String8"); + wxString s10("String90"); + + wxString s11("7String3"); + wxString s12("07String20"); + wxString s13("007String100"); + + CHECK(wxCmpNatural(s01, s02) < 0); + CHECK(wxCmpNatural(s02, s03) < 0); + CHECK(wxCmpNatural(s03, s04) == 0); // Check that case is ignored + CHECK(wxCmpNatural(s05, s06) < 0); + CHECK(wxCmpNatural(s06, s07) < 0); + CHECK(wxCmpNatural(s07, s08) < 0); + CHECK(wxCmpNatural(s08, s09) < 0); + CHECK(wxCmpNatural(s09, s10) < 0); + CHECK(wxCmpNatural(s11, s12) < 0); + CHECK(wxCmpNatural(s12, s13) < 0); + CHECK(wxCmpNatural(s01, s01) == 0); // Check that equality works in all cases + CHECK(wxCmpNatural(s02, s02) == 0); + CHECK(wxCmpNatural(s03, s03) == 0); + CHECK(wxCmpNatural(s04, s04) == 0); + CHECK(wxCmpNatural(s05, s05) == 0); + CHECK(wxCmpNatural(s06, s06) == 0); + CHECK(wxCmpNatural(s07, s07) == 0); + CHECK(wxCmpNatural(s08, s08) == 0); + CHECK(wxCmpNatural(s09, s09) == 0); + CHECK(wxCmpNatural(s10, s10) == 0); + CHECK(wxCmpNatural(s11, s11) == 0); + CHECK(wxCmpNatural(s12, s12) == 0); + CHECK(wxCmpNatural(s13, s13) == 0); +} + From 83a2a1e5057d8db3be18b9000b16b0635c788c5d Mon Sep 17 00:00:00 2001 From: PB Date: Thu, 2 Jul 2020 18:15:25 +0200 Subject: [PATCH 2/2] Refactor the natural string compare and sort algorithm Add a new string fragment type for whitespace and punctuation which needs to be assessed separately from letters and symbols. Use wxUint64 instead of long for storing the value for numeric fragment. Use collate instead of compare for non-numeric fragments. Change names for the public comparison functions: wxWidgets provided function is now named wxCmpGenericNatural() and for common public use is wxCmpNatural() which calls a native function in wxMSW and wxCmpGenericNatural() elsewhere. Try harder in wxCmpNaturalGeneric() if wxRegEx is unavailable: do not just make a simple string comparison, but perform a case-insensitive collation. Make some other changes to simplify and possibly speed up the code. --- include/wx/arrstr.h | 2 +- interface/wx/arrstr.h | 142 +++++++++++----------- src/common/arrstr.cpp | 258 +++++++++++++++++++++------------------- tests/arrays/arrays.cpp | 117 +++++++++++------- 4 files changed, 282 insertions(+), 237 deletions(-) diff --git a/include/wx/arrstr.h b/include/wx/arrstr.h index 27ff30dffd..e21dc78761 100644 --- a/include/wx/arrstr.h +++ b/include/wx/arrstr.h @@ -53,7 +53,7 @@ WXDLLIMPEXP_BASE int wxCMPFUNC_CONV wxCmpNatural(const wxString& s1, const wxString& s2); WXDLLIMPEXP_BASE -int wxCMPFUNC_CONV wxCmpNaturalNative(const wxString& s1, const wxString& s2); +int wxCMPFUNC_CONV wxCmpNaturalGeneric(const wxString& s1, const wxString& s2); inline int wxCMPFUNC_CONV wxNaturalStringSortAscending(const wxString& s1, const wxString& s2) { diff --git a/interface/wx/arrstr.h b/interface/wx/arrstr.h index 4d2ca36877..8c00e5b143 100644 --- a/interface/wx/arrstr.h +++ b/interface/wx/arrstr.h @@ -397,7 +397,7 @@ int wxStringSortDescending(const wxString& s1, const wxString& s2); @see wxDictionaryStringSortDescending(), wxStringSortAscending(), wxNaturalStringSortAscending() - + @since 3.1.0 */ int wxDictionaryStringSortAscending(const wxString& s1, const wxString& s2); @@ -416,88 +416,84 @@ int wxDictionaryStringSortAscending(const wxString& s1, const wxString& s2); int wxDictionaryStringSortDescending(const wxString& s1, const wxString& s2); - /** - Comparison function used for Natural Sort. - - Functions in the same way as wxDictionaryStringSortAscending(), with - the exception that numbers within the string are recognised, and - compared numerically, rather than alphabetically. When used for - sorting, the result is that e.g. file names containing numbers are - sorted in a natural way. +/** + Comparison function comparing strings in natural order. - This function will use an OS native function if one is available, - to ensure that the sort order is the same as the OS uses. - - Comparison is case insensitive. - - e.g. Sorting using wxDictionaryStringSortAscending() results in: - - file1.txt - - file10.txt - - file100.txt - - file2.txt - - file20.txt - - file3.txt - - e.g. Sorting using wxNaturalStringSortAscending() results in: - - file1.txt - - file2.txt - - file3.txt - - file11.txt - - file20.txt - - file100.txt - - @see wxNaturalStringSortDescending(), - wxStringSortAscending(), - wxDictionaryStringSortAscending() - - @since 3.1.2 - */ + This function can be used with wxSortedArrayString::Sort() + or passed as an argument to wxSortedArrayString constructor. + + See wxCmpNatural() for more information about how natural + sort order is implemented. + + @see wxNaturalStringSortDescending(), + wxStringSortAscending(), wxDictionaryStringSortAscending() + + @since 3.1.4 +*/ int wxNaturalStringSortAscending(const wxString& s1, const wxString& s2); +/** + Comparison function comparing strings in reverse natural order. - /** - Comparison function comparing strings in reverse natural order. - - See wxNaturalStringSortAscending() for the natural sort description. - - @see wxNaturalStringSortAscending(), - wxStringSortDescending(), - wxDictionaryStringSortDescending() - - @since 3.1.2 - */ + This function can be used with wxSortedArrayString::Sort() + or passed as an argument to wxSortedArrayString constructor. + + See wxCmpNatural() for more information about how natural + sort order is implemented. + + @see wxNaturalStringSortAscending(), + wxStringSortDescending(), wxDictionaryStringSortDescending() + + @since 3.1.4 +*/ int wxNaturalStringSortDescending(const wxString& s1, const wxString& s2); +/** + This function compares strings using case-insensitive collation and + additionally, numbers within strings are recognised and compared + numerically, rather than alphabetically. When used for sorting, + the result is that e.g. file names containing numbers are sorted + in a natural way. - /** - This is wxWidgets' own implementation of the natural sort comparison - function. This will be used whenever an OS native function is not available. - - Since OS native implementations might differ from each other, the user might - wish to use this function which behaves in the same way across all platforms. - - @since 3.1.2 - */ -int wxCMPFUNC_CONV wxCmpNatural(const wxString& s1, const wxString& s2); + For example, sorting with a simple string comparison results in: + - file1.txt + - file10.txt + - file100.txt + - file2.txt + - file20.txt + - file3.txt + But sorting the same strings in natural sort order results in: + - file1.txt + - file2.txt + - file3.txt + - file10.txt + - file20.txt + - file100.txt - /** - Comparison function, identical to wxNaturalStringSortAscending(). - In fact, wxNaturalStringSortAscending() and wxNaturalStringSortDescending() - are both implemented using this function. - - When an OS native natural sort function is available, that will be used, - otherwise wxCmpNatural() will be used. + wxCmpNatural() uses an OS native natural sort function when available + (currently only under Microsoft Windows), wxCmpNaturalGeneric() otherwise. - Be aware that OS native implementations might differ from each other, and - might change behaviour from release to release. - - @see wxNaturalStringSortAscending(), - wxNaturalStringSortDescending() - - @since 3.1.2 - */ -int wxCMPFUNC_CONV wxCmpNaturalNative(const wxString& s1, const wxString& s2); + Be aware that OS native implementations might differ from each other, + and might change behaviour from release to release. + + @see wxNaturalStringSortAscending(), wxNaturalStringSortDescending() + + @since 3.1.4 +*/ +int wxCmpNatural(const wxString& s1, const wxString& s2); + +/** + This is wxWidgets' own implementation of the natural sort comparison function. + + Requires wxRegEx, if it is unavailable numbers within strings are not + recognised and only case-insensitive collation is performed. + + @see wxCmpNatural() + + @since 3.1.4 +*/ +int wxCmpNaturalGeneric(const wxString& s1, const wxString& s2); // ============================================================================ diff --git a/src/common/arrstr.cpp b/src/common/arrstr.cpp index c123b402f8..176bd6e72d 100644 --- a/src/common/arrstr.cpp +++ b/src/common/arrstr.cpp @@ -20,13 +20,14 @@ #endif #include "wx/arrstr.h" +#include "wx/regex.h" #include "wx/scopedarray.h" +#include "wx/wxcrt.h" #include "wx/beforestd.h" #include #include #include "wx/afterstd.h" -#include "wx/regex.h" #if defined( __WINDOWS__ ) #include @@ -728,143 +729,149 @@ wxArrayString wxSplit(const wxString& str, const wxChar sep, const wxChar escape return ret; } +#if wxUSE_REGEX -namespace // enum, class and functions needed by wxCmpNatural(). +namespace // helpers needed by wxCmpNaturalGeneric() { - enum wxStringFragmentType +// Used for comparison of string parts +struct wxStringFragment +{ + // Fragment types are generally sorted like this: + // Empty < SpaceOrPunct < Digit < LetterOrSymbol + // Fragments of the same type are compared as follows: + // SpaceOrPunct - collated, Digit - as numbers using value + // LetterOrSymbol - lower-cased and then collated + enum Type { - wxFRAGMENT_TYPE_EMPTY = 0, - wxFRAGMENT_TYPE_ALPHA = 1, - wxFRAGMENT_TYPE_DIGIT = 2 + Empty, + SpaceOrPunct, // whitespace or punctuation + Digit, // a sequence of decimal digits + LetterOrSymbol // letters and symbols, i.e., anything not covered by the above types }; + wxStringFragment() : type(Empty), value(0) {} - // ---------------------------------------------------------------------------- - // wxStringFragment - // ---------------------------------------------------------------------------- - // - // Lightweight object returned by GetNaturalFragment(). - // Represents either a number, or a string which contains no numerical digits. - class wxStringFragment + Type type; + wxString text; + wxUint64 value; // used only for Digit type +}; + + +wxStringFragment GetFragment(wxString& text) +{ + static const wxRegEx reSpaceOrPunct(wxS("^([[:space:]]|[[:punct:]])+")); + // Limit the length to make sure the value will fit into a wxUint64 + static const wxRegEx reDigit(wxS("^[[:digit:]]{1,19}")); + static const wxRegEx reLetterOrSymbol("^[^[:space:]|[:punct:]|[:digit:]]+"); + + if ( text.empty() ) + return wxStringFragment(); + + wxStringFragment fragment; + size_t length = 0; + + // In attempt to minimize the number of wxRegEx.Matches() calls, + // try to do them from the most expected to the least expected + // string fragment type. + if ( reLetterOrSymbol.Matches(text) ) { - public: - wxStringFragment() - : type(wxFRAGMENT_TYPE_EMPTY) - {} - - wxString text; - long value; - wxStringFragmentType type; - }; - - - wxStringFragment GetFragment(wxString& text) + if ( reLetterOrSymbol.GetMatch(NULL, &length) ) + { + fragment.type = wxStringFragment::LetterOrSymbol; + fragment.text = text.Left(length); + } + } + else if ( reDigit.Matches(text) ) { - static const wxRegEx naturalNumeric(wxS("[0-9]+")); - static const wxRegEx naturalAlpha(wxS("[^0-9]+")); - - size_t digitStart = 0; - size_t digitLength = 0; - size_t alphaStart = 0; - size_t alphaLength = 0; - wxStringFragment fragment; - - if ( text.empty() ) - return fragment; - - if ( naturalNumeric.Matches(text) ) + if ( reDigit.GetMatch(NULL, &length) ) { - naturalNumeric.GetMatch(&digitStart, &digitLength, 0); + fragment.type = wxStringFragment::Digit; + fragment.text = text.Left(length); + fragment.text.ToULongLong(&fragment.value); } - - if ( naturalAlpha.Matches(text) ) + } + else if ( reSpaceOrPunct.Matches(text) ) + { + if ( reSpaceOrPunct.GetMatch(NULL, &length) ) { - naturalAlpha.GetMatch(&alphaStart, &alphaLength, 0); + fragment.type = wxStringFragment::SpaceOrPunct; + fragment.text = text.Left(length); } - - - if ( alphaStart == 0 ) - { - fragment.text = text.Mid(0, alphaLength); - fragment.value = 0; - fragment.type = wxFRAGMENT_TYPE_ALPHA; - - text.erase(0, alphaLength); - } - - if ( digitStart == 0 ) - { - fragment.text = text.Mid(0, digitLength); - fragment.text.ToLong(&fragment.value); - fragment.type = wxFRAGMENT_TYPE_DIGIT; - - text.erase(0, digitLength); - } - - return fragment; } - int CompareFragmentNatural(const wxStringFragment& lhs, const wxStringFragment& rhs) + text.erase(0, length); + return fragment; +} + +int CompareFragmentNatural(const wxStringFragment& lhs, const wxStringFragment& rhs) +{ + switch ( lhs.type ) { - if ( (lhs.type == wxFRAGMENT_TYPE_ALPHA) && - (rhs.type == wxFRAGMENT_TYPE_ALPHA) ) - { - return lhs.text.CmpNoCase(rhs.text); - } - - if ( (lhs.type == wxFRAGMENT_TYPE_DIGIT) && - (rhs.type == wxFRAGMENT_TYPE_DIGIT) ) - { - if ( lhs.value == rhs.value ) + case wxStringFragment::Empty: + switch ( rhs.type ) { - return 0; + case wxStringFragment::Empty: + return 0; + case wxStringFragment::SpaceOrPunct: + case wxStringFragment::Digit: + case wxStringFragment::LetterOrSymbol: + return -1; } - if ( lhs.value < rhs.value ) + case wxStringFragment::SpaceOrPunct: + switch ( rhs.type ) { - return -1; + case wxStringFragment::Empty: + return 1; + case wxStringFragment::SpaceOrPunct: + return wxStrcoll_String(lhs.text, rhs.text); + case wxStringFragment::Digit: + case wxStringFragment::LetterOrSymbol: + return -1; } - if ( lhs.value > rhs.value ) + case wxStringFragment::Digit: + switch ( rhs.type ) { - return 1; + case wxStringFragment::Empty: + case wxStringFragment::SpaceOrPunct: + return 1; + case wxStringFragment::Digit: + if ( lhs.value > rhs.value ) + return 1; + else if ( lhs.value < rhs.value ) + return -1; + else + return 0; + case wxStringFragment::LetterOrSymbol: + return -1; } - } - if ( (lhs.type == wxFRAGMENT_TYPE_DIGIT) && - (rhs.type == wxFRAGMENT_TYPE_ALPHA) ) - { - return -1; - } - - if ( (lhs.type == wxFRAGMENT_TYPE_ALPHA) && - (rhs.type == wxFRAGMENT_TYPE_DIGIT) ) - { - return 1; - } - - if ( lhs.type == wxFRAGMENT_TYPE_EMPTY ) - { - return -1; - } - - if ( rhs.type == wxFRAGMENT_TYPE_EMPTY ) - { - return 1; - } - - return 0; + case wxStringFragment::LetterOrSymbol: + switch ( rhs.type ) + { + case wxStringFragment::Empty: + case wxStringFragment::SpaceOrPunct: + case wxStringFragment::Digit: + return 1; + case wxStringFragment::LetterOrSymbol: + return wxStrcoll_String(lhs.text.Lower(), rhs.text.Lower()); + } } + // all possible cases should be covered by the switch above + // but return also from here to prevent the compiler warning + return 1; +} + } // unnamed namespace - // ---------------------------------------------------------------------------- -// wxCmpNaturalNative +// wxCmpNaturalGeneric // ---------------------------------------------------------------------------- -// -int wxCMPFUNC_CONV wxCmpNatural(const wxString& s1, const wxString& s2) +// +int wxCMPFUNC_CONV wxCmpNaturalGeneric(const wxString& s1, const wxString& s2) { wxString lhs(s1); wxString rhs(s2); @@ -873,19 +880,28 @@ int wxCMPFUNC_CONV wxCmpNatural(const wxString& s1, const wxString& s2) while ( (comparison == 0) && (!lhs.empty() || !rhs.empty()) ) { - wxStringFragment fragmentL = GetFragment(lhs); - wxStringFragment fragmentR = GetFragment(rhs); - comparison = CompareFragmentNatural(fragmentL, fragmentR); + const wxStringFragment fragmentLHS = GetFragment(lhs); + const wxStringFragment fragmentRHS = GetFragment(rhs); + + comparison = CompareFragmentNatural(fragmentLHS, fragmentRHS); } return comparison; } +#else + +int wxCMPFUNC_CONV wxCmpNaturalGeneric(const wxString& s1, const wxString& s2) +{ + return wxStrcoll_String(s1.Lower(), s2.Lower()); +} + +#endif // #if wxUSE_REGEX // ---------------------------------------------------------------------------- // Declaration of StrCmpLogicalW() // ---------------------------------------------------------------------------- -// +// // In some distributions of MinGW32, this function is exported in the library, // but not declared in shlwapi.h. Therefore we declare it here. #if defined( __MINGW32_TOOLCHAIN__ ) @@ -894,19 +910,17 @@ int wxCMPFUNC_CONV wxCmpNatural(const wxString& s1, const wxString& s2) // ---------------------------------------------------------------------------- -// wxCmpNaturalNative +// wxCmpNatural // ---------------------------------------------------------------------------- -// +// // If a native version of Natural sort is available, then use that, otherwise -// use the wxWidgets version, wxCmpNatural(). -int wxCMPFUNC_CONV wxCmpNaturalNative(const wxString& s1, const wxString& s2) +// use the generic version. +inline int wxCMPFUNC_CONV wxCmpNatural(const wxString& s1, const wxString& s2) { - #if defined( __WINDOWS__ ) - return StrCmpLogicalW( s1.wc_str(), s2.wc_str() ); - - #else - return wxCmpNatural( s1, s2 ); - - #endif +#if defined( __WINDOWS__ ) + return StrCmpLogicalW(s1.wc_str(), s2.wc_str()); +#else + return wxCmpNaturalGeneric(s1, s2); +#endif // #if defined( __WINDOWS__ ) } diff --git a/tests/arrays/arrays.cpp b/tests/arrays/arrays.cpp index 630f12d06c..0266915cfd 100644 --- a/tests/arrays/arrays.cpp +++ b/tests/arrays/arrays.cpp @@ -782,47 +782,82 @@ void ArraysTestCase::IndexFromEnd() } -TEST_CASE("wxNaturalStringSortAscending()", "[array][sort][string]") +TEST_CASE("wxNaturalStringComparisonGeneric()", "[wxString][compare]") { - wxString s01("3String"); - wxString s02("21String"); - - wxString s03("100string"); - wxString s04("100String"); - - wxString s05("10String"); - wxString s06("Str3ing"); - wxString s07("Str20ing"); - wxString s08("Str200ing"); - wxString s09("String8"); - wxString s10("String90"); - - wxString s11("7String3"); - wxString s12("07String20"); - wxString s13("007String100"); - - CHECK(wxCmpNatural(s01, s02) < 0); - CHECK(wxCmpNatural(s02, s03) < 0); - CHECK(wxCmpNatural(s03, s04) == 0); // Check that case is ignored - CHECK(wxCmpNatural(s05, s06) < 0); - CHECK(wxCmpNatural(s06, s07) < 0); - CHECK(wxCmpNatural(s07, s08) < 0); - CHECK(wxCmpNatural(s08, s09) < 0); - CHECK(wxCmpNatural(s09, s10) < 0); - CHECK(wxCmpNatural(s11, s12) < 0); - CHECK(wxCmpNatural(s12, s13) < 0); - CHECK(wxCmpNatural(s01, s01) == 0); // Check that equality works in all cases - CHECK(wxCmpNatural(s02, s02) == 0); - CHECK(wxCmpNatural(s03, s03) == 0); - CHECK(wxCmpNatural(s04, s04) == 0); - CHECK(wxCmpNatural(s05, s05) == 0); - CHECK(wxCmpNatural(s06, s06) == 0); - CHECK(wxCmpNatural(s07, s07) == 0); - CHECK(wxCmpNatural(s08, s08) == 0); - CHECK(wxCmpNatural(s09, s09) == 0); - CHECK(wxCmpNatural(s10, s10) == 0); - CHECK(wxCmpNatural(s11, s11) == 0); - CHECK(wxCmpNatural(s12, s12) == 0); - CHECK(wxCmpNatural(s13, s13) == 0); +#if !wxUSE_REGEX + WARN("Skipping wxCmpNaturalGeneric() tests: wxRegEx not available"); +#else + // simple string comparison + CHECK(wxCmpNaturalGeneric("a", "a") == 0); + CHECK(wxCmpNaturalGeneric("a", "z") < 0); + CHECK(wxCmpNaturalGeneric("z", "a") > 0); + + // case insensitivity + CHECK(wxCmpNaturalGeneric("a", "A") == 0); + CHECK(wxCmpNaturalGeneric("A", "a") == 0); + CHECK(wxCmpNaturalGeneric("AB", "a") > 0); + CHECK(wxCmpNaturalGeneric("a", "AB") < 0); + + // empty strings sort before whitespace and punctiation + CHECK(wxCmpNaturalGeneric("", " ") < 0); + CHECK(wxCmpNaturalGeneric(" ", "") > 0); + CHECK(wxCmpNaturalGeneric("", ",") < 0); + CHECK(wxCmpNaturalGeneric(",", "") > 0); + + // empty strings sort before numbers + CHECK(wxCmpNaturalGeneric("", "0") < 0); + CHECK(wxCmpNaturalGeneric("0", "") > 0); + + // empty strings sort before letters and symbols + CHECK(wxCmpNaturalGeneric("", "abc") < 0); + CHECK(wxCmpNaturalGeneric("abc", "") > 0); + + // whitespace and punctiation sort before numbers + CHECK(wxCmpNaturalGeneric(" ", "1") < 0); + CHECK(wxCmpNaturalGeneric("1", " ") > 0); + CHECK(wxCmpNaturalGeneric(",", "1") < 0); + CHECK(wxCmpNaturalGeneric("1", ",") > 0); + + // strings containing numbers sort before letters and symbols + CHECK(wxCmpNaturalGeneric("00", "a") < 0); + CHECK(wxCmpNaturalGeneric("a", "00") > 0); + + // strings containing numbers are compared by their value + CHECK(wxCmpNaturalGeneric("01", "1") == 0); + CHECK(wxCmpNaturalGeneric("1", "01") == 0); + CHECK(wxCmpNaturalGeneric("1", "05") < 0); + CHECK(wxCmpNaturalGeneric("05", "1") > 0); + CHECK(wxCmpNaturalGeneric("10", "5") > 0); + CHECK(wxCmpNaturalGeneric("5", "10") < 0); + CHECK(wxCmpNaturalGeneric("1", "9999999999999999999") < 0); + CHECK(wxCmpNaturalGeneric("9999999999999999999", "1") > 0); + + // comparing strings composed from whitespace, + // punctuation, numbers, letters, and symbols + CHECK(wxCmpNaturalGeneric("1st", " 1st") > 0); + CHECK(wxCmpNaturalGeneric(" 1st", "1st") < 0); + + CHECK(wxCmpNaturalGeneric("1st", ",1st") > 0); + CHECK(wxCmpNaturalGeneric(",1st", "1st") < 0); + + CHECK(wxCmpNaturalGeneric("1st", "01st") == 0); + CHECK(wxCmpNaturalGeneric("01st", "1st") == 0); + CHECK(wxCmpNaturalGeneric("10th", "5th") > 0); + CHECK(wxCmpNaturalGeneric("5th", "10th") < 0); + + CHECK(wxCmpNaturalGeneric("a1st", "a01st") == 0); + CHECK(wxCmpNaturalGeneric("a01st", "a1st") == 0); + CHECK(wxCmpNaturalGeneric("a10th", "a5th") > 0); + CHECK(wxCmpNaturalGeneric("a5th", "a10th") < 0); + CHECK(wxCmpNaturalGeneric("a 10th", "a5th") < 0); + CHECK(wxCmpNaturalGeneric("a5th", "a 10th") > 0); + + CHECK(wxCmpNaturalGeneric("a1st1", "a01st01") == 0); + CHECK(wxCmpNaturalGeneric("a01st01", "a1st1") == 0); + CHECK(wxCmpNaturalGeneric("a10th10", "a5th5") > 0); + CHECK(wxCmpNaturalGeneric("a5th5", "a10th10") < 0); + CHECK(wxCmpNaturalGeneric("a 10th 10", "a5th 5") < 0); + CHECK(wxCmpNaturalGeneric("a5th 5", "a 10th 10") > 0); +#endif // #if !wxUSE_REGEX }