Refactor the natural string compare and sort algorithm

Add a new string fragment type for whitespace and punctuation which needs
to be assessed separately from letters and symbols.

Use wxUint64 instead of long for storing the value for numeric fragment.

Use collate instead of compare for non-numeric fragments.

Change names for the public comparison functions: wxWidgets provided function
is now named wxCmpGenericNatural() and for common public use is wxCmpNatural()
which calls a native function in wxMSW and wxCmpGenericNatural() elsewhere.

Try harder in wxCmpNaturalGeneric() if wxRegEx is unavailable: do not
just make a simple string comparison, but perform a case-insensitive
collation.

Make some other changes to simplify and possibly speed up the code.
This commit is contained in:
PB
2020-07-02 18:15:25 +02:00
committed by Vadim Zeitlin
parent 371c4b1366
commit 83a2a1e505
4 changed files with 282 additions and 237 deletions

View File

@@ -782,47 +782,82 @@ void ArraysTestCase::IndexFromEnd()
}
TEST_CASE("wxNaturalStringSortAscending()", "[array][sort][string]")
TEST_CASE("wxNaturalStringComparisonGeneric()", "[wxString][compare]")
{
wxString s01("3String");
wxString s02("21String");
wxString s03("100string");
wxString s04("100String");
wxString s05("10String");
wxString s06("Str3ing");
wxString s07("Str20ing");
wxString s08("Str200ing");
wxString s09("String8");
wxString s10("String90");
wxString s11("7String3");
wxString s12("07String20");
wxString s13("007String100");
CHECK(wxCmpNatural(s01, s02) < 0);
CHECK(wxCmpNatural(s02, s03) < 0);
CHECK(wxCmpNatural(s03, s04) == 0); // Check that case is ignored
CHECK(wxCmpNatural(s05, s06) < 0);
CHECK(wxCmpNatural(s06, s07) < 0);
CHECK(wxCmpNatural(s07, s08) < 0);
CHECK(wxCmpNatural(s08, s09) < 0);
CHECK(wxCmpNatural(s09, s10) < 0);
CHECK(wxCmpNatural(s11, s12) < 0);
CHECK(wxCmpNatural(s12, s13) < 0);
CHECK(wxCmpNatural(s01, s01) == 0); // Check that equality works in all cases
CHECK(wxCmpNatural(s02, s02) == 0);
CHECK(wxCmpNatural(s03, s03) == 0);
CHECK(wxCmpNatural(s04, s04) == 0);
CHECK(wxCmpNatural(s05, s05) == 0);
CHECK(wxCmpNatural(s06, s06) == 0);
CHECK(wxCmpNatural(s07, s07) == 0);
CHECK(wxCmpNatural(s08, s08) == 0);
CHECK(wxCmpNatural(s09, s09) == 0);
CHECK(wxCmpNatural(s10, s10) == 0);
CHECK(wxCmpNatural(s11, s11) == 0);
CHECK(wxCmpNatural(s12, s12) == 0);
CHECK(wxCmpNatural(s13, s13) == 0);
#if !wxUSE_REGEX
WARN("Skipping wxCmpNaturalGeneric() tests: wxRegEx not available");
#else
// simple string comparison
CHECK(wxCmpNaturalGeneric("a", "a") == 0);
CHECK(wxCmpNaturalGeneric("a", "z") < 0);
CHECK(wxCmpNaturalGeneric("z", "a") > 0);
// case insensitivity
CHECK(wxCmpNaturalGeneric("a", "A") == 0);
CHECK(wxCmpNaturalGeneric("A", "a") == 0);
CHECK(wxCmpNaturalGeneric("AB", "a") > 0);
CHECK(wxCmpNaturalGeneric("a", "AB") < 0);
// empty strings sort before whitespace and punctiation
CHECK(wxCmpNaturalGeneric("", " ") < 0);
CHECK(wxCmpNaturalGeneric(" ", "") > 0);
CHECK(wxCmpNaturalGeneric("", ",") < 0);
CHECK(wxCmpNaturalGeneric(",", "") > 0);
// empty strings sort before numbers
CHECK(wxCmpNaturalGeneric("", "0") < 0);
CHECK(wxCmpNaturalGeneric("0", "") > 0);
// empty strings sort before letters and symbols
CHECK(wxCmpNaturalGeneric("", "abc") < 0);
CHECK(wxCmpNaturalGeneric("abc", "") > 0);
// whitespace and punctiation sort before numbers
CHECK(wxCmpNaturalGeneric(" ", "1") < 0);
CHECK(wxCmpNaturalGeneric("1", " ") > 0);
CHECK(wxCmpNaturalGeneric(",", "1") < 0);
CHECK(wxCmpNaturalGeneric("1", ",") > 0);
// strings containing numbers sort before letters and symbols
CHECK(wxCmpNaturalGeneric("00", "a") < 0);
CHECK(wxCmpNaturalGeneric("a", "00") > 0);
// strings containing numbers are compared by their value
CHECK(wxCmpNaturalGeneric("01", "1") == 0);
CHECK(wxCmpNaturalGeneric("1", "01") == 0);
CHECK(wxCmpNaturalGeneric("1", "05") < 0);
CHECK(wxCmpNaturalGeneric("05", "1") > 0);
CHECK(wxCmpNaturalGeneric("10", "5") > 0);
CHECK(wxCmpNaturalGeneric("5", "10") < 0);
CHECK(wxCmpNaturalGeneric("1", "9999999999999999999") < 0);
CHECK(wxCmpNaturalGeneric("9999999999999999999", "1") > 0);
// comparing strings composed from whitespace,
// punctuation, numbers, letters, and symbols
CHECK(wxCmpNaturalGeneric("1st", " 1st") > 0);
CHECK(wxCmpNaturalGeneric(" 1st", "1st") < 0);
CHECK(wxCmpNaturalGeneric("1st", ",1st") > 0);
CHECK(wxCmpNaturalGeneric(",1st", "1st") < 0);
CHECK(wxCmpNaturalGeneric("1st", "01st") == 0);
CHECK(wxCmpNaturalGeneric("01st", "1st") == 0);
CHECK(wxCmpNaturalGeneric("10th", "5th") > 0);
CHECK(wxCmpNaturalGeneric("5th", "10th") < 0);
CHECK(wxCmpNaturalGeneric("a1st", "a01st") == 0);
CHECK(wxCmpNaturalGeneric("a01st", "a1st") == 0);
CHECK(wxCmpNaturalGeneric("a10th", "a5th") > 0);
CHECK(wxCmpNaturalGeneric("a5th", "a10th") < 0);
CHECK(wxCmpNaturalGeneric("a 10th", "a5th") < 0);
CHECK(wxCmpNaturalGeneric("a5th", "a 10th") > 0);
CHECK(wxCmpNaturalGeneric("a1st1", "a01st01") == 0);
CHECK(wxCmpNaturalGeneric("a01st01", "a1st1") == 0);
CHECK(wxCmpNaturalGeneric("a10th10", "a5th5") > 0);
CHECK(wxCmpNaturalGeneric("a5th5", "a10th10") < 0);
CHECK(wxCmpNaturalGeneric("a 10th 10", "a5th 5") < 0);
CHECK(wxCmpNaturalGeneric("a5th 5", "a 10th 10") > 0);
#endif // #if !wxUSE_REGEX
}