Refactor the natural string compare and sort algorithm
Add a new string fragment type for whitespace and punctuation which needs to be assessed separately from letters and symbols. Use wxUint64 instead of long for storing the value for numeric fragment. Use collate instead of compare for non-numeric fragments. Change names for the public comparison functions: wxWidgets provided function is now named wxCmpGenericNatural() and for common public use is wxCmpNatural() which calls a native function in wxMSW and wxCmpGenericNatural() elsewhere. Try harder in wxCmpNaturalGeneric() if wxRegEx is unavailable: do not just make a simple string comparison, but perform a case-insensitive collation. Make some other changes to simplify and possibly speed up the code.
This commit is contained in:
@@ -782,47 +782,82 @@ void ArraysTestCase::IndexFromEnd()
|
||||
}
|
||||
|
||||
|
||||
TEST_CASE("wxNaturalStringSortAscending()", "[array][sort][string]")
|
||||
TEST_CASE("wxNaturalStringComparisonGeneric()", "[wxString][compare]")
|
||||
{
|
||||
wxString s01("3String");
|
||||
wxString s02("21String");
|
||||
|
||||
wxString s03("100string");
|
||||
wxString s04("100String");
|
||||
|
||||
wxString s05("10String");
|
||||
wxString s06("Str3ing");
|
||||
wxString s07("Str20ing");
|
||||
wxString s08("Str200ing");
|
||||
wxString s09("String8");
|
||||
wxString s10("String90");
|
||||
|
||||
wxString s11("7String3");
|
||||
wxString s12("07String20");
|
||||
wxString s13("007String100");
|
||||
|
||||
CHECK(wxCmpNatural(s01, s02) < 0);
|
||||
CHECK(wxCmpNatural(s02, s03) < 0);
|
||||
CHECK(wxCmpNatural(s03, s04) == 0); // Check that case is ignored
|
||||
CHECK(wxCmpNatural(s05, s06) < 0);
|
||||
CHECK(wxCmpNatural(s06, s07) < 0);
|
||||
CHECK(wxCmpNatural(s07, s08) < 0);
|
||||
CHECK(wxCmpNatural(s08, s09) < 0);
|
||||
CHECK(wxCmpNatural(s09, s10) < 0);
|
||||
CHECK(wxCmpNatural(s11, s12) < 0);
|
||||
CHECK(wxCmpNatural(s12, s13) < 0);
|
||||
CHECK(wxCmpNatural(s01, s01) == 0); // Check that equality works in all cases
|
||||
CHECK(wxCmpNatural(s02, s02) == 0);
|
||||
CHECK(wxCmpNatural(s03, s03) == 0);
|
||||
CHECK(wxCmpNatural(s04, s04) == 0);
|
||||
CHECK(wxCmpNatural(s05, s05) == 0);
|
||||
CHECK(wxCmpNatural(s06, s06) == 0);
|
||||
CHECK(wxCmpNatural(s07, s07) == 0);
|
||||
CHECK(wxCmpNatural(s08, s08) == 0);
|
||||
CHECK(wxCmpNatural(s09, s09) == 0);
|
||||
CHECK(wxCmpNatural(s10, s10) == 0);
|
||||
CHECK(wxCmpNatural(s11, s11) == 0);
|
||||
CHECK(wxCmpNatural(s12, s12) == 0);
|
||||
CHECK(wxCmpNatural(s13, s13) == 0);
|
||||
#if !wxUSE_REGEX
|
||||
WARN("Skipping wxCmpNaturalGeneric() tests: wxRegEx not available");
|
||||
#else
|
||||
// simple string comparison
|
||||
CHECK(wxCmpNaturalGeneric("a", "a") == 0);
|
||||
CHECK(wxCmpNaturalGeneric("a", "z") < 0);
|
||||
CHECK(wxCmpNaturalGeneric("z", "a") > 0);
|
||||
|
||||
// case insensitivity
|
||||
CHECK(wxCmpNaturalGeneric("a", "A") == 0);
|
||||
CHECK(wxCmpNaturalGeneric("A", "a") == 0);
|
||||
CHECK(wxCmpNaturalGeneric("AB", "a") > 0);
|
||||
CHECK(wxCmpNaturalGeneric("a", "AB") < 0);
|
||||
|
||||
// empty strings sort before whitespace and punctiation
|
||||
CHECK(wxCmpNaturalGeneric("", " ") < 0);
|
||||
CHECK(wxCmpNaturalGeneric(" ", "") > 0);
|
||||
CHECK(wxCmpNaturalGeneric("", ",") < 0);
|
||||
CHECK(wxCmpNaturalGeneric(",", "") > 0);
|
||||
|
||||
// empty strings sort before numbers
|
||||
CHECK(wxCmpNaturalGeneric("", "0") < 0);
|
||||
CHECK(wxCmpNaturalGeneric("0", "") > 0);
|
||||
|
||||
// empty strings sort before letters and symbols
|
||||
CHECK(wxCmpNaturalGeneric("", "abc") < 0);
|
||||
CHECK(wxCmpNaturalGeneric("abc", "") > 0);
|
||||
|
||||
// whitespace and punctiation sort before numbers
|
||||
CHECK(wxCmpNaturalGeneric(" ", "1") < 0);
|
||||
CHECK(wxCmpNaturalGeneric("1", " ") > 0);
|
||||
CHECK(wxCmpNaturalGeneric(",", "1") < 0);
|
||||
CHECK(wxCmpNaturalGeneric("1", ",") > 0);
|
||||
|
||||
// strings containing numbers sort before letters and symbols
|
||||
CHECK(wxCmpNaturalGeneric("00", "a") < 0);
|
||||
CHECK(wxCmpNaturalGeneric("a", "00") > 0);
|
||||
|
||||
// strings containing numbers are compared by their value
|
||||
CHECK(wxCmpNaturalGeneric("01", "1") == 0);
|
||||
CHECK(wxCmpNaturalGeneric("1", "01") == 0);
|
||||
CHECK(wxCmpNaturalGeneric("1", "05") < 0);
|
||||
CHECK(wxCmpNaturalGeneric("05", "1") > 0);
|
||||
CHECK(wxCmpNaturalGeneric("10", "5") > 0);
|
||||
CHECK(wxCmpNaturalGeneric("5", "10") < 0);
|
||||
CHECK(wxCmpNaturalGeneric("1", "9999999999999999999") < 0);
|
||||
CHECK(wxCmpNaturalGeneric("9999999999999999999", "1") > 0);
|
||||
|
||||
// comparing strings composed from whitespace,
|
||||
// punctuation, numbers, letters, and symbols
|
||||
CHECK(wxCmpNaturalGeneric("1st", " 1st") > 0);
|
||||
CHECK(wxCmpNaturalGeneric(" 1st", "1st") < 0);
|
||||
|
||||
CHECK(wxCmpNaturalGeneric("1st", ",1st") > 0);
|
||||
CHECK(wxCmpNaturalGeneric(",1st", "1st") < 0);
|
||||
|
||||
CHECK(wxCmpNaturalGeneric("1st", "01st") == 0);
|
||||
CHECK(wxCmpNaturalGeneric("01st", "1st") == 0);
|
||||
CHECK(wxCmpNaturalGeneric("10th", "5th") > 0);
|
||||
CHECK(wxCmpNaturalGeneric("5th", "10th") < 0);
|
||||
|
||||
CHECK(wxCmpNaturalGeneric("a1st", "a01st") == 0);
|
||||
CHECK(wxCmpNaturalGeneric("a01st", "a1st") == 0);
|
||||
CHECK(wxCmpNaturalGeneric("a10th", "a5th") > 0);
|
||||
CHECK(wxCmpNaturalGeneric("a5th", "a10th") < 0);
|
||||
CHECK(wxCmpNaturalGeneric("a 10th", "a5th") < 0);
|
||||
CHECK(wxCmpNaturalGeneric("a5th", "a 10th") > 0);
|
||||
|
||||
CHECK(wxCmpNaturalGeneric("a1st1", "a01st01") == 0);
|
||||
CHECK(wxCmpNaturalGeneric("a01st01", "a1st1") == 0);
|
||||
CHECK(wxCmpNaturalGeneric("a10th10", "a5th5") > 0);
|
||||
CHECK(wxCmpNaturalGeneric("a5th5", "a10th10") < 0);
|
||||
CHECK(wxCmpNaturalGeneric("a 10th 10", "a5th 5") < 0);
|
||||
CHECK(wxCmpNaturalGeneric("a5th 5", "a 10th 10") > 0);
|
||||
#endif // #if !wxUSE_REGEX
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user