Don't use native MSW functions in wxString::CmpNoCase().

While the native CompareString() is much more efficient than MSVC CRT version of _wcsicmp(), it gives unexpected results for non-letter characters, so don't use it but use the slow but correct wxStricmp() instead. At least don't use char-by-char comparison (in non-UTF-8 case) as it's the slowest possible implementation of this function, the new one using wxStricmp() is 3 times faster (by comparison, using CompareString() is 16 times faster still -- but wrong). Closes #10375. git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@65572 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
2010-09-20 12:52:26 +00:00
parent 05f68f2f84
commit 5858fe6806
2 changed files with 59 additions and 27 deletions
--- a/src/common/string.cpp
+++ b/src/common/string.cpp
@@ -1083,40 +1083,63 @@ size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
 int wxString::CmpNoCase(const wxString& s) const
 {
-#if defined(__WXMSW__) && !wxUSE_UNICODE_UTF8
+#if !wxUSE_UNICODE_UTF8
-    // Prefer to use CompareString() if available as it's more efficient than
+    // We compare NUL-delimited chunks of the strings inside the loop. We will
-    // doing it manually or even using wxStricmp() (see #10375)
+    // do as many iterations as there are embedded NULs in the string, i.e.
-    //
+    // usually we will run it just once.
-    // Also note that not using NORM_STRINGSORT may result in not having a
+
-    // strict weak ordering (e.g. s1 < s2 and s2 < s3 but s3 < s1) and so break
+    typedef const wxStringImpl::value_type *pchar_type;
-    // algorithms such as std::sort that rely on it. It's also more consistent
+    const pchar_type thisBegin = m_impl.c_str();
-    // with the fall back version below.
+    const pchar_type thatBegin = s.m_impl.c_str();
-    switch ( ::CompareString(LOCALE_USER_DEFAULT,
+
-                             NORM_IGNORECASE | SORT_STRINGSORT,
+    const pchar_type thisEnd = thisBegin + m_impl.length();
-                             m_impl.c_str(), m_impl.length(),
+    const pchar_type thatEnd = thatBegin + s.m_impl.length();
-                             s.m_impl.c_str(), s.m_impl.length()) )
+
    pchar_type thisCur = thisBegin;
    pchar_type thatCur = thatBegin;
    int rc;
    for ( ;; )
    {
-        case CSTR_LESS_THAN:
+        // Compare until the next NUL, if the strings differ this is the final
-            return -1;
+        // result.
        rc = wxStricmp(thisCur, thatCur);
        if ( rc )
            break;
-        case CSTR_EQUAL:
+        const size_t lenChunk = wxStrlen(thisCur);
-            return 0;
+        thisCur += lenChunk;
        thatCur += lenChunk;
-        case CSTR_GREATER_THAN:
+        // Skip all the NULs as wxStricmp() doesn't handle them.
-            return 1;
+        for ( ; !*thisCur; thisCur++, thatCur++ )
        {
            // Check if we exhausted either of the strings.
            if ( thisCur == thisEnd )
            {
                // This one is exhausted, is the other one too?
                return thatCur == thatEnd ? 0 : -1;
            }
-        default:
+            if ( thatCur == thatEnd )
-            wxFAIL_MSG( "unexpected CompareString() return value" );
+            {
-            // fall through
+                // Because of the test above we know that this one is not
                // exhausted yet so it's greater than the other one that is.
                return 1;
            }
-        case 0:
+            if ( *thatCur )
-            wxLogLastError("CompareString");
+            {
-            // use generic code below
+                // Anything non-NUL is greater than NUL.
                return -1;
            }
        }
    }
 #endif // __WXMSW__ && !wxUSE_UNICODE_UTF8
-    // do the comparison manually: notice that we can't use wxStricmp() as it
+    return rc;
-    // doesn't handle embedded NULs
+#else // wxUSE_UNICODE_UTF8
    // CRT functions can't be used for case-insensitive comparison of UTF-8
    // strings so do it in the naive, simple and inefficient way.
    // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
    const_iterator i1 = begin();
@@ -1140,6 +1163,7 @@ int wxString::CmpNoCase(const wxString& s) const
    else if ( len1 > len2 )
        return 1;
    return 0;
 #endif // !wxUSE_UNICODE_UTF8/wxUSE_UNICODE_UTF8
 }
--- a/tests/strings/strings.cpp
+++ b/tests/strings/strings.cpp
@@ -455,6 +455,10 @@ void StringTestCase::Compare()
    CPPUNIT_ASSERT( s1 != neq2 );
    CPPUNIT_ASSERT( s1 != neq3 );
    CPPUNIT_ASSERT( s1 != neq4 );
    CPPUNIT_ASSERT( wxString("\n").Cmp(" ") < 0 );
    CPPUNIT_ASSERT( wxString("'").Cmp("!") > 0 );
    CPPUNIT_ASSERT( wxString("!").Cmp("z") < 0 );
 }
 void StringTestCase::CompareNoCase()
@@ -502,6 +506,10 @@ void StringTestCase::CompareNoCase()
    CPPUNIT_CNCNEQ_ASSERT( s1, neq );
    CPPUNIT_CNCNEQ_ASSERT( s1, neq2 );
    CPPUNIT_CNCNEQ_ASSERT( s1, neq3 );
    CPPUNIT_ASSERT( wxString("\n").CmpNoCase(" ") < 0 );
    CPPUNIT_ASSERT( wxString("'").CmpNoCase("!") > 0);
    CPPUNIT_ASSERT( wxString("!").Cmp("Z") < 0 );
 }
 void StringTestCase::Contains()