big wxURI cleanup; it now handles Unicode characters correctly (#3874)

git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@54723 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
2008-07-19 23:14:51 +00:00
parent ece97e2882
commit 2186321ff5
4 changed files with 544 additions and 739 deletions
--- a/docs/changes.txt
+++ b/docs/changes.txt
@@ -249,6 +249,7 @@ Major new features in this release
 All:
 - Added (experimental) IPv6 support to wxSocket (Arcen)
 - Cleaned up wxURI and made it Unicode-friendly.
 - Add support for wxExecute(wxEXEC_ASYNC) in wxBase (Lukasz Michalski)
 - Added wxXLocale class and xlocale-like functions using it
 - Allow loading message catalogs from wxFileSystem (Axel Gembe)
--- a/include/wx/uri.h
+++ b/include/wx/uri.h
@@ -1,11 +1,12 @@
 /////////////////////////////////////////////////////////////////////////////
-// Name:        uri.h
+// Name:        wx/uri.h
 // Purpose:     wxURI - Class for parsing URIs
 // Author:      Ryan Norton
-// Modified By:
+//              Vadim Zeitlin (UTF-8 URI support, many other changes)
 // Created:     07/01/2004
 // RCS-ID:      $Id$
-// Copyright:   (c) Ryan Norton
+// Copyright:   (c) 2004 Ryan Norton
 //                  2008 Vadim Zeitlin
 // Licence:     wxWindows Licence
 /////////////////////////////////////////////////////////////////////////////
@@ -52,86 +53,119 @@ class WXDLLIMPEXP_BASE wxURI : public wxObject
 public:
    wxURI();
    wxURI(const wxString& uri);
    wxURI(const wxURI& uri);
-    virtual ~wxURI();
+    // default copy ctor, assignment operator and dtor are ok
-    const wxChar* Create(const wxString& uri);
+    bool Create(const wxString& uri);
-    bool HasScheme() const      {   return (m_fields & wxURI_SCHEME) == wxURI_SCHEME;       }
+    wxURI& operator=(const wxString& string)
-    bool HasUserInfo() const    {   return (m_fields & wxURI_USERINFO) == wxURI_USERINFO;   }
+    {
-    bool HasServer() const      {   return (m_fields & wxURI_SERVER) == wxURI_SERVER;       }
+        Create(string);
-    bool HasPort() const        {   return (m_fields & wxURI_PORT) == wxURI_PORT;           }
+        return *this;
-    bool HasPath() const        {   return (m_fields & wxURI_PATH) == wxURI_PATH;           }
+    }
    bool HasQuery() const       {   return (m_fields & wxURI_QUERY) == wxURI_QUERY;         }
    bool HasFragment() const    {   return (m_fields & wxURI_FRAGMENT) == wxURI_FRAGMENT;   }
-    const wxString& GetScheme() const           {   return m_scheme;    }
+    bool operator==(const wxURI& uri) const;
    const wxString& GetPath() const             {   return m_path;      }
    const wxString& GetQuery() const            {   return m_query;     }
    const wxString& GetFragment() const         {   return m_fragment;  }
    const wxString& GetPort() const             {   return m_port;      }
    const wxString& GetUserInfo() const         {   return m_userinfo;  }
    const wxString& GetServer() const           {   return m_server;    }
    const wxURIHostType& GetHostType() const    {   return m_hostType;  }
-    //Note that the following two get functions are explicitly depreciated by RFC 2396
+    // various accessors
    bool HasScheme() const      { return (m_fields & wxURI_SCHEME) != 0;   }
    bool HasUserInfo() const    { return (m_fields & wxURI_USERINFO) != 0; }
    bool HasServer() const      { return (m_fields & wxURI_SERVER) != 0;   }
    bool HasPort() const        { return (m_fields & wxURI_PORT) != 0;     }
    bool HasPath() const        { return (m_fields & wxURI_PATH) != 0;     }
    bool HasQuery() const       { return (m_fields & wxURI_QUERY) != 0;    }
    bool HasFragment() const    { return (m_fields & wxURI_FRAGMENT) != 0; }
    const wxString& GetScheme() const    { return m_scheme;   }
    const wxString& GetPath() const      { return m_path;     }
    const wxString& GetQuery() const     { return m_query;    }
    const wxString& GetFragment() const  { return m_fragment; }
    const wxString& GetPort() const      { return m_port;     }
    const wxString& GetUserInfo() const  { return m_userinfo; }
    const wxString& GetServer() const    { return m_server;   }
    wxURIHostType GetHostType() const    { return m_hostType; }
    // these functions only work if the user information part of the URI is in
    // the usual (but insecure and hence explicitly recommended against by the
    // RFC) "user:password" form
    wxString GetUser() const;
    wxString GetPassword() const;
-    wxString BuildURI() const;
+
-    wxString BuildUnescapedURI() const;
+    // combine all URI components into a single string
    //
    // BuildURI() returns the real URI suitable for use with network libraries,
    // for example, while BuildUnescapedURI() returns a string suitable to be
    // shown to the user.
    wxString BuildURI() const { return DoBuildURI(&wxURI::Nothing); }
    wxString BuildUnescapedURI() const { return DoBuildURI(&wxURI::Unescape); }
    // the escaped URI should contain only ASCII characters, including possible
    // escape sequences
    static wxString Unescape(const wxString& escapedURI);
    void Resolve(const wxURI& base, int flags = wxURI_STRICT);
    bool IsReference() const;
    wxURI& operator = (const wxURI& uri);
    wxURI& operator = (const wxString& string);
    bool operator == (const wxURI& uri) const;
    static wxString Unescape (const wxString& szEscapedURI);
 protected:
    wxURI& Assign(const wxURI& uri);
    void Clear();
-    const wxChar* Parse          (const wxChar* uri);
+    // common part of BuildURI() and BuildUnescapedURI()
-    const wxChar* ParseAuthority (const wxChar* uri);
+    wxString DoBuildURI(wxString (*funcDecode)(const wxString&)) const;
-    const wxChar* ParseScheme    (const wxChar* uri);
+
-    const wxChar* ParseUserInfo  (const wxChar* uri);
+    // function which returns its argument unmodified, this is used by
-    const wxChar* ParseServer    (const wxChar* uri);
+    // BuildURI() to tell DoBuildURI() that nothing needs to be done with the
-    const wxChar* ParsePort      (const wxChar* uri);
+    // URI components
-    const wxChar* ParsePath      (const wxChar* uri,
+    static wxString Nothing(const wxString& value) { return value; }
-                                  bool bReference = false,
+
-                                  bool bNormalize = true);
+    bool Parse(const char* uri);
-    const wxChar* ParseQuery     (const wxChar* uri);
+
-    const wxChar* ParseFragment  (const wxChar* uri);
+    const char* ParseAuthority (const char* uri);
    const char* ParseScheme    (const char* uri);
    const char* ParseUserInfo  (const char* uri);
    const char* ParseServer    (const char* uri);
    const char* ParsePort      (const char* uri);
    const char* ParsePath      (const char* uri);
    const char* ParseQuery     (const char* uri);
    const char* ParseFragment  (const char* uri);
-    static bool ParseH16(const wxChar*& uri);
+    static bool ParseH16(const char*& uri);
-    static bool ParseIPv4address(const wxChar*& uri);
+    static bool ParseIPv4address(const char*& uri);
-    static bool ParseIPv6address(const wxChar*& uri);
+    static bool ParseIPv6address(const char*& uri);
-    static bool ParseIPvFuture(const wxChar*& uri);
+    static bool ParseIPvFuture(const char*& uri);
-    static void Normalize(wxChar* uri, bool bIgnoreLeads = false);
+    // should be called with i pointing to '%', returns the encoded character
-    static void UpTree(const wxChar* uristart, const wxChar*& uri);
+    // following it or -1 if invalid and advances i past it (so that it points
-    static void UpTree(wxString::const_iterator uristart,
+    // to the last character consumed on return)
-                       wxString::const_iterator& uri);
+    static int DecodeEscape(wxString::const_iterator& i);
-    static wxUniChar TranslateEscape(const wxString::const_iterator& s);
+    // append next character pointer to by p to the string in an escaped form
-    static void Escape(wxString& s, const wxChar& c);
+    // and advance p past it
-    static bool IsEscape(const wxChar*& uri);
+    //
    // if the next character is '%' and it's followed by 2 hex digits, they are
    // not escaped (again) by this function, this allows to keep (backwards-
    // compatible) ambiguity about the input format to wxURI::Create(): it can
    // be either already escaped or not
    void AppendNextEscaped(wxString& s, const char *& p);
-    static wxChar CharToHex(const wxChar& c);
+    // convert hexadecimal digit to its value; return -1 if c isn't valid
    static int CharToHex(char c);
-    static bool IsUnreserved (const wxChar& c);
+    // split an URI path string in its component segments (including empty and
-    static bool IsReserved (const wxChar& c);
+    // "." ones, no post-processing is done)
-    static bool IsGenDelim (const wxChar& c);
+    static wxArrayString SplitInSegments(const wxString& path);
-    static bool IsSubDelim (const wxChar& c);
+
-    static bool IsHex(const wxChar& c);
+    // various URI grammar helpers
-    static bool IsAlpha(const wxChar& c);
+    static bool IsUnreserved(char c);
-    static bool IsDigit(const wxChar& c);
+    static bool IsReserved(char c);
    static bool IsGenDelim(char c);
    static bool IsSubDelim(char c);
    static bool IsHex(char c);
    static bool IsAlpha(char c);
    static bool IsDigit(char c);
    static bool IsEndPath(char c);
    wxString m_scheme;
    wxString m_path;
--- a/src/common/uri.cpp
+++ b/src/common/uri.cpp
--- a/tests/uris/uris.cpp
+++ b/tests/uris/uris.cpp
@@ -158,10 +158,11 @@ void URITestCase::Paths()
 }
 #define URI_TEST_RESOLVE_IMPL(string, eq, strict) \
-        uri = new wxURI(wxT(string));\
+    { \
-        uri->Resolve(masteruri, strict);\
+        wxURI uri(string); \
-        CPPUNIT_ASSERT(uri->BuildURI() == wxT(eq));\
+        uri.Resolve(masteruri, strict); \
-        delete uri;
+        CPPUNIT_ASSERT_EQUAL(eq, uri.BuildURI()); \
    }
 #define URI_TEST_RESOLVE(string, eq) \
        URI_TEST_RESOLVE_IMPL(string, eq, true);
@@ -174,8 +175,7 @@ void URITestCase::Paths()
 void URITestCase::NormalResolving()
 {
-    wxURI masteruri(wxT("http://a/b/c/d;p?q"));
+    wxURI masteruri("http://a/b/c/d;p?q");
    wxURI* uri;
    URI_TEST_RESOLVE("g:h"  ,"g:h")
    URI_TEST_RESOLVE("g"    ,"http://a/b/c/g")
@@ -205,10 +205,12 @@ void URITestCase::NormalResolving()
 void URITestCase::ComplexResolving()
 {
-    wxURI masteruri(wxT("http://a/b/c/d;p?q"));
+    wxURI masteruri("http://a/b/c/d;p?q");
    wxURI* uri;
    //odd path examples
    URI_TEST_RESOLVE("../../../g"   , "http://a/g")
    URI_TEST_RESOLVE("../../../../g", "http://a/g")
    URI_TEST_RESOLVE("/./g"   ,"http://a/g")
    URI_TEST_RESOLVE("/../g"  ,"http://a/g")
    URI_TEST_RESOLVE("g."     ,"http://a/b/c/g.")
@@ -216,14 +218,10 @@ void URITestCase::ComplexResolving()
    URI_TEST_RESOLVE("g.."    ,"http://a/b/c/g..")
    URI_TEST_RESOLVE("..g"    ,"http://a/b/c/..g")
 }
   //Should Fail
   //"../../../g"    =  "http://a/g"
   //"../../../../g" =  "http://a/g"
 void URITestCase::ReallyComplexResolving()
 {
-    wxURI masteruri(wxT("http://a/b/c/d;p?q"));
+    wxURI masteruri("http://a/b/c/d;p?q");
    wxURI* uri;
    //even more odder path examples
    URI_TEST_RESOLVE("./../g" ,"http://a/b/g")
@@ -236,8 +234,7 @@ void URITestCase::ReallyComplexResolving()
 void URITestCase::QueryFragmentResolving()
 {
-    wxURI masteruri(wxT("http://a/b/c/d;p?q"));
+    wxURI masteruri("http://a/b/c/d;p?q");
    wxURI* uri;
    //query/fragment ambigiousness
    URI_TEST_RESOLVE("g?y/./x","http://a/b/c/g?y/./x")
@@ -248,8 +245,7 @@ void URITestCase::QueryFragmentResolving()
 void URITestCase::BackwardsResolving()
 {
-    wxURI masteruri(wxT("http://a/b/c/d;p?q"));
+    wxURI masteruri("http://a/b/c/d;p?q");
    wxURI* uri;
    //"NEW"
    URI_TEST_RESOLVE("http:g" ,  "http:g")         //strict
@@ -259,70 +255,76 @@ void URITestCase::BackwardsResolving()
 void URITestCase::Assignment()
 {
-    wxURI uri1(wxT("http://mysite.com")),
+    wxURI uri1("http://mysite.com"),
-          uri2(wxT("http://mysite2.com"));
+          uri2("http://mysite2.com");
    uri2 = uri1;
-    CPPUNIT_ASSERT(uri1.BuildURI() == uri2.BuildURI());
+    CPPUNIT_ASSERT_EQUAL(uri1.BuildURI(), uri2.BuildURI());
 }
 void URITestCase::Comparison()
 {
-    CPPUNIT_ASSERT(wxURI(wxT("http://mysite.com")) == wxURI(wxT("http://mysite.com")));
+    CPPUNIT_ASSERT(wxURI("http://mysite.com") == wxURI("http://mysite.com"));
 }
 void URITestCase::Unescaping()
 {
-    wxString orig = wxT("http://test.com/of/file%3A%2F%2FC%3A%5Curi%5C")
+    wxString escaped,
-                    wxT("escaping%5Cthat%5Cseems%5Cbroken%5Csadly%5B1%5D.rss");
+             unescaped;
-    wxString works= wxURI(orig).BuildUnescapedURI();
+    escaped = "http://test.com/of/file%3A%2F%2FC%3A%5Curi%5C"
              "escaping%5Cthat%5Cseems%5Cbroken%5Csadly%5B1%5D.rss";
-    CPPUNIT_ASSERT(orig.IsSameAs(works) == false);
+    unescaped = wxURI(escaped).BuildUnescapedURI();
-    wxString orig2 = wxT("http://test.com/of/file%3A%2F%")
+    CPPUNIT_ASSERT_EQUAL( "http://test.com/of/file://C:\\uri\\"
-                     wxT("2FC%3A%5Curi%5Cescaping%5Cthat%5Cseems%")
+                          "escaping\\that\\seems\\broken\\sadly[1].rss",
-                     wxT("5Cbroken%5Csadly%5B1%5D.rss");
+                          unescaped );
-    wxString works2 = wxURI::Unescape(orig2);
+    CPPUNIT_ASSERT_EQUAL( unescaped, wxURI::Unescape(escaped) );
    wxString broken2 = wxURI(orig2).BuildUnescapedURI();
    CPPUNIT_ASSERT(works2.IsSameAs(broken2));
    escaped = "http://ru.wikipedia.org/wiki/"
              "%D0%A6%D0%B5%D0%BB%D0%BE%D0%B5_%D1%87%D0%B8%D1%81%D0%BB%D0%BE";
    unescaped = wxURI::Unescape(escaped);
    CPPUNIT_ASSERT_EQUAL( wxString::FromUTF8(
                            "http://ru.wikipedia.org/wiki/"
                            "\xD0\xA6\xD0\xB5\xD0\xBB\xD0\xBE\xD0\xB5_"
                            "\xD1\x87\xD0\xB8\xD1\x81\xD0\xBB\xD0\xBE"
                          ),
                          unescaped );
 }
 void URITestCase::FileScheme()
 {
    //file:// variety (NOT CONFORMANT TO THE RFC)
-    CPPUNIT_ASSERT(wxURI(wxString(wxT("file://e:/wxcode/script1.xml"))).GetPath()
+    URI_TEST_EQUAL( "file://e:/wxcode/script1.xml",
-                    == wxT("e:/wxcode/script1.xml") );
+                    "e:/wxcode/script1.xml", GetPath() );
    //file:/// variety
-    CPPUNIT_ASSERT(wxURI(wxString(wxT("file:///e:/wxcode/script1.xml"))).GetPath()
+    URI_TEST_EQUAL( "file:///e:/wxcode/script1.xml",
-                    == wxT("/e:/wxcode/script1.xml") );
+                    "/e:/wxcode/script1.xml", GetPath() );
    //file:/ variety
-    CPPUNIT_ASSERT(wxURI(wxString(wxT("file:/e:/wxcode/script1.xml"))).GetPath()
+    URI_TEST_EQUAL( "file:/e:/wxcode/script1.xml",
-                    == wxT("/e:/wxcode/script1.xml") );
+                    "/e:/wxcode/script1.xml", GetPath() );
    //file: variety
-    CPPUNIT_ASSERT(wxURI(wxString(wxT("file:e:/wxcode/script1.xml"))).GetPath()
+    URI_TEST_EQUAL( "file:e:/wxcode/script1.xml",
-                    == wxT("e:/wxcode/script1.xml") );
+                    "e:/wxcode/script1.xml", GetPath() );
 }
 #if TEST_URL
 const wxChar* pszProblemUrls[] = { wxT("http://www.csdn.net"),
                                   wxT("http://www.163.com"),
                                   wxT("http://www.sina.com.cn") };
 #include "wx/url.h"
 #include "wx/file.h"
 void URITestCase::URLCompat()
 {
-    wxURL url(wxT("http://user:password@wxwidgets.org"));
+    wxURL url("http://user:password@wxwidgets.org");
    CPPUNIT_ASSERT(url.GetError() == wxURL_NOERR);
@@ -332,9 +334,9 @@ void URITestCase::URLCompat()
    CPPUNIT_ASSERT( pInput != NULL );
 #endif
-    CPPUNIT_ASSERT( url == wxURL(wxT("http://user:password@wxwidgets.org")) );
+    CPPUNIT_ASSERT( url == wxURL("http://user:password@wxwidgets.org") );
-    wxURI uri(wxT("http://user:password@wxwidgets.org"));
+    wxURI uri("http://user:password@wxwidgets.org");
    CPPUNIT_ASSERT( url == uri );
@@ -348,18 +350,22 @@ void URITestCase::URLCompat()
    CPPUNIT_ASSERT( uricopy == url );
    CPPUNIT_ASSERT( uricopy == urlcopy );
    CPPUNIT_ASSERT( uricopy == uri );
-    CPPUNIT_ASSERT( wxURI::Unescape(wxT("%20%41%20")) == wxT(" A ") );
+    CPPUNIT_ASSERT_EQUAL( " A ", wxURI::Unescape("%20%41%20") );
-    wxURI test(wxT("file:\"myf\"ile.txt"));
+    wxURI test("file:\"myf\"ile.txt");
-    CPPUNIT_ASSERT( test.BuildURI() == wxT("file:%22myf%22ile.txt") );
+    CPPUNIT_ASSERT_EQUAL( "file:%22myf%22ile.txt" , test.BuildURI() );
-    CPPUNIT_ASSERT( test.GetScheme() == wxT("file") );
+    CPPUNIT_ASSERT_EQUAL( "file", test.GetScheme() );
-    CPPUNIT_ASSERT( test.GetPath() == wxT("%22myf%22ile.txt") );
+    CPPUNIT_ASSERT_EQUAL( "%22myf%22ile.txt", test.GetPath() );
    // these could be put under a named registry since they take some
    // time to complete
 #if 0
    // Test problem urls (reported not to work some time ago by a user...)
    const wxChar* pszProblemUrls[] = { "http://www.csdn.net",
                                       "http://www.163.com",
                                       "http://www.sina.com.cn" };
    for ( size_t i = 0; i < WXSIZEOF(pszProblemUrls); ++i )
    {
        wxURL urlProblem(pszProblemUrls[i]);