big wxURI cleanup; it now handles Unicode characters correctly (#3874)

git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@54723 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
Vadim Zeitlin
2008-07-19 23:14:51 +00:00
parent ece97e2882
commit 2186321ff5
4 changed files with 544 additions and 739 deletions

View File

@@ -249,6 +249,7 @@ Major new features in this release
All: All:
- Added (experimental) IPv6 support to wxSocket (Arcen) - Added (experimental) IPv6 support to wxSocket (Arcen)
- Cleaned up wxURI and made it Unicode-friendly.
- Add support for wxExecute(wxEXEC_ASYNC) in wxBase (Lukasz Michalski) - Add support for wxExecute(wxEXEC_ASYNC) in wxBase (Lukasz Michalski)
- Added wxXLocale class and xlocale-like functions using it - Added wxXLocale class and xlocale-like functions using it
- Allow loading message catalogs from wxFileSystem (Axel Gembe) - Allow loading message catalogs from wxFileSystem (Axel Gembe)

View File

@@ -1,11 +1,12 @@
///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////
// Name: uri.h // Name: wx/uri.h
// Purpose: wxURI - Class for parsing URIs // Purpose: wxURI - Class for parsing URIs
// Author: Ryan Norton // Author: Ryan Norton
// Modified By: // Vadim Zeitlin (UTF-8 URI support, many other changes)
// Created: 07/01/2004 // Created: 07/01/2004
// RCS-ID: $Id$ // RCS-ID: $Id$
// Copyright: (c) Ryan Norton // Copyright: (c) 2004 Ryan Norton
// 2008 Vadim Zeitlin
// Licence: wxWindows Licence // Licence: wxWindows Licence
///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////
@@ -52,86 +53,119 @@ class WXDLLIMPEXP_BASE wxURI : public wxObject
public: public:
wxURI(); wxURI();
wxURI(const wxString& uri); wxURI(const wxString& uri);
wxURI(const wxURI& uri);
virtual ~wxURI(); // default copy ctor, assignment operator and dtor are ok
const wxChar* Create(const wxString& uri); bool Create(const wxString& uri);
bool HasScheme() const { return (m_fields & wxURI_SCHEME) == wxURI_SCHEME; } wxURI& operator=(const wxString& string)
bool HasUserInfo() const { return (m_fields & wxURI_USERINFO) == wxURI_USERINFO; } {
bool HasServer() const { return (m_fields & wxURI_SERVER) == wxURI_SERVER; } Create(string);
bool HasPort() const { return (m_fields & wxURI_PORT) == wxURI_PORT; } return *this;
bool HasPath() const { return (m_fields & wxURI_PATH) == wxURI_PATH; } }
bool HasQuery() const { return (m_fields & wxURI_QUERY) == wxURI_QUERY; }
bool HasFragment() const { return (m_fields & wxURI_FRAGMENT) == wxURI_FRAGMENT; }
const wxString& GetScheme() const { return m_scheme; } bool operator==(const wxURI& uri) const;
const wxString& GetPath() const { return m_path; }
const wxString& GetQuery() const { return m_query; }
const wxString& GetFragment() const { return m_fragment; }
const wxString& GetPort() const { return m_port; }
const wxString& GetUserInfo() const { return m_userinfo; }
const wxString& GetServer() const { return m_server; }
const wxURIHostType& GetHostType() const { return m_hostType; }
//Note that the following two get functions are explicitly depreciated by RFC 2396 // various accessors
bool HasScheme() const { return (m_fields & wxURI_SCHEME) != 0; }
bool HasUserInfo() const { return (m_fields & wxURI_USERINFO) != 0; }
bool HasServer() const { return (m_fields & wxURI_SERVER) != 0; }
bool HasPort() const { return (m_fields & wxURI_PORT) != 0; }
bool HasPath() const { return (m_fields & wxURI_PATH) != 0; }
bool HasQuery() const { return (m_fields & wxURI_QUERY) != 0; }
bool HasFragment() const { return (m_fields & wxURI_FRAGMENT) != 0; }
const wxString& GetScheme() const { return m_scheme; }
const wxString& GetPath() const { return m_path; }
const wxString& GetQuery() const { return m_query; }
const wxString& GetFragment() const { return m_fragment; }
const wxString& GetPort() const { return m_port; }
const wxString& GetUserInfo() const { return m_userinfo; }
const wxString& GetServer() const { return m_server; }
wxURIHostType GetHostType() const { return m_hostType; }
// these functions only work if the user information part of the URI is in
// the usual (but insecure and hence explicitly recommended against by the
// RFC) "user:password" form
wxString GetUser() const; wxString GetUser() const;
wxString GetPassword() const; wxString GetPassword() const;
wxString BuildURI() const;
wxString BuildUnescapedURI() const; // combine all URI components into a single string
//
// BuildURI() returns the real URI suitable for use with network libraries,
// for example, while BuildUnescapedURI() returns a string suitable to be
// shown to the user.
wxString BuildURI() const { return DoBuildURI(&wxURI::Nothing); }
wxString BuildUnescapedURI() const { return DoBuildURI(&wxURI::Unescape); }
// the escaped URI should contain only ASCII characters, including possible
// escape sequences
static wxString Unescape(const wxString& escapedURI);
void Resolve(const wxURI& base, int flags = wxURI_STRICT); void Resolve(const wxURI& base, int flags = wxURI_STRICT);
bool IsReference() const; bool IsReference() const;
wxURI& operator = (const wxURI& uri);
wxURI& operator = (const wxString& string);
bool operator == (const wxURI& uri) const;
static wxString Unescape (const wxString& szEscapedURI);
protected: protected:
wxURI& Assign(const wxURI& uri);
void Clear(); void Clear();
const wxChar* Parse (const wxChar* uri); // common part of BuildURI() and BuildUnescapedURI()
const wxChar* ParseAuthority (const wxChar* uri); wxString DoBuildURI(wxString (*funcDecode)(const wxString&)) const;
const wxChar* ParseScheme (const wxChar* uri);
const wxChar* ParseUserInfo (const wxChar* uri); // function which returns its argument unmodified, this is used by
const wxChar* ParseServer (const wxChar* uri); // BuildURI() to tell DoBuildURI() that nothing needs to be done with the
const wxChar* ParsePort (const wxChar* uri); // URI components
const wxChar* ParsePath (const wxChar* uri, static wxString Nothing(const wxString& value) { return value; }
bool bReference = false,
bool bNormalize = true); bool Parse(const char* uri);
const wxChar* ParseQuery (const wxChar* uri);
const wxChar* ParseFragment (const wxChar* uri); const char* ParseAuthority (const char* uri);
const char* ParseScheme (const char* uri);
const char* ParseUserInfo (const char* uri);
const char* ParseServer (const char* uri);
const char* ParsePort (const char* uri);
const char* ParsePath (const char* uri);
const char* ParseQuery (const char* uri);
const char* ParseFragment (const char* uri);
static bool ParseH16(const wxChar*& uri); static bool ParseH16(const char*& uri);
static bool ParseIPv4address(const wxChar*& uri); static bool ParseIPv4address(const char*& uri);
static bool ParseIPv6address(const wxChar*& uri); static bool ParseIPv6address(const char*& uri);
static bool ParseIPvFuture(const wxChar*& uri); static bool ParseIPvFuture(const char*& uri);
static void Normalize(wxChar* uri, bool bIgnoreLeads = false); // should be called with i pointing to '%', returns the encoded character
static void UpTree(const wxChar* uristart, const wxChar*& uri); // following it or -1 if invalid and advances i past it (so that it points
static void UpTree(wxString::const_iterator uristart, // to the last character consumed on return)
wxString::const_iterator& uri); static int DecodeEscape(wxString::const_iterator& i);
static wxUniChar TranslateEscape(const wxString::const_iterator& s); // append next character pointer to by p to the string in an escaped form
static void Escape(wxString& s, const wxChar& c); // and advance p past it
static bool IsEscape(const wxChar*& uri); //
// if the next character is '%' and it's followed by 2 hex digits, they are
// not escaped (again) by this function, this allows to keep (backwards-
// compatible) ambiguity about the input format to wxURI::Create(): it can
// be either already escaped or not
void AppendNextEscaped(wxString& s, const char *& p);
static wxChar CharToHex(const wxChar& c); // convert hexadecimal digit to its value; return -1 if c isn't valid
static int CharToHex(char c);
static bool IsUnreserved (const wxChar& c); // split an URI path string in its component segments (including empty and
static bool IsReserved (const wxChar& c); // "." ones, no post-processing is done)
static bool IsGenDelim (const wxChar& c); static wxArrayString SplitInSegments(const wxString& path);
static bool IsSubDelim (const wxChar& c);
static bool IsHex(const wxChar& c); // various URI grammar helpers
static bool IsAlpha(const wxChar& c); static bool IsUnreserved(char c);
static bool IsDigit(const wxChar& c); static bool IsReserved(char c);
static bool IsGenDelim(char c);
static bool IsSubDelim(char c);
static bool IsHex(char c);
static bool IsAlpha(char c);
static bool IsDigit(char c);
static bool IsEndPath(char c);
wxString m_scheme; wxString m_scheme;
wxString m_path; wxString m_path;

File diff suppressed because it is too large Load Diff

View File

@@ -158,10 +158,11 @@ void URITestCase::Paths()
} }
#define URI_TEST_RESOLVE_IMPL(string, eq, strict) \ #define URI_TEST_RESOLVE_IMPL(string, eq, strict) \
uri = new wxURI(wxT(string));\ { \
uri->Resolve(masteruri, strict);\ wxURI uri(string); \
CPPUNIT_ASSERT(uri->BuildURI() == wxT(eq));\ uri.Resolve(masteruri, strict); \
delete uri; CPPUNIT_ASSERT_EQUAL(eq, uri.BuildURI()); \
}
#define URI_TEST_RESOLVE(string, eq) \ #define URI_TEST_RESOLVE(string, eq) \
URI_TEST_RESOLVE_IMPL(string, eq, true); URI_TEST_RESOLVE_IMPL(string, eq, true);
@@ -174,8 +175,7 @@ void URITestCase::Paths()
void URITestCase::NormalResolving() void URITestCase::NormalResolving()
{ {
wxURI masteruri(wxT("http://a/b/c/d;p?q")); wxURI masteruri("http://a/b/c/d;p?q");
wxURI* uri;
URI_TEST_RESOLVE("g:h" ,"g:h") URI_TEST_RESOLVE("g:h" ,"g:h")
URI_TEST_RESOLVE("g" ,"http://a/b/c/g") URI_TEST_RESOLVE("g" ,"http://a/b/c/g")
@@ -205,10 +205,12 @@ void URITestCase::NormalResolving()
void URITestCase::ComplexResolving() void URITestCase::ComplexResolving()
{ {
wxURI masteruri(wxT("http://a/b/c/d;p?q")); wxURI masteruri("http://a/b/c/d;p?q");
wxURI* uri;
//odd path examples //odd path examples
URI_TEST_RESOLVE("../../../g" , "http://a/g")
URI_TEST_RESOLVE("../../../../g", "http://a/g")
URI_TEST_RESOLVE("/./g" ,"http://a/g") URI_TEST_RESOLVE("/./g" ,"http://a/g")
URI_TEST_RESOLVE("/../g" ,"http://a/g") URI_TEST_RESOLVE("/../g" ,"http://a/g")
URI_TEST_RESOLVE("g." ,"http://a/b/c/g.") URI_TEST_RESOLVE("g." ,"http://a/b/c/g.")
@@ -216,14 +218,10 @@ void URITestCase::ComplexResolving()
URI_TEST_RESOLVE("g.." ,"http://a/b/c/g..") URI_TEST_RESOLVE("g.." ,"http://a/b/c/g..")
URI_TEST_RESOLVE("..g" ,"http://a/b/c/..g") URI_TEST_RESOLVE("..g" ,"http://a/b/c/..g")
} }
//Should Fail
//"../../../g" = "http://a/g"
//"../../../../g" = "http://a/g"
void URITestCase::ReallyComplexResolving() void URITestCase::ReallyComplexResolving()
{ {
wxURI masteruri(wxT("http://a/b/c/d;p?q")); wxURI masteruri("http://a/b/c/d;p?q");
wxURI* uri;
//even more odder path examples //even more odder path examples
URI_TEST_RESOLVE("./../g" ,"http://a/b/g") URI_TEST_RESOLVE("./../g" ,"http://a/b/g")
@@ -236,8 +234,7 @@ void URITestCase::ReallyComplexResolving()
void URITestCase::QueryFragmentResolving() void URITestCase::QueryFragmentResolving()
{ {
wxURI masteruri(wxT("http://a/b/c/d;p?q")); wxURI masteruri("http://a/b/c/d;p?q");
wxURI* uri;
//query/fragment ambigiousness //query/fragment ambigiousness
URI_TEST_RESOLVE("g?y/./x","http://a/b/c/g?y/./x") URI_TEST_RESOLVE("g?y/./x","http://a/b/c/g?y/./x")
@@ -248,8 +245,7 @@ void URITestCase::QueryFragmentResolving()
void URITestCase::BackwardsResolving() void URITestCase::BackwardsResolving()
{ {
wxURI masteruri(wxT("http://a/b/c/d;p?q")); wxURI masteruri("http://a/b/c/d;p?q");
wxURI* uri;
//"NEW" //"NEW"
URI_TEST_RESOLVE("http:g" , "http:g") //strict URI_TEST_RESOLVE("http:g" , "http:g") //strict
@@ -259,70 +255,76 @@ void URITestCase::BackwardsResolving()
void URITestCase::Assignment() void URITestCase::Assignment()
{ {
wxURI uri1(wxT("http://mysite.com")), wxURI uri1("http://mysite.com"),
uri2(wxT("http://mysite2.com")); uri2("http://mysite2.com");
uri2 = uri1; uri2 = uri1;
CPPUNIT_ASSERT(uri1.BuildURI() == uri2.BuildURI()); CPPUNIT_ASSERT_EQUAL(uri1.BuildURI(), uri2.BuildURI());
} }
void URITestCase::Comparison() void URITestCase::Comparison()
{ {
CPPUNIT_ASSERT(wxURI(wxT("http://mysite.com")) == wxURI(wxT("http://mysite.com"))); CPPUNIT_ASSERT(wxURI("http://mysite.com") == wxURI("http://mysite.com"));
} }
void URITestCase::Unescaping() void URITestCase::Unescaping()
{ {
wxString orig = wxT("http://test.com/of/file%3A%2F%2FC%3A%5Curi%5C") wxString escaped,
wxT("escaping%5Cthat%5Cseems%5Cbroken%5Csadly%5B1%5D.rss"); unescaped;
wxString works= wxURI(orig).BuildUnescapedURI(); escaped = "http://test.com/of/file%3A%2F%2FC%3A%5Curi%5C"
"escaping%5Cthat%5Cseems%5Cbroken%5Csadly%5B1%5D.rss";
CPPUNIT_ASSERT(orig.IsSameAs(works) == false); unescaped = wxURI(escaped).BuildUnescapedURI();
wxString orig2 = wxT("http://test.com/of/file%3A%2F%") CPPUNIT_ASSERT_EQUAL( "http://test.com/of/file://C:\\uri\\"
wxT("2FC%3A%5Curi%5Cescaping%5Cthat%5Cseems%") "escaping\\that\\seems\\broken\\sadly[1].rss",
wxT("5Cbroken%5Csadly%5B1%5D.rss"); unescaped );
wxString works2 = wxURI::Unescape(orig2); CPPUNIT_ASSERT_EQUAL( unescaped, wxURI::Unescape(escaped) );
wxString broken2 = wxURI(orig2).BuildUnescapedURI();
CPPUNIT_ASSERT(works2.IsSameAs(broken2));
escaped = "http://ru.wikipedia.org/wiki/"
"%D0%A6%D0%B5%D0%BB%D0%BE%D0%B5_%D1%87%D0%B8%D1%81%D0%BB%D0%BE";
unescaped = wxURI::Unescape(escaped);
CPPUNIT_ASSERT_EQUAL( wxString::FromUTF8(
"http://ru.wikipedia.org/wiki/"
"\xD0\xA6\xD0\xB5\xD0\xBB\xD0\xBE\xD0\xB5_"
"\xD1\x87\xD0\xB8\xD1\x81\xD0\xBB\xD0\xBE"
),
unescaped );
} }
void URITestCase::FileScheme() void URITestCase::FileScheme()
{ {
//file:// variety (NOT CONFORMANT TO THE RFC) //file:// variety (NOT CONFORMANT TO THE RFC)
CPPUNIT_ASSERT(wxURI(wxString(wxT("file://e:/wxcode/script1.xml"))).GetPath() URI_TEST_EQUAL( "file://e:/wxcode/script1.xml",
== wxT("e:/wxcode/script1.xml") ); "e:/wxcode/script1.xml", GetPath() );
//file:/// variety //file:/// variety
CPPUNIT_ASSERT(wxURI(wxString(wxT("file:///e:/wxcode/script1.xml"))).GetPath() URI_TEST_EQUAL( "file:///e:/wxcode/script1.xml",
== wxT("/e:/wxcode/script1.xml") ); "/e:/wxcode/script1.xml", GetPath() );
//file:/ variety //file:/ variety
CPPUNIT_ASSERT(wxURI(wxString(wxT("file:/e:/wxcode/script1.xml"))).GetPath() URI_TEST_EQUAL( "file:/e:/wxcode/script1.xml",
== wxT("/e:/wxcode/script1.xml") ); "/e:/wxcode/script1.xml", GetPath() );
//file: variety //file: variety
CPPUNIT_ASSERT(wxURI(wxString(wxT("file:e:/wxcode/script1.xml"))).GetPath() URI_TEST_EQUAL( "file:e:/wxcode/script1.xml",
== wxT("e:/wxcode/script1.xml") ); "e:/wxcode/script1.xml", GetPath() );
} }
#if TEST_URL #if TEST_URL
const wxChar* pszProblemUrls[] = { wxT("http://www.csdn.net"),
wxT("http://www.163.com"),
wxT("http://www.sina.com.cn") };
#include "wx/url.h" #include "wx/url.h"
#include "wx/file.h" #include "wx/file.h"
void URITestCase::URLCompat() void URITestCase::URLCompat()
{ {
wxURL url(wxT("http://user:password@wxwidgets.org")); wxURL url("http://user:password@wxwidgets.org");
CPPUNIT_ASSERT(url.GetError() == wxURL_NOERR); CPPUNIT_ASSERT(url.GetError() == wxURL_NOERR);
@@ -332,9 +334,9 @@ void URITestCase::URLCompat()
CPPUNIT_ASSERT( pInput != NULL ); CPPUNIT_ASSERT( pInput != NULL );
#endif #endif
CPPUNIT_ASSERT( url == wxURL(wxT("http://user:password@wxwidgets.org")) ); CPPUNIT_ASSERT( url == wxURL("http://user:password@wxwidgets.org") );
wxURI uri(wxT("http://user:password@wxwidgets.org")); wxURI uri("http://user:password@wxwidgets.org");
CPPUNIT_ASSERT( url == uri ); CPPUNIT_ASSERT( url == uri );
@@ -348,18 +350,22 @@ void URITestCase::URLCompat()
CPPUNIT_ASSERT( uricopy == url ); CPPUNIT_ASSERT( uricopy == url );
CPPUNIT_ASSERT( uricopy == urlcopy ); CPPUNIT_ASSERT( uricopy == urlcopy );
CPPUNIT_ASSERT( uricopy == uri ); CPPUNIT_ASSERT( uricopy == uri );
CPPUNIT_ASSERT( wxURI::Unescape(wxT("%20%41%20")) == wxT(" A ") ); CPPUNIT_ASSERT_EQUAL( " A ", wxURI::Unescape("%20%41%20") );
wxURI test(wxT("file:\"myf\"ile.txt")); wxURI test("file:\"myf\"ile.txt");
CPPUNIT_ASSERT( test.BuildURI() == wxT("file:%22myf%22ile.txt") ); CPPUNIT_ASSERT_EQUAL( "file:%22myf%22ile.txt" , test.BuildURI() );
CPPUNIT_ASSERT( test.GetScheme() == wxT("file") ); CPPUNIT_ASSERT_EQUAL( "file", test.GetScheme() );
CPPUNIT_ASSERT( test.GetPath() == wxT("%22myf%22ile.txt") ); CPPUNIT_ASSERT_EQUAL( "%22myf%22ile.txt", test.GetPath() );
// these could be put under a named registry since they take some // these could be put under a named registry since they take some
// time to complete // time to complete
#if 0 #if 0
// Test problem urls (reported not to work some time ago by a user...) // Test problem urls (reported not to work some time ago by a user...)
const wxChar* pszProblemUrls[] = { "http://www.csdn.net",
"http://www.163.com",
"http://www.sina.com.cn" };
for ( size_t i = 0; i < WXSIZEOF(pszProblemUrls); ++i ) for ( size_t i = 0; i < WXSIZEOF(pszProblemUrls); ++i )
{ {
wxURL urlProblem(pszProblemUrls[i]); wxURL urlProblem(pszProblemUrls[i]);