big wxURI cleanup; it now handles Unicode characters correctly (#3874)

git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@54723 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
Vadim Zeitlin
2008-07-19 23:14:51 +00:00
parent ece97e2882
commit 2186321ff5
4 changed files with 544 additions and 739 deletions

View File

@@ -249,6 +249,7 @@ Major new features in this release
All:
- Added (experimental) IPv6 support to wxSocket (Arcen)
- Cleaned up wxURI and made it Unicode-friendly.
- Add support for wxExecute(wxEXEC_ASYNC) in wxBase (Lukasz Michalski)
- Added wxXLocale class and xlocale-like functions using it
- Allow loading message catalogs from wxFileSystem (Axel Gembe)

View File

@@ -1,11 +1,12 @@
/////////////////////////////////////////////////////////////////////////////
// Name: uri.h
// Name: wx/uri.h
// Purpose: wxURI - Class for parsing URIs
// Author: Ryan Norton
// Modified By:
// Vadim Zeitlin (UTF-8 URI support, many other changes)
// Created: 07/01/2004
// RCS-ID: $Id$
// Copyright: (c) Ryan Norton
// Copyright: (c) 2004 Ryan Norton
// 2008 Vadim Zeitlin
// Licence: wxWindows Licence
/////////////////////////////////////////////////////////////////////////////
@@ -52,86 +53,119 @@ class WXDLLIMPEXP_BASE wxURI : public wxObject
public:
wxURI();
wxURI(const wxString& uri);
wxURI(const wxURI& uri);
virtual ~wxURI();
// default copy ctor, assignment operator and dtor are ok
const wxChar* Create(const wxString& uri);
bool Create(const wxString& uri);
bool HasScheme() const { return (m_fields & wxURI_SCHEME) == wxURI_SCHEME; }
bool HasUserInfo() const { return (m_fields & wxURI_USERINFO) == wxURI_USERINFO; }
bool HasServer() const { return (m_fields & wxURI_SERVER) == wxURI_SERVER; }
bool HasPort() const { return (m_fields & wxURI_PORT) == wxURI_PORT; }
bool HasPath() const { return (m_fields & wxURI_PATH) == wxURI_PATH; }
bool HasQuery() const { return (m_fields & wxURI_QUERY) == wxURI_QUERY; }
bool HasFragment() const { return (m_fields & wxURI_FRAGMENT) == wxURI_FRAGMENT; }
wxURI& operator=(const wxString& string)
{
Create(string);
return *this;
}
const wxString& GetScheme() const { return m_scheme; }
const wxString& GetPath() const { return m_path; }
const wxString& GetQuery() const { return m_query; }
const wxString& GetFragment() const { return m_fragment; }
const wxString& GetPort() const { return m_port; }
const wxString& GetUserInfo() const { return m_userinfo; }
const wxString& GetServer() const { return m_server; }
const wxURIHostType& GetHostType() const { return m_hostType; }
bool operator==(const wxURI& uri) const;
//Note that the following two get functions are explicitly depreciated by RFC 2396
// various accessors
bool HasScheme() const { return (m_fields & wxURI_SCHEME) != 0; }
bool HasUserInfo() const { return (m_fields & wxURI_USERINFO) != 0; }
bool HasServer() const { return (m_fields & wxURI_SERVER) != 0; }
bool HasPort() const { return (m_fields & wxURI_PORT) != 0; }
bool HasPath() const { return (m_fields & wxURI_PATH) != 0; }
bool HasQuery() const { return (m_fields & wxURI_QUERY) != 0; }
bool HasFragment() const { return (m_fields & wxURI_FRAGMENT) != 0; }
const wxString& GetScheme() const { return m_scheme; }
const wxString& GetPath() const { return m_path; }
const wxString& GetQuery() const { return m_query; }
const wxString& GetFragment() const { return m_fragment; }
const wxString& GetPort() const { return m_port; }
const wxString& GetUserInfo() const { return m_userinfo; }
const wxString& GetServer() const { return m_server; }
wxURIHostType GetHostType() const { return m_hostType; }
// these functions only work if the user information part of the URI is in
// the usual (but insecure and hence explicitly recommended against by the
// RFC) "user:password" form
wxString GetUser() const;
wxString GetPassword() const;
wxString BuildURI() const;
wxString BuildUnescapedURI() const;
// combine all URI components into a single string
//
// BuildURI() returns the real URI suitable for use with network libraries,
// for example, while BuildUnescapedURI() returns a string suitable to be
// shown to the user.
wxString BuildURI() const { return DoBuildURI(&wxURI::Nothing); }
wxString BuildUnescapedURI() const { return DoBuildURI(&wxURI::Unescape); }
// the escaped URI should contain only ASCII characters, including possible
// escape sequences
static wxString Unescape(const wxString& escapedURI);
void Resolve(const wxURI& base, int flags = wxURI_STRICT);
bool IsReference() const;
wxURI& operator = (const wxURI& uri);
wxURI& operator = (const wxString& string);
bool operator == (const wxURI& uri) const;
static wxString Unescape (const wxString& szEscapedURI);
protected:
wxURI& Assign(const wxURI& uri);
void Clear();
const wxChar* Parse (const wxChar* uri);
const wxChar* ParseAuthority (const wxChar* uri);
const wxChar* ParseScheme (const wxChar* uri);
const wxChar* ParseUserInfo (const wxChar* uri);
const wxChar* ParseServer (const wxChar* uri);
const wxChar* ParsePort (const wxChar* uri);
const wxChar* ParsePath (const wxChar* uri,
bool bReference = false,
bool bNormalize = true);
const wxChar* ParseQuery (const wxChar* uri);
const wxChar* ParseFragment (const wxChar* uri);
// common part of BuildURI() and BuildUnescapedURI()
wxString DoBuildURI(wxString (*funcDecode)(const wxString&)) const;
// function which returns its argument unmodified, this is used by
// BuildURI() to tell DoBuildURI() that nothing needs to be done with the
// URI components
static wxString Nothing(const wxString& value) { return value; }
bool Parse(const char* uri);
const char* ParseAuthority (const char* uri);
const char* ParseScheme (const char* uri);
const char* ParseUserInfo (const char* uri);
const char* ParseServer (const char* uri);
const char* ParsePort (const char* uri);
const char* ParsePath (const char* uri);
const char* ParseQuery (const char* uri);
const char* ParseFragment (const char* uri);
static bool ParseH16(const wxChar*& uri);
static bool ParseIPv4address(const wxChar*& uri);
static bool ParseIPv6address(const wxChar*& uri);
static bool ParseIPvFuture(const wxChar*& uri);
static bool ParseH16(const char*& uri);
static bool ParseIPv4address(const char*& uri);
static bool ParseIPv6address(const char*& uri);
static bool ParseIPvFuture(const char*& uri);
static void Normalize(wxChar* uri, bool bIgnoreLeads = false);
static void UpTree(const wxChar* uristart, const wxChar*& uri);
static void UpTree(wxString::const_iterator uristart,
wxString::const_iterator& uri);
// should be called with i pointing to '%', returns the encoded character
// following it or -1 if invalid and advances i past it (so that it points
// to the last character consumed on return)
static int DecodeEscape(wxString::const_iterator& i);
static wxUniChar TranslateEscape(const wxString::const_iterator& s);
static void Escape(wxString& s, const wxChar& c);
static bool IsEscape(const wxChar*& uri);
// append next character pointer to by p to the string in an escaped form
// and advance p past it
//
// if the next character is '%' and it's followed by 2 hex digits, they are
// not escaped (again) by this function, this allows to keep (backwards-
// compatible) ambiguity about the input format to wxURI::Create(): it can
// be either already escaped or not
void AppendNextEscaped(wxString& s, const char *& p);
static wxChar CharToHex(const wxChar& c);
// convert hexadecimal digit to its value; return -1 if c isn't valid
static int CharToHex(char c);
static bool IsUnreserved (const wxChar& c);
static bool IsReserved (const wxChar& c);
static bool IsGenDelim (const wxChar& c);
static bool IsSubDelim (const wxChar& c);
static bool IsHex(const wxChar& c);
static bool IsAlpha(const wxChar& c);
static bool IsDigit(const wxChar& c);
// split an URI path string in its component segments (including empty and
// "." ones, no post-processing is done)
static wxArrayString SplitInSegments(const wxString& path);
// various URI grammar helpers
static bool IsUnreserved(char c);
static bool IsReserved(char c);
static bool IsGenDelim(char c);
static bool IsSubDelim(char c);
static bool IsHex(char c);
static bool IsAlpha(char c);
static bool IsDigit(char c);
static bool IsEndPath(char c);
wxString m_scheme;
wxString m_path;

File diff suppressed because it is too large Load Diff

View File

@@ -158,10 +158,11 @@ void URITestCase::Paths()
}
#define URI_TEST_RESOLVE_IMPL(string, eq, strict) \
uri = new wxURI(wxT(string));\
uri->Resolve(masteruri, strict);\
CPPUNIT_ASSERT(uri->BuildURI() == wxT(eq));\
delete uri;
{ \
wxURI uri(string); \
uri.Resolve(masteruri, strict); \
CPPUNIT_ASSERT_EQUAL(eq, uri.BuildURI()); \
}
#define URI_TEST_RESOLVE(string, eq) \
URI_TEST_RESOLVE_IMPL(string, eq, true);
@@ -174,8 +175,7 @@ void URITestCase::Paths()
void URITestCase::NormalResolving()
{
wxURI masteruri(wxT("http://a/b/c/d;p?q"));
wxURI* uri;
wxURI masteruri("http://a/b/c/d;p?q");
URI_TEST_RESOLVE("g:h" ,"g:h")
URI_TEST_RESOLVE("g" ,"http://a/b/c/g")
@@ -205,10 +205,12 @@ void URITestCase::NormalResolving()
void URITestCase::ComplexResolving()
{
wxURI masteruri(wxT("http://a/b/c/d;p?q"));
wxURI* uri;
wxURI masteruri("http://a/b/c/d;p?q");
//odd path examples
URI_TEST_RESOLVE("../../../g" , "http://a/g")
URI_TEST_RESOLVE("../../../../g", "http://a/g")
URI_TEST_RESOLVE("/./g" ,"http://a/g")
URI_TEST_RESOLVE("/../g" ,"http://a/g")
URI_TEST_RESOLVE("g." ,"http://a/b/c/g.")
@@ -216,14 +218,10 @@ void URITestCase::ComplexResolving()
URI_TEST_RESOLVE("g.." ,"http://a/b/c/g..")
URI_TEST_RESOLVE("..g" ,"http://a/b/c/..g")
}
//Should Fail
//"../../../g" = "http://a/g"
//"../../../../g" = "http://a/g"
void URITestCase::ReallyComplexResolving()
{
wxURI masteruri(wxT("http://a/b/c/d;p?q"));
wxURI* uri;
wxURI masteruri("http://a/b/c/d;p?q");
//even more odder path examples
URI_TEST_RESOLVE("./../g" ,"http://a/b/g")
@@ -236,8 +234,7 @@ void URITestCase::ReallyComplexResolving()
void URITestCase::QueryFragmentResolving()
{
wxURI masteruri(wxT("http://a/b/c/d;p?q"));
wxURI* uri;
wxURI masteruri("http://a/b/c/d;p?q");
//query/fragment ambigiousness
URI_TEST_RESOLVE("g?y/./x","http://a/b/c/g?y/./x")
@@ -248,8 +245,7 @@ void URITestCase::QueryFragmentResolving()
void URITestCase::BackwardsResolving()
{
wxURI masteruri(wxT("http://a/b/c/d;p?q"));
wxURI* uri;
wxURI masteruri("http://a/b/c/d;p?q");
//"NEW"
URI_TEST_RESOLVE("http:g" , "http:g") //strict
@@ -259,70 +255,76 @@ void URITestCase::BackwardsResolving()
void URITestCase::Assignment()
{
wxURI uri1(wxT("http://mysite.com")),
uri2(wxT("http://mysite2.com"));
wxURI uri1("http://mysite.com"),
uri2("http://mysite2.com");
uri2 = uri1;
CPPUNIT_ASSERT(uri1.BuildURI() == uri2.BuildURI());
CPPUNIT_ASSERT_EQUAL(uri1.BuildURI(), uri2.BuildURI());
}
void URITestCase::Comparison()
{
CPPUNIT_ASSERT(wxURI(wxT("http://mysite.com")) == wxURI(wxT("http://mysite.com")));
CPPUNIT_ASSERT(wxURI("http://mysite.com") == wxURI("http://mysite.com"));
}
void URITestCase::Unescaping()
{
wxString orig = wxT("http://test.com/of/file%3A%2F%2FC%3A%5Curi%5C")
wxT("escaping%5Cthat%5Cseems%5Cbroken%5Csadly%5B1%5D.rss");
wxString escaped,
unescaped;
wxString works= wxURI(orig).BuildUnescapedURI();
escaped = "http://test.com/of/file%3A%2F%2FC%3A%5Curi%5C"
"escaping%5Cthat%5Cseems%5Cbroken%5Csadly%5B1%5D.rss";
CPPUNIT_ASSERT(orig.IsSameAs(works) == false);
unescaped = wxURI(escaped).BuildUnescapedURI();
wxString orig2 = wxT("http://test.com/of/file%3A%2F%")
wxT("2FC%3A%5Curi%5Cescaping%5Cthat%5Cseems%")
wxT("5Cbroken%5Csadly%5B1%5D.rss");
CPPUNIT_ASSERT_EQUAL( "http://test.com/of/file://C:\\uri\\"
"escaping\\that\\seems\\broken\\sadly[1].rss",
unescaped );
wxString works2 = wxURI::Unescape(orig2);
wxString broken2 = wxURI(orig2).BuildUnescapedURI();
CPPUNIT_ASSERT_EQUAL( unescaped, wxURI::Unescape(escaped) );
CPPUNIT_ASSERT(works2.IsSameAs(broken2));
escaped = "http://ru.wikipedia.org/wiki/"
"%D0%A6%D0%B5%D0%BB%D0%BE%D0%B5_%D1%87%D0%B8%D1%81%D0%BB%D0%BE";
unescaped = wxURI::Unescape(escaped);
CPPUNIT_ASSERT_EQUAL( wxString::FromUTF8(
"http://ru.wikipedia.org/wiki/"
"\xD0\xA6\xD0\xB5\xD0\xBB\xD0\xBE\xD0\xB5_"
"\xD1\x87\xD0\xB8\xD1\x81\xD0\xBB\xD0\xBE"
),
unescaped );
}
void URITestCase::FileScheme()
{
//file:// variety (NOT CONFORMANT TO THE RFC)
CPPUNIT_ASSERT(wxURI(wxString(wxT("file://e:/wxcode/script1.xml"))).GetPath()
== wxT("e:/wxcode/script1.xml") );
URI_TEST_EQUAL( "file://e:/wxcode/script1.xml",
"e:/wxcode/script1.xml", GetPath() );
//file:/// variety
CPPUNIT_ASSERT(wxURI(wxString(wxT("file:///e:/wxcode/script1.xml"))).GetPath()
== wxT("/e:/wxcode/script1.xml") );
URI_TEST_EQUAL( "file:///e:/wxcode/script1.xml",
"/e:/wxcode/script1.xml", GetPath() );
//file:/ variety
CPPUNIT_ASSERT(wxURI(wxString(wxT("file:/e:/wxcode/script1.xml"))).GetPath()
== wxT("/e:/wxcode/script1.xml") );
URI_TEST_EQUAL( "file:/e:/wxcode/script1.xml",
"/e:/wxcode/script1.xml", GetPath() );
//file: variety
CPPUNIT_ASSERT(wxURI(wxString(wxT("file:e:/wxcode/script1.xml"))).GetPath()
== wxT("e:/wxcode/script1.xml") );
URI_TEST_EQUAL( "file:e:/wxcode/script1.xml",
"e:/wxcode/script1.xml", GetPath() );
}
#if TEST_URL
const wxChar* pszProblemUrls[] = { wxT("http://www.csdn.net"),
wxT("http://www.163.com"),
wxT("http://www.sina.com.cn") };
#include "wx/url.h"
#include "wx/file.h"
void URITestCase::URLCompat()
{
wxURL url(wxT("http://user:password@wxwidgets.org"));
wxURL url("http://user:password@wxwidgets.org");
CPPUNIT_ASSERT(url.GetError() == wxURL_NOERR);
@@ -332,9 +334,9 @@ void URITestCase::URLCompat()
CPPUNIT_ASSERT( pInput != NULL );
#endif
CPPUNIT_ASSERT( url == wxURL(wxT("http://user:password@wxwidgets.org")) );
CPPUNIT_ASSERT( url == wxURL("http://user:password@wxwidgets.org") );
wxURI uri(wxT("http://user:password@wxwidgets.org"));
wxURI uri("http://user:password@wxwidgets.org");
CPPUNIT_ASSERT( url == uri );
@@ -348,18 +350,22 @@ void URITestCase::URLCompat()
CPPUNIT_ASSERT( uricopy == url );
CPPUNIT_ASSERT( uricopy == urlcopy );
CPPUNIT_ASSERT( uricopy == uri );
CPPUNIT_ASSERT( wxURI::Unescape(wxT("%20%41%20")) == wxT(" A ") );
CPPUNIT_ASSERT_EQUAL( " A ", wxURI::Unescape("%20%41%20") );
wxURI test(wxT("file:\"myf\"ile.txt"));
wxURI test("file:\"myf\"ile.txt");
CPPUNIT_ASSERT( test.BuildURI() == wxT("file:%22myf%22ile.txt") );
CPPUNIT_ASSERT( test.GetScheme() == wxT("file") );
CPPUNIT_ASSERT( test.GetPath() == wxT("%22myf%22ile.txt") );
CPPUNIT_ASSERT_EQUAL( "file:%22myf%22ile.txt" , test.BuildURI() );
CPPUNIT_ASSERT_EQUAL( "file", test.GetScheme() );
CPPUNIT_ASSERT_EQUAL( "%22myf%22ile.txt", test.GetPath() );
// these could be put under a named registry since they take some
// time to complete
#if 0
// Test problem urls (reported not to work some time ago by a user...)
const wxChar* pszProblemUrls[] = { "http://www.csdn.net",
"http://www.163.com",
"http://www.sina.com.cn" };
for ( size_t i = 0; i < WXSIZEOF(pszProblemUrls); ++i )
{
wxURL urlProblem(pszProblemUrls[i]);