big wxURI cleanup; it now handles Unicode characters correctly (#3874)

git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@54723 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
Vadim Zeitlin
2008-07-19 23:14:51 +00:00
parent ece97e2882
commit 2186321ff5
4 changed files with 544 additions and 739 deletions

View File

@@ -1,11 +1,12 @@
/////////////////////////////////////////////////////////////////////////////
// Name: uri.h
// Name: wx/uri.h
// Purpose: wxURI - Class for parsing URIs
// Author: Ryan Norton
// Modified By:
// Vadim Zeitlin (UTF-8 URI support, many other changes)
// Created: 07/01/2004
// RCS-ID: $Id$
// Copyright: (c) Ryan Norton
// Copyright: (c) 2004 Ryan Norton
// 2008 Vadim Zeitlin
// Licence: wxWindows Licence
/////////////////////////////////////////////////////////////////////////////
@@ -52,86 +53,119 @@ class WXDLLIMPEXP_BASE wxURI : public wxObject
public:
wxURI();
wxURI(const wxString& uri);
wxURI(const wxURI& uri);
virtual ~wxURI();
// default copy ctor, assignment operator and dtor are ok
const wxChar* Create(const wxString& uri);
bool Create(const wxString& uri);
bool HasScheme() const { return (m_fields & wxURI_SCHEME) == wxURI_SCHEME; }
bool HasUserInfo() const { return (m_fields & wxURI_USERINFO) == wxURI_USERINFO; }
bool HasServer() const { return (m_fields & wxURI_SERVER) == wxURI_SERVER; }
bool HasPort() const { return (m_fields & wxURI_PORT) == wxURI_PORT; }
bool HasPath() const { return (m_fields & wxURI_PATH) == wxURI_PATH; }
bool HasQuery() const { return (m_fields & wxURI_QUERY) == wxURI_QUERY; }
bool HasFragment() const { return (m_fields & wxURI_FRAGMENT) == wxURI_FRAGMENT; }
wxURI& operator=(const wxString& string)
{
Create(string);
return *this;
}
const wxString& GetScheme() const { return m_scheme; }
const wxString& GetPath() const { return m_path; }
const wxString& GetQuery() const { return m_query; }
const wxString& GetFragment() const { return m_fragment; }
const wxString& GetPort() const { return m_port; }
const wxString& GetUserInfo() const { return m_userinfo; }
const wxString& GetServer() const { return m_server; }
const wxURIHostType& GetHostType() const { return m_hostType; }
bool operator==(const wxURI& uri) const;
//Note that the following two get functions are explicitly depreciated by RFC 2396
// various accessors
bool HasScheme() const { return (m_fields & wxURI_SCHEME) != 0; }
bool HasUserInfo() const { return (m_fields & wxURI_USERINFO) != 0; }
bool HasServer() const { return (m_fields & wxURI_SERVER) != 0; }
bool HasPort() const { return (m_fields & wxURI_PORT) != 0; }
bool HasPath() const { return (m_fields & wxURI_PATH) != 0; }
bool HasQuery() const { return (m_fields & wxURI_QUERY) != 0; }
bool HasFragment() const { return (m_fields & wxURI_FRAGMENT) != 0; }
const wxString& GetScheme() const { return m_scheme; }
const wxString& GetPath() const { return m_path; }
const wxString& GetQuery() const { return m_query; }
const wxString& GetFragment() const { return m_fragment; }
const wxString& GetPort() const { return m_port; }
const wxString& GetUserInfo() const { return m_userinfo; }
const wxString& GetServer() const { return m_server; }
wxURIHostType GetHostType() const { return m_hostType; }
// these functions only work if the user information part of the URI is in
// the usual (but insecure and hence explicitly recommended against by the
// RFC) "user:password" form
wxString GetUser() const;
wxString GetPassword() const;
wxString BuildURI() const;
wxString BuildUnescapedURI() const;
// combine all URI components into a single string
//
// BuildURI() returns the real URI suitable for use with network libraries,
// for example, while BuildUnescapedURI() returns a string suitable to be
// shown to the user.
wxString BuildURI() const { return DoBuildURI(&wxURI::Nothing); }
wxString BuildUnescapedURI() const { return DoBuildURI(&wxURI::Unescape); }
// the escaped URI should contain only ASCII characters, including possible
// escape sequences
static wxString Unescape(const wxString& escapedURI);
void Resolve(const wxURI& base, int flags = wxURI_STRICT);
bool IsReference() const;
wxURI& operator = (const wxURI& uri);
wxURI& operator = (const wxString& string);
bool operator == (const wxURI& uri) const;
static wxString Unescape (const wxString& szEscapedURI);
protected:
wxURI& Assign(const wxURI& uri);
void Clear();
const wxChar* Parse (const wxChar* uri);
const wxChar* ParseAuthority (const wxChar* uri);
const wxChar* ParseScheme (const wxChar* uri);
const wxChar* ParseUserInfo (const wxChar* uri);
const wxChar* ParseServer (const wxChar* uri);
const wxChar* ParsePort (const wxChar* uri);
const wxChar* ParsePath (const wxChar* uri,
bool bReference = false,
bool bNormalize = true);
const wxChar* ParseQuery (const wxChar* uri);
const wxChar* ParseFragment (const wxChar* uri);
// common part of BuildURI() and BuildUnescapedURI()
wxString DoBuildURI(wxString (*funcDecode)(const wxString&)) const;
// function which returns its argument unmodified, this is used by
// BuildURI() to tell DoBuildURI() that nothing needs to be done with the
// URI components
static wxString Nothing(const wxString& value) { return value; }
bool Parse(const char* uri);
const char* ParseAuthority (const char* uri);
const char* ParseScheme (const char* uri);
const char* ParseUserInfo (const char* uri);
const char* ParseServer (const char* uri);
const char* ParsePort (const char* uri);
const char* ParsePath (const char* uri);
const char* ParseQuery (const char* uri);
const char* ParseFragment (const char* uri);
static bool ParseH16(const wxChar*& uri);
static bool ParseIPv4address(const wxChar*& uri);
static bool ParseIPv6address(const wxChar*& uri);
static bool ParseIPvFuture(const wxChar*& uri);
static bool ParseH16(const char*& uri);
static bool ParseIPv4address(const char*& uri);
static bool ParseIPv6address(const char*& uri);
static bool ParseIPvFuture(const char*& uri);
static void Normalize(wxChar* uri, bool bIgnoreLeads = false);
static void UpTree(const wxChar* uristart, const wxChar*& uri);
static void UpTree(wxString::const_iterator uristart,
wxString::const_iterator& uri);
// should be called with i pointing to '%', returns the encoded character
// following it or -1 if invalid and advances i past it (so that it points
// to the last character consumed on return)
static int DecodeEscape(wxString::const_iterator& i);
static wxUniChar TranslateEscape(const wxString::const_iterator& s);
static void Escape(wxString& s, const wxChar& c);
static bool IsEscape(const wxChar*& uri);
// append next character pointer to by p to the string in an escaped form
// and advance p past it
//
// if the next character is '%' and it's followed by 2 hex digits, they are
// not escaped (again) by this function, this allows to keep (backwards-
// compatible) ambiguity about the input format to wxURI::Create(): it can
// be either already escaped or not
void AppendNextEscaped(wxString& s, const char *& p);
static wxChar CharToHex(const wxChar& c);
// convert hexadecimal digit to its value; return -1 if c isn't valid
static int CharToHex(char c);
static bool IsUnreserved (const wxChar& c);
static bool IsReserved (const wxChar& c);
static bool IsGenDelim (const wxChar& c);
static bool IsSubDelim (const wxChar& c);
static bool IsHex(const wxChar& c);
static bool IsAlpha(const wxChar& c);
static bool IsDigit(const wxChar& c);
// split an URI path string in its component segments (including empty and
// "." ones, no post-processing is done)
static wxArrayString SplitInSegments(const wxString& path);
// various URI grammar helpers
static bool IsUnreserved(char c);
static bool IsReserved(char c);
static bool IsGenDelim(char c);
static bool IsSubDelim(char c);
static bool IsHex(char c);
static bool IsAlpha(char c);
static bool IsDigit(char c);
static bool IsEndPath(char c);
wxString m_scheme;
wxString m_path;