Such strings are not really URIs as they should have been encoded if they were but we can obtain them from e.g. wxFileSystem::FindFirst(), so handle them correctly here as it's simpler than checking all the places where Unescape() is called. Add a unit test checking that decoding an URI containing both Unicode and percent-encoded Unicode characters works correctly.
1083 lines
27 KiB
C++
1083 lines
27 KiB
C++
/////////////////////////////////////////////////////////////////////////////
|
|
// Name: src/common/uri.cpp
|
|
// Purpose: Implementation of a URI parser
|
|
// Author: Ryan Norton,
|
|
// Vadim Zeitlin (UTF-8 URI support, many other changes)
|
|
// Created: 10/26/04
|
|
// Copyright: (c) 2004 Ryan Norton,
|
|
// 2008 Vadim Zeitlin
|
|
// Licence: wxWindows licence
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
|
|
// ===========================================================================
|
|
// declarations
|
|
// ===========================================================================
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// headers
|
|
// ---------------------------------------------------------------------------
|
|
|
|
// For compilers that support precompilation, includes "wx.h".
|
|
#include "wx/wxprec.h"
|
|
|
|
#ifdef __BORLANDC__
|
|
#pragma hdrstop
|
|
#endif
|
|
|
|
#ifndef WX_PRECOMP
|
|
#include "wx/crt.h"
|
|
#endif
|
|
|
|
#include "wx/uri.h"
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// definitions
|
|
// ---------------------------------------------------------------------------
|
|
|
|
wxIMPLEMENT_CLASS(wxURI, wxObject);
|
|
|
|
// ===========================================================================
|
|
// wxURI implementation
|
|
// ===========================================================================
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Constructors and cleanup
|
|
// ---------------------------------------------------------------------------
|
|
|
|
wxURI::wxURI()
|
|
: m_hostType(wxURI_REGNAME),
|
|
m_fields(0)
|
|
{
|
|
}
|
|
|
|
wxURI::wxURI(const wxString& uri)
|
|
: m_hostType(wxURI_REGNAME),
|
|
m_fields(0)
|
|
{
|
|
Create(uri);
|
|
}
|
|
|
|
bool wxURI::Create(const wxString& uri)
|
|
{
|
|
if (m_fields)
|
|
Clear();
|
|
|
|
return Parse(uri.utf8_str());
|
|
}
|
|
|
|
void wxURI::Clear()
|
|
{
|
|
m_scheme =
|
|
m_userinfo =
|
|
m_server =
|
|
m_port =
|
|
m_path =
|
|
m_query =
|
|
m_fragment = wxEmptyString;
|
|
|
|
m_hostType = wxURI_REGNAME;
|
|
|
|
m_fields = 0;
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Escaped characters handling
|
|
// ---------------------------------------------------------------------------
|
|
|
|
// Converts a character into a numeric hexadecimal value, or -1 if the passed
|
|
// in character is not a valid hex character
|
|
|
|
/* static */
|
|
int wxURI::CharToHex(char c)
|
|
{
|
|
if ((c >= 'A') && (c <= 'Z'))
|
|
return c - 'A' + 10;
|
|
if ((c >= 'a') && (c <= 'z'))
|
|
return c - 'a' + 10;
|
|
if ((c >= '0') && (c <= '9'))
|
|
return c - '0';
|
|
|
|
return -1;
|
|
}
|
|
|
|
/* static */
|
|
wxString wxURI::Unescape(const wxString& uri)
|
|
{
|
|
// URIs can contain escaped 8-bit characters that have to be decoded using
|
|
// UTF-8 (see RFC 3986), however in our (probably broken...) case we can
|
|
// also end up with not escaped Unicode characters in the URI string which
|
|
// can't be decoded as UTF-8. So what we do here is to encode all Unicode
|
|
// characters as UTF-8 only to decode them back below. This is obviously
|
|
// inefficient but there doesn't seem to be anything else to do, other than
|
|
// not allowing to mix Unicode characters with escapes in the first place,
|
|
// but this seems to be done in a lot of places, unfortunately.
|
|
const wxScopedCharBuffer& uriU8(uri.utf8_str());
|
|
const size_t len = uriU8.length();
|
|
|
|
// the unescaped version can't be longer than the original one
|
|
wxCharBuffer buf(uriU8.length());
|
|
char *p = buf.data();
|
|
|
|
const char* const end = uriU8.data() + len;
|
|
for ( const char* s = uriU8.data(); s != end; ++s, ++p )
|
|
{
|
|
char c = *s;
|
|
if ( c == '%' && s < end - 2 && IsHex(s[1]) && IsHex(s[2]) )
|
|
{
|
|
c = (CharToHex(s[1]) << 4) | CharToHex(s[2]);
|
|
s += 2;
|
|
}
|
|
|
|
*p = c;
|
|
}
|
|
|
|
*p = '\0';
|
|
|
|
return wxString::FromUTF8(buf);
|
|
}
|
|
|
|
void wxURI::AppendNextEscaped(wxString& s, const char *& p)
|
|
{
|
|
// check for an already encoded character:
|
|
//
|
|
// pct-encoded = "%" HEXDIG HEXDIG
|
|
if ( p[0] == '%' && IsHex(p[1]) && IsHex(p[2]) )
|
|
{
|
|
s += *p++;
|
|
s += *p++;
|
|
s += *p++;
|
|
}
|
|
else // really needs escaping
|
|
{
|
|
static const char* hexDigits = "0123456789abcdef";
|
|
|
|
const char c = *p++;
|
|
|
|
s += '%';
|
|
s += hexDigits[(c >> 4) & 15];
|
|
s += hexDigits[c & 15];
|
|
}
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// GetUser
|
|
// GetPassword
|
|
//
|
|
// Gets the username and password via the old URL method.
|
|
// ---------------------------------------------------------------------------
|
|
wxString wxURI::GetUser() const
|
|
{
|
|
// if there is no colon at all, find() returns npos and this method returns
|
|
// the entire string which is correct as it means that password was omitted
|
|
return m_userinfo(0, m_userinfo.find(':'));
|
|
}
|
|
|
|
wxString wxURI::GetPassword() const
|
|
{
|
|
size_t posColon = m_userinfo.find(':');
|
|
|
|
if ( posColon == wxString::npos )
|
|
return "";
|
|
|
|
return m_userinfo(posColon + 1, wxString::npos);
|
|
}
|
|
|
|
// combine all URI fields in a single string, applying funcDecode to each
|
|
// component which it may make sense to decode (i.e. "unescape")
|
|
wxString wxURI::DoBuildURI(wxString (*funcDecode)(const wxString&)) const
|
|
{
|
|
wxString ret;
|
|
|
|
if (HasScheme())
|
|
ret += m_scheme + ":";
|
|
|
|
if (HasServer())
|
|
{
|
|
ret += "//";
|
|
|
|
if (HasUserInfo())
|
|
ret += funcDecode(m_userinfo) + "@";
|
|
|
|
if (m_hostType == wxURI_REGNAME)
|
|
ret += funcDecode(m_server);
|
|
else
|
|
ret += m_server;
|
|
|
|
if (HasPort())
|
|
ret += ":" + m_port;
|
|
}
|
|
|
|
ret += funcDecode(m_path);
|
|
|
|
if (HasQuery())
|
|
ret += "?" + funcDecode(m_query);
|
|
|
|
if (HasFragment())
|
|
ret += "#" + funcDecode(m_fragment);
|
|
|
|
return ret;
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Comparison
|
|
// ---------------------------------------------------------------------------
|
|
|
|
bool wxURI::operator==(const wxURI& uri) const
|
|
{
|
|
if (HasScheme())
|
|
{
|
|
if(m_scheme != uri.m_scheme)
|
|
return false;
|
|
}
|
|
else if (uri.HasScheme())
|
|
return false;
|
|
|
|
|
|
if (HasServer())
|
|
{
|
|
if (HasUserInfo())
|
|
{
|
|
if (m_userinfo != uri.m_userinfo)
|
|
return false;
|
|
}
|
|
else if (uri.HasUserInfo())
|
|
return false;
|
|
|
|
if (m_server != uri.m_server ||
|
|
m_hostType != uri.m_hostType)
|
|
return false;
|
|
|
|
if (HasPort())
|
|
{
|
|
if(m_port != uri.m_port)
|
|
return false;
|
|
}
|
|
else if (uri.HasPort())
|
|
return false;
|
|
}
|
|
else if (uri.HasServer())
|
|
return false;
|
|
|
|
|
|
if (HasPath())
|
|
{
|
|
if(m_path != uri.m_path)
|
|
return false;
|
|
}
|
|
else if (uri.HasPath())
|
|
return false;
|
|
|
|
if (HasQuery())
|
|
{
|
|
if (m_query != uri.m_query)
|
|
return false;
|
|
}
|
|
else if (uri.HasQuery())
|
|
return false;
|
|
|
|
if (HasFragment())
|
|
{
|
|
if (m_fragment != uri.m_fragment)
|
|
return false;
|
|
}
|
|
else if (uri.HasFragment())
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// IsReference
|
|
//
|
|
// if there is no authority or scheme, it is a reference
|
|
// ---------------------------------------------------------------------------
|
|
|
|
bool wxURI::IsReference() const
|
|
{
|
|
return !HasScheme() || !HasServer();
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// IsRelative
|
|
//
|
|
// FIXME: may need refinement
|
|
// ---------------------------------------------------------------------------
|
|
|
|
bool wxURI::IsRelative() const
|
|
{
|
|
return !HasScheme() && !HasServer();
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Parse
|
|
//
|
|
// Master URI parsing method. Just calls the individual parsing methods
|
|
//
|
|
// URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
|
|
// URI-reference = URI / relative
|
|
// ---------------------------------------------------------------------------
|
|
|
|
bool wxURI::Parse(const char *uri)
|
|
{
|
|
uri = ParseScheme(uri);
|
|
if ( uri )
|
|
uri = ParseAuthority(uri);
|
|
if ( uri )
|
|
uri = ParsePath(uri);
|
|
if ( uri )
|
|
uri = ParseQuery(uri);
|
|
if ( uri )
|
|
uri = ParseFragment(uri);
|
|
|
|
// we only succeed if we parsed the entire string
|
|
return uri && *uri == '\0';
|
|
}
|
|
|
|
const char* wxURI::ParseScheme(const char *uri)
|
|
{
|
|
const char * const start = uri;
|
|
|
|
// assume that we have a scheme if we have the valid start of it
|
|
if ( IsAlpha(*uri) )
|
|
{
|
|
m_scheme += *uri++;
|
|
|
|
//scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
|
|
while (IsAlpha(*uri) || IsDigit(*uri) ||
|
|
*uri == '+' ||
|
|
*uri == '-' ||
|
|
*uri == '.')
|
|
{
|
|
m_scheme += *uri++;
|
|
}
|
|
|
|
//valid scheme?
|
|
if (*uri == ':')
|
|
{
|
|
//mark the scheme as valid
|
|
m_fields |= wxURI_SCHEME;
|
|
|
|
//move reference point up to input buffer
|
|
++uri;
|
|
}
|
|
else // no valid scheme finally
|
|
{
|
|
uri = start; // rewind
|
|
m_scheme.clear();
|
|
}
|
|
}
|
|
//else: can't have schema, possible a relative URI
|
|
|
|
return uri;
|
|
}
|
|
|
|
const char* wxURI::ParseAuthority(const char* uri)
|
|
{
|
|
// authority = [ userinfo "@" ] host [ ":" port ]
|
|
if ( uri[0] == '/' && uri[1] == '/' )
|
|
{
|
|
//skip past the two slashes
|
|
uri += 2;
|
|
|
|
// ############# DEVIATION FROM RFC #########################
|
|
// Don't parse the server component for file URIs
|
|
if(m_scheme != "file")
|
|
{
|
|
//normal way
|
|
uri = ParseUserInfo(uri);
|
|
uri = ParseServer(uri);
|
|
return ParsePort(uri);
|
|
}
|
|
}
|
|
|
|
return uri;
|
|
}
|
|
|
|
const char* wxURI::ParseUserInfo(const char* uri)
|
|
{
|
|
const char * const start = uri;
|
|
|
|
// userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
|
|
while ( *uri && *uri != '@' && *uri != '/' && *uri != '#' && *uri != '?' )
|
|
{
|
|
if ( IsUnreserved(*uri) || IsSubDelim(*uri) || *uri == ':' )
|
|
m_userinfo += *uri++;
|
|
else
|
|
AppendNextEscaped(m_userinfo, uri);
|
|
}
|
|
|
|
if ( *uri++ == '@' )
|
|
{
|
|
// valid userinfo
|
|
m_fields |= wxURI_USERINFO;
|
|
}
|
|
else
|
|
{
|
|
uri = start; // rewind
|
|
m_userinfo.clear();
|
|
}
|
|
|
|
return uri;
|
|
}
|
|
|
|
const char* wxURI::ParseServer(const char* uri)
|
|
{
|
|
const char * const start = uri;
|
|
|
|
// host = IP-literal / IPv4address / reg-name
|
|
// IP-literal = "[" ( IPv6address / IPvFuture ) "]"
|
|
if (*uri == '[')
|
|
{
|
|
++uri;
|
|
if (ParseIPv6address(uri) && *uri == ']')
|
|
{
|
|
m_hostType = wxURI_IPV6ADDRESS;
|
|
|
|
m_server.assign(start + 1, uri - start - 1);
|
|
++uri;
|
|
}
|
|
else
|
|
{
|
|
uri = start + 1; // skip the leading '[' again
|
|
|
|
if (ParseIPvFuture(uri) && *uri == ']')
|
|
{
|
|
m_hostType = wxURI_IPVFUTURE;
|
|
|
|
m_server.assign(start + 1, uri - start - 1);
|
|
++uri;
|
|
}
|
|
else // unrecognized IP literal
|
|
{
|
|
uri = start;
|
|
}
|
|
}
|
|
}
|
|
else // IPv4 or a reg-name
|
|
{
|
|
if (ParseIPv4address(uri))
|
|
{
|
|
m_hostType = wxURI_IPV4ADDRESS;
|
|
|
|
m_server.assign(start, uri - start);
|
|
}
|
|
else
|
|
{
|
|
uri = start;
|
|
}
|
|
}
|
|
|
|
if ( m_hostType == wxURI_REGNAME )
|
|
{
|
|
uri = start;
|
|
// reg-name = *( unreserved / pct-encoded / sub-delims )
|
|
while ( *uri && *uri != '/' && *uri != ':' && *uri != '#' && *uri != '?' )
|
|
{
|
|
if ( IsUnreserved(*uri) || IsSubDelim(*uri) )
|
|
m_server += *uri++;
|
|
else
|
|
AppendNextEscaped(m_server, uri);
|
|
}
|
|
}
|
|
|
|
m_fields |= wxURI_SERVER;
|
|
|
|
return uri;
|
|
}
|
|
|
|
|
|
const char* wxURI::ParsePort(const char* uri)
|
|
{
|
|
// port = *DIGIT
|
|
if( *uri == ':' )
|
|
{
|
|
++uri;
|
|
while ( IsDigit(*uri) )
|
|
{
|
|
m_port += *uri++;
|
|
}
|
|
|
|
m_fields |= wxURI_PORT;
|
|
}
|
|
|
|
return uri;
|
|
}
|
|
|
|
const char* wxURI::ParsePath(const char* uri)
|
|
{
|
|
/// hier-part = "//" authority path-abempty
|
|
/// / path-absolute
|
|
/// / path-rootless
|
|
/// / path-empty
|
|
///
|
|
/// relative-part = "//" authority path-abempty
|
|
/// / path-absolute
|
|
/// / path-noscheme
|
|
/// / path-empty
|
|
///
|
|
/// path-abempty = *( "/" segment )
|
|
/// path-absolute = "/" [ segment-nz *( "/" segment ) ]
|
|
/// path-noscheme = segment-nz-nc *( "/" segment )
|
|
/// path-rootless = segment-nz *( "/" segment )
|
|
/// path-empty = 0<pchar>
|
|
///
|
|
/// segment = *pchar
|
|
/// segment-nz = 1*pchar
|
|
/// segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
|
|
/// ; non-zero-length segment without any colon ":"
|
|
///
|
|
/// pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
|
|
|
|
if ( IsEndPath(*uri) )
|
|
return uri;
|
|
|
|
const bool isAbs = *uri == '/';
|
|
if ( isAbs )
|
|
m_path += *uri++;
|
|
|
|
wxArrayString segments;
|
|
wxString segment;
|
|
for ( ;; )
|
|
{
|
|
const bool endPath = IsEndPath(*uri);
|
|
if ( endPath || *uri == '/' )
|
|
{
|
|
// end of a segment, look at what we got
|
|
if ( segment == ".." )
|
|
{
|
|
if ( !segments.empty() && *segments.rbegin() != ".." )
|
|
segments.pop_back();
|
|
else if ( !isAbs )
|
|
segments.push_back("..");
|
|
}
|
|
else if ( segment == "." )
|
|
{
|
|
// normally we ignore "." but the last one should be taken into
|
|
// account as "path/." is the same as "path/" and not just "path"
|
|
if ( endPath )
|
|
segments.push_back("");
|
|
}
|
|
else // normal segment
|
|
{
|
|
segments.push_back(segment);
|
|
}
|
|
|
|
if ( endPath )
|
|
break;
|
|
|
|
segment.clear();
|
|
++uri;
|
|
continue;
|
|
}
|
|
|
|
if ( IsUnreserved(*uri) || IsSubDelim(*uri) || *uri == ':' || *uri == '@' )
|
|
segment += *uri++;
|
|
else
|
|
AppendNextEscaped(segment, uri);
|
|
}
|
|
|
|
m_path += wxJoin(segments, '/', '\0');
|
|
m_fields |= wxURI_PATH;
|
|
|
|
return uri;
|
|
}
|
|
|
|
|
|
const char* wxURI::ParseQuery(const char* uri)
|
|
{
|
|
// query = *( pchar / "/" / "?" )
|
|
if ( *uri == '?' )
|
|
{
|
|
++uri;
|
|
while ( *uri && *uri != '#' )
|
|
{
|
|
if ( IsUnreserved(*uri) || IsSubDelim(*uri) ||
|
|
*uri == ':' || *uri == '@' || *uri == '/' || *uri == '?' )
|
|
m_query += *uri++;
|
|
else
|
|
AppendNextEscaped(m_query, uri);
|
|
}
|
|
|
|
m_fields |= wxURI_QUERY;
|
|
}
|
|
|
|
return uri;
|
|
}
|
|
|
|
|
|
const char* wxURI::ParseFragment(const char* uri)
|
|
{
|
|
// fragment = *( pchar / "/" / "?" )
|
|
if ( *uri == '#' )
|
|
{
|
|
++uri;
|
|
while ( *uri )
|
|
{
|
|
if ( IsUnreserved(*uri) || IsSubDelim(*uri) ||
|
|
*uri == ':' || *uri == '@' || *uri == '/' || *uri == '?')
|
|
m_fragment += *uri++;
|
|
else
|
|
AppendNextEscaped(m_fragment, uri);
|
|
}
|
|
|
|
m_fields |= wxURI_FRAGMENT;
|
|
}
|
|
|
|
return uri;
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Resolve
|
|
//
|
|
// Builds missing components of this uri from a base uri
|
|
//
|
|
// A version of the algorithm outlined in the RFC is used here
|
|
// (it is shown in comments)
|
|
//
|
|
// Note that an empty URI inherits all components
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/* static */
|
|
wxArrayString wxURI::SplitInSegments(const wxString& path)
|
|
{
|
|
return wxSplit(path, '/', '\0' /* no escape character */);
|
|
}
|
|
|
|
void wxURI::Resolve(const wxURI& base, int flags)
|
|
{
|
|
wxASSERT_MSG(!base.IsReference(),
|
|
"wxURI to inherit from must not be a reference!");
|
|
|
|
// If we aren't being strict, enable the older (pre-RFC2396) loophole that
|
|
// allows this uri to inherit other properties from the base uri - even if
|
|
// the scheme is defined
|
|
if ( !(flags & wxURI_STRICT) &&
|
|
HasScheme() && base.HasScheme() &&
|
|
m_scheme == base.m_scheme )
|
|
{
|
|
m_fields -= wxURI_SCHEME;
|
|
}
|
|
|
|
|
|
// Do nothing if this is an absolute wxURI
|
|
// if defined(R.scheme) then
|
|
// T.scheme = R.scheme;
|
|
// T.authority = R.authority;
|
|
// T.path = remove_dot_segments(R.path);
|
|
// T.query = R.query;
|
|
if (HasScheme())
|
|
return;
|
|
|
|
//No scheme - inherit
|
|
m_scheme = base.m_scheme;
|
|
m_fields |= wxURI_SCHEME;
|
|
|
|
// All we need to do for relative URIs with an
|
|
// authority component is just inherit the scheme
|
|
// if defined(R.authority) then
|
|
// T.authority = R.authority;
|
|
// T.path = remove_dot_segments(R.path);
|
|
// T.query = R.query;
|
|
if (HasServer())
|
|
return;
|
|
|
|
//No authority - inherit
|
|
if (base.HasUserInfo())
|
|
{
|
|
m_userinfo = base.m_userinfo;
|
|
m_fields |= wxURI_USERINFO;
|
|
}
|
|
|
|
m_server = base.m_server;
|
|
m_hostType = base.m_hostType;
|
|
m_fields |= wxURI_SERVER;
|
|
|
|
if (base.HasPort())
|
|
{
|
|
m_port = base.m_port;
|
|
m_fields |= wxURI_PORT;
|
|
}
|
|
|
|
|
|
// Simple path inheritance from base
|
|
if (!HasPath())
|
|
{
|
|
// T.path = Base.path;
|
|
m_path = base.m_path;
|
|
m_fields |= wxURI_PATH;
|
|
|
|
|
|
// if defined(R.query) then
|
|
// T.query = R.query;
|
|
// else
|
|
// T.query = Base.query;
|
|
// endif;
|
|
if (!HasQuery())
|
|
{
|
|
m_query = base.m_query;
|
|
m_fields |= wxURI_QUERY;
|
|
}
|
|
}
|
|
else if ( m_path.empty() || m_path[0u] != '/' )
|
|
{
|
|
// if (R.path starts-with "/") then
|
|
// T.path = remove_dot_segments(R.path);
|
|
// else
|
|
// T.path = merge(Base.path, R.path);
|
|
// T.path = remove_dot_segments(T.path);
|
|
// endif;
|
|
// T.query = R.query;
|
|
//
|
|
// So we don't do anything for absolute paths and implement merge for
|
|
// the relative ones
|
|
|
|
wxArrayString our(SplitInSegments(m_path)),
|
|
result(SplitInSegments(base.m_path));
|
|
|
|
if ( !result.empty() )
|
|
result.pop_back();
|
|
|
|
if ( our.empty() )
|
|
{
|
|
// if we have an empty path it means we were constructed from a "."
|
|
// string or something similar (e.g. "././././"), it should count
|
|
// as (empty) segment
|
|
our.push_back("");
|
|
}
|
|
|
|
const wxArrayString::const_iterator end = our.end();
|
|
for ( wxArrayString::const_iterator i = our.begin(); i != end; ++i )
|
|
{
|
|
if ( i->empty() || *i == "." )
|
|
{
|
|
// as in ParsePath(), while normally we ignore the empty
|
|
// segments, we need to take account of them at the end
|
|
if ( i == end - 1 )
|
|
result.push_back("");
|
|
continue;
|
|
}
|
|
|
|
if ( *i == ".." )
|
|
{
|
|
if ( !result.empty() )
|
|
{
|
|
result.pop_back();
|
|
|
|
if ( i == end - 1 )
|
|
result.push_back("");
|
|
}
|
|
//else: just ignore, extra ".." don't accumulate
|
|
}
|
|
else
|
|
{
|
|
if ( result.empty() )
|
|
{
|
|
// ensure that the resulting path will always be absolute
|
|
result.push_back("");
|
|
}
|
|
|
|
result.push_back(*i);
|
|
}
|
|
}
|
|
|
|
m_path = wxJoin(result, '/', '\0');
|
|
}
|
|
|
|
//T.fragment = R.fragment;
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// ParseH16
|
|
//
|
|
// Parses 1 to 4 hex values. Returns true if the first character of the input
|
|
// string is a valid hex character. It is the caller's responsibility to move
|
|
// the input string back to its original position on failure.
|
|
// ---------------------------------------------------------------------------
|
|
|
|
bool wxURI::ParseH16(const char*& uri)
|
|
{
|
|
// h16 = 1*4HEXDIG
|
|
if(!IsHex(*++uri))
|
|
return false;
|
|
|
|
if(IsHex(*++uri) && IsHex(*++uri) && IsHex(*++uri))
|
|
++uri;
|
|
|
|
return true;
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// ParseIPXXX
|
|
//
|
|
// Parses a certain version of an IP address and moves the input string past
|
|
// it. Returns true if the input string contains the proper version of an ip
|
|
// address. It is the caller's responsibility to move the input string back
|
|
// to its original position on failure.
|
|
// ---------------------------------------------------------------------------
|
|
|
|
bool wxURI::ParseIPv4address(const char*& uri)
|
|
{
|
|
//IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
|
|
//
|
|
//dec-octet = DIGIT ; 0-9
|
|
// / %x31-39 DIGIT ; 10-99
|
|
// / "1" 2DIGIT ; 100-199
|
|
// / "2" %x30-34 DIGIT ; 200-249
|
|
// / "25" %x30-35 ; 250-255
|
|
size_t iIPv4 = 0;
|
|
if (IsDigit(*uri))
|
|
{
|
|
++iIPv4;
|
|
|
|
|
|
//each ip part must be between 0-255 (dupe of version in for loop)
|
|
if( IsDigit(*++uri) && IsDigit(*++uri) &&
|
|
//100 or less (note !)
|
|
!( (*(uri-2) < '2') ||
|
|
//240 or less
|
|
(*(uri-2) == '2' &&
|
|
(*(uri-1) < '5' || (*(uri-1) == '5' && *uri <= '5'))
|
|
)
|
|
)
|
|
)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
if(IsDigit(*uri))++uri;
|
|
|
|
//compilers should unroll this loop
|
|
for(; iIPv4 < 4; ++iIPv4)
|
|
{
|
|
if (*uri != '.' || !IsDigit(*++uri))
|
|
break;
|
|
|
|
//each ip part must be between 0-255
|
|
if( IsDigit(*++uri) && IsDigit(*++uri) &&
|
|
//100 or less (note !)
|
|
!( (*(uri-2) < '2') ||
|
|
//240 or less
|
|
(*(uri-2) == '2' &&
|
|
(*(uri-1) < '5' || (*(uri-1) == '5' && *uri <= '5'))
|
|
)
|
|
)
|
|
)
|
|
{
|
|
return false;
|
|
}
|
|
if(IsDigit(*uri))++uri;
|
|
}
|
|
}
|
|
return iIPv4 == 4;
|
|
}
|
|
|
|
bool wxURI::ParseIPv6address(const char*& uri)
|
|
{
|
|
// IPv6address = 6( h16 ":" ) ls32
|
|
// / "::" 5( h16 ":" ) ls32
|
|
// / [ h16 ] "::" 4( h16 ":" ) ls32
|
|
// / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
|
|
// / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
|
|
// / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
|
|
// / [ *4( h16 ":" ) h16 ] "::" ls32
|
|
// / [ *5( h16 ":" ) h16 ] "::" h16
|
|
// / [ *6( h16 ":" ) h16 ] "::"
|
|
|
|
size_t numPrefix = 0,
|
|
maxPostfix;
|
|
|
|
bool bEndHex = false;
|
|
|
|
for( ; numPrefix < 6; ++numPrefix)
|
|
{
|
|
if(!ParseH16(uri))
|
|
{
|
|
--uri;
|
|
bEndHex = true;
|
|
break;
|
|
}
|
|
|
|
if(*uri != ':')
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
|
|
if(!bEndHex && !ParseH16(uri))
|
|
{
|
|
--uri;
|
|
|
|
if (numPrefix)
|
|
return false;
|
|
|
|
if (*uri == ':')
|
|
{
|
|
if (*++uri != ':')
|
|
return false;
|
|
|
|
maxPostfix = 5;
|
|
}
|
|
else
|
|
maxPostfix = 6;
|
|
}
|
|
else
|
|
{
|
|
if (*uri != ':' || *(uri+1) != ':')
|
|
{
|
|
if (numPrefix != 6)
|
|
return false;
|
|
|
|
while (*--uri != ':') {}
|
|
++uri;
|
|
|
|
const char * const start = uri;
|
|
//parse ls32
|
|
// ls32 = ( h16 ":" h16 ) / IPv4address
|
|
if (ParseH16(uri) && *uri == ':' && ParseH16(uri))
|
|
return true;
|
|
|
|
uri = start;
|
|
|
|
if (ParseIPv4address(uri))
|
|
return true;
|
|
else
|
|
return false;
|
|
}
|
|
else
|
|
{
|
|
uri += 2;
|
|
|
|
if (numPrefix > 3)
|
|
maxPostfix = 0;
|
|
else
|
|
maxPostfix = 4 - numPrefix;
|
|
}
|
|
}
|
|
|
|
bool bAllowAltEnding = maxPostfix == 0;
|
|
|
|
for(; maxPostfix != 0; --maxPostfix)
|
|
{
|
|
if(!ParseH16(uri) || *uri != ':')
|
|
return false;
|
|
}
|
|
|
|
if(numPrefix <= 4)
|
|
{
|
|
const char * const start = uri;
|
|
//parse ls32
|
|
// ls32 = ( h16 ":" h16 ) / IPv4address
|
|
if (ParseH16(uri) && *uri == ':' && ParseH16(uri))
|
|
return true;
|
|
|
|
uri = start;
|
|
|
|
if (ParseIPv4address(uri))
|
|
return true;
|
|
|
|
uri = start;
|
|
|
|
if (!bAllowAltEnding)
|
|
return false;
|
|
}
|
|
|
|
if(numPrefix <= 5 && ParseH16(uri))
|
|
return true;
|
|
|
|
return true;
|
|
}
|
|
|
|
bool wxURI::ParseIPvFuture(const char*& uri)
|
|
{
|
|
// IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
|
|
if (*++uri != 'v' || !IsHex(*++uri))
|
|
return false;
|
|
|
|
while (IsHex(*++uri))
|
|
;
|
|
|
|
if (*uri != '.' || !(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == ':'))
|
|
return false;
|
|
|
|
while(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == ':') {}
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// IsXXX
|
|
//
|
|
// Returns true if the passed in character meets the criteria of the method
|
|
// ---------------------------------------------------------------------------
|
|
|
|
// unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
|
|
bool wxURI::IsUnreserved(char c)
|
|
{
|
|
return IsAlpha(c) ||
|
|
IsDigit(c) ||
|
|
c == '-' ||
|
|
c == '.' ||
|
|
c == '_' ||
|
|
c == '~'
|
|
;
|
|
}
|
|
|
|
bool wxURI::IsReserved(char c)
|
|
{
|
|
return IsGenDelim(c) || IsSubDelim(c);
|
|
}
|
|
|
|
// gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
|
|
bool wxURI::IsGenDelim(char c)
|
|
{
|
|
return c == ':' ||
|
|
c == '/' ||
|
|
c == '?' ||
|
|
c == '#' ||
|
|
c == '[' ||
|
|
c == ']' ||
|
|
c == '@';
|
|
}
|
|
|
|
// sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
|
|
// / "*" / "+" / "," / ";" / "="
|
|
bool wxURI::IsSubDelim(char c)
|
|
{
|
|
return c == '!' ||
|
|
c == '$' ||
|
|
c == '&' ||
|
|
c == '\'' ||
|
|
c == '(' ||
|
|
c == ')' ||
|
|
c == '*' ||
|
|
c == '+' ||
|
|
c == ',' ||
|
|
c == ';' ||
|
|
c == '='
|
|
;
|
|
}
|
|
|
|
bool wxURI::IsHex(char c)
|
|
{
|
|
return IsDigit(c) ||
|
|
(c >= 'a' && c <= 'f') ||
|
|
(c >= 'A' && c <= 'F');
|
|
}
|
|
|
|
bool wxURI::IsAlpha(char c)
|
|
{
|
|
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
|
|
}
|
|
|
|
bool wxURI::IsDigit(char c)
|
|
{
|
|
return c >= '0' && c <= '9';
|
|
}
|
|
|
|
bool wxURI::IsEndPath(char c)
|
|
{
|
|
return c == '\0' || c == '#' || c == '?';
|
|
}
|
|
|