Add support for DOCTYPE to wxXmlDocument.

Allow retrieving the DOCTYPE specified in an existing document as well as
specifying the DOCTYPE to use when writing a new one.

Closes #13779.
This commit is contained in:
Nick Matthews
2015-04-26 15:34:18 +02:00
committed by Vadim Zeitlin
parent 41b1ed9c2e
commit d13278ecc3
5 changed files with 336 additions and 2 deletions

View File

@@ -47,6 +47,7 @@ All:
- Allow calling wxItemContainer::Add() and similar with std::vector<> argument.
- Add "%z" support to printf()-like functions like wxString::Format() (RIVDSL).
- Add DOCTYPE support to wxXmlDocument (Nick Matthews).
- Add wxPowerResourceBlocker (Tobias Taschner).
- Add wxApp::StoreCurrentException() and RethrowStoredException() and implement
their functionality by default when using C++11 compiler.

View File

@@ -236,6 +236,35 @@ inline void wxXmlNode::SetProperties(wxXmlAttribute *prop)
class WXDLLIMPEXP_XML wxXmlDoctype
{
public:
explicit
wxXmlDoctype(const wxString& name = wxString(),
const wxString& sysid = wxString(),
const wxString& pubid = wxString())
: m_rootName(name), m_systemId(sysid), m_publicId(pubid)
{}
// Default copy ctor and assignment operators are ok.
bool IsValid() const;
void Clear();
const wxString& GetRootName() const { return m_rootName; }
const wxString& GetSystemId() const { return m_systemId; }
const wxString& GetPublicId() const { return m_publicId; }
wxString GetFullString() const;
private:
wxString m_rootName;
wxString m_systemId;
wxString m_publicId;
};
// special indentation value for wxXmlDocument::Save
#define wxXML_NO_INDENTATION (-1)
@@ -287,6 +316,7 @@ public:
// Note: this is the encoding original file was saved in, *not* the
// encoding of in-memory representation!
const wxString& GetFileEncoding() const { return m_fileEncoding; }
const wxXmlDoctype& GetDoctype() const { return m_doctype; }
// Write-access methods:
wxXmlNode *DetachDocumentNode() { wxXmlNode *old=m_docNode; m_docNode=NULL; return old; }
@@ -295,6 +325,7 @@ public:
void SetRoot(wxXmlNode *node);
void SetVersion(const wxString& version) { m_version = version; }
void SetFileEncoding(const wxString& encoding) { m_fileEncoding = encoding; }
void SetDoctype(const wxXmlDoctype& doctype) { m_doctype = doctype; }
void AppendToProlog(wxXmlNode *node);
#if !wxUSE_UNICODE
@@ -313,6 +344,7 @@ private:
#if !wxUSE_UNICODE
wxString m_encoding;
#endif
wxXmlDoctype m_doctype;
wxXmlNode *m_docNode;
void DoCopy(const wxXmlDocument& doc);

View File

@@ -44,13 +44,16 @@ enum wxXmlNodeType
of pseudo-attributes these do not use the nodes attribute system. It is the users
responsibility to code and decode the instruction text.
The @c wxXML_DOCUMENT_TYPE_NODE is not implemented at this time. Instead,
get and set the DOCTYPE values using the wxXmlDocument class.
If @c wxUSE_UNICODE is 0, all strings are encoded in the encoding given to
wxXmlDocument::Load (default is UTF-8).
@library{wxxml}
@category{xml}
@see wxXmlDocument, wxXmlAttribute
@see wxXmlDocument, wxXmlDoctype, wxXmlAttribute
*/
class wxXmlNode
{
@@ -433,6 +436,99 @@ public:
};
/**
@class wxXmlDoctype
Represents a DOCTYPE Declaration.
Example DOCTYPE: <tt>\<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"\></tt>.
In the above example, "plist" is the name of root element,
"-//Apple//DTD PLIST 1.0//EN" (without the quotes) is the public identifier and
"http://www.apple.com/DTDs/PropertyList-1.0.dtd" (again, without the quotes) is
the system identifier.
A valid DOCTYPE exists in one of following forms:
1. A root element name.
2. A root element name and a system identifier.
3. A root element name, a system identifier and a public identifier.
4. A root element name and a public identifier. Although this form is not
valid XML it is valid for SMGL.
The DOCTYPE may also contain an internal subset of declarations
added between square brackets at the end.
These have not been implemented at this time.
@since 3.1.0
@library{wxxml}
@category{xml}
@see wxXmlDocument
*/
class wxXmlDoctype
{
public:
/**
Creates and possible initializes the DOCTYPE.
@param name
The root name.
@param sysid
The system identifier.
@param pubid
The public identifier.
*/
wxXmlDoctype(const wxString& rootName = wxString(),
const wxString& systemId = wxString(),
const wxString& publicId = wxString());
/**
Removes all the DOCTYPE values.
*/
void Clear();
/**
Returns the root name of the document.
*/
const wxString& GetRootName() const;
/**
Returns the system id of the document.
*/
const wxString& GetSystemId() const;
/**
Returns the public id of the document.
*/
const wxString& GetPublicId() const;
/**
Returns the formatted DOCTYPE contents.
This consists of all the text shown between the opening
"<!DOCTYPE " and closing ">" of a DOCTYPE declaration.
If this object is empty or invalid, i.e. IsValid() returns false, this
method returns an empty string.
*/
wxString GetFullString() const;
/**
Returns true if the contents can produce a valid DOCTYPE string.
For an object to be valid, it must have a non-empty root name and a
valid system identifier (currently the validity checks of the latter
are limited to checking that it doesn't contain both single and double
quotes).
*/
bool IsValid() const;
};
//* special indentation value for wxXmlDocument::Save
#define wxXML_NO_INDENTATION (-1)
@@ -526,10 +622,14 @@ enum wxXmlDocumentLoadFlag
doc.Save("myfile2.xml"); // myfile2.xml != myfile.xml
@endcode
If the root name value of the DOCTYPE is set, either by loading a file with a
DOCTYPE declaration or by setting it directly with the SetDoctype member,
then a DOCTYPE declaration will be added immediately after the XML declaration.
@library{wxxml}
@category{xml}
@see wxXmlNode, wxXmlAttribute
@see wxXmlNode, wxXmlAttribute, wxXmlDoctype
*/
class wxXmlDocument : public wxObject
{
@@ -612,6 +712,13 @@ public:
*/
const wxString& GetFileEncoding() const;
/**
Returns the DOCTYPE declaration data for the document.
@since 3.1.0
*/
const wxXmlDoctype& GetDoctype() const;
/**
Returns the document node of the document.
@@ -700,6 +807,14 @@ public:
*/
void SetFileEncoding(const wxString& encoding);
/**
Sets the data which will appear in the DOCTYPE declaration when the
document is saved.
@since 3.1.0
*/
void SetDoctype(const wxXmlDoctype& doctype);
/**
Sets the root element node of this document.

View File

@@ -402,6 +402,56 @@ bool wxXmlNode::IsWhitespaceOnly() const
//-----------------------------------------------------------------------------
// wxXmlDoctype
//-----------------------------------------------------------------------------
void wxXmlDoctype::Clear()
{
m_rootName.clear();
m_systemId.clear();
m_publicId.clear();
}
wxString wxXmlDoctype::GetFullString() const
{
wxString content;
if ( !m_rootName.empty() )
{
content = m_rootName;
if ( !m_publicId.empty() )
{
content << wxS(" PUBLIC \"") << m_publicId << wxS("\"");
}
if ( !m_systemId.empty() )
{
if ( m_publicId.empty() )
content << wxS(" SYSTEM");
// Prefer to use double quotes, but switch to single ones if a
// double quote appears inside the string to be quoted.
wxString quote;
if ( m_systemId.find('\"') == wxString::npos )
quote = wxS('"');
else if ( m_systemId.find('\'') == wxString::npos )
quote = wxS('\'');
else // It's an error if we can't use either kind of quotes.
return wxString();
content << wxS(' ') << quote << m_systemId << quote;
}
}
return content;
}
bool wxXmlDoctype::IsValid() const
{
return !GetFullString().empty();
}
//-----------------------------------------------------------------------------
// wxXmlDocument
//-----------------------------------------------------------------------------
@@ -452,6 +502,7 @@ void wxXmlDocument::DoCopy(const wxXmlDocument& doc)
m_encoding = doc.m_encoding;
#endif
m_fileEncoding = doc.m_fileEncoding;
m_doctype = doc.m_doctype;
if (doc.m_docNode)
m_docNode = new wxXmlNode(*doc.m_docNode);
@@ -607,6 +658,7 @@ struct wxXmlParsingContext
node(NULL),
lastChild(NULL),
lastAsText(NULL),
doctype(NULL),
removeWhiteOnlyNodes(false)
{}
@@ -617,6 +669,7 @@ struct wxXmlParsingContext
wxXmlNode *lastAsText; // the last _text_ child of "node"
wxString encoding;
wxString version;
wxXmlDoctype *doctype;
bool removeWhiteOnlyNodes;
};
@@ -747,6 +800,21 @@ static void PIHnd(void *userData, const char *target, const char *data)
ctx->lastAsText = NULL;
}
static void StartDoctypeHnd(void *userData, const char *doctypeName,
const char *sysid, const char *pubid,
int WXUNUSED(has_internal_subset))
{
wxXmlParsingContext *ctx = (wxXmlParsingContext*)userData;
*ctx->doctype = wxXmlDoctype(CharToString(ctx->conv, doctypeName),
CharToString(ctx->conv, sysid),
CharToString(ctx->conv, pubid));
}
static void EndDoctypeHnd(void *WXUNUSED(userData))
{
}
static void DefaultHnd(void *userData, const char *s, int len)
{
// XML header:
@@ -819,6 +887,7 @@ bool wxXmlDocument::Load(wxInputStream& stream, const wxString& encoding, int fl
if ( encoding.CmpNoCase(wxS("UTF-8")) != 0 )
ctx.conv = new wxCSConv(encoding);
#endif
ctx.doctype = &m_doctype;
ctx.removeWhiteOnlyNodes = (flags & wxXMLDOC_KEEP_WHITESPACE_NODES) == 0;
ctx.parser = parser;
ctx.node = root;
@@ -829,6 +898,7 @@ bool wxXmlDocument::Load(wxInputStream& stream, const wxString& encoding, int fl
XML_SetCdataSectionHandler(parser, StartCdataHnd, EndCdataHnd);
XML_SetCommentHandler(parser, CommentHnd);
XML_SetProcessingInstructionHandler(parser, PIHnd);
XML_SetDoctypeDeclHandler(parser, StartDoctypeHnd, EndDoctypeHnd);
XML_SetDefaultHandler(parser, DefaultHnd);
XML_SetUnknownEncodingHandler(parser, UnknownEncodingHnd, NULL);
@@ -1133,6 +1203,17 @@ bool wxXmlDocument::Save(wxOutputStream& stream, int indentstep) const
);
bool rc = OutputString(stream, dec, convMem.get(), convFile.get());
if ( rc )
{
const wxString doctype = m_doctype.GetFullString();
if ( !doctype.empty() )
{
rc = OutputString(stream,
wxS("<!DOCTYPE ") + doctype + wxS(">\n"),
convMem.get(), convFile.get());
}
}
wxXmlNode *node = GetDocumentNode();
if ( node )
node = node->GetChildren();

View File

@@ -82,6 +82,8 @@ private:
CPPUNIT_TEST( AppendToProlog );
CPPUNIT_TEST( SetRoot );
CPPUNIT_TEST( CopyNode );
CPPUNIT_TEST( CopyDocument );
CPPUNIT_TEST( Doctype );
CPPUNIT_TEST_SUITE_END();
void InsertChild();
@@ -94,6 +96,8 @@ private:
void AppendToProlog();
void SetRoot();
void CopyNode();
void CopyDocument();
void Doctype();
wxDECLARE_NO_COPY_CLASS(XmlTestCase);
};
@@ -207,6 +211,7 @@ void XmlTestCase::LoadSave()
const char *xmlTextProlog =
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
"<!DOCTYPE resource PUBLIC \"Public-ID\" 'System\"ID\"'>\n"
"<!-- Prolog comment -->\n"
"<?xml-stylesheet href=\"style.css\" type=\"text/css\"?>\n"
"<resource xmlns=\"http://www.wxwidgets.org/wxxrc\" version=\"2.3.0.1\">\n"
@@ -507,3 +512,103 @@ void XmlTestCase::CopyNode()
;
CPPUNIT_ASSERT_EQUAL( xmlTextResult, sos.GetString() );
}
void XmlTestCase::CopyDocument()
{
const char *xmlText =
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
"<!DOCTYPE resource PUBLIC \"Public-ID\" \"System'ID'\">\n"
"<!-- 1st prolog entry -->\n"
"<root>\n"
" <first>Text</first>\n"
" <second/>\n"
"</root>\n"
;
wxXmlDocument doc1;
wxStringInputStream sis(xmlText);
CPPUNIT_ASSERT( doc1.Load(sis) );
wxXmlDocument doc2 = doc1;
wxStringOutputStream sos;
CPPUNIT_ASSERT(doc2.Save(sos));
CPPUNIT_ASSERT_EQUAL( xmlText, sos.GetString() );
}
void XmlTestCase::Doctype()
{
const char *xmlText =
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
"<!DOCTYPE root PUBLIC \"Public-ID\" 'System\"ID\"'>\n"
"<root>\n"
" <content/>\n"
"</root>\n"
;
wxStringInputStream sis(xmlText);
wxXmlDocument doc;
CPPUNIT_ASSERT( doc.Load(sis) );
wxXmlDoctype dt = doc.GetDoctype();
CPPUNIT_ASSERT_EQUAL( "root", dt.GetRootName() );
CPPUNIT_ASSERT_EQUAL( "System\"ID\"", dt.GetSystemId() );
CPPUNIT_ASSERT_EQUAL( "Public-ID", dt.GetPublicId() );
CPPUNIT_ASSERT( dt.IsValid() );
CPPUNIT_ASSERT_EQUAL( "root PUBLIC \"Public-ID\" 'System\"ID\"'", dt.GetFullString() );
dt = wxXmlDoctype( dt.GetRootName(), dt.GetSystemId() );
CPPUNIT_ASSERT( dt.IsValid() );
CPPUNIT_ASSERT_EQUAL( "root SYSTEM 'System\"ID\"'", dt.GetFullString() );
dt = wxXmlDoctype( dt.GetRootName() );
CPPUNIT_ASSERT( dt.IsValid() );
CPPUNIT_ASSERT_EQUAL( "root", dt.GetFullString() );
doc.SetDoctype(dt);
wxStringOutputStream sos;
CPPUNIT_ASSERT(doc.Save(sos));
const char *xmlText1 =
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
"<!DOCTYPE root>\n"
"<root>\n"
" <content/>\n"
"</root>\n"
;
CPPUNIT_ASSERT_EQUAL( xmlText1, sos.GetString() );
doc.SetDoctype(wxXmlDoctype());
wxStringOutputStream sos2;
CPPUNIT_ASSERT(doc.Save(sos2));
const char *xmlText2 =
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
"<root>\n"
" <content/>\n"
"</root>\n"
;
CPPUNIT_ASSERT_EQUAL( xmlText2, sos2.GetString() );
doc.SetDoctype(wxXmlDoctype("root", "Sys'id"));
wxStringOutputStream sos3;
CPPUNIT_ASSERT(doc.Save(sos3));
const char *xmlText3 =
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
"<!DOCTYPE root SYSTEM \"Sys'id\">\n"
"<root>\n"
" <content/>\n"
"</root>\n"
;
CPPUNIT_ASSERT_EQUAL( xmlText3, sos3.GetString() );
dt = wxXmlDoctype( "", "System\"ID\"", "Public-ID" );
CPPUNIT_ASSERT( !dt.IsValid() );
CPPUNIT_ASSERT_EQUAL( "", dt.GetFullString() );
// Strictly speaking, this is illegal for XML but is legal for SGML.
dt = wxXmlDoctype( "root", "", "Public-ID" );
CPPUNIT_ASSERT( dt.IsValid() );
CPPUNIT_ASSERT_EQUAL( "root PUBLIC \"Public-ID\"", dt.GetFullString() );
// Using both single and double quotes in system ID is not allowed.
dt = wxXmlDoctype( "root", "O'Reilly (\"editor\")", "Public-ID" );
CPPUNIT_ASSERT( !dt.IsValid() );
}