wxXML load/save improvements: added ability to not ignore whitespace and specify indentation level (patch #1541888)

git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@41157 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
Václav Slavík
2006-09-11 11:08:49 +00:00
parent 847dfdb422
commit 538f383019
4 changed files with 131 additions and 50 deletions

View File

@@ -18,7 +18,7 @@ A simple example of using XML classes is:
\begin{verbatim}
wxXmlDocument doc;
if (!doc.Load(wxT("myfile.xml"))
if (!doc.Load(wxT("myfile.xml")))
return false;
// start processing the XML file
@@ -51,6 +51,23 @@ while (child) {
}
\end{verbatim}
{\bf Note:} if you want to preserve the original formatting of the loaded file including whitespaces
and indentation, you need to turn off whitespace-only textnode removal and automatic indentation:
\begin{verbatim}
wxXmlDocument doc;
doc.Load(wxT("myfile.xml"), wxT("UTF-8"), wxXMLDOC_KEEP_WHITESPACE_NODES);
doc.Save(wxT("myfile2.xml"), wxXML_NO_INDENTATION); // myfile2.xml will be indentic to myfile.xml
\end{verbatim}
Using default parameters, you will get a reformatted document which in general is different from
the original loaded content:
\begin{verbatim}
wxXmlDocument doc;
doc.Load(wxT("myfile.xml"));
doc.Save(wxT("myfile2.xml")); // myfile2.xml != myfile.xml
\end{verbatim}
\wxheading{Derived from}
@@ -74,11 +91,11 @@ while (child) {
\func{}{wxXmlDocument}{\void}
\func{}{wxXmlDocument}{\param{const wxString\& }{filename}, \param{const wxString\& }{encoding = wxT("UTF-8")}}
\func{}{wxXmlDocument}{\param{const wxString\& }{filename}, \param{const wxString\& }{encoding = wxT("UTF-8")}, \param{int }{flags = wxXMLDOC_NONE}}
Loads the given {\it filename} using the given encoding. See \helpref{Load()}{wxxmldocumentload}.
\func{}{wxXmlDocument}{\param{wxInputStream\& }{stream}, \param{const wxString\& }{encoding = wxT("UTF-8")}}
\func{}{wxXmlDocument}{\param{wxInputStream\& }{stream}, \param{const wxString\& }{encoding = wxT("UTF-8")}, \param{int }{flags = wxXMLDOC_NONE}}
Loads the XML document from given stream using the given encoding. See \helpref{Load()}{wxxmldocumentload}.
@@ -149,23 +166,36 @@ Returns \true if the document has been loaded successfully.
\membersection{wxXmlDocument::Load}\label{wxxmldocumentload}
\func{bool}{Load}{\param{const wxString\& }{filename}, \param{const wxString\& }{encoding = wxT("UTF-8")}}
\func{bool}{Load}{\param{const wxString\& }{filename}, \param{const wxString\& }{encoding = wxT("UTF-8")}, \param{int }{flags = wxXMLDOC_NONE}}
Parses {\it filename} as an xml document and loads data. Returns \true on success, \false otherwise.
Parses {\it filename} as an xml document and loads its data.
\func{bool}{Load}{\param{wxInputStream\& }{stream}, \param{const wxString\& }{encoding = wxT("UTF-8")}}
If {\tt flags} does not contain {\tt wxXMLDOC_KEEP_WHITESPACE_NODES}, then, while loading, all nodes of
type {\tt wxXML_TEXT_NODE} (see \helpref{wxXmlNode}{wxxmlnode}) are automatically skipped if they
contain whitespaces only.
The removal of these nodes makes the load process slightly faster and requires less memory however
makes impossible to recreate exactly the loaded text with a \helpref{Save}{wxxmldocumentsave} call later.
Read the initial description of this class for more info.
Returns \true on success, \false otherwise.
\func{bool}{Load}{\param{wxInputStream\& }{stream}, \param{const wxString\& }{encoding = wxT("UTF-8")}, \param{int }{flags = wxXMLDOC_NONE}}
Like above but takes the data from given input stream.
\membersection{wxXmlDocument::Save}\label{wxxmldocumentsave}
\constfunc{bool}{Save}{\param{const wxString\& }{filename}}
\constfunc{bool}{Save}{\param{const wxString\& }{filename}, \param{int }{indentstep = 1}}
Saves XML tree creating a file named with given string.
\constfunc{bool}{Save}{\param{wxOutputStream\& }{stream}}
If {\tt indentstep} is greater than or equal to zero, then, while saving, an automatic indentation
is added with steps composed by {\tt indentstep} spaces.
If {\tt indentstep} is {\tt wxXML_NO_INDENTATION}, then, automatic indentation is turned off.
Saves XML tree in the given output stream.
\constfunc{bool}{Save}{\param{wxOutputStream\& }{stream}, \param{int }{indentstep = 1}}
Saves XML tree in the given output stream. See other overload for a description of {\tt indentstep}.
\membersection{wxXmlDocument::SetEncoding}\label{wxxmldocumentsetencoding}

View File

@@ -124,7 +124,7 @@ Removes the first properties which has the given {\it name} from the list of pro
\membersection{wxXmlNode::GetChildren}\label{wxxmlnodegetchildren}
\constfunc{wxXmlNode*}{GetChildren}{\param{void}{}}
\constfunc{wxXmlNode*}{GetChildren}{\void}
Returns the first child of this node.
To get a pointer to the second child of this node (if it does exist), use the
@@ -139,6 +139,17 @@ Be aware that for nodes of type \texttt{wxXML\_ELEMENT\_NODE} (the most used nod
content is an empty string. See \helpref{GetNodeContent()}{wxxmlnodegetnodecontent} for more details.
\membersection{wxXmlNode::GetDepth}\label{wxxmlnodegetdepth}
\constfunc{int}{GetDepth}{\param{wxXmlNode* }{grandparent = NULL}}
Returns the number of nodes which separe this node from {\tt grandparent}.
This function searches only the parents of this node until it finds {\tt grandparent}
or the \NULL node (which is the parent of non-linked nodes or the parent of a
\helpref{wxXmlDocument}{wxxmldocument}'s root node).
\membersection{wxXmlNode::GetNodeContent}\label{wxxmlnodegetnodecontent}
\constfunc{wxString}{GetNodeContent}{\void}
@@ -220,6 +231,14 @@ If {\it before\_node} is \NULL, then {\it child} is prepended to the list of chi
becomes the first child of this node.
Returns \true if {\it before\_node} has been found and the {\it child} node has been inserted.
\membersection{wxXmlNode::IsWhitespaceOnly}\label{wxxmlnodecontainsiswhitespaceonly}
\constfunc{bool}{IsWhitespaceOnly}{\void}
Returns \true if the content of this node is a string containing only whitespaces (spaces,
tabs, new lines, etc). Note that this function is locale-independent since the parsing of XML
documents must always produce the exact same tree regardless of the locale it runs under.
\membersection{wxXmlNode::RemoveChild}\label{wxxmlnoderemovechild}
\func{bool}{RemoveChild}{\param{wxXmlNode* }{child}}

View File

@@ -125,6 +125,9 @@ public:
wxString GetName() const { return m_name; }
wxString GetContent() const { return m_content; }
bool IsWhitespaceOnly() const;
int GetDepth(wxXmlNode *grandparent = NULL) const;
// Gets node content from wxXML_ENTITY_NODE
// The problem is, <tag>content<tag> is represented as
// wxXML_ENTITY_NODE name="tag", content=""
@@ -165,8 +168,15 @@ private:
// special indentation value for wxXmlDocument::Save
#define wxXML_NO_INDENTATION (-1)
// flags for wxXmlDocument::Load
enum wxXmlDocumentLoadFlag
{
wxXMLDOC_NONE = 0,
wxXMLDOC_KEEP_WHITESPACE_NODES = 1
};
// This class holds XML data/document as parsed by XML parser.
@@ -187,13 +197,13 @@ public:
// Parses .xml file and loads data. Returns TRUE on success, FALSE
// otherwise.
virtual bool Load(const wxString& filename,
const wxString& encoding = wxT("UTF-8"));
const wxString& encoding = wxT("UTF-8"), int flags = wxXMLDOC_NONE);
virtual bool Load(wxInputStream& stream,
const wxString& encoding = wxT("UTF-8"));
const wxString& encoding = wxT("UTF-8"), int flags = wxXMLDOC_NONE);
// Saves document as .xml file.
virtual bool Save(const wxString& filename) const;
virtual bool Save(wxOutputStream& stream) const;
virtual bool Save(const wxString& filename, int indentstep = 1) const;
virtual bool Save(wxOutputStream& stream, int indentstep = 1) const;
bool IsOk() const { return m_root != NULL; }

View File

@@ -39,6 +39,9 @@ WX_CHECK_BUILD_OPTIONS("wxXML")
IMPLEMENT_CLASS(wxXmlDocument, wxObject)
// a private utility used by wxXML
static bool wxIsWhiteOnly(const wxChar *buf);
//-----------------------------------------------------------------------------
// wxXmlNode
@@ -309,6 +312,28 @@ wxString wxXmlNode::GetNodeContent() const
return wxEmptyString;
}
int wxXmlNode::GetDepth(wxXmlNode *grandparent) const
{
const wxXmlNode *n = this;
int ret = -1;
do
{
ret++;
n = n->GetParent();
if (n == grandparent)
return ret;
} while (n);
return wxNOT_FOUND;
}
bool wxXmlNode::IsWhitespaceOnly() const
{
return wxIsWhiteOnly(m_content);
}
//-----------------------------------------------------------------------------
@@ -364,20 +389,20 @@ void wxXmlDocument::DoCopy(const wxXmlDocument& doc)
m_root = new wxXmlNode(*doc.m_root);
}
bool wxXmlDocument::Load(const wxString& filename, const wxString& encoding)
bool wxXmlDocument::Load(const wxString& filename, const wxString& encoding, int flags)
{
wxFileInputStream stream(filename);
if (!stream.Ok())
return false;
return Load(stream, encoding);
return Load(stream, encoding, flags);
}
bool wxXmlDocument::Save(const wxString& filename) const
bool wxXmlDocument::Save(const wxString& filename, int indentstep) const
{
wxFileOutputStream stream(filename);
if (!stream.Ok())
return false;
return Save(stream);
return Save(stream, indentstep);
}
@@ -386,11 +411,6 @@ bool wxXmlDocument::Save(const wxString& filename) const
// wxXmlDocument loading routines
//-----------------------------------------------------------------------------
/*
FIXME:
- process all elements, including CDATA
*/
// converts Expat-produced string in UTF-8 into wxString using the specified
// conv or keep in UTF-8 if conv is NULL
static wxString CharToString(wxMBConv *conv,
@@ -417,6 +437,16 @@ static wxString CharToString(wxMBConv *conv,
#endif // wxUSE_UNICODE/!wxUSE_UNICODE
}
// returns true if the given string contains only whitespaces
bool wxIsWhiteOnly(const wxChar *buf)
{
for (const wxChar *c = buf; *c != wxT('\0'); c++)
if (*c != wxT(' ') && *c != wxT('\t') && *c != wxT('\n') && *c != wxT('\r'))
return false;
return true;
}
struct wxXmlParsingContext
{
wxMBConv *conv;
@@ -426,6 +456,7 @@ struct wxXmlParsingContext
wxString encoding;
wxString version;
bool bLastCdata;
bool removeWhiteOnlyNodes;
};
extern "C" {
@@ -462,42 +493,32 @@ extern "C" {
static void TextHnd(void *userData, const char *s, int len)
{
wxXmlParsingContext *ctx = (wxXmlParsingContext*)userData;
char *buf = new char[len + 1];
buf[len] = '\0';
memcpy(buf, s, (size_t)len);
wxString str = CharToString(ctx->conv, s, len);
if (ctx->lastAsText)
{
if ( ctx->bLastCdata )
{
ctx->lastAsText->SetContent(ctx->lastAsText->GetContent() +
CharToString(NULL, buf));
CharToString(NULL, s, len));
}
else
{
ctx->lastAsText->SetContent(ctx->lastAsText->GetContent() +
CharToString(ctx->conv, buf));
ctx->lastAsText->SetContent(ctx->lastAsText->GetContent() + str);
}
}
else
{
bool whiteOnly = true;
for (char *c = buf; *c != '\0'; c++)
if (*c != ' ' && *c != '\t' && *c != '\n' && *c != '\r')
{
whiteOnly = false;
break;
}
bool whiteOnly = false;
if (ctx->removeWhiteOnlyNodes)
whiteOnly = wxIsWhiteOnly(str);
if (!whiteOnly)
{
ctx->lastAsText = new wxXmlNode(wxXML_TEXT_NODE, wxT("text"),
CharToString(ctx->conv, buf));
ctx->lastAsText = new wxXmlNode(wxXML_TEXT_NODE, wxT("text"), str);
ctx->node->AddChild(ctx->lastAsText);
}
}
delete[] buf;
}
}
@@ -593,7 +614,7 @@ static int UnknownEncodingHnd(void * WXUNUSED(encodingHandlerData),
}
}
bool wxXmlDocument::Load(wxInputStream& stream, const wxString& encoding)
bool wxXmlDocument::Load(wxInputStream& stream, const wxString& encoding, int flags)
{
#if wxUSE_UNICODE
(void)encoding;
@@ -614,6 +635,7 @@ bool wxXmlDocument::Load(wxInputStream& stream, const wxString& encoding)
if ( encoding != wxT("UTF-8") && encoding != wxT("utf-8") )
ctx.conv = new wxCSConv(encoding);
#endif
ctx.removeWhiteOnlyNodes = (flags & wxXMLDOC_KEEP_WHITESPACE_NODES) == 0;
ctx.bLastCdata = false;
XML_SetUserData(parser, (void*)&ctx);
@@ -755,7 +777,7 @@ inline static void OutputIndentation(wxOutputStream& stream, int indent)
}
static void OutputNode(wxOutputStream& stream, wxXmlNode *node, int indent,
wxMBConv *convMem, wxMBConv *convFile)
wxMBConv *convMem, wxMBConv *convFile, int indentstep)
{
wxXmlNode *n, *prev;
wxXmlProperty *prop;
@@ -793,13 +815,13 @@ static void OutputNode(wxOutputStream& stream, wxXmlNode *node, int indent,
n = node->GetChildren();
while (n)
{
if (n && n->GetType() != wxXML_TEXT_NODE)
OutputIndentation(stream, indent + 1);
OutputNode(stream, n, indent + 1, convMem, convFile);
if (indentstep >= 0 && n && n->GetType() != wxXML_TEXT_NODE)
OutputIndentation(stream, indent + indentstep);
OutputNode(stream, n, indent + indentstep, convMem, convFile, indentstep);
prev = n;
n = n->GetNext();
}
if (prev && prev->GetType() != wxXML_TEXT_NODE)
if (indentstep >= 0 && prev && prev->GetType() != wxXML_TEXT_NODE)
OutputIndentation(stream, indent);
OutputString(stream, wxT("</"));
OutputString(stream, node->GetName());
@@ -820,7 +842,7 @@ static void OutputNode(wxOutputStream& stream, wxXmlNode *node, int indent,
}
}
bool wxXmlDocument::Save(wxOutputStream& stream) const
bool wxXmlDocument::Save(wxOutputStream& stream, int indentstep) const
{
if ( !IsOk() )
return false;
@@ -844,7 +866,7 @@ bool wxXmlDocument::Save(wxOutputStream& stream) const
GetVersion().c_str(), GetFileEncoding().c_str());
OutputString(stream, s);
OutputNode(stream, GetRoot(), 0, convMem, convFile);
OutputNode(stream, GetRoot(), 0, convMem, convFile, indentstep);
OutputString(stream, wxT("\n"));
if ( convFile )