just return false instead of crashing if the document contents can't be converted to the target encoding (#10064)

git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@56215 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
Vadim Zeitlin
2008-10-10 16:13:53 +00:00
parent ddf01bdb96
commit e767076e80
2 changed files with 182 additions and 115 deletions

View File

@@ -29,6 +29,7 @@
#include "wx/datstrm.h" #include "wx/datstrm.h"
#include "wx/zstream.h" #include "wx/zstream.h"
#include "wx/strconv.h" #include "wx/strconv.h"
#include "wx/ptr_scpd.h"
#include "expat.h" // from Expat #include "expat.h" // from Expat
@@ -392,10 +393,10 @@ bool wxXmlNode::IsWhitespaceOnly() const
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
wxXmlDocument::wxXmlDocument() wxXmlDocument::wxXmlDocument()
: m_version(wxT("1.0")), m_fileEncoding(wxT("utf-8")), m_root(NULL) : m_version(wxS("1.0")), m_fileEncoding(wxS("utf-8")), m_root(NULL)
{ {
#if !wxUSE_UNICODE #if !wxUSE_UNICODE
m_encoding = wxT("UTF-8"); m_encoding = wxS("UTF-8");
#endif #endif
} }
@@ -494,7 +495,7 @@ bool wxIsWhiteOnly(const wxString& buf)
for ( wxString::const_iterator i = buf.begin(); i != buf.end(); ++i ) for ( wxString::const_iterator i = buf.begin(); i != buf.end(); ++i )
{ {
wxChar c = *i; wxChar c = *i;
if ( c != wxT(' ') && c != wxT('\t') && c != wxT('\n') && c != wxT('\r')) if ( c != wxS(' ') && c != wxS('\t') && c != wxS('\n') && c != wxS('\r'))
return false; return false;
} }
return true; return true;
@@ -594,7 +595,7 @@ static void TextHnd(void *userData, const char *s, int len)
if (!whiteOnly) if (!whiteOnly)
{ {
wxXmlNode *textnode = wxXmlNode *textnode =
new wxXmlNode(wxXML_TEXT_NODE, wxT("text"), str, new wxXmlNode(wxXML_TEXT_NODE, wxS("text"), str,
XML_GetCurrentLineNumber(ctx->parser)); XML_GetCurrentLineNumber(ctx->parser));
ASSERT_LAST_CHILD_OK(ctx); ASSERT_LAST_CHILD_OK(ctx);
@@ -609,7 +610,7 @@ static void StartCdataHnd(void *userData)
wxXmlParsingContext *ctx = (wxXmlParsingContext*)userData; wxXmlParsingContext *ctx = (wxXmlParsingContext*)userData;
wxXmlNode *textnode = wxXmlNode *textnode =
new wxXmlNode(wxXML_CDATA_SECTION_NODE, wxT("cdata"), wxT(""), new wxXmlNode(wxXML_CDATA_SECTION_NODE, wxS("cdata"), wxS(""),
XML_GetCurrentLineNumber(ctx->parser)); XML_GetCurrentLineNumber(ctx->parser));
ASSERT_LAST_CHILD_OK(ctx); ASSERT_LAST_CHILD_OK(ctx);
@@ -625,7 +626,7 @@ static void CommentHnd(void *userData, const char *data)
{ {
wxXmlNode *commentnode = wxXmlNode *commentnode =
new wxXmlNode(wxXML_COMMENT_NODE, new wxXmlNode(wxXML_COMMENT_NODE,
wxT("comment"), CharToString(ctx->conv, data), wxS("comment"), CharToString(ctx->conv, data),
XML_GetCurrentLineNumber(ctx->parser)); XML_GetCurrentLineNumber(ctx->parser));
ASSERT_LAST_CHILD_OK(ctx); ASSERT_LAST_CHILD_OK(ctx);
@@ -648,10 +649,10 @@ static void DefaultHnd(void *userData, const char *s, int len)
wxString buf = CharToString(ctx->conv, s, (size_t)len); wxString buf = CharToString(ctx->conv, s, (size_t)len);
int pos; int pos;
pos = buf.Find(wxT("encoding=")); pos = buf.Find(wxS("encoding="));
if (pos != wxNOT_FOUND) if (pos != wxNOT_FOUND)
ctx->encoding = buf.Mid(pos + 10).BeforeFirst(buf[(size_t)pos+9]); ctx->encoding = buf.Mid(pos + 10).BeforeFirst(buf[(size_t)pos+9]);
pos = buf.Find(wxT("version=")); pos = buf.Find(wxS("version="));
if (pos != wxNOT_FOUND) if (pos != wxNOT_FOUND)
ctx->version = buf.Mid(pos + 9).BeforeFirst(buf[(size_t)pos+8]); ctx->version = buf.Mid(pos + 9).BeforeFirst(buf[(size_t)pos+8]);
} }
@@ -704,10 +705,10 @@ bool wxXmlDocument::Load(wxInputStream& stream, const wxString& encoding, int fl
bool done; bool done;
XML_Parser parser = XML_ParserCreate(NULL); XML_Parser parser = XML_ParserCreate(NULL);
ctx.encoding = wxT("UTF-8"); // default in absence of encoding="" ctx.encoding = wxS("UTF-8"); // default in absence of encoding=""
ctx.conv = NULL; ctx.conv = NULL;
#if !wxUSE_UNICODE #if !wxUSE_UNICODE
if ( encoding.CmpNoCase(wxT("UTF-8")) != 0 ) if ( encoding.CmpNoCase(wxS("UTF-8")) != 0 )
ctx.conv = new wxCSConv(encoding); ctx.conv = new wxCSConv(encoding);
#endif #endif
ctx.removeWhiteOnlyNodes = (flags & wxXMLDOC_KEEP_WHITESPACE_NODES) == 0; ctx.removeWhiteOnlyNodes = (flags & wxXMLDOC_KEEP_WHITESPACE_NODES) == 0;
@@ -767,30 +768,40 @@ bool wxXmlDocument::Load(wxInputStream& stream, const wxString& encoding, int fl
// wxXmlDocument saving routines // wxXmlDocument saving routines
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
// helpers for XML generation
namespace
{
// write string to output: // write string to output:
inline static void OutputString(wxOutputStream& stream, const wxString& str, bool OutputString(wxOutputStream& stream,
wxMBConv *convMem = NULL, const wxString& str,
wxMBConv *convFile = NULL) wxMBConv *convMem,
wxMBConv *convFile)
{ {
if (str.empty()) if (str.empty())
return; return true;
#if wxUSE_UNICODE #if wxUSE_UNICODE
wxUnusedVar(convMem); wxUnusedVar(convMem);
const wxWX2MBbuf buf(str.mb_str(*(convFile ? convFile : &wxConvUTF8))); const wxWX2MBbuf buf(str.mb_str(convFile ? *convFile : wxConvUTF8));
stream.Write((const char*)buf, strlen((const char*)buf)); if ( !buf )
return false;
stream.Write(buf, strlen(buf));
#else // !wxUSE_UNICODE #else // !wxUSE_UNICODE
if ( convFile && convMem ) if ( convFile && convMem )
{ {
wxString str2(str.wc_str(*convMem), *convFile); wxString str2(str.wc_str(*convMem), *convFile);
stream.Write(str2.mb_str(), str2.Len()); stream.Write(str2.mb_str(), str2.length());
} }
else // no conversions to do else // no conversions to do
{ {
stream.Write(str.mb_str(), str.Len()); stream.Write(str.mb_str(), str.length());
} }
#endif // wxUSE_UNICODE/!wxUSE_UNICODE #endif // wxUSE_UNICODE/!wxUSE_UNICODE
return stream.IsOk();
} }
// flags for OutputStringEnt() // flags for OutputStringEnt()
@@ -801,159 +812,192 @@ enum
// Same as above, but create entities first. // Same as above, but create entities first.
// Translates '<' to "&lt;", '>' to "&gt;" and '&' to "&amp;" // Translates '<' to "&lt;", '>' to "&gt;" and '&' to "&amp;"
static void OutputStringEnt(wxOutputStream& stream, const wxString& str, bool OutputStringEnt(wxOutputStream& stream,
wxMBConv *convMem = NULL, const wxString& str,
wxMBConv *convFile = NULL, wxMBConv *convMem,
int flags = 0) wxMBConv *convFile,
int flags = 0)
{ {
wxString buf; const size_t len = str.length();
size_t i, last, len; size_t i,
wxChar c; last = 0;
len = str.Len();
last = 0;
for (i = 0; i < len; i++) for (i = 0; i < len; i++)
{ {
c = str.GetChar(i); wxChar c = str.GetChar(i);
if (c == wxT('<') || c == wxT('>') || if (c == wxS('<') || c == wxS('>') ||
(c == wxT('&') && str.Mid(i+1, 4) != wxT("amp;")) || (c == wxS('&') && str.Mid(i+1, 4) != wxS("amp;")) ||
((flags & XML_ESCAPE_QUOTES) && c == wxT('"'))) ((flags & XML_ESCAPE_QUOTES) && c == wxS('"')))
{ {
OutputString(stream, str.Mid(last, i - last), convMem, convFile); if ( !OutputString(stream, str.substr(last, i), convMem, convFile) )
switch (c) return false;
const char *escaped;
switch ( c )
{ {
case wxT('<'): case wxS('<'):
OutputString(stream, wxT("&lt;")); escaped = "&lt;";
break; break;
case wxT('>'): case wxS('>'):
OutputString(stream, wxT("&gt;")); escaped = "&gt;";
break; break;
case wxT('&'): case wxS('&'):
OutputString(stream, wxT("&amp;")); escaped = "&amp;";
break; break;
case wxT('"'): case wxS('"'):
OutputString(stream, wxT("&quot;")); escaped = "&quot;";
break; break;
default: default:
break; wxFAIL_MSG( "logic error in the code" );
return false;
} }
if ( !OutputString(stream, escaped, convMem, convFile) )
return false;
last = i + 1; last = i + 1;
} }
} }
OutputString(stream, str.Mid(last, i - last), convMem, convFile);
return OutputString(stream, str.substr(last, i), convMem, convFile);
} }
inline static void OutputIndentation(wxOutputStream& stream, int indent) bool OutputIndentation(wxOutputStream& stream,
int indent,
wxMBConv *convMem,
wxMBConv *convFile)
{ {
wxString str = wxT("\n"); wxString str(wxS("\n"));
for (int i = 0; i < indent; i++) str += wxString(2*indent, wxS(' '));
str << wxT(' ') << wxT(' '); return OutputString(stream, str, convMem, convFile);
OutputString(stream, str);
} }
static void OutputNode(wxOutputStream& stream, wxXmlNode *node, int indent, bool OutputNode(wxOutputStream& stream,
wxMBConv *convMem, wxMBConv *convFile, int indentstep) wxXmlNode *node,
int indent,
wxMBConv *convMem,
wxMBConv *convFile,
int indentstep)
{ {
wxXmlNode *n, *prev; bool rc;
wxXmlAttribute *attr;
switch (node->GetType()) switch (node->GetType())
{ {
case wxXML_CDATA_SECTION_NODE: case wxXML_CDATA_SECTION_NODE:
OutputString( stream, wxT("<![CDATA[")); rc = OutputString(stream, wxS("<![CDATA["), convMem, convFile) &&
OutputString( stream, node->GetContent() ); OutputString(stream, node->GetContent(), convMem, convFile) &&
OutputString( stream, wxT("]]>") ); OutputString(stream, wxS("]]>"), convMem, convFile);
break; break;
case wxXML_TEXT_NODE: case wxXML_TEXT_NODE:
OutputStringEnt(stream, node->GetContent(), convMem, convFile); rc = OutputStringEnt(stream, node->GetContent(), convMem, convFile);
break; break;
case wxXML_ELEMENT_NODE: case wxXML_ELEMENT_NODE:
OutputString(stream, wxT("<")); rc = OutputString(stream, wxS("<"), convMem, convFile) &&
OutputString(stream, node->GetName()); OutputString(stream, node->GetName(), convMem, convFile);
attr = node->GetAttributes(); if ( rc )
while (attr)
{ {
OutputString(stream, wxT(" ") + attr->GetName() + wxT("=\"")); for ( wxXmlAttribute *attr = node->GetAttributes();
OutputStringEnt(stream, attr->GetValue(), convMem, convFile, attr && rc;
XML_ESCAPE_QUOTES); attr = attr->GetNext() )
OutputString(stream, wxT("\""));
attr = attr->GetNext();
}
if (node->GetChildren())
{
OutputString(stream, wxT(">"));
prev = NULL;
n = node->GetChildren();
while (n)
{ {
if (indentstep >= 0 && n && n->GetType() != wxXML_TEXT_NODE) rc = OutputString(stream,
OutputIndentation(stream, indent + indentstep); wxS(" ") + attr->GetName() + wxS("=\""),
OutputNode(stream, n, indent + indentstep, convMem, convFile, indentstep); convMem, convFile) &&
prev = n; OutputStringEnt(stream, attr->GetValue(),
n = n->GetNext(); convMem, convFile,
XML_ESCAPE_QUOTES) &&
OutputString(stream, wxS("\""), convMem, convFile);
} }
if (indentstep >= 0 && prev && prev->GetType() != wxXML_TEXT_NODE)
OutputIndentation(stream, indent);
OutputString(stream, wxT("</"));
OutputString(stream, node->GetName());
OutputString(stream, wxT(">"));
} }
else
OutputString(stream, wxT("/>")); if ( node->GetChildren() )
{
rc = OutputString(stream, wxS(">"), convMem, convFile);
wxXmlNode *prev = NULL;
for ( wxXmlNode *n = node->GetChildren();
n && rc;
n = n->GetNext() )
{
if ( indentstep >= 0 && n->GetType() != wxXML_TEXT_NODE )
{
rc = OutputIndentation(stream, indent + indentstep,
convMem, convFile);
}
if ( rc )
rc = OutputNode(stream, n, indent + indentstep,
convMem, convFile, indentstep);
prev = n;
}
if ( rc && indentstep >= 0 &&
prev && prev->GetType() != wxXML_TEXT_NODE )
{
rc = OutputIndentation(stream, indent, convMem, convFile);
}
if ( rc )
{
rc = OutputString(stream, wxS("</"), convMem, convFile) &&
OutputString(stream, node->GetName(),
convMem, convFile) &&
OutputString(stream, wxS(">"), convMem, convFile);
}
}
else // no children, output "<foo/>"
{
rc = OutputString(stream, wxS("/>"), convMem, convFile);
}
break; break;
case wxXML_COMMENT_NODE: case wxXML_COMMENT_NODE:
OutputString(stream, wxT("<!--")); rc = OutputString(stream, wxS("<!--"), convMem, convFile) &&
OutputString(stream, node->GetContent(), convMem, convFile); OutputString(stream, node->GetContent(), convMem, convFile) &&
OutputString(stream, wxT("-->")); OutputString(stream, wxS("-->"), convMem, convFile);
break; break;
default: default:
wxFAIL_MSG(wxT("unsupported node type")); wxFAIL_MSG("unsupported node type");
rc = false;
} }
return rc;
} }
} // anonymous namespace
bool wxXmlDocument::Save(wxOutputStream& stream, int indentstep) const bool wxXmlDocument::Save(wxOutputStream& stream, int indentstep) const
{ {
if ( !IsOk() ) if ( !IsOk() )
return false; return false;
wxString s; wxScopedPtr<wxMBConv> convMem, convFile;
wxMBConv *convMem = NULL,
*convFile;
#if wxUSE_UNICODE #if wxUSE_UNICODE
convFile = new wxCSConv(GetFileEncoding()); convFile.reset(new wxCSConv(GetFileEncoding()));
convMem = NULL;
#else #else
if ( GetFileEncoding().CmpNoCase(GetEncoding()) != 0 ) if ( GetFileEncoding().CmpNoCase(GetEncoding()) != 0 )
{ {
convFile = new wxCSConv(GetFileEncoding()); convFile.reset(new wxCSConv(GetFileEncoding()));
convMem = new wxCSConv(GetEncoding()); convMem.reset(new wxCSConv(GetEncoding()));
}
else // file and in-memory encodings are the same, no conversion needed
{
convFile =
convMem = NULL;
} }
//else: file and in-memory encodings are the same, no conversion needed
#endif #endif
s.Printf(wxT("<?xml version=\"%s\" encoding=\"%s\"?>\n"), return OutputString(stream,
GetVersion().c_str(), GetFileEncoding().c_str()); wxString::Format
OutputString(stream, s); (
wxS("<?xml version=\"%s\" encoding=\"%s\"?>\n"),
OutputNode(stream, GetRoot(), 0, convMem, convFile, indentstep); GetVersion(), GetFileEncoding()
OutputString(stream, wxT("\n")); ),
convMem.get(),
delete convFile; convFile.get()) &&
delete convMem; OutputNode(stream, GetRoot(), 0,
convMem.get(), convFile.get(), indentstep) &&
return true; OutputString(stream, wxS("\n"), convMem.get(), convFile.get());
} }
#endif // wxUSE_XML #endif // wxUSE_XML

View File

@@ -163,5 +163,28 @@ void XmlTestCase::LoadSave()
CPPUNIT_ASSERT( doc.Save(sos) ); CPPUNIT_ASSERT( doc.Save(sos) );
CPPUNIT_ASSERT_EQUAL( xmlText, sos.GetString() ); CPPUNIT_ASSERT_EQUAL( xmlText, sos.GetString() );
const char *utf8xmlText =
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
"<word>\n"
" <lang name=\"fr\">\xc3\xa9t\xc3\xa9</lang>\n"
" <lang name=\"ru\">\xd0\xbb\xd0\xb5\xd1\x82\xd0\xbe</lang>\n"
"</word>\n"
;
wxStringInputStream sis8(wxString::FromUTF8(utf8xmlText));
CPPUNIT_ASSERT( doc.Load(sis8) );
// this contents can't be represented in Latin-1 as it contains Cyrillic
// letters
doc.SetFileEncoding("ISO-8859-1");
CPPUNIT_ASSERT( !doc.Save(sos) );
// but it should work in UTF-8
wxStringOutputStream sos8;
doc.SetFileEncoding("UTF-8");
CPPUNIT_ASSERT( doc.Save(sos8) );
CPPUNIT_ASSERT_EQUAL( utf8xmlText, sos8.GetString().ToUTF8() );
} }