Fixes for parsing invalid HTML without tag ends.
The code in wxHtmlParser supposed in many places that a '<' character must be always followed by a '>' one and could create (and sometimes dereference) invalid iterators if this wasn't the case resulting in asserts from MSVC debug CRT and possibly crashes. Fix this by ensuring that only valid iterators are used and add a trivial unit test for wxHtmlParser which checks that it can parse invalid HTML without crashing. Closes #12869. git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@66678 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
@@ -227,7 +227,7 @@ void wxHtmlParser::CreateDOMSubTree(wxHtmlTag *cur,
|
||||
else
|
||||
{
|
||||
while (i < end_pos && *i != wxT('>')) ++i;
|
||||
textBeginning = i+1;
|
||||
textBeginning = i < end_pos ? i+1 : i;
|
||||
}
|
||||
}
|
||||
else ++i;
|
||||
@@ -958,7 +958,7 @@ wxHtmlParser::SkipCommentTag(wxString::const_iterator& start,
|
||||
wxString::const_iterator p = start;
|
||||
|
||||
// comments begin with "<!--" in HTML 4.0
|
||||
if ( p > end - 3 || *++p != '!' || *++p != '-' || *++p != '-' )
|
||||
if ( end - start < 4 || *++p != '!' || *++p != '-' || *++p != '-' )
|
||||
{
|
||||
// not a comment at all
|
||||
return false;
|
||||
|
@@ -94,12 +94,10 @@ wxHtmlTagsCache::wxHtmlTagsCache(const wxString& source)
|
||||
if ( wxHtmlParser::SkipCommentTag(pos, end) )
|
||||
continue;
|
||||
|
||||
size_t tg = Cache().size();
|
||||
Cache().push_back(wxHtmlCacheItem());
|
||||
|
||||
// Remember the starting tag position.
|
||||
wxString::const_iterator stpos = pos++;
|
||||
Cache()[tg].Key = stpos;
|
||||
|
||||
// And look for the ending one.
|
||||
int i;
|
||||
for ( i = 0;
|
||||
pos < end && i < (int)WXSIZEOF(tagBuffer) - 1 &&
|
||||
@@ -110,12 +108,26 @@ wxHtmlTagsCache::wxHtmlTagsCache(const wxString& source)
|
||||
}
|
||||
tagBuffer[i] = wxT('\0');
|
||||
|
||||
Cache()[tg].Name = new wxChar[i+1];
|
||||
memcpy(Cache()[tg].Name, tagBuffer, (i+1)*sizeof(wxChar));
|
||||
|
||||
while (pos < end && *pos != wxT('>'))
|
||||
++pos;
|
||||
|
||||
if ( pos == end )
|
||||
{
|
||||
// We didn't find a closing bracket, this is not a valid tag after
|
||||
// all. Notice that we need to roll back pos to avoid creating an
|
||||
// invalid iterator when "++pos" is done in the loop statement.
|
||||
--pos;
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
// We have a valid tag, add it to the cache.
|
||||
size_t tg = Cache().size();
|
||||
Cache().push_back(wxHtmlCacheItem());
|
||||
Cache()[tg].Key = stpos;
|
||||
Cache()[tg].Name = new wxChar[i+1];
|
||||
memcpy(Cache()[tg].Name, tagBuffer, (i+1)*sizeof(wxChar));
|
||||
|
||||
if ((stpos+1) < end && *(stpos+1) == wxT('/')) // ending tag:
|
||||
{
|
||||
Cache()[tg].type = wxHtmlCacheItem::Type_EndingTag;
|
||||
@@ -223,7 +235,12 @@ void wxHtmlTagsCache::QueryTag(const wxString::const_iterator& at,
|
||||
bool *hasEnding)
|
||||
{
|
||||
if (Cache().empty())
|
||||
{
|
||||
*end1 =
|
||||
*end2 = inputEnd;
|
||||
*hasEnding = true;
|
||||
return;
|
||||
}
|
||||
|
||||
if (Cache()[m_CachePos].Key != at)
|
||||
{
|
||||
|
@@ -202,6 +202,7 @@ TEST_GUI_OBJECTS = \
|
||||
test_gui_fonttest.o \
|
||||
test_gui_image.o \
|
||||
test_gui_rawbmp.o \
|
||||
test_gui_htmlparser.o \
|
||||
test_gui_htmlwindow.o \
|
||||
test_gui_accelentry.o \
|
||||
test_gui_menu.o \
|
||||
@@ -831,6 +832,9 @@ test_gui_image.o: $(srcdir)/image/image.cpp $(TEST_GUI_ODEP)
|
||||
test_gui_rawbmp.o: $(srcdir)/image/rawbmp.cpp $(TEST_GUI_ODEP)
|
||||
$(CXXC) -c -o $@ $(TEST_GUI_CXXFLAGS) $(srcdir)/image/rawbmp.cpp
|
||||
|
||||
test_gui_htmlparser.o: $(srcdir)/html/htmlparser.cpp $(TEST_GUI_ODEP)
|
||||
$(CXXC) -c -o $@ $(TEST_GUI_CXXFLAGS) $(srcdir)/html/htmlparser.cpp
|
||||
|
||||
test_gui_htmlwindow.o: $(srcdir)/html/htmlwindow.cpp $(TEST_GUI_ODEP)
|
||||
$(CXXC) -c -o $@ $(TEST_GUI_CXXFLAGS) $(srcdir)/html/htmlwindow.cpp
|
||||
|
||||
|
75
tests/html/htmlparser.cpp
Normal file
75
tests/html/htmlparser.cpp
Normal file
@@ -0,0 +1,75 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Name: tests/html/htmlparser.cpp
|
||||
// Purpose: wxHtmlParser tests
|
||||
// Author: Vadim Zeitlin
|
||||
// Created: 2011-01-13
|
||||
// RCS-ID: $Id$
|
||||
// Copyright: (c) 2011 Vadim Zeitlin <vadim@wxwidgets.org>
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// headers
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
#include "testprec.h"
|
||||
|
||||
#if wxUSE_HTML
|
||||
|
||||
#ifdef __BORLANDC__
|
||||
#pragma hdrstop
|
||||
#endif
|
||||
|
||||
#ifndef WX_PRECOMP
|
||||
#endif // WX_PRECOMP
|
||||
|
||||
#include "wx/html/htmlpars.h"
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// test class
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
class HtmlParserTestCase : public CppUnit::TestCase
|
||||
{
|
||||
public:
|
||||
HtmlParserTestCase() { }
|
||||
|
||||
private:
|
||||
CPPUNIT_TEST_SUITE( HtmlParserTestCase );
|
||||
CPPUNIT_TEST( Invalid );
|
||||
CPPUNIT_TEST_SUITE_END();
|
||||
|
||||
void Invalid();
|
||||
|
||||
wxDECLARE_NO_COPY_CLASS(HtmlParserTestCase);
|
||||
};
|
||||
|
||||
// register in the unnamed registry so that these tests are run by default
|
||||
CPPUNIT_TEST_SUITE_REGISTRATION( HtmlParserTestCase );
|
||||
|
||||
// also include in it's own registry so that these tests can be run alone
|
||||
CPPUNIT_TEST_SUITE_NAMED_REGISTRATION( HtmlParserTestCase, "HtmlParserTestCase" );
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// tests themselves
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
// Test that parsing invalid HTML simply fails but doesn't crash for example.
|
||||
void HtmlParserTestCase::Invalid()
|
||||
{
|
||||
class NullParser : public wxHtmlParser
|
||||
{
|
||||
public:
|
||||
virtual wxObject *GetProduct() { return NULL; }
|
||||
|
||||
protected:
|
||||
virtual void AddText(const wxString& WXUNUSED(txt)) { }
|
||||
};
|
||||
|
||||
NullParser p;
|
||||
p.Parse("<");
|
||||
p.Parse("<foo");
|
||||
p.Parse("<!--");
|
||||
p.Parse("<!---");
|
||||
}
|
||||
|
||||
#endif //wxUSE_HTML
|
@@ -187,6 +187,7 @@ TEST_GUI_OBJECTS = \
|
||||
$(OBJS)\test_gui_fonttest.obj \
|
||||
$(OBJS)\test_gui_image.obj \
|
||||
$(OBJS)\test_gui_rawbmp.obj \
|
||||
$(OBJS)\test_gui_htmlparser.obj \
|
||||
$(OBJS)\test_gui_htmlwindow.obj \
|
||||
$(OBJS)\test_gui_accelentry.obj \
|
||||
$(OBJS)\test_gui_menu.obj \
|
||||
@@ -877,6 +878,9 @@ $(OBJS)\test_gui_image.obj: .\image\image.cpp
|
||||
$(OBJS)\test_gui_rawbmp.obj: .\image\rawbmp.cpp
|
||||
$(CXX) -q -c -P -o$@ $(TEST_GUI_CXXFLAGS) .\image\rawbmp.cpp
|
||||
|
||||
$(OBJS)\test_gui_htmlparser.obj: .\html\htmlparser.cpp
|
||||
$(CXX) -q -c -P -o$@ $(TEST_GUI_CXXFLAGS) .\html\htmlparser.cpp
|
||||
|
||||
$(OBJS)\test_gui_htmlwindow.obj: .\html\htmlwindow.cpp
|
||||
$(CXX) -q -c -P -o$@ $(TEST_GUI_CXXFLAGS) .\html\htmlwindow.cpp
|
||||
|
||||
|
@@ -180,6 +180,7 @@ TEST_GUI_OBJECTS = \
|
||||
$(OBJS)\test_gui_fonttest.o \
|
||||
$(OBJS)\test_gui_image.o \
|
||||
$(OBJS)\test_gui_rawbmp.o \
|
||||
$(OBJS)\test_gui_htmlparser.o \
|
||||
$(OBJS)\test_gui_htmlwindow.o \
|
||||
$(OBJS)\test_gui_accelentry.o \
|
||||
$(OBJS)\test_gui_menu.o \
|
||||
@@ -858,6 +859,9 @@ $(OBJS)\test_gui_image.o: ./image/image.cpp
|
||||
$(OBJS)\test_gui_rawbmp.o: ./image/rawbmp.cpp
|
||||
$(CXX) -c -o $@ $(TEST_GUI_CXXFLAGS) $(CPPDEPS) $<
|
||||
|
||||
$(OBJS)\test_gui_htmlparser.o: ./html/htmlparser.cpp
|
||||
$(CXX) -c -o $@ $(TEST_GUI_CXXFLAGS) $(CPPDEPS) $<
|
||||
|
||||
$(OBJS)\test_gui_htmlwindow.o: ./html/htmlwindow.cpp
|
||||
$(CXX) -c -o $@ $(TEST_GUI_CXXFLAGS) $(CPPDEPS) $<
|
||||
|
||||
|
@@ -182,6 +182,7 @@ TEST_GUI_OBJECTS = \
|
||||
$(OBJS)\test_gui_fonttest.obj \
|
||||
$(OBJS)\test_gui_image.obj \
|
||||
$(OBJS)\test_gui_rawbmp.obj \
|
||||
$(OBJS)\test_gui_htmlparser.obj \
|
||||
$(OBJS)\test_gui_htmlwindow.obj \
|
||||
$(OBJS)\test_gui_accelentry.obj \
|
||||
$(OBJS)\test_gui_menu.obj \
|
||||
@@ -1003,6 +1004,9 @@ $(OBJS)\test_gui_image.obj: .\image\image.cpp
|
||||
$(OBJS)\test_gui_rawbmp.obj: .\image\rawbmp.cpp
|
||||
$(CXX) /c /nologo /TP /Fo$@ $(TEST_GUI_CXXFLAGS) .\image\rawbmp.cpp
|
||||
|
||||
$(OBJS)\test_gui_htmlparser.obj: .\html\htmlparser.cpp
|
||||
$(CXX) /c /nologo /TP /Fo$@ $(TEST_GUI_CXXFLAGS) .\html\htmlparser.cpp
|
||||
|
||||
$(OBJS)\test_gui_htmlwindow.obj: .\html\htmlwindow.cpp
|
||||
$(CXX) /c /nologo /TP /Fo$@ $(TEST_GUI_CXXFLAGS) .\html\htmlwindow.cpp
|
||||
|
||||
|
@@ -422,6 +422,7 @@ TEST_GUI_OBJECTS = &
|
||||
$(OBJS)\test_gui_fonttest.obj &
|
||||
$(OBJS)\test_gui_image.obj &
|
||||
$(OBJS)\test_gui_rawbmp.obj &
|
||||
$(OBJS)\test_gui_htmlparser.obj &
|
||||
$(OBJS)\test_gui_htmlwindow.obj &
|
||||
$(OBJS)\test_gui_accelentry.obj &
|
||||
$(OBJS)\test_gui_menu.obj &
|
||||
@@ -916,6 +917,9 @@ $(OBJS)\test_gui_image.obj : .AUTODEPEND .\image\image.cpp
|
||||
$(OBJS)\test_gui_rawbmp.obj : .AUTODEPEND .\image\rawbmp.cpp
|
||||
$(CXX) -bt=nt -zq -fo=$^@ $(TEST_GUI_CXXFLAGS) $<
|
||||
|
||||
$(OBJS)\test_gui_htmlparser.obj : .AUTODEPEND .\html\htmlparser.cpp
|
||||
$(CXX) -bt=nt -zq -fo=$^@ $(TEST_GUI_CXXFLAGS) $<
|
||||
|
||||
$(OBJS)\test_gui_htmlwindow.obj : .AUTODEPEND .\html\htmlwindow.cpp
|
||||
$(CXX) -bt=nt -zq -fo=$^@ $(TEST_GUI_CXXFLAGS) $<
|
||||
|
||||
|
@@ -183,6 +183,7 @@
|
||||
font/fonttest.cpp
|
||||
image/image.cpp
|
||||
image/rawbmp.cpp
|
||||
html/htmlparser.cpp
|
||||
html/htmlwindow.cpp
|
||||
menu/accelentry.cpp
|
||||
menu/menu.cpp
|
||||
|
@@ -345,6 +345,10 @@ SOURCE=.\controls\htmllboxtest.cpp
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\html\htmlparser.cpp
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\html\htmlwindow.cpp
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
@@ -694,6 +694,9 @@
|
||||
<File
|
||||
RelativePath=".\controls\htmllboxtest.cpp">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\html\htmlparser.cpp">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\html\htmlwindow.cpp">
|
||||
</File>
|
||||
|
@@ -999,6 +999,10 @@
|
||||
RelativePath=".\controls\htmllboxtest.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\html\htmlparser.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\html\htmlwindow.cpp"
|
||||
>
|
||||
|
@@ -971,6 +971,10 @@
|
||||
RelativePath=".\controls\htmllboxtest.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\html\htmlparser.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\html\htmlwindow.cpp"
|
||||
>
|
||||
|
Reference in New Issue
Block a user