Fixes for parsing invalid HTML without tag ends.

The code in wxHtmlParser supposed in many places that a '<' character must be
always followed by a '>' one and could create (and sometimes dereference)
invalid iterators if this wasn't the case resulting in asserts from MSVC debug
CRT and possibly crashes.

Fix this by ensuring that only valid iterators are used and add a trivial unit
test for wxHtmlParser which checks that it can parse invalid HTML without
crashing.

Closes #12869.

git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@66678 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
Vadim Zeitlin
2011-01-13 14:49:55 +00:00
parent 48d8ea6d93
commit 3625820490
13 changed files with 137 additions and 9 deletions

View File

@@ -227,7 +227,7 @@ void wxHtmlParser::CreateDOMSubTree(wxHtmlTag *cur,
else else
{ {
while (i < end_pos && *i != wxT('>')) ++i; while (i < end_pos && *i != wxT('>')) ++i;
textBeginning = i+1; textBeginning = i < end_pos ? i+1 : i;
} }
} }
else ++i; else ++i;
@@ -958,7 +958,7 @@ wxHtmlParser::SkipCommentTag(wxString::const_iterator& start,
wxString::const_iterator p = start; wxString::const_iterator p = start;
// comments begin with "<!--" in HTML 4.0 // comments begin with "<!--" in HTML 4.0
if ( p > end - 3 || *++p != '!' || *++p != '-' || *++p != '-' ) if ( end - start < 4 || *++p != '!' || *++p != '-' || *++p != '-' )
{ {
// not a comment at all // not a comment at all
return false; return false;

View File

@@ -94,12 +94,10 @@ wxHtmlTagsCache::wxHtmlTagsCache(const wxString& source)
if ( wxHtmlParser::SkipCommentTag(pos, end) ) if ( wxHtmlParser::SkipCommentTag(pos, end) )
continue; continue;
size_t tg = Cache().size(); // Remember the starting tag position.
Cache().push_back(wxHtmlCacheItem());
wxString::const_iterator stpos = pos++; wxString::const_iterator stpos = pos++;
Cache()[tg].Key = stpos;
// And look for the ending one.
int i; int i;
for ( i = 0; for ( i = 0;
pos < end && i < (int)WXSIZEOF(tagBuffer) - 1 && pos < end && i < (int)WXSIZEOF(tagBuffer) - 1 &&
@@ -110,12 +108,26 @@ wxHtmlTagsCache::wxHtmlTagsCache(const wxString& source)
} }
tagBuffer[i] = wxT('\0'); tagBuffer[i] = wxT('\0');
Cache()[tg].Name = new wxChar[i+1];
memcpy(Cache()[tg].Name, tagBuffer, (i+1)*sizeof(wxChar));
while (pos < end && *pos != wxT('>')) while (pos < end && *pos != wxT('>'))
++pos; ++pos;
if ( pos == end )
{
// We didn't find a closing bracket, this is not a valid tag after
// all. Notice that we need to roll back pos to avoid creating an
// invalid iterator when "++pos" is done in the loop statement.
--pos;
continue;
}
// We have a valid tag, add it to the cache.
size_t tg = Cache().size();
Cache().push_back(wxHtmlCacheItem());
Cache()[tg].Key = stpos;
Cache()[tg].Name = new wxChar[i+1];
memcpy(Cache()[tg].Name, tagBuffer, (i+1)*sizeof(wxChar));
if ((stpos+1) < end && *(stpos+1) == wxT('/')) // ending tag: if ((stpos+1) < end && *(stpos+1) == wxT('/')) // ending tag:
{ {
Cache()[tg].type = wxHtmlCacheItem::Type_EndingTag; Cache()[tg].type = wxHtmlCacheItem::Type_EndingTag;
@@ -223,7 +235,12 @@ void wxHtmlTagsCache::QueryTag(const wxString::const_iterator& at,
bool *hasEnding) bool *hasEnding)
{ {
if (Cache().empty()) if (Cache().empty())
{
*end1 =
*end2 = inputEnd;
*hasEnding = true;
return; return;
}
if (Cache()[m_CachePos].Key != at) if (Cache()[m_CachePos].Key != at)
{ {

View File

@@ -202,6 +202,7 @@ TEST_GUI_OBJECTS = \
test_gui_fonttest.o \ test_gui_fonttest.o \
test_gui_image.o \ test_gui_image.o \
test_gui_rawbmp.o \ test_gui_rawbmp.o \
test_gui_htmlparser.o \
test_gui_htmlwindow.o \ test_gui_htmlwindow.o \
test_gui_accelentry.o \ test_gui_accelentry.o \
test_gui_menu.o \ test_gui_menu.o \
@@ -831,6 +832,9 @@ test_gui_image.o: $(srcdir)/image/image.cpp $(TEST_GUI_ODEP)
test_gui_rawbmp.o: $(srcdir)/image/rawbmp.cpp $(TEST_GUI_ODEP) test_gui_rawbmp.o: $(srcdir)/image/rawbmp.cpp $(TEST_GUI_ODEP)
$(CXXC) -c -o $@ $(TEST_GUI_CXXFLAGS) $(srcdir)/image/rawbmp.cpp $(CXXC) -c -o $@ $(TEST_GUI_CXXFLAGS) $(srcdir)/image/rawbmp.cpp
test_gui_htmlparser.o: $(srcdir)/html/htmlparser.cpp $(TEST_GUI_ODEP)
$(CXXC) -c -o $@ $(TEST_GUI_CXXFLAGS) $(srcdir)/html/htmlparser.cpp
test_gui_htmlwindow.o: $(srcdir)/html/htmlwindow.cpp $(TEST_GUI_ODEP) test_gui_htmlwindow.o: $(srcdir)/html/htmlwindow.cpp $(TEST_GUI_ODEP)
$(CXXC) -c -o $@ $(TEST_GUI_CXXFLAGS) $(srcdir)/html/htmlwindow.cpp $(CXXC) -c -o $@ $(TEST_GUI_CXXFLAGS) $(srcdir)/html/htmlwindow.cpp

75
tests/html/htmlparser.cpp Normal file
View File

@@ -0,0 +1,75 @@
///////////////////////////////////////////////////////////////////////////////
// Name: tests/html/htmlparser.cpp
// Purpose: wxHtmlParser tests
// Author: Vadim Zeitlin
// Created: 2011-01-13
// RCS-ID: $Id$
// Copyright: (c) 2011 Vadim Zeitlin <vadim@wxwidgets.org>
///////////////////////////////////////////////////////////////////////////////
// ----------------------------------------------------------------------------
// headers
// ----------------------------------------------------------------------------
#include "testprec.h"
#if wxUSE_HTML
#ifdef __BORLANDC__
#pragma hdrstop
#endif
#ifndef WX_PRECOMP
#endif // WX_PRECOMP
#include "wx/html/htmlpars.h"
// ----------------------------------------------------------------------------
// test class
// ----------------------------------------------------------------------------
class HtmlParserTestCase : public CppUnit::TestCase
{
public:
HtmlParserTestCase() { }
private:
CPPUNIT_TEST_SUITE( HtmlParserTestCase );
CPPUNIT_TEST( Invalid );
CPPUNIT_TEST_SUITE_END();
void Invalid();
wxDECLARE_NO_COPY_CLASS(HtmlParserTestCase);
};
// register in the unnamed registry so that these tests are run by default
CPPUNIT_TEST_SUITE_REGISTRATION( HtmlParserTestCase );
// also include in it's own registry so that these tests can be run alone
CPPUNIT_TEST_SUITE_NAMED_REGISTRATION( HtmlParserTestCase, "HtmlParserTestCase" );
// ----------------------------------------------------------------------------
// tests themselves
// ----------------------------------------------------------------------------
// Test that parsing invalid HTML simply fails but doesn't crash for example.
void HtmlParserTestCase::Invalid()
{
class NullParser : public wxHtmlParser
{
public:
virtual wxObject *GetProduct() { return NULL; }
protected:
virtual void AddText(const wxString& WXUNUSED(txt)) { }
};
NullParser p;
p.Parse("<");
p.Parse("<foo");
p.Parse("<!--");
p.Parse("<!---");
}
#endif //wxUSE_HTML

View File

@@ -187,6 +187,7 @@ TEST_GUI_OBJECTS = \
$(OBJS)\test_gui_fonttest.obj \ $(OBJS)\test_gui_fonttest.obj \
$(OBJS)\test_gui_image.obj \ $(OBJS)\test_gui_image.obj \
$(OBJS)\test_gui_rawbmp.obj \ $(OBJS)\test_gui_rawbmp.obj \
$(OBJS)\test_gui_htmlparser.obj \
$(OBJS)\test_gui_htmlwindow.obj \ $(OBJS)\test_gui_htmlwindow.obj \
$(OBJS)\test_gui_accelentry.obj \ $(OBJS)\test_gui_accelentry.obj \
$(OBJS)\test_gui_menu.obj \ $(OBJS)\test_gui_menu.obj \
@@ -877,6 +878,9 @@ $(OBJS)\test_gui_image.obj: .\image\image.cpp
$(OBJS)\test_gui_rawbmp.obj: .\image\rawbmp.cpp $(OBJS)\test_gui_rawbmp.obj: .\image\rawbmp.cpp
$(CXX) -q -c -P -o$@ $(TEST_GUI_CXXFLAGS) .\image\rawbmp.cpp $(CXX) -q -c -P -o$@ $(TEST_GUI_CXXFLAGS) .\image\rawbmp.cpp
$(OBJS)\test_gui_htmlparser.obj: .\html\htmlparser.cpp
$(CXX) -q -c -P -o$@ $(TEST_GUI_CXXFLAGS) .\html\htmlparser.cpp
$(OBJS)\test_gui_htmlwindow.obj: .\html\htmlwindow.cpp $(OBJS)\test_gui_htmlwindow.obj: .\html\htmlwindow.cpp
$(CXX) -q -c -P -o$@ $(TEST_GUI_CXXFLAGS) .\html\htmlwindow.cpp $(CXX) -q -c -P -o$@ $(TEST_GUI_CXXFLAGS) .\html\htmlwindow.cpp

View File

@@ -180,6 +180,7 @@ TEST_GUI_OBJECTS = \
$(OBJS)\test_gui_fonttest.o \ $(OBJS)\test_gui_fonttest.o \
$(OBJS)\test_gui_image.o \ $(OBJS)\test_gui_image.o \
$(OBJS)\test_gui_rawbmp.o \ $(OBJS)\test_gui_rawbmp.o \
$(OBJS)\test_gui_htmlparser.o \
$(OBJS)\test_gui_htmlwindow.o \ $(OBJS)\test_gui_htmlwindow.o \
$(OBJS)\test_gui_accelentry.o \ $(OBJS)\test_gui_accelentry.o \
$(OBJS)\test_gui_menu.o \ $(OBJS)\test_gui_menu.o \
@@ -858,6 +859,9 @@ $(OBJS)\test_gui_image.o: ./image/image.cpp
$(OBJS)\test_gui_rawbmp.o: ./image/rawbmp.cpp $(OBJS)\test_gui_rawbmp.o: ./image/rawbmp.cpp
$(CXX) -c -o $@ $(TEST_GUI_CXXFLAGS) $(CPPDEPS) $< $(CXX) -c -o $@ $(TEST_GUI_CXXFLAGS) $(CPPDEPS) $<
$(OBJS)\test_gui_htmlparser.o: ./html/htmlparser.cpp
$(CXX) -c -o $@ $(TEST_GUI_CXXFLAGS) $(CPPDEPS) $<
$(OBJS)\test_gui_htmlwindow.o: ./html/htmlwindow.cpp $(OBJS)\test_gui_htmlwindow.o: ./html/htmlwindow.cpp
$(CXX) -c -o $@ $(TEST_GUI_CXXFLAGS) $(CPPDEPS) $< $(CXX) -c -o $@ $(TEST_GUI_CXXFLAGS) $(CPPDEPS) $<

View File

@@ -182,6 +182,7 @@ TEST_GUI_OBJECTS = \
$(OBJS)\test_gui_fonttest.obj \ $(OBJS)\test_gui_fonttest.obj \
$(OBJS)\test_gui_image.obj \ $(OBJS)\test_gui_image.obj \
$(OBJS)\test_gui_rawbmp.obj \ $(OBJS)\test_gui_rawbmp.obj \
$(OBJS)\test_gui_htmlparser.obj \
$(OBJS)\test_gui_htmlwindow.obj \ $(OBJS)\test_gui_htmlwindow.obj \
$(OBJS)\test_gui_accelentry.obj \ $(OBJS)\test_gui_accelentry.obj \
$(OBJS)\test_gui_menu.obj \ $(OBJS)\test_gui_menu.obj \
@@ -1003,6 +1004,9 @@ $(OBJS)\test_gui_image.obj: .\image\image.cpp
$(OBJS)\test_gui_rawbmp.obj: .\image\rawbmp.cpp $(OBJS)\test_gui_rawbmp.obj: .\image\rawbmp.cpp
$(CXX) /c /nologo /TP /Fo$@ $(TEST_GUI_CXXFLAGS) .\image\rawbmp.cpp $(CXX) /c /nologo /TP /Fo$@ $(TEST_GUI_CXXFLAGS) .\image\rawbmp.cpp
$(OBJS)\test_gui_htmlparser.obj: .\html\htmlparser.cpp
$(CXX) /c /nologo /TP /Fo$@ $(TEST_GUI_CXXFLAGS) .\html\htmlparser.cpp
$(OBJS)\test_gui_htmlwindow.obj: .\html\htmlwindow.cpp $(OBJS)\test_gui_htmlwindow.obj: .\html\htmlwindow.cpp
$(CXX) /c /nologo /TP /Fo$@ $(TEST_GUI_CXXFLAGS) .\html\htmlwindow.cpp $(CXX) /c /nologo /TP /Fo$@ $(TEST_GUI_CXXFLAGS) .\html\htmlwindow.cpp

View File

@@ -422,6 +422,7 @@ TEST_GUI_OBJECTS = &
$(OBJS)\test_gui_fonttest.obj & $(OBJS)\test_gui_fonttest.obj &
$(OBJS)\test_gui_image.obj & $(OBJS)\test_gui_image.obj &
$(OBJS)\test_gui_rawbmp.obj & $(OBJS)\test_gui_rawbmp.obj &
$(OBJS)\test_gui_htmlparser.obj &
$(OBJS)\test_gui_htmlwindow.obj & $(OBJS)\test_gui_htmlwindow.obj &
$(OBJS)\test_gui_accelentry.obj & $(OBJS)\test_gui_accelentry.obj &
$(OBJS)\test_gui_menu.obj & $(OBJS)\test_gui_menu.obj &
@@ -916,6 +917,9 @@ $(OBJS)\test_gui_image.obj : .AUTODEPEND .\image\image.cpp
$(OBJS)\test_gui_rawbmp.obj : .AUTODEPEND .\image\rawbmp.cpp $(OBJS)\test_gui_rawbmp.obj : .AUTODEPEND .\image\rawbmp.cpp
$(CXX) -bt=nt -zq -fo=$^@ $(TEST_GUI_CXXFLAGS) $< $(CXX) -bt=nt -zq -fo=$^@ $(TEST_GUI_CXXFLAGS) $<
$(OBJS)\test_gui_htmlparser.obj : .AUTODEPEND .\html\htmlparser.cpp
$(CXX) -bt=nt -zq -fo=$^@ $(TEST_GUI_CXXFLAGS) $<
$(OBJS)\test_gui_htmlwindow.obj : .AUTODEPEND .\html\htmlwindow.cpp $(OBJS)\test_gui_htmlwindow.obj : .AUTODEPEND .\html\htmlwindow.cpp
$(CXX) -bt=nt -zq -fo=$^@ $(TEST_GUI_CXXFLAGS) $< $(CXX) -bt=nt -zq -fo=$^@ $(TEST_GUI_CXXFLAGS) $<

View File

@@ -183,6 +183,7 @@
font/fonttest.cpp font/fonttest.cpp
image/image.cpp image/image.cpp
image/rawbmp.cpp image/rawbmp.cpp
html/htmlparser.cpp
html/htmlwindow.cpp html/htmlwindow.cpp
menu/accelentry.cpp menu/accelentry.cpp
menu/menu.cpp menu/menu.cpp

View File

@@ -345,6 +345,10 @@ SOURCE=.\controls\htmllboxtest.cpp
# End Source File # End Source File
# Begin Source File # Begin Source File
SOURCE=.\html\htmlparser.cpp
# End Source File
# Begin Source File
SOURCE=.\html\htmlwindow.cpp SOURCE=.\html\htmlwindow.cpp
# End Source File # End Source File
# Begin Source File # Begin Source File

View File

@@ -694,6 +694,9 @@
<File <File
RelativePath=".\controls\htmllboxtest.cpp"> RelativePath=".\controls\htmllboxtest.cpp">
</File> </File>
<File
RelativePath=".\html\htmlparser.cpp">
</File>
<File <File
RelativePath=".\html\htmlwindow.cpp"> RelativePath=".\html\htmlwindow.cpp">
</File> </File>

View File

@@ -999,6 +999,10 @@
RelativePath=".\controls\htmllboxtest.cpp" RelativePath=".\controls\htmllboxtest.cpp"
> >
</File> </File>
<File
RelativePath=".\html\htmlparser.cpp"
>
</File>
<File <File
RelativePath=".\html\htmlwindow.cpp" RelativePath=".\html\htmlwindow.cpp"
> >

View File

@@ -971,6 +971,10 @@
RelativePath=".\controls\htmllboxtest.cpp" RelativePath=".\controls\htmllboxtest.cpp"
> >
</File> </File>
<File
RelativePath=".\html\htmlparser.cpp"
>
</File>
<File <File
RelativePath=".\html\htmlwindow.cpp" RelativePath=".\html\htmlwindow.cpp"
> >