Fixes for parsing invalid HTML without tag ends.

The code in wxHtmlParser supposed in many places that a '<' character must be
always followed by a '>' one and could create (and sometimes dereference)
invalid iterators if this wasn't the case resulting in asserts from MSVC debug
CRT and possibly crashes.

Fix this by ensuring that only valid iterators are used and add a trivial unit
test for wxHtmlParser which checks that it can parse invalid HTML without
crashing.

Closes #12869.

git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@66678 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
Vadim Zeitlin
2011-01-13 14:49:55 +00:00
parent 48d8ea6d93
commit 3625820490
13 changed files with 137 additions and 9 deletions

View File

@@ -94,12 +94,10 @@ wxHtmlTagsCache::wxHtmlTagsCache(const wxString& source)
if ( wxHtmlParser::SkipCommentTag(pos, end) )
continue;
size_t tg = Cache().size();
Cache().push_back(wxHtmlCacheItem());
// Remember the starting tag position.
wxString::const_iterator stpos = pos++;
Cache()[tg].Key = stpos;
// And look for the ending one.
int i;
for ( i = 0;
pos < end && i < (int)WXSIZEOF(tagBuffer) - 1 &&
@@ -110,12 +108,26 @@ wxHtmlTagsCache::wxHtmlTagsCache(const wxString& source)
}
tagBuffer[i] = wxT('\0');
Cache()[tg].Name = new wxChar[i+1];
memcpy(Cache()[tg].Name, tagBuffer, (i+1)*sizeof(wxChar));
while (pos < end && *pos != wxT('>'))
++pos;
if ( pos == end )
{
// We didn't find a closing bracket, this is not a valid tag after
// all. Notice that we need to roll back pos to avoid creating an
// invalid iterator when "++pos" is done in the loop statement.
--pos;
continue;
}
// We have a valid tag, add it to the cache.
size_t tg = Cache().size();
Cache().push_back(wxHtmlCacheItem());
Cache()[tg].Key = stpos;
Cache()[tg].Name = new wxChar[i+1];
memcpy(Cache()[tg].Name, tagBuffer, (i+1)*sizeof(wxChar));
if ((stpos+1) < end && *(stpos+1) == wxT('/')) // ending tag:
{
Cache()[tg].type = wxHtmlCacheItem::Type_EndingTag;
@@ -223,7 +235,12 @@ void wxHtmlTagsCache::QueryTag(const wxString::const_iterator& at,
bool *hasEnding)
{
if (Cache().empty())
{
*end1 =
*end2 = inputEnd;
*hasEnding = true;
return;
}
if (Cache()[m_CachePos].Key != at)
{