fix the problem with parsing HTML comments (closes bug 1116708; based on patch 1168583)
git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@45336 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
@@ -128,6 +128,13 @@ public:
|
||||
// Returns entity parser object, used to substitute HTML &entities;
|
||||
wxHtmlEntitiesParser *GetEntitiesParser() const { return m_entitiesParser; }
|
||||
|
||||
// Returns true if the tag starting at the given position is a comment tag
|
||||
//
|
||||
// p should point to '<' character and is modified to point to the closing
|
||||
// '>' of the end comment tag if this is indeed a comment
|
||||
static bool
|
||||
SkipCommentTag(wxString::const_iterator& p, wxString::const_iterator end);
|
||||
|
||||
protected:
|
||||
// DOM structure
|
||||
void CreateDOMTree();
|
||||
|
@@ -171,29 +171,11 @@ void wxHtmlParser::CreateDOMSubTree(wxHtmlTag *cur,
|
||||
wxHtmlTextPiece(textBeginning, i - textBeginning));
|
||||
|
||||
// if it is a comment, skip it:
|
||||
if (i < end_pos-6 && m_Source.GetChar(i+1) == wxT('!') &&
|
||||
m_Source.GetChar(i+2) == wxT('-') &&
|
||||
m_Source.GetChar(i+3) == wxT('-'))
|
||||
wxString::const_iterator iter = m_Source.begin() + i;
|
||||
if ( SkipCommentTag(iter, m_Source.end()) )
|
||||
{
|
||||
// Comments begin with "<!--" and end with "--[ \t\r\n]*>"
|
||||
// according to HTML 4.0
|
||||
int dashes = 0;
|
||||
i += 4;
|
||||
while (i < end_pos)
|
||||
{
|
||||
c = m_Source.GetChar(i++);
|
||||
if ((c == wxT(' ') || c == wxT('\n') ||
|
||||
c == wxT('\r') || c == wxT('\t')) && dashes >= 2) {}
|
||||
else if (c == wxT('>') && dashes >= 2)
|
||||
{
|
||||
textBeginning = i;
|
||||
break;
|
||||
}
|
||||
else if (c == wxT('-'))
|
||||
dashes++;
|
||||
else
|
||||
dashes = 0;
|
||||
}
|
||||
textBeginning =
|
||||
i = iter - m_Source.begin() + 1; // skip closing '>' too
|
||||
}
|
||||
|
||||
// add another tag to the tree:
|
||||
@@ -951,4 +933,55 @@ wxString wxHtmlParser::ExtractCharsetInformation(const wxString& markup)
|
||||
return charset;
|
||||
}
|
||||
|
||||
#endif
|
||||
/* static */
|
||||
bool
|
||||
wxHtmlParser::SkipCommentTag(wxString::const_iterator& start,
|
||||
wxString::const_iterator end)
|
||||
{
|
||||
wxASSERT_MSG( *start == '<', _T("should be called on the tag start") );
|
||||
|
||||
wxString::const_iterator p = start;
|
||||
|
||||
// comments begin with "<!--" in HTML 4.0
|
||||
if ( end - p < 3 || *++p != '!' || *++p != '-' || *++p != '-' )
|
||||
{
|
||||
// not a comment at all
|
||||
return false;
|
||||
}
|
||||
|
||||
// skip the start of the comment tag in any case, if we don't find the
|
||||
// closing tag we should ignore broken markup
|
||||
start = p;
|
||||
|
||||
// comments end with "--[ \t\r\n]*>", i.e. white space is allowed between
|
||||
// comment delimiter and the closing tag character (section 3.2.4 of
|
||||
// http://www.w3.org/TR/html401/)
|
||||
int dashes = 0;
|
||||
while ( ++p < end )
|
||||
{
|
||||
const wxChar c = *p;
|
||||
|
||||
if ( (c == wxT(' ') || c == wxT('\n') ||
|
||||
c == wxT('\r') || c == wxT('\t')) && dashes >= 2 )
|
||||
{
|
||||
// ignore white space before potential tag end
|
||||
continue;
|
||||
}
|
||||
|
||||
if ( c == wxT('>') && dashes >= 2 )
|
||||
{
|
||||
// found end of comment
|
||||
start = p;
|
||||
break;
|
||||
}
|
||||
|
||||
if ( c == wxT('-') )
|
||||
dashes++;
|
||||
else
|
||||
dashes = 0;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#endif // wxUSE_HTML
|
||||
|
@@ -68,11 +68,18 @@ wxHtmlTagsCache::wxHtmlTagsCache(const wxString& source)
|
||||
m_CacheSize = 0;
|
||||
m_CachePos = 0;
|
||||
|
||||
int pos = 0;
|
||||
while (pos < lng)
|
||||
for ( int pos = 0; pos < lng; pos++ )
|
||||
{
|
||||
if (src[pos] == wxT('<')) // tag found:
|
||||
{
|
||||
// don't cache comment tags
|
||||
wxString::const_iterator iter = source.begin() + pos;
|
||||
if ( wxHtmlParser::SkipCommentTag(iter, source.end()) )
|
||||
{
|
||||
pos = iter - source.begin();
|
||||
continue;
|
||||
}
|
||||
|
||||
if (m_CacheSize % CACHE_INCREMENT == 0)
|
||||
m_Cache = (wxHtmlCacheItem*) realloc(m_Cache, (m_CacheSize + CACHE_INCREMENT) * sizeof(wxHtmlCacheItem));
|
||||
int tg = m_CacheSize++;
|
||||
@@ -169,8 +176,6 @@ wxHtmlTagsCache::wxHtmlTagsCache(const wxString& source)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pos++;
|
||||
}
|
||||
|
||||
// ok, we're done, now we'll free .Name members of cache - we don't need it anymore:
|
||||
|
Reference in New Issue
Block a user