fix the problem with parsing HTML comments (closes bug 1116708; based on patch 1168583)
git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@45336 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
@@ -128,6 +128,13 @@ public:
|
|||||||
// Returns entity parser object, used to substitute HTML &entities;
|
// Returns entity parser object, used to substitute HTML &entities;
|
||||||
wxHtmlEntitiesParser *GetEntitiesParser() const { return m_entitiesParser; }
|
wxHtmlEntitiesParser *GetEntitiesParser() const { return m_entitiesParser; }
|
||||||
|
|
||||||
|
// Returns true if the tag starting at the given position is a comment tag
|
||||||
|
//
|
||||||
|
// p should point to '<' character and is modified to point to the closing
|
||||||
|
// '>' of the end comment tag if this is indeed a comment
|
||||||
|
static bool
|
||||||
|
SkipCommentTag(wxString::const_iterator& p, wxString::const_iterator end);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
// DOM structure
|
// DOM structure
|
||||||
void CreateDOMTree();
|
void CreateDOMTree();
|
||||||
|
@@ -171,29 +171,11 @@ void wxHtmlParser::CreateDOMSubTree(wxHtmlTag *cur,
|
|||||||
wxHtmlTextPiece(textBeginning, i - textBeginning));
|
wxHtmlTextPiece(textBeginning, i - textBeginning));
|
||||||
|
|
||||||
// if it is a comment, skip it:
|
// if it is a comment, skip it:
|
||||||
if (i < end_pos-6 && m_Source.GetChar(i+1) == wxT('!') &&
|
wxString::const_iterator iter = m_Source.begin() + i;
|
||||||
m_Source.GetChar(i+2) == wxT('-') &&
|
if ( SkipCommentTag(iter, m_Source.end()) )
|
||||||
m_Source.GetChar(i+3) == wxT('-'))
|
|
||||||
{
|
{
|
||||||
// Comments begin with "<!--" and end with "--[ \t\r\n]*>"
|
textBeginning =
|
||||||
// according to HTML 4.0
|
i = iter - m_Source.begin() + 1; // skip closing '>' too
|
||||||
int dashes = 0;
|
|
||||||
i += 4;
|
|
||||||
while (i < end_pos)
|
|
||||||
{
|
|
||||||
c = m_Source.GetChar(i++);
|
|
||||||
if ((c == wxT(' ') || c == wxT('\n') ||
|
|
||||||
c == wxT('\r') || c == wxT('\t')) && dashes >= 2) {}
|
|
||||||
else if (c == wxT('>') && dashes >= 2)
|
|
||||||
{
|
|
||||||
textBeginning = i;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
else if (c == wxT('-'))
|
|
||||||
dashes++;
|
|
||||||
else
|
|
||||||
dashes = 0;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// add another tag to the tree:
|
// add another tag to the tree:
|
||||||
@@ -951,4 +933,55 @@ wxString wxHtmlParser::ExtractCharsetInformation(const wxString& markup)
|
|||||||
return charset;
|
return charset;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
/* static */
|
||||||
|
bool
|
||||||
|
wxHtmlParser::SkipCommentTag(wxString::const_iterator& start,
|
||||||
|
wxString::const_iterator end)
|
||||||
|
{
|
||||||
|
wxASSERT_MSG( *start == '<', _T("should be called on the tag start") );
|
||||||
|
|
||||||
|
wxString::const_iterator p = start;
|
||||||
|
|
||||||
|
// comments begin with "<!--" in HTML 4.0
|
||||||
|
if ( end - p < 3 || *++p != '!' || *++p != '-' || *++p != '-' )
|
||||||
|
{
|
||||||
|
// not a comment at all
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// skip the start of the comment tag in any case, if we don't find the
|
||||||
|
// closing tag we should ignore broken markup
|
||||||
|
start = p;
|
||||||
|
|
||||||
|
// comments end with "--[ \t\r\n]*>", i.e. white space is allowed between
|
||||||
|
// comment delimiter and the closing tag character (section 3.2.4 of
|
||||||
|
// http://www.w3.org/TR/html401/)
|
||||||
|
int dashes = 0;
|
||||||
|
while ( ++p < end )
|
||||||
|
{
|
||||||
|
const wxChar c = *p;
|
||||||
|
|
||||||
|
if ( (c == wxT(' ') || c == wxT('\n') ||
|
||||||
|
c == wxT('\r') || c == wxT('\t')) && dashes >= 2 )
|
||||||
|
{
|
||||||
|
// ignore white space before potential tag end
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( c == wxT('>') && dashes >= 2 )
|
||||||
|
{
|
||||||
|
// found end of comment
|
||||||
|
start = p;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( c == wxT('-') )
|
||||||
|
dashes++;
|
||||||
|
else
|
||||||
|
dashes = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // wxUSE_HTML
|
||||||
|
@@ -68,11 +68,18 @@ wxHtmlTagsCache::wxHtmlTagsCache(const wxString& source)
|
|||||||
m_CacheSize = 0;
|
m_CacheSize = 0;
|
||||||
m_CachePos = 0;
|
m_CachePos = 0;
|
||||||
|
|
||||||
int pos = 0;
|
for ( int pos = 0; pos < lng; pos++ )
|
||||||
while (pos < lng)
|
|
||||||
{
|
{
|
||||||
if (src[pos] == wxT('<')) // tag found:
|
if (src[pos] == wxT('<')) // tag found:
|
||||||
{
|
{
|
||||||
|
// don't cache comment tags
|
||||||
|
wxString::const_iterator iter = source.begin() + pos;
|
||||||
|
if ( wxHtmlParser::SkipCommentTag(iter, source.end()) )
|
||||||
|
{
|
||||||
|
pos = iter - source.begin();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
if (m_CacheSize % CACHE_INCREMENT == 0)
|
if (m_CacheSize % CACHE_INCREMENT == 0)
|
||||||
m_Cache = (wxHtmlCacheItem*) realloc(m_Cache, (m_CacheSize + CACHE_INCREMENT) * sizeof(wxHtmlCacheItem));
|
m_Cache = (wxHtmlCacheItem*) realloc(m_Cache, (m_CacheSize + CACHE_INCREMENT) * sizeof(wxHtmlCacheItem));
|
||||||
int tg = m_CacheSize++;
|
int tg = m_CacheSize++;
|
||||||
@@ -169,8 +176,6 @@ wxHtmlTagsCache::wxHtmlTagsCache(const wxString& source)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pos++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ok, we're done, now we'll free .Name members of cache - we don't need it anymore:
|
// ok, we're done, now we'll free .Name members of cache - we don't need it anymore:
|
||||||
|
Reference in New Issue
Block a user