diff --git a/src/html/htmlpars.cpp b/src/html/htmlpars.cpp index 9ed716fe81..ac2fc44880 100644 --- a/src/html/htmlpars.cpp +++ b/src/html/htmlpars.cpp @@ -127,6 +127,8 @@ void wxHtmlParser::CreateDOMTree() m_CurTextPiece = 0; } +extern bool wxIsCDATAElement(const wxChar *tag); + void wxHtmlParser::CreateDOMSubTree(wxHtmlTag *cur, int begin_pos, int end_pos, wxHtmlTagsCache *cache) @@ -137,6 +139,15 @@ void wxHtmlParser::CreateDOMSubTree(wxHtmlTag *cur, int i = begin_pos; int textBeginning = begin_pos; + // If the tag contains CDATA text, we include the text between beginning + // and ending tag verbosely. Setting i=end_pos will skip to the very + // end of this function where text piece is added, bypassing any child + // tags parsing (CDATA element can't have child elements by definition): + if (cur != NULL && wxIsCDATAElement(cur->GetName().c_str())) + { + i = end_pos; + } + while (i < end_pos) { c = m_Source.GetChar(i); @@ -209,6 +220,7 @@ void wxHtmlParser::CreateDOMSubTree(wxHtmlTag *cur, } else i = chd->GetBeginPos(); + textBeginning = i; } diff --git a/src/html/htmltag.cpp b/src/html/htmltag.cpp index 47ac16a2e4..e8d244a2ee 100644 --- a/src/html/htmltag.cpp +++ b/src/html/htmltag.cpp @@ -57,6 +57,12 @@ IMPLEMENT_CLASS(wxHtmlTagsCache,wxObject) #define CACHE_INCREMENT 64 +inline bool wxIsCDATAElement(const wxChar *tag) +{ + return (wxStrcmp(tag, _T("SCRIPT")) == 0) || + (wxStrcmp(tag, _T("STYLE")) == 0); +} + wxHtmlTagsCache::wxHtmlTagsCache(const wxString& source) { const wxChar *src = source.c_str(); @@ -108,6 +114,47 @@ wxHtmlTagsCache::wxHtmlTagsCache(const wxString& source) else { m_Cache[tg].End1 = m_Cache[tg].End2 = -1; + + if (wxIsCDATAElement(tagBuffer)) + { + // find next matching tag + int tag_len = wxStrlen(tagBuffer); + while (pos < lng) + { + // find the ending tag + while (pos + 1 < lng && + (src[pos] != '<' || src[pos+1] != '/')) + ++pos; + if (src[pos] == '<') + ++pos; + + // see if it matches + int match_pos = 0; + while (pos < lng && match_pos < tag_len && src[pos] != '>' && src[pos] != '<') { + if (wxToupper(src[pos]) == tagBuffer[match_pos]) { + ++match_pos; + } + else if (src[pos] == wxT(' ') || src[pos] == wxT('\n') || + src[pos] == wxT('\r') || src[pos] == wxT('\t')) { + // need to skip over these + } + else { + match_pos = 0; + } + ++pos; + } + + // found a match + if (match_pos == tag_len) { + pos = pos - tag_len - 3; + stpos = pos; + break; + } + else { + ++pos; + } + } + } } }