strip HTML tags when searching for keywords

git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@39817 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
Vadim Zeitlin
2006-06-22 01:12:06 +00:00
parent a0d43d79c2
commit 21dc463833

View File

@@ -981,47 +981,94 @@ static inline bool WHITESPACE(wxChar c)
return c == _T(' ') || c == _T('\n') || c == _T('\r') || c == _T('\t'); return c == _T(' ') || c == _T('\n') || c == _T('\r') || c == _T('\t');
} }
// replace continuous spaces by one single space
static inline wxString CompressSpaces(const wxString & str)
{
wxString buf;
buf.reserve( str.size() );
bool space_counted = false;
for( const wxChar * pstr = str.c_str(); *pstr; ++pstr )
{
wxChar ch = *pstr;
if( WHITESPACE( ch ) )
{
if( space_counted )
{
continue;
}
ch = _T(' ');
space_counted = true;
}
else
{
space_counted = false;
}
buf += ch;
}
return buf;
}
bool wxHtmlSearchEngine::Scan(const wxFSFile& file) bool wxHtmlSearchEngine::Scan(const wxFSFile& file)
{ {
wxASSERT_MSG(!m_Keyword.empty(), wxT("wxHtmlSearchEngine::LookFor must be called before scanning!")); wxASSERT_MSG(!m_Keyword.empty(), wxT("wxHtmlSearchEngine::LookFor must be called before scanning!"));
int i, j;
int wrd = m_Keyword.length();
bool found = false;
wxHtmlFilterHTML filter; wxHtmlFilterHTML filter;
wxString tmp = filter.ReadFile(file); wxString bufStr = filter.ReadFile(file);
int lng = tmp.length();
const wxChar *buf = tmp.c_str();
if (!m_CaseSensitive) if (!m_CaseSensitive)
tmp.LowerCase(); bufStr.LowerCase();
const wxChar *kwd = m_Keyword.c_str(); { // remove html tags
wxString bufStrCopy;
bufStrCopy.reserve( bufStr.size() );
bool insideTag = false;
for (const wxChar * pBufStr = bufStr.c_str(); *pBufStr; ++pBufStr)
{
wxChar c = *pBufStr;
if (insideTag)
{
if (c == _T('>'))
{
insideTag = false;
// replace the tag by an empty space
c = _T(' ');
}
else
continue;
}
else if (c == _T('<'))
{
wxChar nextCh = *(pBufStr + 1);
if (nextCh == _T('/') || !WHITESPACE(nextCh))
{
insideTag = true;
continue;
}
}
bufStrCopy += c;
}
bufStr.swap( bufStrCopy );
}
wxString keyword = m_Keyword;
if (m_WholeWords) if (m_WholeWords)
{ {
for (i = 0; i < lng - wrd; i++) // insert ' ' at the beginning and at the end
{ keyword.insert( 0, _T(" ") );
if (WHITESPACE(buf[i])) continue; keyword.append( _T(" ") );
j = 0; bufStr.insert( 0, _T(" ") );
while ((j < wrd) && (buf[i + j] == kwd[j])) j++; bufStr.append( _T(" ") );
if (j == wrd && WHITESPACE(buf[i + j])) { found = true; break; }
}
} }
else // remove continuous spaces
{ keyword = CompressSpaces( keyword );
for (i = 0; i < lng - wrd; i++) bufStr = CompressSpaces( bufStr );
{
j = 0;
while ((j < wrd) && (buf[i + j] == kwd[j])) j++;
if (j == wrd) { found = true; break; }
}
}
return found; // finally do the search
return bufStr.find( keyword ) != wxString::npos;
} }
#endif #endif