strip HTML tags when searching for keywords
git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@39817 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
@@ -981,47 +981,94 @@ static inline bool WHITESPACE(wxChar c)
|
|||||||
return c == _T(' ') || c == _T('\n') || c == _T('\r') || c == _T('\t');
|
return c == _T(' ') || c == _T('\n') || c == _T('\r') || c == _T('\t');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// replace continuous spaces by one single space
|
||||||
|
static inline wxString CompressSpaces(const wxString & str)
|
||||||
|
{
|
||||||
|
wxString buf;
|
||||||
|
buf.reserve( str.size() );
|
||||||
|
|
||||||
|
bool space_counted = false;
|
||||||
|
for( const wxChar * pstr = str.c_str(); *pstr; ++pstr )
|
||||||
|
{
|
||||||
|
wxChar ch = *pstr;
|
||||||
|
if( WHITESPACE( ch ) )
|
||||||
|
{
|
||||||
|
if( space_counted )
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
ch = _T(' ');
|
||||||
|
space_counted = true;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
space_counted = false;
|
||||||
|
}
|
||||||
|
buf += ch;
|
||||||
|
}
|
||||||
|
|
||||||
|
return buf;
|
||||||
|
}
|
||||||
|
|
||||||
bool wxHtmlSearchEngine::Scan(const wxFSFile& file)
|
bool wxHtmlSearchEngine::Scan(const wxFSFile& file)
|
||||||
{
|
{
|
||||||
wxASSERT_MSG(!m_Keyword.empty(), wxT("wxHtmlSearchEngine::LookFor must be called before scanning!"));
|
wxASSERT_MSG(!m_Keyword.empty(), wxT("wxHtmlSearchEngine::LookFor must be called before scanning!"));
|
||||||
|
|
||||||
int i, j;
|
|
||||||
int wrd = m_Keyword.length();
|
|
||||||
bool found = false;
|
|
||||||
wxHtmlFilterHTML filter;
|
wxHtmlFilterHTML filter;
|
||||||
wxString tmp = filter.ReadFile(file);
|
wxString bufStr = filter.ReadFile(file);
|
||||||
int lng = tmp.length();
|
|
||||||
const wxChar *buf = tmp.c_str();
|
|
||||||
|
|
||||||
if (!m_CaseSensitive)
|
if (!m_CaseSensitive)
|
||||||
tmp.LowerCase();
|
bufStr.LowerCase();
|
||||||
|
|
||||||
const wxChar *kwd = m_Keyword.c_str();
|
{ // remove html tags
|
||||||
|
wxString bufStrCopy;
|
||||||
|
bufStrCopy.reserve( bufStr.size() );
|
||||||
|
bool insideTag = false;
|
||||||
|
for (const wxChar * pBufStr = bufStr.c_str(); *pBufStr; ++pBufStr)
|
||||||
|
{
|
||||||
|
wxChar c = *pBufStr;
|
||||||
|
if (insideTag)
|
||||||
|
{
|
||||||
|
if (c == _T('>'))
|
||||||
|
{
|
||||||
|
insideTag = false;
|
||||||
|
// replace the tag by an empty space
|
||||||
|
c = _T(' ');
|
||||||
|
}
|
||||||
|
else
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
else if (c == _T('<'))
|
||||||
|
{
|
||||||
|
wxChar nextCh = *(pBufStr + 1);
|
||||||
|
if (nextCh == _T('/') || !WHITESPACE(nextCh))
|
||||||
|
{
|
||||||
|
insideTag = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
bufStrCopy += c;
|
||||||
|
}
|
||||||
|
bufStr.swap( bufStrCopy );
|
||||||
|
}
|
||||||
|
|
||||||
|
wxString keyword = m_Keyword;
|
||||||
|
|
||||||
if (m_WholeWords)
|
if (m_WholeWords)
|
||||||
{
|
{
|
||||||
for (i = 0; i < lng - wrd; i++)
|
// insert ' ' at the beginning and at the end
|
||||||
{
|
keyword.insert( 0, _T(" ") );
|
||||||
if (WHITESPACE(buf[i])) continue;
|
keyword.append( _T(" ") );
|
||||||
j = 0;
|
bufStr.insert( 0, _T(" ") );
|
||||||
while ((j < wrd) && (buf[i + j] == kwd[j])) j++;
|
bufStr.append( _T(" ") );
|
||||||
if (j == wrd && WHITESPACE(buf[i + j])) { found = true; break; }
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
else
|
// remove continuous spaces
|
||||||
{
|
keyword = CompressSpaces( keyword );
|
||||||
for (i = 0; i < lng - wrd; i++)
|
bufStr = CompressSpaces( bufStr );
|
||||||
{
|
|
||||||
j = 0;
|
|
||||||
while ((j < wrd) && (buf[i + j] == kwd[j])) j++;
|
|
||||||
if (j == wrd) { found = true; break; }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return found;
|
// finally do the search
|
||||||
|
return bufStr.find( keyword ) != wxString::npos;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
Reference in New Issue
Block a user