Files
wxWidgets/src/html/helpdata.cpp

1001 lines
26 KiB
C++

/////////////////////////////////////////////////////////////////////////////
// Name: src/html/helpdata.cpp
// Purpose: wxHtmlHelpData
// Notes: Based on htmlhelp.cpp, implementing a monolithic
// HTML Help controller class, by Vaclav Slavik
// Author: Harm van der Heijden and Vaclav Slavik
// Copyright: (c) Harm van der Heijden and Vaclav Slavik
// Licence: wxWindows licence
/////////////////////////////////////////////////////////////////////////////
// For compilers that support precompilation, includes "wx.h".
#include "wx/wxprec.h"
#ifdef __BORLANDC__
#pragma hdrstop
#endif
#if wxUSE_HTML && wxUSE_STREAMS
#ifndef WX_PRECOMP
#include "wx/intl.h"
#include "wx/log.h"
#endif
#include <ctype.h>
#include "wx/html/helpdata.h"
#include "wx/tokenzr.h"
#include "wx/wfstream.h"
#include "wx/busyinfo.h"
#include "wx/encconv.h"
#include "wx/fontmap.h"
#include "wx/html/htmlpars.h"
#include "wx/html/htmldefs.h"
#include "wx/html/htmlfilt.h"
#include "wx/filename.h"
#include "wx/arrimpl.cpp"
WX_DEFINE_OBJARRAY(wxHtmlBookRecArray)
WX_DEFINE_OBJARRAY(wxHtmlHelpDataItems)
//-----------------------------------------------------------------------------
// static helper functions
//-----------------------------------------------------------------------------
// Reads one line, stores it into buf and returns pointer to new line or NULL.
static const wxChar* ReadLine(const wxChar *line, wxChar *buf, size_t bufsize)
{
wxChar *writeptr = buf;
wxChar *endptr = buf + bufsize - 1;
const wxChar *readptr = line;
while (*readptr != 0 && *readptr != wxT('\r') && *readptr != wxT('\n') &&
writeptr != endptr)
*(writeptr++) = *(readptr++);
*writeptr = 0;
while (*readptr == wxT('\r') || *readptr == wxT('\n'))
readptr++;
if (*readptr == 0)
return NULL;
else
return readptr;
}
static int
wxHtmlHelpIndexCompareFunc(wxHtmlHelpDataItem **a, wxHtmlHelpDataItem **b)
{
wxHtmlHelpDataItem *ia = *a;
wxHtmlHelpDataItem *ib = *b;
if (ia == NULL)
return -1;
if (ib == NULL)
return 1;
if (ia->parent == ib->parent)
{
return ia->name.CmpNoCase(ib->name);
}
else if (ia->level == ib->level)
{
return wxHtmlHelpIndexCompareFunc(&ia->parent, &ib->parent);
}
else
{
wxHtmlHelpDataItem *ia2 = ia;
wxHtmlHelpDataItem *ib2 = ib;
while (ia2->level > ib2->level)
{
ia2 = ia2->parent;
}
while (ib2->level > ia2->level)
{
ib2 = ib2->parent;
}
wxASSERT(ia2);
wxASSERT(ib2);
int res = wxHtmlHelpIndexCompareFunc(&ia2, &ib2);
if (res != 0)
return res;
else if (ia->level > ib->level)
return 1;
else
return -1;
}
}
//-----------------------------------------------------------------------------
// HP_Parser
//-----------------------------------------------------------------------------
class HP_Parser : public wxHtmlParser
{
public:
HP_Parser()
{
GetEntitiesParser()->SetEncoding(wxFONTENCODING_ISO8859_1);
}
wxObject* GetProduct() wxOVERRIDE { return NULL; }
protected:
virtual void AddText(const wxString& WXUNUSED(txt)) wxOVERRIDE {}
wxDECLARE_NO_COPY_CLASS(HP_Parser);
};
//-----------------------------------------------------------------------------
// HP_TagHandler
//-----------------------------------------------------------------------------
class HP_TagHandler : public wxHtmlTagHandler
{
private:
wxString m_name, m_page;
int m_level;
int m_id;
int m_count;
wxHtmlHelpDataItem *m_parentItem;
wxHtmlBookRecord *m_book;
wxHtmlHelpDataItems *m_data;
public:
HP_TagHandler(wxHtmlBookRecord *b) : wxHtmlTagHandler()
{
m_data = NULL;
m_book = b;
m_name = m_page = wxEmptyString;
m_level = 0;
m_id = wxID_ANY;
m_count = 0;
m_parentItem = NULL;
}
wxString GetSupportedTags() wxOVERRIDE { return wxT("UL,OBJECT,PARAM"); }
bool HandleTag(const wxHtmlTag& tag) wxOVERRIDE;
void Reset(wxHtmlHelpDataItems& data)
{
m_data = &data;
m_count = 0;
m_level = 0;
m_parentItem = NULL;
}
wxDECLARE_NO_COPY_CLASS(HP_TagHandler);
};
bool HP_TagHandler::HandleTag(const wxHtmlTag& tag)
{
if (tag.GetName() == wxT("UL"))
{
wxHtmlHelpDataItem *oldparent = m_parentItem;
m_level++;
m_parentItem = (m_count > 0) ? &(*m_data)[m_data->size()-1] : NULL;
ParseInner(tag);
m_level--;
m_parentItem = oldparent;
return true;
}
else if (tag.GetName() == wxT("OBJECT"))
{
m_name = m_page = wxEmptyString;
ParseInner(tag);
#if 0
if (!page.IsEmpty())
/* Valid HHW's file may contain only two object tags:
<OBJECT type="text/site properties">
<param name="ImageType" value="Folder">
</OBJECT>
or
<OBJECT type="text/sitemap">
<param name="Name" value="main page">
<param name="Local" value="another.htm">
</OBJECT>
We're interested in the latter. !page.IsEmpty() is valid
condition because text/site properties does not contain Local param
*/
#endif
if (tag.GetParam(wxT("TYPE")) == wxT("text/sitemap"))
{
wxHtmlHelpDataItem *item = new wxHtmlHelpDataItem();
item->parent = m_parentItem;
item->level = m_level;
item->id = m_id;
item->page = m_page;
item->name = m_name;
item->book = m_book;
m_data->Add(item);
m_count++;
}
return true;
}
else
{ // "PARAM"
if (m_name.empty() && tag.GetParam(wxT("NAME")) == wxT("Name"))
m_name = tag.GetParam(wxT("VALUE"));
if (tag.GetParam(wxT("NAME")) == wxT("Local"))
{
m_page = tag.GetParam(wxT("VALUE"));
m_page.Replace("\\", "/");
}
if (tag.GetParam(wxT("NAME")) == wxT("ID"))
tag.GetParamAsInt(wxT("VALUE"), &m_id);
return false;
}
}
//-----------------------------------------------------------------------------
// wxHtmlHelpData
//-----------------------------------------------------------------------------
wxString wxHtmlBookRecord::GetFullPath(const wxString &page) const
{
if (wxIsAbsolutePath(page) || page.Find(wxT("file:")) == 0)
return page;
else
return m_BasePath + page;
}
wxString wxHtmlHelpDataItem::GetIndentedName() const
{
wxString s;
for (int i = 1; i < level; i++)
s << wxT(" ");
s << name;
return s;
}
wxIMPLEMENT_DYNAMIC_CLASS(wxHtmlHelpData, wxObject);
wxHtmlHelpData::wxHtmlHelpData()
{
}
wxHtmlHelpData::~wxHtmlHelpData()
{
}
bool wxHtmlHelpData::LoadMSProject(wxHtmlBookRecord *book, wxFileSystem& fsys,
const wxString& indexfile,
const wxString& contentsfile)
{
wxFSFile *f;
wxHtmlFilterHTML filter;
wxString buf;
wxString string;
HP_Parser parser;
HP_TagHandler *handler = new HP_TagHandler(book);
parser.AddTagHandler(handler);
f = ( contentsfile.empty() ? NULL : fsys.OpenFile(contentsfile) );
if (f)
{
buf.clear();
buf = filter.ReadFile(*f);
delete f;
handler->Reset(m_contents);
parser.Parse(buf);
}
else
{
wxLogError(_("Cannot open contents file: %s"), contentsfile.c_str());
}
f = ( indexfile.empty() ? NULL : fsys.OpenFile(indexfile) );
if (f)
{
buf.clear();
buf = filter.ReadFile(*f);
delete f;
handler->Reset(m_index);
parser.Parse(buf);
}
else if (!indexfile.empty())
{
wxLogError(_("Cannot open index file: %s"), indexfile.c_str());
}
return true;
}
inline static void CacheWriteInt32(wxOutputStream *f, wxInt32 value)
{
wxInt32 x = wxINT32_SWAP_ON_BE(value);
f->Write(&x, sizeof(x));
}
inline static wxInt32 CacheReadInt32(wxInputStream *f)
{
wxInt32 x;
f->Read(&x, sizeof(x));
return wxINT32_SWAP_ON_BE(x);
}
inline static void CacheWriteString(wxOutputStream *f, const wxString& str)
{
const wxWX2MBbuf mbstr = str.mb_str(wxConvUTF8);
size_t len = strlen((const char*)mbstr)+1;
CacheWriteInt32(f, len);
f->Write((const char*)mbstr, len);
}
inline static wxString CacheReadString(wxInputStream *f)
{
size_t len = (size_t)CacheReadInt32(f);
wxCharBuffer str(len-1);
f->Read(str.data(), len);
return wxString(str, wxConvUTF8);
}
#define CURRENT_CACHED_BOOK_VERSION 5
// Additional flags to detect incompatibilities of the runtime environment:
#define CACHED_BOOK_FORMAT_FLAGS \
(wxUSE_UNICODE << 0)
bool wxHtmlHelpData::LoadCachedBook(wxHtmlBookRecord *book, wxInputStream *f)
{
int i, st, newsize;
wxInt32 version;
/* load header - version info : */
version = CacheReadInt32(f);
if (version != CURRENT_CACHED_BOOK_VERSION)
{
// NB: We can just silently return false here and don't worry about
// it anymore, because AddBookParam will load the MS project in
// absence of (properly versioned) .cached file and automatically
// create new .cached file immediately afterward.
return false;
}
if (CacheReadInt32(f) != CACHED_BOOK_FORMAT_FLAGS)
return false;
/* load contents : */
st = m_contents.size();
newsize = st + CacheReadInt32(f);
m_contents.Alloc(newsize);
for (i = st; i < newsize; i++)
{
wxHtmlHelpDataItem *item = new wxHtmlHelpDataItem;
item->level = CacheReadInt32(f);
item->id = CacheReadInt32(f);
item->name = CacheReadString(f);
item->page = CacheReadString(f);
item->book = book;
m_contents.Add(item);
}
/* load index : */
st = m_index.size();
newsize = st + CacheReadInt32(f);
m_index.Alloc(newsize);
for (i = st; i < newsize; i++)
{
wxHtmlHelpDataItem *item = new wxHtmlHelpDataItem;
item->name = CacheReadString(f);
item->page = CacheReadString(f);
item->level = CacheReadInt32(f);
item->book = book;
int parentShift = CacheReadInt32(f);
if (parentShift != 0)
item->parent = &m_index[m_index.size() - parentShift];
m_index.Add(item);
}
return true;
}
bool wxHtmlHelpData::SaveCachedBook(wxHtmlBookRecord *book, wxOutputStream *f)
{
int i;
wxInt32 cnt;
/* save header - version info : */
CacheWriteInt32(f, CURRENT_CACHED_BOOK_VERSION);
CacheWriteInt32(f, CACHED_BOOK_FORMAT_FLAGS);
/* save contents : */
int len = m_contents.size();
for (cnt = 0, i = 0; i < len; i++)
if (m_contents[i].book == book && m_contents[i].level > 0)
cnt++;
CacheWriteInt32(f, cnt);
for (i = 0; i < len; i++)
{
if (m_contents[i].book != book || m_contents[i].level == 0)
continue;
CacheWriteInt32(f, m_contents[i].level);
CacheWriteInt32(f, m_contents[i].id);
CacheWriteString(f, m_contents[i].name);
CacheWriteString(f, m_contents[i].page);
}
/* save index : */
len = m_index.size();
for (cnt = 0, i = 0; i < len; i++)
if (m_index[i].book == book && m_index[i].level > 0)
cnt++;
CacheWriteInt32(f, cnt);
for (i = 0; i < len; i++)
{
if (m_index[i].book != book || m_index[i].level == 0)
continue;
CacheWriteString(f, m_index[i].name);
CacheWriteString(f, m_index[i].page);
CacheWriteInt32(f, m_index[i].level);
// save distance to parent item, if any:
if (m_index[i].parent == NULL)
{
CacheWriteInt32(f, 0);
}
else
{
int cnt2 = 0;
wxHtmlHelpDataItem *parent = m_index[i].parent;
for (int j = i-1; j >= 0; j--)
{
if (m_index[j].book == book && m_index[j].level > 0)
cnt2++;
if (&m_index[j] == parent)
break;
}
wxASSERT(cnt2 > 0);
CacheWriteInt32(f, cnt2);
}
}
return true;
}
void wxHtmlHelpData::SetTempDir(const wxString& path)
{
if (path.empty())
m_tempPath = path;
else
{
wxFileName fn;
fn.AssignDir(path);
fn.MakeAbsolute();
m_tempPath = fn.GetPath(wxPATH_GET_VOLUME | wxPATH_GET_SEPARATOR);
}
}
static wxString SafeFileName(const wxString& s)
{
wxString res(s);
res.Replace(wxT("#"), wxT("_"));
res.Replace(wxT(":"), wxT("_"));
res.Replace(wxT("\\"), wxT("_"));
res.Replace(wxT("/"), wxT("_"));
return res;
}
bool wxHtmlHelpData::AddBookParam(const wxFSFile& bookfile,
wxFontEncoding encoding,
const wxString& title, const wxString& contfile,
const wxString& indexfile, const wxString& deftopic,
const wxString& path)
{
#if wxUSE_WCHAR_T
#if wxUSE_UNICODE
#define CORRECT_STR(str, conv) \
str = wxString((str).mb_str(wxConvISO8859_1), conv)
#else
#define CORRECT_STR(str, conv) \
str = wxString((str).wc_str(conv), wxConvLocal)
#endif
#else
#define CORRECT_STR(str, conv)
#endif
wxFileSystem fsys;
wxFSFile *fi;
wxHtmlBookRecord *bookr;
int IndexOld = m_index.size(),
ContentsOld = m_contents.size();
if (!path.empty())
fsys.ChangePathTo(path, true);
size_t booksCnt = m_bookRecords.GetCount();
for (size_t i = 0; i < booksCnt; i++)
{
if ( m_bookRecords[i].GetBookFile() == bookfile.GetLocation() )
return true; // book is (was) loaded
}
wxString title1 = title;
if (encoding != wxFONTENCODING_SYSTEM)
{
wxCSConv conv(encoding);
CORRECT_STR(title1, conv);
if (title1.IsEmpty() && !title.IsEmpty())
title1 = title;
}
bookr = new wxHtmlBookRecord(bookfile.GetLocation(), fsys.GetPath(), title1, deftopic);
wxHtmlHelpDataItem *bookitem = new wxHtmlHelpDataItem;
bookitem->level = 0;
bookitem->id = 0;
bookitem->page = deftopic;
bookitem->name = title;
bookitem->book = bookr;
// store the contents index for later
int cont_start = m_contents.size();
m_contents.Add(bookitem);
// Try to find cached binary versions:
// 1. save file as book, but with .hhp.cached extension
// 2. same as 1. but in temp path
// 3. otherwise or if cache load failed, load it from MS.
fi = fsys.OpenFile(bookfile.GetLocation() + wxT(".cached"));
if (fi == NULL ||
#if wxUSE_DATETIME
fi->GetModificationTime() < bookfile.GetModificationTime() ||
#endif // wxUSE_DATETIME
!LoadCachedBook(bookr, fi->GetStream()))
{
if (fi != NULL) delete fi;
fi = fsys.OpenFile(m_tempPath + wxFileNameFromPath(bookfile.GetLocation()) + wxT(".cached"));
if (m_tempPath.empty() || fi == NULL ||
#if wxUSE_DATETIME
fi->GetModificationTime() < bookfile.GetModificationTime() ||
#endif // wxUSE_DATETIME
!LoadCachedBook(bookr, fi->GetStream()))
{
LoadMSProject(bookr, fsys, indexfile, contfile);
if (!m_tempPath.empty())
{
wxFileOutputStream *outs = new wxFileOutputStream(m_tempPath +
SafeFileName(wxFileNameFromPath(bookfile.GetLocation())) + wxT(".cached"));
SaveCachedBook(bookr, outs);
delete outs;
}
}
}
if (fi != NULL) delete fi;
// Now store the contents range
bookr->SetContentsRange(cont_start, m_contents.size());
// MS HTML Help files [written by MS HTML Help Workshop] are broken
// in that the data are iso-8859-1 (including HTML entities), but must
// be interpreted as being in language's windows charset. Correct the
// differences here and also convert to wxConvLocal in ANSI build
if (encoding != wxFONTENCODING_SYSTEM)
{
wxCSConv conv(encoding);
size_t IndexCnt = m_index.size();
size_t ContentsCnt = m_contents.size();
size_t i;
for (i = IndexOld; i < IndexCnt; i++)
{
CORRECT_STR(m_index[i].name, conv);
}
for (i = ContentsOld; i < ContentsCnt; i++)
{
CORRECT_STR(m_contents[i].name, conv);
}
#undef CORRECT_STR
}
m_bookRecords.Add(bookr);
if (!m_index.empty())
{
m_index.Sort(wxHtmlHelpIndexCompareFunc);
}
return true;
}
bool wxHtmlHelpData::AddBook(const wxString& book)
{
wxString extension(book.Right(4).Lower());
if (extension == wxT(".zip") ||
#if wxUSE_LIBMSPACK
extension == wxT(".chm") /*compressed html help book*/ ||
#endif
extension == wxT(".htb") /*html book*/)
{
wxFileSystem fsys;
wxString s;
bool rt = false;
#if wxUSE_LIBMSPACK
if (extension == wxT(".chm"))
s = fsys.FindFirst(book + wxT("#chm:*.hhp"), wxFILE);
else
#endif
s = fsys.FindFirst(book + wxT("#zip:*.hhp"), wxFILE);
while (!s.empty())
{
if (AddBook(s)) rt = true;
s = fsys.FindNext();
}
return rt;
}
wxFSFile *fi;
wxFileSystem fsys;
wxString title = _("noname"),
safetitle,
start = wxEmptyString,
contents = wxEmptyString,
index = wxEmptyString,
charset = wxEmptyString;
fi = fsys.OpenFile(book);
if (fi == NULL)
{
wxLogError(_("Cannot open HTML help book: %s"), book.c_str());
return false;
}
fsys.ChangePathTo(book);
const wxChar *lineptr;
wxChar linebuf[300];
wxString tmp;
wxHtmlFilterPlainText filter;
tmp = filter.ReadFile(*fi);
lineptr = tmp.c_str();
do
{
lineptr = ReadLine(lineptr, linebuf, 300);
for (wxChar *ch = linebuf; *ch != wxT('\0') && *ch != wxT('='); ch++)
*ch = (wxChar)wxTolower(*ch);
if (wxStrstr(linebuf, wxT("title=")) == linebuf)
title = linebuf + wxStrlen(wxT("title="));
if (wxStrstr(linebuf, wxT("default topic=")) == linebuf)
{
start = linebuf + wxStrlen(wxT("default topic="));
start.Replace("\\", "/");
}
if (wxStrstr(linebuf, wxT("index file=")) == linebuf)
index = linebuf + wxStrlen(wxT("index file="));
if (wxStrstr(linebuf, wxT("contents file=")) == linebuf)
contents = linebuf + wxStrlen(wxT("contents file="));
if (wxStrstr(linebuf, wxT("charset=")) == linebuf)
charset = linebuf + wxStrlen(wxT("charset="));
} while (lineptr != NULL);
wxFontEncoding enc = wxFONTENCODING_SYSTEM;
#if wxUSE_FONTMAP
if (!charset.empty())
enc = wxFontMapper::Get()->CharsetToEncoding(charset);
#endif
bool rtval = AddBookParam(*fi, enc,
title, contents, index, start, fsys.GetPath());
delete fi;
return rtval;
}
wxString wxHtmlHelpData::FindPageByName(const wxString& x)
{
int i;
bool has_non_ascii = false;
wxString::const_iterator it;
for (it = x.begin(); it != x.end(); ++it)
{
wxUniChar ch = *it;
if (!ch.IsAscii())
{
has_non_ascii = true;
break;
}
}
int cnt = m_bookRecords.GetCount();
if (!has_non_ascii)
{
wxFileSystem fsys;
wxFSFile *f;
// 1. try to open given file:
for (i = 0; i < cnt; i++)
{
f = fsys.OpenFile(m_bookRecords[i].GetFullPath(x));
if (f)
{
wxString url = m_bookRecords[i].GetFullPath(x);
delete f;
return url;
}
}
}
// 2. try to find a book:
for (i = 0; i < cnt; i++)
{
if (m_bookRecords[i].GetTitle() == x)
return m_bookRecords[i].GetFullPath(m_bookRecords[i].GetStart());
}
// 3. try to find in contents:
cnt = m_contents.size();
for (i = 0; i < cnt; i++)
{
if (m_contents[i].name == x)
return m_contents[i].GetFullPath();
}
// 4. try to find in index:
cnt = m_index.size();
for (i = 0; i < cnt; i++)
{
if (m_index[i].name == x)
return m_index[i].GetFullPath();
}
// 4b. if still not found, try case-insensitive comparison
for (i = 0; i < cnt; i++)
{
if (m_index[i].name.CmpNoCase(x) == 0)
return m_index[i].GetFullPath();
}
return wxEmptyString;
}
wxString wxHtmlHelpData::FindPageById(int id)
{
size_t cnt = m_contents.size();
for (size_t i = 0; i < cnt; i++)
{
if (m_contents[i].id == id)
{
return m_contents[i].GetFullPath();
}
}
return wxEmptyString;
}
//----------------------------------------------------------------------------------
// wxHtmlSearchStatus functions
//----------------------------------------------------------------------------------
wxHtmlSearchStatus::wxHtmlSearchStatus(wxHtmlHelpData* data, const wxString& keyword,
bool case_sensitive, bool whole_words_only,
const wxString& book)
{
m_Data = data;
m_Keyword = keyword;
wxHtmlBookRecord* bookr = NULL;
if (!book.empty())
{
// we have to search in a specific book. Find it first
int i, cnt = data->m_bookRecords.GetCount();
for (i = 0; i < cnt; i++)
if (data->m_bookRecords[i].GetTitle() == book)
{
bookr = &(data->m_bookRecords[i]);
m_CurIndex = bookr->GetContentsStart();
m_MaxIndex = bookr->GetContentsEnd();
break;
}
// check; we won't crash if the book doesn't exist, but it's Bad Anyway.
wxASSERT(bookr);
}
if (! bookr)
{
// no book specified; search all books
m_CurIndex = 0;
m_MaxIndex = m_Data->m_contents.size();
}
m_Engine.LookFor(keyword, case_sensitive, whole_words_only);
m_Active = (m_CurIndex < m_MaxIndex);
}
bool wxHtmlSearchStatus::Search()
{
wxFSFile *file;
int i = m_CurIndex; // shortcut
bool found = false;
wxString thepage;
if (!m_Active)
{
// sanity check. Illegal use, but we'll try to prevent a crash anyway
wxASSERT(m_Active);
return false;
}
m_Name = wxEmptyString;
m_CurItem = NULL;
thepage = m_Data->m_contents[i].page;
m_Active = (++m_CurIndex < m_MaxIndex);
// check if it is same page with different anchor:
if (!m_LastPage.empty())
{
const wxChar *p1, *p2;
for (p1 = thepage.c_str(), p2 = m_LastPage.c_str();
*p1 != 0 && *p1 != wxT('#') && *p1 == *p2; p1++, p2++) {}
m_LastPage = thepage;
if (*p1 == 0 || *p1 == wxT('#'))
return false;
}
else m_LastPage = thepage;
wxFileSystem fsys;
file = fsys.OpenFile(m_Data->m_contents[i].book->GetFullPath(thepage));
if (file)
{
if (m_Engine.Scan(*file))
{
m_Name = m_Data->m_contents[i].name;
m_CurItem = &m_Data->m_contents[i];
found = true;
}
delete file;
}
return found;
}
//--------------------------------------------------------------------------------
// wxHtmlSearchEngine
//--------------------------------------------------------------------------------
void wxHtmlSearchEngine::LookFor(const wxString& keyword, bool case_sensitive, bool whole_words_only)
{
m_CaseSensitive = case_sensitive;
m_WholeWords = whole_words_only;
m_Keyword = keyword;
if (!m_CaseSensitive)
m_Keyword.LowerCase();
}
static inline bool WHITESPACE(wxChar c)
{
return c == wxT(' ') || c == wxT('\n') || c == wxT('\r') || c == wxT('\t');
}
// replace continuous spaces by one single space
static inline wxString CompressSpaces(const wxString & str)
{
wxString buf;
buf.reserve( str.size() );
bool space_counted = false;
for( const wxChar * pstr = str.c_str(); *pstr; ++pstr )
{
wxChar ch = *pstr;
if( WHITESPACE( ch ) )
{
if( space_counted )
{
continue;
}
ch = wxT(' ');
space_counted = true;
}
else
{
space_counted = false;
}
buf += ch;
}
return buf;
}
bool wxHtmlSearchEngine::Scan(const wxFSFile& file)
{
wxASSERT_MSG(!m_Keyword.empty(), wxT("wxHtmlSearchEngine::LookFor must be called before scanning!"));
wxHtmlFilterHTML filter;
wxString bufStr = filter.ReadFile(file);
if (!m_CaseSensitive)
bufStr.LowerCase();
{ // remove html tags
wxString bufStrCopy;
bufStrCopy.reserve( bufStr.size() );
bool insideTag = false;
for (const wxChar * pBufStr = bufStr.c_str(); *pBufStr; ++pBufStr)
{
wxChar c = *pBufStr;
if (insideTag)
{
if (c == wxT('>'))
{
insideTag = false;
// replace the tag by an empty space
c = wxT(' ');
}
else
continue;
}
else if (c == wxT('<'))
{
wxChar nextCh = *(pBufStr + 1);
if (nextCh == wxT('/') || !WHITESPACE(nextCh))
{
insideTag = true;
continue;
}
}
bufStrCopy += c;
}
bufStr.swap( bufStrCopy );
}
wxString keyword = m_Keyword;
if (m_WholeWords)
{
// insert ' ' at the beginning and at the end
keyword.insert( 0, wxT(" ") );
keyword.append( wxT(" ") );
bufStr.insert( 0, wxT(" ") );
bufStr.append( wxT(" ") );
}
// remove continuous spaces
keyword = CompressSpaces( keyword );
bufStr = CompressSpaces( bufStr );
// finally do the search
return bufStr.find( keyword ) != wxString::npos;
}
#endif